summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/freedreno/Makefile.am4
-rw-r--r--src/gallium/drivers/freedreno/a3xx/Makefile.am27
-rw-r--r--src/gallium/drivers/freedreno/a3xx/a3xx.xml.h1838
-rw-r--r--src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c946
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_blend.c87
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_blend.h52
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_compiler.c1240
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_compiler.h38
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_context.c118
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_context.h68
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_draw.c236
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_draw.h38
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c581
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.h89
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_gmem.c486
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_gmem.h36
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c642
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.h116
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c92
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h56
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_screen.c105
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_screen.h36
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_texture.c140
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_texture.h68
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_util.c348
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_util.h56
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_zsa.c100
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_zsa.h56
-rw-r--r--src/gallium/drivers/freedreno/a3xx/instr-a3xx.h532
-rw-r--r--src/gallium/drivers/freedreno/a3xx/ir-a3xx.c527
-rw-r--r--src/gallium/drivers/freedreno/a3xx/ir-a3xx.h190
-rw-r--r--src/gallium/drivers/freedreno/adreno_common.xml.h2
-rw-r--r--src/gallium/drivers/freedreno/adreno_pm4.xml.h2
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c4
-rw-r--r--src/gallium/drivers/freedreno/freedreno_util.c2
35 files changed, 8955 insertions, 3 deletions
diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am
index 64dfda62399..b6dbbd19183 100644
--- a/src/gallium/drivers/freedreno/Makefile.am
+++ b/src/gallium/drivers/freedreno/Makefile.am
@@ -2,18 +2,19 @@ include $(top_srcdir)/src/gallium/Automake.inc
noinst_LTLIBRARIES = libfreedreno.la
AM_CFLAGS = \
-Wno-packed-bitfield-compat \
-I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/drivers/freedreno/a3xx \
-I$(top_srcdir)/src/gallium/drivers/freedreno/a2xx \
$(GALLIUM_CFLAGS) \
$(FREEDRENO_CFLAGS) \
$(VISIBILITY_CFLAGS)
-SUBDIRS = a2xx
+SUBDIRS = a2xx a3xx
libfreedreno_la_SOURCES = \
freedreno_util.c \
freedreno_fence.c \
freedreno_resource.c \
freedreno_surface.c \
@@ -22,8 +23,9 @@ libfreedreno_la_SOURCES = \
freedreno_texture.c \
freedreno_context.c \
freedreno_screen.c \
freedreno_gmem.c
libfreedreno_la_LIBADD = \
+ a3xx/libfd3xx.la \
a2xx/libfd2xx.la
diff --git a/src/gallium/drivers/freedreno/a3xx/Makefile.am b/src/gallium/drivers/freedreno/a3xx/Makefile.am
new file mode 100644
index 00000000000..a7e415f7fbd
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/Makefile.am
@@ -0,0 +1,27 @@
+include $(top_srcdir)/src/gallium/Automake.inc
+
+noinst_LTLIBRARIES = libfd3xx.la
+
+AM_CFLAGS = \
+ -Wno-packed-bitfield-compat \
+ -I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/drivers/freedreno \
+ $(GALLIUM_CFLAGS) \
+ $(FREEDRENO_CFLAGS) \
+ $(VISIBILITY_CFLAGS)
+
+libfd3xx_la_SOURCES = \
+ fd3_blend.c \
+ fd3_compiler.c \
+ fd3_context.c \
+ fd3_draw.c \
+ fd3_emit.c \
+ fd3_gmem.c \
+ fd3_program.c \
+ fd3_rasterizer.c \
+ fd3_screen.c \
+ fd3_texture.c \
+ fd3_util.c \
+ fd3_zsa.c \
+ disasm-a3xx.c \
+ ir-a3xx.c
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
new file mode 100644
index 00000000000..c7f5085d032
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -0,0 +1,1838 @@
+#ifndef A3XX_XML
+#define A3XX_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37)
+
+Copyright (C) 2013 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+enum a3xx_render_mode {
+ RB_RENDERING_PASS = 0,
+ RB_TILING_PASS = 1,
+ RB_RESOLVE_PASS = 2,
+};
+
+enum a3xx_tile_mode {
+ LINEAR = 0,
+ TILE_32X32 = 2,
+};
+
+enum a3xx_threadmode {
+ MULTI = 0,
+ SINGLE = 1,
+};
+
+enum a3xx_instrbuffermode {
+ BUFFER = 1,
+};
+
+enum a3xx_threadsize {
+ TWO_QUADS = 0,
+ FOUR_QUADS = 1,
+};
+
+enum a3xx_state_block_id {
+ HLSQ_BLOCK_ID_TP_TEX = 2,
+ HLSQ_BLOCK_ID_TP_MIPMAP = 3,
+ HLSQ_BLOCK_ID_SP_VS = 4,
+ HLSQ_BLOCK_ID_SP_FS = 6,
+};
+
+enum a3xx_cache_opcode {
+ INVALIDATE = 1,
+};
+
+enum a3xx_vtx_fmt {
+ VFMT_FLOAT_32 = 0,
+ VFMT_FLOAT_32_32 = 1,
+ VFMT_FLOAT_32_32_32 = 2,
+ VFMT_FLOAT_32_32_32_32 = 3,
+ VFMT_FLOAT_16 = 4,
+ VFMT_FLOAT_16_16 = 5,
+ VFMT_FLOAT_16_16_16 = 6,
+ VFMT_FLOAT_16_16_16_16 = 7,
+ VFMT_FIXED_32 = 8,
+ VFMT_FIXED_32_32 = 9,
+ VFMT_FIXED_32_32_32 = 10,
+ VFMT_FIXED_32_32_32_32 = 11,
+ VFMT_SHORT_16 = 16,
+ VFMT_SHORT_16_16 = 17,
+ VFMT_SHORT_16_16_16 = 18,
+ VFMT_SHORT_16_16_16_16 = 19,
+ VFMT_USHORT_16 = 20,
+ VFMT_USHORT_16_16 = 21,
+ VFMT_USHORT_16_16_16 = 22,
+ VFMT_USHORT_16_16_16_16 = 23,
+ VFMT_NORM_SHORT_16 = 24,
+ VFMT_NORM_SHORT_16_16 = 25,
+ VFMT_NORM_SHORT_16_16_16 = 26,
+ VFMT_NORM_SHORT_16_16_16_16 = 27,
+ VFMT_NORM_USHORT_16 = 28,
+ VFMT_NORM_USHORT_16_16 = 29,
+ VFMT_NORM_USHORT_16_16_16 = 30,
+ VFMT_NORM_USHORT_16_16_16_16 = 31,
+ VFMT_UBYTE_8 = 40,
+ VFMT_UBYTE_8_8 = 41,
+ VFMT_UBYTE_8_8_8 = 42,
+ VFMT_UBYTE_8_8_8_8 = 43,
+ VFMT_NORM_UBYTE_8 = 44,
+ VFMT_NORM_UBYTE_8_8 = 45,
+ VFMT_NORM_UBYTE_8_8_8 = 46,
+ VFMT_NORM_UBYTE_8_8_8_8 = 47,
+ VFMT_BYTE_8 = 48,
+ VFMT_BYTE_8_8 = 49,
+ VFMT_BYTE_8_8_8 = 50,
+ VFMT_BYTE_8_8_8_8 = 51,
+ VFMT_NORM_BYTE_8 = 52,
+ VFMT_NORM_BYTE_8_8 = 53,
+ VFMT_NORM_BYTE_8_8_8 = 54,
+ VFMT_NORM_BYTE_8_8_8_8 = 55,
+ VFMT_UINT_10_10_10_2 = 60,
+ VFMT_NORM_UINT_10_10_10_2 = 61,
+ VFMT_INT_10_10_10_2 = 62,
+ VFMT_NORM_INT_10_10_10_2 = 63,
+};
+
+enum a3xx_tex_fmt {
+ TFMT_NORM_USHORT_565 = 4,
+ TFMT_NORM_USHORT_5551 = 6,
+ TFMT_NORM_USHORT_4444 = 7,
+ TFMT_NORM_UINT_X8Z24 = 10,
+ TFMT_NORM_UINT_2_10_10_10 = 41,
+ TFMT_NORM_UINT_A8 = 44,
+ TFMT_NORM_UINT_L8_A8 = 47,
+ TFMT_NORM_UINT_8 = 48,
+ TFMT_NORM_UINT_8_8 = 49,
+ TFMT_NORM_UINT_8_8_8 = 50,
+ TFMT_NORM_UINT_8_8_8_8 = 51,
+ TFMT_FLOAT_16 = 64,
+ TFMT_FLOAT_16_16 = 65,
+ TFMT_FLOAT_16_16_16_16 = 67,
+ TFMT_FLOAT_32 = 84,
+ TFMT_FLOAT_32_32 = 85,
+ TFMT_FLOAT_32_32_32_32 = 87,
+};
+
+enum a3xx_tex_fetchsize {
+ TFETCH_DISABLE = 0,
+ TFETCH_1_BYTE = 1,
+ TFETCH_2_BYTE = 2,
+ TFETCH_4_BYTE = 3,
+ TFETCH_8_BYTE = 4,
+ TFETCH_16_BYTE = 5,
+};
+
+enum a3xx_color_fmt {
+ RB_R8G8B8_UNORM = 4,
+ RB_R8G8B8A8_UNORM = 8,
+ RB_Z16_UNORM = 12,
+ RB_A8_UNORM = 20,
+};
+
+enum a3xx_color_swap {
+ WZYX = 0,
+ WXYZ = 1,
+ ZYXW = 2,
+ XYZW = 3,
+};
+
+enum a3xx_msaa_samples {
+ MSAA_ONE = 0,
+ MSAA_TWO = 1,
+ MSAA_FOUR = 2,
+};
+
+enum a3xx_sp_perfcounter_select {
+ SP_FS_CFLOW_INSTRUCTIONS = 12,
+ SP_FS_FULL_ALU_INSTRUCTIONS = 14,
+ SP0_ICL1_MISSES = 26,
+ SP_ALU_ACTIVE_CYCLES = 29,
+};
+
+enum adreno_rb_copy_control_mode {
+ RB_COPY_RESOLVE = 1,
+ RB_COPY_DEPTH_STENCIL = 5,
+};
+
+enum a3xx_tex_filter {
+ A3XX_TEX_NEAREST = 0,
+ A3XX_TEX_LINEAR = 1,
+};
+
+enum a3xx_tex_clamp {
+ A3XX_TEX_REPEAT = 0,
+ A3XX_TEX_CLAMP_TO_EDGE = 1,
+ A3XX_TEX_MIRROR_REPEAT = 2,
+ A3XX_TEX_CLAMP_NONE = 3,
+};
+
+enum a3xx_tex_swiz {
+ A3XX_TEX_X = 0,
+ A3XX_TEX_Y = 1,
+ A3XX_TEX_Z = 2,
+ A3XX_TEX_W = 3,
+ A3XX_TEX_ZERO = 4,
+ A3XX_TEX_ONE = 5,
+};
+
+#define REG_A3XX_RBBM_HW_VERSION 0x00000000
+
+#define REG_A3XX_RBBM_HW_RELEASE 0x00000001
+
+#define REG_A3XX_RBBM_HW_CONFIGURATION 0x00000002
+
+#define REG_A3XX_RBBM_CLOCK_CTL 0x00000010
+
+#define REG_A3XX_RBBM_SP_HYST_CNT 0x00000012
+
+#define REG_A3XX_RBBM_SW_RESET_CMD 0x00000018
+
+#define REG_A3XX_RBBM_AHB_CTL0 0x00000020
+
+#define REG_A3XX_RBBM_AHB_CTL1 0x00000021
+
+#define REG_A3XX_RBBM_AHB_CMD 0x00000022
+
+#define REG_A3XX_RBBM_AHB_ERROR_STATUS 0x00000027
+
+#define REG_A3XX_RBBM_GPR0_CTL 0x0000002e
+
+#define REG_A3XX_RBBM_STATUS 0x00000030
+
+#define REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x00000033
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL 0x00000050
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL0 0x00000051
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL1 0x00000054
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL2 0x00000057
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL3 0x0000005a
+
+#define REG_A3XX_RBBM_INT_CLEAR_CMD 0x00000061
+
+#define REG_A3XX_RBBM_INT_0_MASK 0x00000063
+
+#define REG_A3XX_RBBM_INT_0_STATUS 0x00000064
+
+#define REG_A3XX_RBBM_PERFCTR_CTL 0x00000080
+
+#define REG_A3XX_RBBM_GPU_BUSY_MASKED 0x00000088
+
+#define REG_A3XX_RBBM_PERFCTR_SP_7_LO 0x000000e0
+
+#define REG_A3XX_RBBM_PERFCTR_SP_7_HI 0x000000e1
+
+#define REG_A3XX_RBBM_PERFCTR_PWR_1_LO 0x000000ec
+
+#define REG_A3XX_RBBM_PERFCTR_PWR_1_HI 0x000000ed
+
+#define REG_A3XX_RBBM_RBBM_CTL 0x00000100
+
+#define REG_A3XX_RBBM_RBBM_CTL 0x00000100
+
+#define REG_A3XX_RBBM_DEBUG_BUS_CTL 0x00000111
+
+#define REG_A3XX_RBBM_DEBUG_BUS_DATA_STATUS 0x00000112
+
+#define REG_A3XX_CP_PFP_UCODE_ADDR 0x000001c9
+
+#define REG_A3XX_CP_PFP_UCODE_DATA 0x000001ca
+
+#define REG_A3XX_CP_ROQ_ADDR 0x000001cc
+
+#define REG_A3XX_CP_ROQ_DATA 0x000001cd
+
+#define REG_A3XX_CP_MERCIU_ADDR 0x000001d1
+
+#define REG_A3XX_CP_MERCIU_DATA 0x000001d2
+
+#define REG_A3XX_CP_MERCIU_DATA2 0x000001d3
+
+#define REG_A3XX_CP_MEQ_ADDR 0x000001da
+
+#define REG_A3XX_CP_MEQ_DATA 0x000001db
+
+#define REG_A3XX_CP_HW_FAULT 0x0000045c
+
+#define REG_A3XX_CP_PROTECT_CTRL 0x0000045e
+
+#define REG_A3XX_CP_PROTECT_STATUS 0x0000045f
+
+#define REG_A3XX_CP_PROTECT(i0) (0x00000460 + 0x1*(i0))
+
+#define REG_A3XX_CP_PROTECT_REG(i0) (0x00000460 + 0x1*(i0))
+
+#define REG_A3XX_CP_AHB_FAULT 0x0000054d
+
+#define REG_A3XX_CP_SCRATCH_REG2 0x0000057a
+
+#define REG_A3XX_CP_SCRATCH_REG3 0x0000057b
+
+#define REG_A3XX_GRAS_CL_CLIP_CNTL 0x00002040
+#define A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 0x00001000
+#define A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00010000
+#define A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE 0x00020000
+#define A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE 0x00080000
+#define A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE 0x00100000
+#define A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE 0x00200000
+
+#define REG_A3XX_GRAS_CL_GB_CLIP_ADJ 0x00002044
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK 0x000003ff
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT 0
+static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(uint32_t val)
+{
+ return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK;
+}
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK 0x000ffc00
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT 10
+static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(uint32_t val)
+{
+ return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_XOFFSET 0x00002048
+#define A3XX_GRAS_CL_VPORT_XOFFSET__MASK 0xffffffff
+#define A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT 0
+static inline uint32_t A3XX_GRAS_CL_VPORT_XOFFSET(float val)
+{
+ return ((fui(val)) << A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_XOFFSET__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_XSCALE 0x00002049
+#define A3XX_GRAS_CL_VPORT_XSCALE__MASK 0xffffffff
+#define A3XX_GRAS_CL_VPORT_XSCALE__SHIFT 0
+static inline uint32_t A3XX_GRAS_CL_VPORT_XSCALE(float val)
+{
+ return ((fui(val)) << A3XX_GRAS_CL_VPORT_XSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_XSCALE__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_YOFFSET 0x0000204a
+#define A3XX_GRAS_CL_VPORT_YOFFSET__MASK 0xffffffff
+#define A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT 0
+static inline uint32_t A3XX_GRAS_CL_VPORT_YOFFSET(float val)
+{
+ return ((fui(val)) << A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_YOFFSET__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_YSCALE 0x0000204b
+#define A3XX_GRAS_CL_VPORT_YSCALE__MASK 0xffffffff
+#define A3XX_GRAS_CL_VPORT_YSCALE__SHIFT 0
+static inline uint32_t A3XX_GRAS_CL_VPORT_YSCALE(float val)
+{
+ return ((fui(val)) << A3XX_GRAS_CL_VPORT_YSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_YSCALE__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_ZOFFSET 0x0000204c
+#define A3XX_GRAS_CL_VPORT_ZOFFSET__MASK 0xffffffff
+#define A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT 0
+static inline uint32_t A3XX_GRAS_CL_VPORT_ZOFFSET(float val)
+{
+ return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_ZOFFSET__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_ZSCALE 0x0000204d
+#define A3XX_GRAS_CL_VPORT_ZSCALE__MASK 0xffffffff
+#define A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT 0
+static inline uint32_t A3XX_GRAS_CL_VPORT_ZSCALE(float val)
+{
+ return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_ZSCALE__MASK;
+}
+
+#define REG_A3XX_GRAS_SU_POINT_MINMAX 0x00002068
+
+#define REG_A3XX_GRAS_SU_POINT_SIZE 0x00002069
+
+#define REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x0000206c
+#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK 0x00ffffff
+#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT 0
+static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val)
+{
+ return ((((uint32_t)(val * 40.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
+}
+
+#define REG_A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x0000206d
+#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK 0xffffffff
+#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT 0
+static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_OFFSET(float val)
+{
+ return ((((uint32_t)(val * 44.0))) << A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK;
+}
+
+#define REG_A3XX_GRAS_SU_MODE_CONTROL 0x00002070
+#define A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT 0x00000001
+#define A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK 0x00000002
+#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK 0x000007fc
+#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT 2
+static inline uint32_t A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(uint32_t val)
+{
+ return ((val) << A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT) & A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK;
+}
+#define A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET 0x00000800
+
+#define REG_A3XX_GRAS_SC_CONTROL 0x00002072
+#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK 0x000000f0
+#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT 4
+static inline uint32_t A3XX_GRAS_SC_CONTROL_RENDER_MODE(enum a3xx_render_mode val)
+{
+ return ((val) << A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK;
+}
+#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK 0x00000f00
+#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT 8
+static inline uint32_t A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(enum a3xx_msaa_samples val)
+{
+ return ((val) << A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT) & A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK;
+}
+#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK 0x0000f000
+#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT 12
+static inline uint32_t A3XX_GRAS_SC_CONTROL_RASTER_MODE(uint32_t val)
+{
+ return ((val) << A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK;
+}
+
+#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL 0x00002074
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK 0x00007fff
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT 0
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(uint32_t val)
+{
+ return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK;
+}
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK 0x7fff0000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT 16
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(uint32_t val)
+{
+ return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK;
+}
+
+#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_BR 0x00002075
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK 0x00007fff
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT 0
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(uint32_t val)
+{
+ return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK;
+}
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK 0x7fff0000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT 16
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(uint32_t val)
+{
+ return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK;
+}
+
+#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL 0x00002079
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK 0x00007fff
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT 0
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val)
+{
+ return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK;
+}
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK 0x7fff0000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT 16
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val)
+{
+ return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK;
+}
+
+#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_BR 0x0000207a
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK 0x00007fff
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT 0
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val)
+{
+ return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK;
+}
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK 0x7fff0000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT 16
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val)
+{
+ return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK;
+}
+
+#define REG_A3XX_RB_MODE_CONTROL 0x000020c0
+#define A3XX_RB_MODE_CONTROL_GMEM_BYPASS 0x00000080
+#define A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK 0x00000700
+#define A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT 8
+static inline uint32_t A3XX_RB_MODE_CONTROL_RENDER_MODE(enum a3xx_render_mode val)
+{
+ return ((val) << A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT) & A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK;
+}
+#define A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE 0x00008000
+#define A3XX_RB_MODE_CONTROL_PACKER_TIMER_ENABLE 0x00010000
+
+#define REG_A3XX_RB_RENDER_CONTROL 0x000020c1
+#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK 0x00000ff0
+#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT 4
+static inline uint32_t A3XX_RB_RENDER_CONTROL_BIN_WIDTH(uint32_t val)
+{
+ return ((val >> 5) << A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT) & A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK;
+}
+#define A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE 0x00001000
+#define A3XX_RB_RENDER_CONTROL_ENABLE_GMEM 0x00002000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK 0x07000000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT 24
+static inline uint32_t A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val)
+{
+ return ((val) << A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK;
+}
+
+#define REG_A3XX_RB_MSAA_CONTROL 0x000020c2
+#define A3XX_RB_MSAA_CONTROL_DISABLE 0x00000400
+#define A3XX_RB_MSAA_CONTROL_SAMPLES__MASK 0x0000f000
+#define A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT 12
+static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLES(enum a3xx_msaa_samples val)
+{
+ return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLES__MASK;
+}
+#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK 0xffff0000
+#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT 16
+static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(uint32_t val)
+{
+ return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK;
+}
+
+#define REG_A3XX_UNKNOWN_20C3 0x000020c3
+
+#define REG_A3XX_RB_MRT(i0) (0x000020c4 + 0x4*(i0))
+
+#define REG_A3XX_RB_MRT_CONTROL(i0) (0x000020c4 + 0x4*(i0))
+#define A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE 0x00000008
+#define A3XX_RB_MRT_CONTROL_BLEND 0x00000010
+#define A3XX_RB_MRT_CONTROL_BLEND2 0x00000020
+#define A3XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000f00
+#define A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 8
+static inline uint32_t A3XX_RB_MRT_CONTROL_ROP_CODE(uint32_t val)
+{
+ return ((val) << A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A3XX_RB_MRT_CONTROL_ROP_CODE__MASK;
+}
+#define A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK 0x00003000
+#define A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT 12
+static inline uint32_t A3XX_RB_MRT_CONTROL_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+ return ((val) << A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT) & A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK;
+}
+#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x0f000000
+#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 24
+static inline uint32_t A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
+{
+ return ((val) << A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+}
+
+#define REG_A3XX_RB_MRT_BUF_INFO(i0) (0x000020c5 + 0x4*(i0))
+#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK 0x0000003f
+#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT 0
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a3xx_color_fmt val)
+{
+ return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK;
+}
+#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK 0x000000c0
+#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT 6
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a3xx_tile_mode val)
+{
+ return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK;
+}
+#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK 0x00000c00
+#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT 10
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val)
+{
+ return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK;
+}
+#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK 0xfffe0000
+#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT 17
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val)
+{
+ return ((val >> 5) << A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK;
+}
+
+#define REG_A3XX_RB_MRT_BUF_BASE(i0) (0x000020c6 + 0x4*(i0))
+#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK 0xfffffff0
+#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT 4
+static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val)
+{
+ return ((val >> 5) << A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT) & A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK;
+}
+
+#define REG_A3XX_RB_MRT_BLEND_CONTROL(i0) (0x000020c7 + 0x4*(i0))
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK 0x0000001f
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT 0
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+ return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK 0x000000e0
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT 5
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum adreno_rb_blend_opcode val)
+{
+ return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK 0x00001f00
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT 8
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+ return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK 0x001f0000
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT 16
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+ return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK 0x00e00000
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT 21
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum adreno_rb_blend_opcode val)
+{
+ return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK 0x1f000000
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT 24
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+ return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE 0x20000000
+
+#define REG_A3XX_RB_BLEND_RED 0x000020e4
+
+#define REG_A3XX_RB_BLEND_GREEN 0x000020e5
+
+#define REG_A3XX_RB_BLEND_BLUE 0x000020e6
+
+#define REG_A3XX_RB_BLEND_ALPHA 0x000020e7
+
+#define REG_A3XX_UNKNOWN_20E8 0x000020e8
+
+#define REG_A3XX_UNKNOWN_20E9 0x000020e9
+
+#define REG_A3XX_UNKNOWN_20EA 0x000020ea
+
+#define REG_A3XX_UNKNOWN_20EB 0x000020eb
+
+#define REG_A3XX_RB_COPY_CONTROL 0x000020ec
+#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK 0x00000003
+#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT 0
+static inline uint32_t A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(enum a3xx_msaa_samples val)
+{
+ return ((val) << A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT) & A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK;
+}
+#define A3XX_RB_COPY_CONTROL_MODE__MASK 0x00000070
+#define A3XX_RB_COPY_CONTROL_MODE__SHIFT 4
+static inline uint32_t A3XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mode val)
+{
+ return ((val) << A3XX_RB_COPY_CONTROL_MODE__SHIFT) & A3XX_RB_COPY_CONTROL_MODE__MASK;
+}
+#define A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK 0xfffffc00
+#define A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT 10
+static inline uint32_t A3XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val)
+{
+ return ((val >> 10) << A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT) & A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK;
+}
+
+#define REG_A3XX_RB_COPY_DEST_BASE 0x000020ed
+#define A3XX_RB_COPY_DEST_BASE_BASE__MASK 0xfffffff0
+#define A3XX_RB_COPY_DEST_BASE_BASE__SHIFT 4
+static inline uint32_t A3XX_RB_COPY_DEST_BASE_BASE(uint32_t val)
+{
+ return ((val >> 5) << A3XX_RB_COPY_DEST_BASE_BASE__SHIFT) & A3XX_RB_COPY_DEST_BASE_BASE__MASK;
+}
+
+#define REG_A3XX_RB_COPY_DEST_PITCH 0x000020ee
+#define A3XX_RB_COPY_DEST_PITCH_PITCH__MASK 0xffffffff
+#define A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT 0
+static inline uint32_t A3XX_RB_COPY_DEST_PITCH_PITCH(uint32_t val)
+{
+ return ((val >> 5) << A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT) & A3XX_RB_COPY_DEST_PITCH_PITCH__MASK;
+}
+
+#define REG_A3XX_RB_COPY_DEST_INFO 0x000020ef
+#define A3XX_RB_COPY_DEST_INFO_TILE__MASK 0x00000003
+#define A3XX_RB_COPY_DEST_INFO_TILE__SHIFT 0
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_TILE(enum a3xx_tile_mode val)
+{
+ return ((val) << A3XX_RB_COPY_DEST_INFO_TILE__SHIFT) & A3XX_RB_COPY_DEST_INFO_TILE__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_FORMAT__MASK 0x000000fc
+#define A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT 2
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_FORMAT(enum a3xx_color_fmt val)
+{
+ return ((val) << A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A3XX_RB_COPY_DEST_INFO_FORMAT__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_SWAP__MASK 0x00000300
+#define A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT 8
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_SWAP(enum a3xx_color_swap val)
+{
+ return ((val) << A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A3XX_RB_COPY_DEST_INFO_SWAP__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK 0x0003c000
+#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT 14
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(uint32_t val)
+{
+ return ((val) << A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT) & A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK 0x001c0000
+#define A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT 18
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_ENDIAN(enum adreno_rb_surface_endian val)
+{
+ return ((val) << A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT) & A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK;
+}
+
+#define REG_A3XX_RB_DEPTH_CONTROL 0x00002100
+#define A3XX_RB_DEPTH_CONTROL_Z_ENABLE 0x00000002
+#define A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE 0x00000004
+#define A3XX_RB_DEPTH_CONTROL_EARLY_Z_ENABLE 0x00000008
+#define A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK 0x00000070
+#define A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT 4
+static inline uint32_t A3XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val)
+{
+ return ((val) << A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
+}
+#define A3XX_RB_DEPTH_CONTROL_BF_ENABLE 0x00000080
+#define A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE 0x80000000
+
+#define REG_A3XX_UNKNOWN_2101 0x00002101
+
+#define REG_A3XX_RB_DEPTH_INFO 0x00002102
+#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK 0x00000001
+#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT 0
+static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum adreno_rb_depth_format val)
+{
+ return ((val) << A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK;
+}
+#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK 0xfffff800
+#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT 11
+static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val)
+{
+ return ((val >> 10) << A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK;
+}
+
+#define REG_A3XX_RB_DEPTH_PITCH 0x00002103
+#define A3XX_RB_DEPTH_PITCH__MASK 0xffffffff
+#define A3XX_RB_DEPTH_PITCH__SHIFT 0
+static inline uint32_t A3XX_RB_DEPTH_PITCH(uint32_t val)
+{
+ return ((val >> 3) << A3XX_RB_DEPTH_PITCH__SHIFT) & A3XX_RB_DEPTH_PITCH__MASK;
+}
+
+#define REG_A3XX_RB_STENCIL_CONTROL 0x00002104
+#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE 0x00000001
+#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF 0x00000004
+#define A3XX_RB_STENCIL_CONTROL_FUNC__MASK 0x00000700
+#define A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT 8
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val)
+{
+ return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_FAIL__MASK 0x00003800
+#define A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT 11
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val)
+{
+ return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZPASS__MASK 0x0001c000
+#define A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT 14
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val)
+{
+ return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK 0x000e0000
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT 17
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val)
+{
+ return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK 0x00700000
+#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT 20
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val)
+{
+ return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK 0x03800000
+#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT 23
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val)
+{
+ return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK 0x1c000000
+#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT 26
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val)
+{
+ return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK 0xe0000000
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT 29
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val)
+{
+ return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK;
+}
+
+#define REG_A3XX_UNKNOWN_2105 0x00002105
+
+#define REG_A3XX_UNKNOWN_2106 0x00002106
+
+#define REG_A3XX_UNKNOWN_2107 0x00002107
+
+#define REG_A3XX_RB_STENCILREFMASK 0x00002108
+#define A3XX_RB_STENCILREFMASK_STENCILREF__MASK 0x000000ff
+#define A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT 0
+static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILREF(uint32_t val)
+{
+ return ((val) << A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILREF__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_STENCILMASK__MASK 0x0000ff00
+#define A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT 8
+static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val)
+{
+ return ((val) << A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILMASK__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK 0x00ff0000
+#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT 16
+static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val)
+{
+ return ((val) << A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK;
+}
+
+#define REG_A3XX_RB_STENCILREFMASK_BF 0x00002109
+#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK 0x000000ff
+#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT 0
+static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val)
+{
+ return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK 0x0000ff00
+#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT 8
+static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val)
+{
+ return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK 0x00ff0000
+#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT 16
+static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val)
+{
+ return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK;
+}
+
+#define REG_A3XX_PA_SC_WINDOW_OFFSET 0x0000210e
+#define A3XX_PA_SC_WINDOW_OFFSET_X__MASK 0x0000ffff
+#define A3XX_PA_SC_WINDOW_OFFSET_X__SHIFT 0
+static inline uint32_t A3XX_PA_SC_WINDOW_OFFSET_X(uint32_t val)
+{
+ return ((val) << A3XX_PA_SC_WINDOW_OFFSET_X__SHIFT) & A3XX_PA_SC_WINDOW_OFFSET_X__MASK;
+}
+#define A3XX_PA_SC_WINDOW_OFFSET_Y__MASK 0xffff0000
+#define A3XX_PA_SC_WINDOW_OFFSET_Y__SHIFT 16
+static inline uint32_t A3XX_PA_SC_WINDOW_OFFSET_Y(uint32_t val)
+{
+ return ((val) << A3XX_PA_SC_WINDOW_OFFSET_Y__SHIFT) & A3XX_PA_SC_WINDOW_OFFSET_Y__MASK;
+}
+
+#define REG_A3XX_PC_VSTREAM_CONTROL 0x000021e4
+
+#define REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL 0x000021ea
+
+#define REG_A3XX_PC_PRIM_VTX_CNTL 0x000021ec
+#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK 0x0000001f
+#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT 0
+static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(uint32_t val)
+{
+ return ((val) << A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK;
+}
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK 0x000000e0
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT 5
+static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+ return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK;
+}
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK 0x00000700
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT 8
+static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+ return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK;
+}
+#define A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST 0x02000000
+
+#define REG_A3XX_PC_RESTART_INDEX 0x000021ed
+
+#define REG_A3XX_HLSQ_CONTROL_0_REG 0x00002200
+#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000010
+#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 4
+static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val)
+{
+ return ((val) << A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK;
+}
+#define A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE 0x00000040
+#define A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART 0x00000200
+#define A3XX_HLSQ_CONTROL_0_REG_RESERVED2 0x00000400
+#define A3XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE 0x04000000
+#define A3XX_HLSQ_CONTROL_0_REG_CONSTSWITCHMODE 0x08000000
+#define A3XX_HLSQ_CONTROL_0_REG_LAZYUPDATEDISABLE 0x10000000
+#define A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE 0x20000000
+#define A3XX_HLSQ_CONTROL_0_REG_TPFULLUPDATE 0x40000000
+#define A3XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT 0x80000000
+
+#define REG_A3XX_HLSQ_CONTROL_1_REG 0x00002201
+#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK 0x00000040
+#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT 6
+static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val)
+{
+ return ((val) << A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK;
+}
+#define A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE 0x00000100
+#define A3XX_HLSQ_CONTROL_1_REG_RESERVED1 0x00000200
+
+#define REG_A3XX_HLSQ_CONTROL_2_REG 0x00002202
+#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK 0xfc000000
+#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT 26
+static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK;
+}
+
+#define REG_A3XX_HLSQ_CONTROL_3_REG 0x00002203
+
+#define REG_A3XX_HLSQ_VS_CONTROL_REG 0x00002204
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK 0x00000fff
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT 0
+static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x00fff000
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT 12
+static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK;
+}
+#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000
+#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT 24
+static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+
+#define REG_A3XX_HLSQ_FS_CONTROL_REG 0x00002205
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK 0x00000fff
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT 0
+static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x00fff000
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT 12
+static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK;
+}
+#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000
+#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT 24
+static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+
+#define REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG 0x00002206
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK 0x0000ffff
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT 0
+static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK;
+}
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK 0xffff0000
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT 16
+static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK;
+}
+
+#define REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x00002207
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK 0x0000ffff
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT 0
+static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK;
+}
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK 0xffff0000
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT 16
+static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK;
+}
+
+#define REG_A3XX_HLSQ_CL_NDRANGE_0_REG 0x0000220a
+
+#define REG_A3XX_HLSQ_CL_NDRANGE_1_REG 0x0000220b
+
+#define REG_A3XX_HLSQ_CL_NDRANGE_2_REG 0x0000220c
+
+#define REG_A3XX_HLSQ_CL_CONTROL_0_REG 0x00002211
+
+#define REG_A3XX_HLSQ_CL_CONTROL_1_REG 0x00002212
+
+#define REG_A3XX_HLSQ_CL_KERNEL_CONST_REG 0x00002214
+
+#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x00002215
+
+#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG 0x00002217
+
+#define REG_A3XX_HLSQ_CL_WG_OFFSET_REG 0x0000221a
+
+#define REG_A3XX_VFD_CONTROL_0 0x00002240
+#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK 0x0003ffff
+#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT 0
+static inline uint32_t A3XX_VFD_CONTROL_0_TOTALATTRTOVS(uint32_t val)
+{
+ return ((val) << A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT) & A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK;
+}
+#define A3XX_VFD_CONTROL_0_PACKETSIZE__MASK 0x003c0000
+#define A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT 18
+static inline uint32_t A3XX_VFD_CONTROL_0_PACKETSIZE(uint32_t val)
+{
+ return ((val) << A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT) & A3XX_VFD_CONTROL_0_PACKETSIZE__MASK;
+}
+#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK 0x07c00000
+#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT 22
+static inline uint32_t A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(uint32_t val)
+{
+ return ((val) << A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK;
+}
+#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK 0xf8000000
+#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT 27
+static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val)
+{
+ return ((val) << A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK;
+}
+
+#define REG_A3XX_VFD_CONTROL_1 0x00002241
+#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000ffff
+#define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT 0
+static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val)
+{
+ return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK;
+}
+#define A3XX_VFD_CONTROL_1_REGID4VTX__MASK 0x00ff0000
+#define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT 16
+static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
+{
+ return ((val) << A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A3XX_VFD_CONTROL_1_REGID4VTX__MASK;
+}
+#define A3XX_VFD_CONTROL_1_REGID4INST__MASK 0xff000000
+#define A3XX_VFD_CONTROL_1_REGID4INST__SHIFT 24
+static inline uint32_t A3XX_VFD_CONTROL_1_REGID4INST(uint32_t val)
+{
+ return ((val) << A3XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A3XX_VFD_CONTROL_1_REGID4INST__MASK;
+}
+
+#define REG_A3XX_VFD_INDEX_MIN 0x00002242
+
+#define REG_A3XX_VFD_INDEX_MAX 0x00002243
+
+#define REG_A3XX_VFD_INSTANCEID_OFFSET 0x00002244
+
+#define REG_A3XX_VFD_INDEX_OFFSET 0x00002245
+
+#define REG_A3XX_VFD_FETCH(i0) (0x00002246 + 0x2*(i0))
+
+#define REG_A3XX_VFD_FETCH_INSTR_0(i0) (0x00002246 + 0x2*(i0))
+#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK 0x0000007f
+#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT 0
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val)
+{
+ return ((val) << A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK;
+}
+#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK 0x0001ff80
+#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT 7
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(uint32_t val)
+{
+ return ((val) << A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK;
+}
+#define A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT 0x00020000
+#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK 0x00fc0000
+#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT 18
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_INDEXCODE(uint32_t val)
+{
+ return ((val) << A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK;
+}
+#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK 0xff000000
+#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT 24
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_STEPRATE(uint32_t val)
+{
+ return ((val) << A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK;
+}
+
+#define REG_A3XX_VFD_FETCH_INSTR_1(i0) (0x00002247 + 0x2*(i0))
+
+#define REG_A3XX_VFD_DECODE(i0) (0x00002266 + 0x1*(i0))
+
+#define REG_A3XX_VFD_DECODE_INSTR(i0) (0x00002266 + 0x1*(i0))
+#define A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK 0x0000000f
+#define A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT 0
+static inline uint32_t A3XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val)
+{
+ return ((val) << A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT) & A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_CONSTFILL 0x00000010
+#define A3XX_VFD_DECODE_INSTR_FORMAT__MASK 0x00000fc0
+#define A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT 6
+static inline uint32_t A3XX_VFD_DECODE_INSTR_FORMAT(enum a3xx_vtx_fmt val)
+{
+ return ((val) << A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A3XX_VFD_DECODE_INSTR_FORMAT__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_REGID__MASK 0x000ff000
+#define A3XX_VFD_DECODE_INSTR_REGID__SHIFT 12
+static inline uint32_t A3XX_VFD_DECODE_INSTR_REGID(uint32_t val)
+{
+ return ((val) << A3XX_VFD_DECODE_INSTR_REGID__SHIFT) & A3XX_VFD_DECODE_INSTR_REGID__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK 0x1f000000
+#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT 24
+static inline uint32_t A3XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val)
+{
+ return ((val) << A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT) & A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_LASTCOMPVALID 0x20000000
+#define A3XX_VFD_DECODE_INSTR_SWITCHNEXT 0x40000000
+
+#define REG_A3XX_VFD_VS_THREADING_THRESHOLD 0x0000227e
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK 0x0000000f
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT 0
+static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(uint32_t val)
+{
+ return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK;
+}
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK 0x0000ff00
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT 8
+static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(uint32_t val)
+{
+ return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK;
+}
+
+#define REG_A3XX_VPC_ATTR 0x00002280
+#define A3XX_VPC_ATTR_TOTALATTR__MASK 0x00000fff
+#define A3XX_VPC_ATTR_TOTALATTR__SHIFT 0
+static inline uint32_t A3XX_VPC_ATTR_TOTALATTR(uint32_t val)
+{
+ return ((val) << A3XX_VPC_ATTR_TOTALATTR__SHIFT) & A3XX_VPC_ATTR_TOTALATTR__MASK;
+}
+#define A3XX_VPC_ATTR_THRDASSIGN__MASK 0x0ffff000
+#define A3XX_VPC_ATTR_THRDASSIGN__SHIFT 12
+static inline uint32_t A3XX_VPC_ATTR_THRDASSIGN(uint32_t val)
+{
+ return ((val) << A3XX_VPC_ATTR_THRDASSIGN__SHIFT) & A3XX_VPC_ATTR_THRDASSIGN__MASK;
+}
+#define A3XX_VPC_ATTR_LMSIZE__MASK 0xf0000000
+#define A3XX_VPC_ATTR_LMSIZE__SHIFT 28
+static inline uint32_t A3XX_VPC_ATTR_LMSIZE(uint32_t val)
+{
+ return ((val) << A3XX_VPC_ATTR_LMSIZE__SHIFT) & A3XX_VPC_ATTR_LMSIZE__MASK;
+}
+
+#define REG_A3XX_VPC_PACK 0x00002281
+#define A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK 0x0000ff00
+#define A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT 8
+static inline uint32_t A3XX_VPC_PACK_NUMFPNONPOSVAR(uint32_t val)
+{
+ return ((val) << A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT) & A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK;
+}
+#define A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK 0x00ff0000
+#define A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT 16
+static inline uint32_t A3XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val)
+{
+ return ((val) << A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK;
+}
+
+#define REG_A3XX_VPC_VARYING_INTERP(i0) (0x00002282 + 0x1*(i0))
+
+#define REG_A3XX_VPC_VARYING_INTERP_MODE(i0) (0x00002282 + 0x1*(i0))
+
+#define REG_A3XX_VPC_VARYING_PS_REPL(i0) (0x00002286 + 0x1*(i0))
+
+#define REG_A3XX_VPC_VARYING_PS_REPL_MODE(i0) (0x00002286 + 0x1*(i0))
+
+#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0 0x0000228a
+
+#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_1 0x0000228b
+
+#define REG_A3XX_SP_SP_CTRL_REG 0x000022c0
+#define A3XX_SP_SP_CTRL_REG_RESOLVE 0x00010000
+#define A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK 0x000c0000
+#define A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT 18
+static inline uint32_t A3XX_SP_SP_CTRL_REG_CONSTMODE(uint32_t val)
+{
+ return ((val) << A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK;
+}
+#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK 0x00300000
+#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT 20
+static inline uint32_t A3XX_SP_SP_CTRL_REG_SLEEPMODE(uint32_t val)
+{
+ return ((val) << A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK;
+}
+#define A3XX_SP_SP_CTRL_REG_LOMODE__MASK 0x00c00000
+#define A3XX_SP_SP_CTRL_REG_LOMODE__SHIFT 22
+static inline uint32_t A3XX_SP_SP_CTRL_REG_LOMODE(uint32_t val)
+{
+ return ((val) << A3XX_SP_SP_CTRL_REG_LOMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_LOMODE__MASK;
+}
+
+#define REG_A3XX_SP_VS_CTRL_REG0 0x000022c4
+#define A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK 0x00000001
+#define A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT 0
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
+{
+ return ((val) << A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK 0x00000002
+#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT 1
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val)
+{
+ return ((val) << A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_CACHEINVALID 0x00000004
+#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0
+#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0003fc00
+#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK 0x000c0000
+#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT 18
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00100000
+#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 20
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+ return ((val) << A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE 0x00200000
+#define A3XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x00400000
+#define A3XX_SP_VS_CTRL_REG0_LENGTH__MASK 0xff000000
+#define A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT 24
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_LENGTH(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG0_LENGTH__MASK;
+}
+
+#define REG_A3XX_SP_VS_CTRL_REG1 0x000022c5
+#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK 0x000003ff
+#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT 0
+static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK 0x000ffc00
+#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT 10
+static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK 0x3f000000
+#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT 24
+static inline uint32_t A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK;
+}
+
+#define REG_A3XX_SP_VS_PARAM_REG 0x000022c6
+#define A3XX_SP_VS_PARAM_REG_POSREGID__MASK 0x000000ff
+#define A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT 0
+static inline uint32_t A3XX_SP_VS_PARAM_REG_POSREGID(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_POSREGID__MASK;
+}
+#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK 0x0000ff00
+#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT 8
+static inline uint32_t A3XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK;
+}
+#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK 0xfff00000
+#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT 20
+static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK;
+}
+
+#define REG_A3XX_SP_VS_OUT(i0) (0x000022c7 + 0x1*(i0))
+
+#define REG_A3XX_SP_VS_OUT_REG(i0) (0x000022c7 + 0x1*(i0))
+#define A3XX_SP_VS_OUT_REG_A_REGID__MASK 0x000001ff
+#define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT 0
+static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_A_REGID__MASK;
+}
+#define A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK 0x00001e00
+#define A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT 9
+static inline uint32_t A3XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK;
+}
+#define A3XX_SP_VS_OUT_REG_B_REGID__MASK 0x01ff0000
+#define A3XX_SP_VS_OUT_REG_B_REGID__SHIFT 16
+static inline uint32_t A3XX_SP_VS_OUT_REG_B_REGID(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_B_REGID__MASK;
+}
+#define A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK 0x1e000000
+#define A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT 25
+static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK;
+}
+
+#define REG_A3XX_SP_VS_VPC_DST(i0) (0x000022d0 + 0x1*(i0))
+
+#define REG_A3XX_SP_VS_VPC_DST_REG(i0) (0x000022d0 + 0x1*(i0))
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK;
+}
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT 8
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK;
+}
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT 16
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK;
+}
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0xff000000
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT 24
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK;
+}
+
+#define REG_A3XX_SP_VS_OBJ_OFFSET_REG 0x000022d4
+#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000
+#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16
+static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000
+#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25
+static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A3XX_SP_VS_OBJ_START_REG 0x000022d5
+
+#define REG_A3XX_SP_VS_PVT_MEM_CTRL_REG 0x000022d6
+
+#define REG_A3XX_SP_VS_PVT_MEM_ADDR_REG 0x000022d7
+
+#define REG_A3XX_SP_VS_PVT_MEM_SIZE_REG 0x000022d8
+
+#define REG_A3XX_SP_VS_LENGTH_REG 0x000022df
+#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK 0xffffffff
+#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT 0
+static inline uint32_t A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK;
+}
+
+#define REG_A3XX_SP_FS_CTRL_REG0 0x000022e0
+#define A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK 0x00000001
+#define A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT 0
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
+{
+ return ((val) << A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK 0x00000002
+#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT 1
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val)
+{
+ return ((val) << A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_CACHEINVALID 0x00000004
+#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0
+#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+ return ((val) << A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0003fc00
+#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+ return ((val) << A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK 0x000c0000
+#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT 18
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
+{
+ return ((val) << A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00100000
+#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 20
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+ return ((val) << A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE 0x00200000
+#define A3XX_SP_FS_CTRL_REG0_PIXLODENABLE 0x00400000
+#define A3XX_SP_FS_CTRL_REG0_LENGTH__MASK 0xff000000
+#define A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT 24
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_LENGTH(uint32_t val)
+{
+ return ((val) << A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG0_LENGTH__MASK;
+}
+
+#define REG_A3XX_SP_FS_CTRL_REG1 0x000022e1
+#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK 0x000003ff
+#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT 0
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(uint32_t val)
+{
+ return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK 0x000ffc00
+#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT 10
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val)
+{
+ return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK 0x00f00000
+#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT 20
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
+{
+ return ((val) << A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK 0x3f000000
+#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT 24
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(uint32_t val)
+{
+ return ((val) << A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT) & A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK;
+}
+
+#define REG_A3XX_SP_FS_OBJ_OFFSET_REG 0x000022e2
+#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000
+#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16
+static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+ return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000
+#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25
+static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+ return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A3XX_SP_FS_OBJ_START_REG 0x000022e3
+
+#define REG_A3XX_SP_FS_PVT_MEM_CTRL_REG 0x000022e4
+
+#define REG_A3XX_SP_FS_PVT_MEM_ADDR_REG 0x000022e5
+
+#define REG_A3XX_SP_FS_PVT_MEM_SIZE_REG 0x000022e6
+
+#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x000022e8
+
+#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x000022e9
+
+#define REG_A3XX_SP_FS_OUTPUT_REG 0x000022ec
+
+#define REG_A3XX_SP_FS_MRT(i0) (0x000022f0 + 0x1*(i0))
+
+#define REG_A3XX_SP_FS_MRT_REG(i0) (0x000022f0 + 0x1*(i0))
+#define A3XX_SP_FS_MRT_REG_REGID__MASK 0x000000ff
+#define A3XX_SP_FS_MRT_REG_REGID__SHIFT 0
+static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val)
+{
+ return ((val) << A3XX_SP_FS_MRT_REG_REGID__SHIFT) & A3XX_SP_FS_MRT_REG_REGID__MASK;
+}
+#define A3XX_SP_FS_MRT_REG_HALF_PRECISION 0x00000100
+
+#define REG_A3XX_SP_FS_IMAGE_OUTPUT(i0) (0x000022f4 + 0x1*(i0))
+
+#define REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i0) (0x000022f4 + 0x1*(i0))
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK 0x0000003f
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT 0
+static inline uint32_t A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(enum a3xx_color_fmt val)
+{
+ return ((val) << A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT) & A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK;
+}
+
+#define REG_A3XX_SP_FS_LENGTH_REG 0x000022ff
+#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK 0xffffffff
+#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT 0
+static inline uint32_t A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(uint32_t val)
+{
+ return ((val) << A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK;
+}
+
+#define REG_A3XX_TPL1_TP_VS_TEX_OFFSET 0x00002340
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK 0x000000ff
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT 0
+static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val)
+{
+ return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK;
+}
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK 0x0000ff00
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT 8
+static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val)
+{
+ return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK;
+}
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK 0xffff0000
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT 16
+static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(uint32_t val)
+{
+ return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK;
+}
+
+#define REG_A3XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR 0x00002341
+
+#define REG_A3XX_TPL1_TP_FS_TEX_OFFSET 0x00002342
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK 0x000000ff
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT 0
+static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val)
+{
+ return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK;
+}
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK 0x0000ff00
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT 8
+static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val)
+{
+ return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK;
+}
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK 0xffff0000
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT 16
+static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(uint32_t val)
+{
+ return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK;
+}
+
+#define REG_A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR 0x00002343
+
+#define REG_A3XX_VBIF_CLKON 0x00003001
+
+#define REG_A3XX_VBIF_FIXED_SORT_EN 0x0000300c
+
+#define REG_A3XX_VBIF_FIXED_SORT_SEL0 0x0000300d
+
+#define REG_A3XX_VBIF_FIXED_SORT_SEL1 0x0000300e
+
+#define REG_A3XX_VBIF_ABIT_SORT 0x0000301c
+
+#define REG_A3XX_VBIF_ABIT_SORT_CONF 0x0000301d
+
+#define REG_A3XX_VBIF_GATE_OFF_WRREQ_EN 0x0000302a
+
+#define REG_A3XX_VBIF_IN_RD_LIM_CONF0 0x0000302c
+
+#define REG_A3XX_VBIF_IN_RD_LIM_CONF1 0x0000302d
+
+#define REG_A3XX_VBIF_IN_WR_LIM_CONF0 0x00003030
+
+#define REG_A3XX_VBIF_IN_WR_LIM_CONF1 0x00003031
+
+#define REG_A3XX_VBIF_OUT_RD_LIM_CONF0 0x00003034
+
+#define REG_A3XX_VBIF_OUT_WR_LIM_CONF0 0x00003035
+
+#define REG_A3XX_VBIF_DDR_OUT_MAX_BURST 0x00003036
+
+#define REG_A3XX_VBIF_ARB_CTL 0x0000303c
+
+#define REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB 0x00003049
+
+#define REG_A3XX_VBIF_OUT_AXI_AMEMTYPE_CONF0 0x00003058
+
+#define REG_A3XX_VBIF_OUT_AXI_AOOO_EN 0x0000305e
+
+#define REG_A3XX_VBIF_OUT_AXI_AOOO 0x0000305f
+
+#define REG_A3XX_VSC_BIN_SIZE 0x00000c01
+#define A3XX_VSC_BIN_SIZE_WIDTH__MASK 0x0000001f
+#define A3XX_VSC_BIN_SIZE_WIDTH__SHIFT 0
+static inline uint32_t A3XX_VSC_BIN_SIZE_WIDTH(uint32_t val)
+{
+ return ((val >> 5) << A3XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A3XX_VSC_BIN_SIZE_WIDTH__MASK;
+}
+#define A3XX_VSC_BIN_SIZE_HEIGHT__MASK 0x000003e0
+#define A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT 5
+static inline uint32_t A3XX_VSC_BIN_SIZE_HEIGHT(uint32_t val)
+{
+ return ((val >> 5) << A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A3XX_VSC_BIN_SIZE_HEIGHT__MASK;
+}
+
+#define REG_A3XX_VSC_SIZE_ADDRESS 0x00000c02
+
+#define REG_A3XX_VSC_PIPE(i0) (0x00000c06 + 0x3*(i0))
+
+#define REG_A3XX_VSC_PIPE_CONFIG(i0) (0x00000c06 + 0x3*(i0))
+#define A3XX_VSC_PIPE_CONFIG_X__MASK 0x000003ff
+#define A3XX_VSC_PIPE_CONFIG_X__SHIFT 0
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_X(uint32_t val)
+{
+ return ((val) << A3XX_VSC_PIPE_CONFIG_X__SHIFT) & A3XX_VSC_PIPE_CONFIG_X__MASK;
+}
+#define A3XX_VSC_PIPE_CONFIG_Y__MASK 0x000ffc00
+#define A3XX_VSC_PIPE_CONFIG_Y__SHIFT 10
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_Y(uint32_t val)
+{
+ return ((val) << A3XX_VSC_PIPE_CONFIG_Y__SHIFT) & A3XX_VSC_PIPE_CONFIG_Y__MASK;
+}
+#define A3XX_VSC_PIPE_CONFIG_W__MASK 0x00f00000
+#define A3XX_VSC_PIPE_CONFIG_W__SHIFT 20
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_W(uint32_t val)
+{
+ return ((val) << A3XX_VSC_PIPE_CONFIG_W__SHIFT) & A3XX_VSC_PIPE_CONFIG_W__MASK;
+}
+#define A3XX_VSC_PIPE_CONFIG_H__MASK 0x0f000000
+#define A3XX_VSC_PIPE_CONFIG_H__SHIFT 24
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_H(uint32_t val)
+{
+ return ((val) << A3XX_VSC_PIPE_CONFIG_H__SHIFT) & A3XX_VSC_PIPE_CONFIG_H__MASK;
+}
+
+#define REG_A3XX_VSC_PIPE_DATA_ADDRESS(i0) (0x00000c07 + 0x3*(i0))
+
+#define REG_A3XX_VSC_PIPE_DATA_LENGTH(i0) (0x00000c08 + 0x3*(i0))
+
+#define REG_A3XX_UNKNOWN_0C3D 0x00000c3d
+
+#define REG_A3XX_UNKNOWN_0C81 0x00000c81
+
+#define REG_A3XX_GRAS_CL_USER_PLANE(i0) (0x00000ca0 + 0x4*(i0))
+
+#define REG_A3XX_GRAS_CL_USER_PLANE_X(i0) (0x00000ca0 + 0x4*(i0))
+
+#define REG_A3XX_GRAS_CL_USER_PLANE_Y(i0) (0x00000ca1 + 0x4*(i0))
+
+#define REG_A3XX_GRAS_CL_USER_PLANE_Z(i0) (0x00000ca2 + 0x4*(i0))
+
+#define REG_A3XX_GRAS_CL_USER_PLANE_W(i0) (0x00000ca3 + 0x4*(i0))
+
+#define REG_A3XX_RB_GMEM_BASE_ADDR 0x00000cc0
+
+#define REG_A3XX_RB_WINDOW_SIZE 0x00000ce0
+#define A3XX_RB_WINDOW_SIZE_WIDTH__MASK 0x00003fff
+#define A3XX_RB_WINDOW_SIZE_WIDTH__SHIFT 0
+static inline uint32_t A3XX_RB_WINDOW_SIZE_WIDTH(uint32_t val)
+{
+ return ((val) << A3XX_RB_WINDOW_SIZE_WIDTH__SHIFT) & A3XX_RB_WINDOW_SIZE_WIDTH__MASK;
+}
+#define A3XX_RB_WINDOW_SIZE_HEIGHT__MASK 0x0fffc000
+#define A3XX_RB_WINDOW_SIZE_HEIGHT__SHIFT 14
+static inline uint32_t A3XX_RB_WINDOW_SIZE_HEIGHT(uint32_t val)
+{
+ return ((val) << A3XX_RB_WINDOW_SIZE_HEIGHT__SHIFT) & A3XX_RB_WINDOW_SIZE_HEIGHT__MASK;
+}
+
+#define REG_A3XX_UNKNOWN_0E00 0x00000e00
+
+#define REG_A3XX_UNKNOWN_0E43 0x00000e43
+
+#define REG_A3XX_VFD_PERFCOUNTER0_SELECT 0x00000e44
+
+#define REG_A3XX_VPC_VPC_DEBUG_RAM_SEL 0x00000e61
+
+#define REG_A3XX_VPC_VPC_DEBUG_RAM_READ 0x00000e62
+
+#define REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG 0x00000e82
+
+#define REG_A3XX_UCHE_CACHE_INVALIDATE0_REG 0x00000ea0
+#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK 0x0fffffff
+#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT 0
+static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(uint32_t val)
+{
+ return ((val) << A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK;
+}
+
+#define REG_A3XX_UCHE_CACHE_INVALIDATE1_REG 0x00000ea1
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK 0x0fffffff
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT 0
+static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(uint32_t val)
+{
+ return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK;
+}
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK 0x30000000
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT 28
+static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(enum a3xx_cache_opcode val)
+{
+ return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK;
+}
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE 0x80000000
+
+#define REG_A3XX_SP_PERFCOUNTER0_SELECT 0x00000ec4
+
+#define REG_A3XX_SP_PERFCOUNTER1_SELECT 0x00000ec5
+
+#define REG_A3XX_SP_PERFCOUNTER2_SELECT 0x00000ec6
+
+#define REG_A3XX_SP_PERFCOUNTER3_SELECT 0x00000ec7
+
+#define REG_A3XX_SP_PERFCOUNTER4_SELECT 0x00000ec8
+
+#define REG_A3XX_SP_PERFCOUNTER5_SELECT 0x00000ec9
+
+#define REG_A3XX_SP_PERFCOUNTER6_SELECT 0x00000eca
+
+#define REG_A3XX_SP_PERFCOUNTER7_SELECT 0x00000ecb
+
+#define REG_A3XX_UNKNOWN_0EE0 0x00000ee0
+
+#define REG_A3XX_UNKNOWN_0F03 0x00000f03
+
+#define REG_A3XX_TEX_SAMP_0 0x00000000
+#define A3XX_TEX_SAMP_0_XY_MAG__MASK 0x0000000c
+#define A3XX_TEX_SAMP_0_XY_MAG__SHIFT 2
+static inline uint32_t A3XX_TEX_SAMP_0_XY_MAG(enum a3xx_tex_filter val)
+{
+ return ((val) << A3XX_TEX_SAMP_0_XY_MAG__SHIFT) & A3XX_TEX_SAMP_0_XY_MAG__MASK;
+}
+#define A3XX_TEX_SAMP_0_XY_MIN__MASK 0x00000030
+#define A3XX_TEX_SAMP_0_XY_MIN__SHIFT 4
+static inline uint32_t A3XX_TEX_SAMP_0_XY_MIN(enum a3xx_tex_filter val)
+{
+ return ((val) << A3XX_TEX_SAMP_0_XY_MIN__SHIFT) & A3XX_TEX_SAMP_0_XY_MIN__MASK;
+}
+#define A3XX_TEX_SAMP_0_WRAP_S__MASK 0x000001c0
+#define A3XX_TEX_SAMP_0_WRAP_S__SHIFT 6
+static inline uint32_t A3XX_TEX_SAMP_0_WRAP_S(enum a3xx_tex_clamp val)
+{
+ return ((val) << A3XX_TEX_SAMP_0_WRAP_S__SHIFT) & A3XX_TEX_SAMP_0_WRAP_S__MASK;
+}
+#define A3XX_TEX_SAMP_0_WRAP_T__MASK 0x00000e00
+#define A3XX_TEX_SAMP_0_WRAP_T__SHIFT 9
+static inline uint32_t A3XX_TEX_SAMP_0_WRAP_T(enum a3xx_tex_clamp val)
+{
+ return ((val) << A3XX_TEX_SAMP_0_WRAP_T__SHIFT) & A3XX_TEX_SAMP_0_WRAP_T__MASK;
+}
+#define A3XX_TEX_SAMP_0_WRAP_R__MASK 0x00007000
+#define A3XX_TEX_SAMP_0_WRAP_R__SHIFT 12
+static inline uint32_t A3XX_TEX_SAMP_0_WRAP_R(enum a3xx_tex_clamp val)
+{
+ return ((val) << A3XX_TEX_SAMP_0_WRAP_R__SHIFT) & A3XX_TEX_SAMP_0_WRAP_R__MASK;
+}
+#define A3XX_TEX_SAMP_0_UNNORM_COORDS 0x80000000
+
+#define REG_A3XX_TEX_SAMP_1 0x00000001
+
+#define REG_A3XX_TEX_CONST_0 0x00000000
+#define A3XX_TEX_CONST_0_TILED 0x00000001
+#define A3XX_TEX_CONST_0_SWIZ_X__MASK 0x00000070
+#define A3XX_TEX_CONST_0_SWIZ_X__SHIFT 4
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_X(enum a3xx_tex_swiz val)
+{
+ return ((val) << A3XX_TEX_CONST_0_SWIZ_X__SHIFT) & A3XX_TEX_CONST_0_SWIZ_X__MASK;
+}
+#define A3XX_TEX_CONST_0_SWIZ_Y__MASK 0x00000380
+#define A3XX_TEX_CONST_0_SWIZ_Y__SHIFT 7
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Y(enum a3xx_tex_swiz val)
+{
+ return ((val) << A3XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Y__MASK;
+}
+#define A3XX_TEX_CONST_0_SWIZ_Z__MASK 0x00001c00
+#define A3XX_TEX_CONST_0_SWIZ_Z__SHIFT 10
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Z(enum a3xx_tex_swiz val)
+{
+ return ((val) << A3XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Z__MASK;
+}
+#define A3XX_TEX_CONST_0_SWIZ_W__MASK 0x0000e000
+#define A3XX_TEX_CONST_0_SWIZ_W__SHIFT 13
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_W(enum a3xx_tex_swiz val)
+{
+ return ((val) << A3XX_TEX_CONST_0_SWIZ_W__SHIFT) & A3XX_TEX_CONST_0_SWIZ_W__MASK;
+}
+#define A3XX_TEX_CONST_0_FMT__MASK 0x1fc00000
+#define A3XX_TEX_CONST_0_FMT__SHIFT 22
+static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val)
+{
+ return ((val) << A3XX_TEX_CONST_0_FMT__SHIFT) & A3XX_TEX_CONST_0_FMT__MASK;
+}
+
+#define REG_A3XX_TEX_CONST_1 0x00000001
+#define A3XX_TEX_CONST_1_HEIGHT__MASK 0x00003fff
+#define A3XX_TEX_CONST_1_HEIGHT__SHIFT 0
+static inline uint32_t A3XX_TEX_CONST_1_HEIGHT(uint32_t val)
+{
+ return ((val) << A3XX_TEX_CONST_1_HEIGHT__SHIFT) & A3XX_TEX_CONST_1_HEIGHT__MASK;
+}
+#define A3XX_TEX_CONST_1_WIDTH__MASK 0x0fffc000
+#define A3XX_TEX_CONST_1_WIDTH__SHIFT 14
+static inline uint32_t A3XX_TEX_CONST_1_WIDTH(uint32_t val)
+{
+ return ((val) << A3XX_TEX_CONST_1_WIDTH__SHIFT) & A3XX_TEX_CONST_1_WIDTH__MASK;
+}
+#define A3XX_TEX_CONST_1_FETCHSIZE__MASK 0xf0000000
+#define A3XX_TEX_CONST_1_FETCHSIZE__SHIFT 28
+static inline uint32_t A3XX_TEX_CONST_1_FETCHSIZE(enum a3xx_tex_fetchsize val)
+{
+ return ((val) << A3XX_TEX_CONST_1_FETCHSIZE__SHIFT) & A3XX_TEX_CONST_1_FETCHSIZE__MASK;
+}
+
+#define REG_A3XX_TEX_CONST_2 0x00000002
+#define A3XX_TEX_CONST_2_INDX__MASK 0x000000ff
+#define A3XX_TEX_CONST_2_INDX__SHIFT 0
+static inline uint32_t A3XX_TEX_CONST_2_INDX(uint32_t val)
+{
+ return ((val) << A3XX_TEX_CONST_2_INDX__SHIFT) & A3XX_TEX_CONST_2_INDX__MASK;
+}
+#define A3XX_TEX_CONST_2_PITCH__MASK 0x3ffff000
+#define A3XX_TEX_CONST_2_PITCH__SHIFT 12
+static inline uint32_t A3XX_TEX_CONST_2_PITCH(uint32_t val)
+{
+ return ((val) << A3XX_TEX_CONST_2_PITCH__SHIFT) & A3XX_TEX_CONST_2_PITCH__MASK;
+}
+#define A3XX_TEX_CONST_2_SWAP__MASK 0xc0000000
+#define A3XX_TEX_CONST_2_SWAP__SHIFT 30
+static inline uint32_t A3XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val)
+{
+ return ((val) << A3XX_TEX_CONST_2_SWAP__SHIFT) & A3XX_TEX_CONST_2_SWAP__MASK;
+}
+
+#define REG_A3XX_TEX_CONST_3 0x00000003
+
+
+#endif /* A3XX_XML */
diff --git a/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c b/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c
new file mode 100644
index 00000000000..4db095f5679
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c
@@ -0,0 +1,946 @@
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+
+#include <util/u_debug.h>
+
+#include "disasm.h"
+#include "instr-a3xx.h"
+
+static enum debug_t debug;
+
+#define printf debug_printf
+
+static const char *levels[] = {
+ "",
+ "\t",
+ "\t\t",
+ "\t\t\t",
+ "\t\t\t\t",
+ "\t\t\t\t\t",
+ "\t\t\t\t\t\t",
+ "\t\t\t\t\t\t\t",
+ "\t\t\t\t\t\t\t\t",
+ "\t\t\t\t\t\t\t\t\t",
+ "x",
+ "x",
+ "x",
+ "x",
+ "x",
+ "x",
+};
+
+static const char *component = "xyzw";
+
+static const char *type[] = {
+ [TYPE_F16] = "f16",
+ [TYPE_F32] = "f32",
+ [TYPE_U16] = "u16",
+ [TYPE_U32] = "u32",
+ [TYPE_S16] = "s16",
+ [TYPE_S32] = "s32",
+ [TYPE_U8] = "u8",
+ [TYPE_S8] = "s8",
+};
+
+static void print_reg(reg_t reg, bool full, bool r, bool c, bool im,
+ bool neg, bool abs, bool addr_rel)
+{
+ const char type = c ? 'c' : 'r';
+
+ // XXX I prefer - and || for neg/abs, but preserving format used
+ // by libllvm-a3xx for easy diffing..
+
+ if (abs && neg)
+ printf("(absneg)");
+ else if (neg)
+ printf("(neg)");
+ else if (abs)
+ printf("(abs)");
+
+ if (r)
+ printf("(r)");
+
+ if (im) {
+ printf("%d", reg.iim_val);
+ } else if (addr_rel) {
+ /* I would just use %+d but trying to make it diff'able with
+ * libllvm-a3xx...
+ */
+ if (reg.iim_val < 0)
+ printf("%s%c<a0.x - %d>", full ? "" : "h", type, -reg.iim_val);
+ else if (reg.iim_val > 0)
+ printf("%s%c<a0.x + %d>", full ? "" : "h", type, reg.iim_val);
+ else
+ printf("%s%c<a0.x>", full ? "" : "h", type);
+ } else if ((reg.num == REG_A0) && !c) {
+ printf("a0.%c", component[reg.comp]);
+ } else if ((reg.num == REG_P0) && !c) {
+ printf("p0.%c", component[reg.comp]);
+ } else {
+ printf("%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]);
+ }
+}
+
+/* Tracking for registers used, read-before-write (input), and
+ * write-after-read (output.. but not 100%)..
+ */
+
+#define MAX_REG 128
+
+typedef struct {
+ uint8_t full[MAX_REG/8];
+ uint8_t half[MAX_REG/8];
+} regmask_t;
+
+static void regmask_set(regmask_t *regmask, unsigned num, bool full, unsigned val)
+{
+ unsigned i = num / 8;
+ unsigned j = num % 8;
+ assert(num < MAX_REG);
+ if (full) {
+ regmask->full[i] = (regmask->full[i] & ~(1 << j)) | (val << j);
+ } else {
+ regmask->half[i] = (regmask->half[i] & ~(1 << j)) | (val << j);
+ }
+}
+
+static unsigned regmask_get(regmask_t *regmask, unsigned num, bool full)
+{
+ unsigned i = num / 8;
+ unsigned j = num % 8;
+ assert(num < MAX_REG);
+ if (full) {
+ return (regmask->full[i] >> j) & 0x1;
+ } else {
+ return (regmask->half[i] >> j) & 0x1;
+ }
+}
+
+static unsigned regidx(reg_t reg)
+{
+ return (4 * reg.num) + reg.comp;
+}
+
+static struct {
+ regmask_t used;
+ regmask_t rbw; /* read before write */
+ regmask_t war; /* write after read */
+ regmask_t cnst; /* used consts */
+} regs;
+
+static void print_regs(regmask_t *regmask, bool full)
+{
+ int num, max = 0, cnt = 0;
+ int first, last;
+
+ void print_sequence(void)
+ {
+ if (first != MAX_REG) {
+ if (first == last) {
+ printf(" %d", first);
+ } else {
+ printf(" %d-%d", first, last);
+ }
+ }
+ }
+
+ first = last = MAX_REG;
+
+ for (num = 0; num < MAX_REG; num++) {
+ if (regmask_get(regmask, num, full)) {
+ if (num != (last + 1)) {
+ print_sequence();
+ first = num;
+ }
+ last = num;
+ max = num;
+ cnt++;
+ }
+ }
+
+ print_sequence();
+
+ printf(" (cnt=%d, max=%d)", cnt, max);
+}
+
+static void print_reg_stats(int level)
+{
+ printf("%sRegister Stats:\n", levels[level]);
+ printf("%s- used (half):", levels[level]);
+ print_regs(&regs.used, false);
+ printf("\n");
+ printf("%s- used (full):", levels[level]);
+ print_regs(&regs.used, true);
+ printf("\n");
+ printf("%s- input (half):", levels[level]);
+ print_regs(&regs.rbw, false);
+ printf("\n");
+ printf("%s- input (full):", levels[level]);
+ print_regs(&regs.rbw, true);
+ printf("\n");
+ printf("%s- const (half):", levels[level]);
+ print_regs(&regs.cnst, false);
+ printf("\n");
+ printf("%s- const (full):", levels[level]);
+ print_regs(&regs.cnst, true);
+ printf("\n");
+ printf("%s- output (half):", levels[level]);
+ print_regs(&regs.war, false);
+ printf(" (estimated)\n");
+ printf("%s- output (full):", levels[level]);
+ print_regs(&regs.war, true);
+ printf(" (estimated)\n");
+}
+
+/* we have to process the dst register after src to avoid tripping up
+ * the read-before-write detection
+ */
+static unsigned last_dst;
+static bool last_dst_full;
+static bool last_dst_valid = false;
+
+/* current instruction repeat flag: */
+static unsigned repeat;
+
+static void process_reg_dst(void)
+{
+ int i;
+
+ if (!last_dst_valid)
+ return;
+
+ for (i = 0; i <= repeat; i++) {
+ unsigned dst = last_dst + i;
+
+ regmask_set(&regs.war, dst, last_dst_full, 1);
+ regmask_set(&regs.used, dst, last_dst_full, 1);
+ }
+
+ last_dst_valid = false;
+}
+
+static void print_reg_dst(reg_t reg, bool full, bool addr_rel)
+{
+ /* presumably the special registers a0.c and p0.c don't count.. */
+ if (!(addr_rel || reg_special(reg))) {
+ last_dst = regidx(reg);
+ last_dst_full = full;
+ last_dst_valid = true;
+ }
+ print_reg(reg, full, false, false, false, false, false, addr_rel);
+}
+
+static void print_reg_src(reg_t reg, bool full, bool r, bool c, bool im,
+ bool neg, bool abs, bool addr_rel)
+{
+ /* presumably the special registers a0.c and p0.c don't count.. */
+ if (!(addr_rel || c || im || reg_special(reg))) {
+ int i, num = regidx(reg);
+ for (i = 0; i <= repeat; i++) {
+ unsigned src = num + i;
+
+ if (!regmask_get(&regs.used, src, full))
+ regmask_set(&regs.rbw, src, full, 1);
+
+ regmask_set(&regs.war, src, full, 0);
+ regmask_set(&regs.used, src, full, 1);
+
+ if (!r)
+ break;
+ }
+ } else if (c) {
+ int i, num = regidx(reg);
+ for (i = 0; i <= repeat; i++) {
+ unsigned src = num + i;
+
+ regmask_set(&regs.cnst, src, full, 1);
+
+ if (!r)
+ break;
+ }
+ }
+
+ print_reg(reg, full, r, c, im, neg, abs, addr_rel);
+}
+
+
+static void print_instr_cat0(instr_t *instr)
+{
+ instr_cat0_t *cat0 = &instr->cat0;
+
+ switch (cat0->opc) {
+ case OPC_KILL:
+ printf(" %sp0.%c", cat0->inv ? "!" : "",
+ component[cat0->comp]);
+ break;
+ case OPC_BR:
+ printf(" %sp0.%c, #%d", cat0->inv ? "!" : "",
+ component[cat0->comp], cat0->immed);
+ break;
+ case OPC_JUMP:
+ case OPC_CALL:
+ printf(" #%d", cat0->immed);
+ break;
+ }
+
+ if ((debug & PRINT_VERBOSE) && (cat0->dummy1|cat0->dummy2|cat0->dummy3|cat0->dummy4))
+ printf("\t{0: %x,%x,%x,%x}", cat0->dummy1, cat0->dummy2, cat0->dummy3, cat0->dummy4);
+}
+
+static void print_instr_cat1(instr_t *instr)
+{
+ instr_cat1_t *cat1 = &instr->cat1;
+
+ // XXX maybe a bug in libllvm disassembler?
+ if (cat1->src_rel)
+ printf("(ul)");
+
+ if (cat1->src_type == cat1->dst_type) {
+ if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) {
+ /* special case (nmemonic?): */
+ printf("mova");
+ } else {
+ printf("mov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
+ }
+ } else {
+ printf("cov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
+ }
+
+ printf(" ");
+
+ if (cat1->even)
+ printf("(even)");
+
+ if (cat1->pos_inf)
+ printf("(pos_infinity)");
+
+ print_reg_dst((reg_t)(cat1->dst), type_size(cat1->dst_type) == 32,
+ cat1->dst_rel);
+
+ printf(", ");
+
+ /* ugg, have to special case this.. vs print_reg().. */
+ if (cat1->src_im) {
+ if (type_float(cat1->src_type))
+ printf("(%f)", cat1->fim_val);
+ else
+ printf("%d", cat1->iim_val);
+ } else if (cat1->src_rel && !cat1->src_c) {
+ /* I would just use %+d but trying to make it diff'able with
+ * libllvm-a3xx...
+ */
+ if (cat1->off < 0)
+ printf("c<a0.x - %d>", -cat1->off);
+ else if (cat1->off > 0)
+ printf("c<a0.x + %d>", cat1->off);
+ else
+ printf("c<a0.x>");
+ } else {
+ print_reg_src((reg_t)(cat1->src), type_size(cat1->src_type) == 32,
+ cat1->src_r, cat1->src_c, cat1->src_im, false, false, false);
+ }
+
+ if ((debug & PRINT_VERBOSE) && (cat1->must_be_0))
+ printf("\t{1: %x}", cat1->must_be_0);
+}
+
+static void print_instr_cat2(instr_t *instr)
+{
+ instr_cat2_t *cat2 = &instr->cat2;
+ static const char *cond[] = {
+ "lt",
+ "le",
+ "gt",
+ "ge",
+ "eq",
+ "ne",
+ "?6?",
+ };
+
+ switch (cat2->opc) {
+ case OPC_CMPS_F:
+ case OPC_CMPS_U:
+ case OPC_CMPS_S:
+ case OPC_CMPV_F:
+ case OPC_CMPV_U:
+ case OPC_CMPV_S:
+ printf(".%s", cond[cat2->cond]);
+ break;
+ }
+
+ printf(" ");
+ if (cat2->ei)
+ printf("(ei)");
+ print_reg_dst((reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false);
+ printf(", ");
+ print_reg_src((reg_t)(cat2->src1), cat2->full, cat2->src1_r,
+ cat2->src1_c, cat2->src1_im, cat2->src1_neg,
+ cat2->src1_abs, cat2->src1_rel);
+ switch (cat2->opc) {
+ case OPC_ABSNEG_F:
+ case OPC_ABSNEG_S:
+ case OPC_CLZ_B:
+ case OPC_CLZ_S:
+ case OPC_SIGN_F:
+ case OPC_FLOOR_F:
+ case OPC_CEIL_F:
+ case OPC_RNDNE_F:
+ case OPC_RNDAZ_F:
+ case OPC_TRUNC_F:
+ case OPC_NOT_B:
+ case OPC_BFREV_B:
+ case OPC_SETRM:
+ case OPC_CBITS_B:
+ /* these only have one src reg */
+ break;
+ default:
+ printf(", ");
+ print_reg_src((reg_t)(cat2->src2), cat2->full, cat2->src2_r,
+ cat2->src2_c, cat2->src2_im, cat2->src2_neg,
+ cat2->src2_abs, cat2->src2_rel);
+ break;
+ }
+}
+
+static void print_instr_cat3(instr_t *instr)
+{
+ instr_cat3_t *cat3 = &instr->cat3;
+ bool full = true;
+
+ // XXX is this based on opc or some other bit?
+ switch (cat3->opc) {
+ case OPC_MAD_F16:
+ case OPC_MAD_U16:
+ case OPC_MAD_S16:
+ case OPC_SEL_B16:
+ case OPC_SEL_S16:
+ case OPC_SEL_F16:
+ case OPC_SAD_S16:
+ case OPC_SAD_S32: // really??
+ full = false;
+ break;
+ }
+
+ printf(" ");
+ print_reg_dst((reg_t)(cat3->dst), full ^ cat3->dst_half, false);
+ printf(", ");
+ print_reg_src((reg_t)(cat3->src1), full,
+ cat3->src1_r, cat3->src1_c, false, cat3->src1_neg,
+ false, cat3->src1_rel);
+ printf(", ");
+ print_reg_src((reg_t)cat3->src2, full,
+ cat3->src2_r, cat3->src2_c, false, cat3->src2_neg,
+ false, false);
+ printf(", ");
+ print_reg_src((reg_t)(cat3->src3), full,
+ cat3->src3_r, cat3->src3_c, false, cat3->src3_neg,
+ false, cat3->src3_rel);
+}
+
+static void print_instr_cat4(instr_t *instr)
+{
+ instr_cat4_t *cat4 = &instr->cat4;
+
+ printf(" ");
+ print_reg_dst((reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false);
+ printf(", ");
+ print_reg_src((reg_t)(cat4->src), cat4->full,
+ cat4->src_r, cat4->src_c, cat4->src_im,
+ cat4->src_neg, cat4->src_abs, cat4->src_rel);
+
+ if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2))
+ printf("\t{4: %x,%x}", cat4->dummy1, cat4->dummy2);
+}
+
+static void print_instr_cat5(instr_t *instr)
+{
+ static const struct {
+ bool src1, src2, samp, tex;
+ } info[0x1f] = {
+ [OPC_ISAM] = { true, false, true, true, },
+ [OPC_ISAML] = { true, true, true, true, },
+ [OPC_ISAMM] = { true, false, true, true, },
+ [OPC_SAM] = { true, false, true, true, },
+ [OPC_SAMB] = { true, true, true, true, },
+ [OPC_SAML] = { true, true, true, true, },
+ [OPC_SAMGQ] = { true, false, true, true, },
+ [OPC_GETLOD] = { true, false, true, true, },
+ [OPC_CONV] = { true, true, true, true, },
+ [OPC_CONVM] = { true, true, true, true, },
+ [OPC_GETSIZE] = { true, false, false, true, },
+ [OPC_GETBUF] = { false, false, false, true, },
+ [OPC_GETPOS] = { true, false, false, true, },
+ [OPC_GETINFO] = { false, false, false, true, },
+ [OPC_DSX] = { true, false, false, false, },
+ [OPC_DSY] = { true, false, false, false, },
+ [OPC_GATHER4R] = { true, false, true, true, },
+ [OPC_GATHER4G] = { true, false, true, true, },
+ [OPC_GATHER4B] = { true, false, true, true, },
+ [OPC_GATHER4A] = { true, false, true, true, },
+ [OPC_SAMGP0] = { true, false, true, true, },
+ [OPC_SAMGP1] = { true, false, true, true, },
+ [OPC_SAMGP2] = { true, false, true, true, },
+ [OPC_SAMGP3] = { true, false, true, true, },
+ [OPC_DSXPP_1] = { true, false, false, false, },
+ [OPC_DSYPP_1] = { true, false, false, false, },
+ [OPC_RGETPOS] = { false, false, false, false, },
+ [OPC_RGETINFO] = { false, false, false, false, },
+ };
+ instr_cat5_t *cat5 = &instr->cat5;
+ int i;
+
+ if (cat5->is_3d) printf(".3d");
+ if (cat5->is_a) printf(".a");
+ if (cat5->is_o) printf(".o");
+ if (cat5->is_p) printf(".p");
+ if (cat5->is_s) printf(".s");
+ if (cat5->is_s2en) printf(".s2en");
+
+ printf(" ");
+
+ switch (cat5->opc) {
+ case OPC_DSXPP_1:
+ case OPC_DSYPP_1:
+ break;
+ default:
+ printf("(%s)", type[cat5->type]);
+ break;
+ }
+
+ printf("(");
+ for (i = 0; i < 4; i++)
+ if (cat5->wrmask & (1 << i))
+ printf("%c", "xyzw"[i]);
+ printf(")");
+
+ print_reg_dst((reg_t)(cat5->dst), type_size(cat5->type) == 32, false);
+
+ if (info[cat5->opc].src1) {
+ printf(", ");
+ print_reg_src((reg_t)(cat5->src1), cat5->full, false, false, false,
+ false, false, false);
+ }
+
+ if (cat5->is_s2en) {
+ printf(", ");
+ print_reg_src((reg_t)(cat5->s2en.src2), cat5->full, false, false, false,
+ false, false, false);
+ printf(", ");
+ print_reg_src((reg_t)(cat5->s2en.src3), false, false, false, false,
+ false, false, false);
+ } else {
+ if (cat5->is_o || info[cat5->opc].src2) {
+ printf(", ");
+ print_reg_src((reg_t)(cat5->norm.src2), cat5->full,
+ false, false, false, false, false, false);
+ }
+ if (info[cat5->opc].samp)
+ printf(", s#%d", cat5->norm.samp);
+ if (info[cat5->opc].tex)
+ printf(", t#%d", cat5->norm.tex);
+ }
+
+ if (debug & PRINT_VERBOSE) {
+ if (cat5->is_s2en) {
+ if ((debug & PRINT_VERBOSE) && (cat5->s2en.dummy1|cat5->s2en.dummy2|cat5->dummy2))
+ printf("\t{5: %x,%x,%x}", cat5->s2en.dummy1, cat5->s2en.dummy2, cat5->dummy2);
+ } else {
+ if ((debug & PRINT_VERBOSE) && (cat5->norm.dummy1|cat5->dummy2))
+ printf("\t{5: %x,%x}", cat5->norm.dummy1, cat5->dummy2);
+ }
+ }
+}
+
+static int32_t u2i(uint32_t val, int nbits)
+{
+ return ((val >> (nbits-1)) * ~((1 << nbits) - 1)) | val;
+}
+
+static void print_instr_cat6(instr_t *instr)
+{
+ instr_cat6_t *cat6 = &instr->cat6;
+
+ printf(".%s ", type[cat6->type]);
+
+ switch (cat6->opc) {
+ case OPC_LDG:
+ case OPC_LDP:
+ case OPC_LDL:
+ case OPC_LDLW:
+ case OPC_LDLV:
+ /* load instructions: */
+ print_reg_dst((reg_t)(cat6->a.dst), type_size(cat6->type) == 32, false);
+ printf(",");
+ switch (cat6->opc) {
+ case OPC_LDG:
+ printf("g");
+ break;
+ case OPC_LDP:
+ printf("p");
+ break;
+ case OPC_LDL:
+ case OPC_LDLW:
+ case OPC_LDLV:
+ printf("l");
+ break;
+ }
+ printf("[");
+ print_reg_src((reg_t)(cat6->a.src), true,
+ false, false, false, false, false, false);
+ if (cat6->a.off)
+ printf("%+d", cat6->a.off);
+ printf("]");
+ break;
+ case OPC_PREFETCH:
+ /* similar to load instructions: */
+ printf("g[");
+ print_reg_src((reg_t)(cat6->a.src), true,
+ false, false, false, false, false, false);
+ if (cat6->a.off)
+ printf("%+d", cat6->a.off);
+ printf("]");
+ break;
+ case OPC_STG:
+ case OPC_STP:
+ case OPC_STL:
+ case OPC_STLW:
+ /* store instructions: */
+ switch (cat6->opc) {
+ case OPC_STG:
+ printf("g");
+ break;
+ case OPC_STP:
+ printf("p");
+ break;
+ case OPC_STL:
+ case OPC_STLW:
+ printf("l");
+ break;
+ }
+ printf("[");
+ print_reg_dst((reg_t)(cat6->b.dst), true, false);
+ if (cat6->b.off || cat6->b.off_hi)
+ printf("%+d", u2i((cat6->b.off_hi << 8) | cat6->b.off, 13));
+ printf("]");
+ printf(",");
+ print_reg_src((reg_t)(cat6->b.src), type_size(cat6->type) == 32,
+ false, false, false, false, false, false);
+
+ break;
+ case OPC_STI:
+ /* sti has same encoding as other store instructions, but
+ * slightly different syntax:
+ */
+ print_reg_dst((reg_t)(cat6->b.dst), false /* XXX is it always half? */, false);
+ if (cat6->b.off || cat6->b.off_hi)
+ printf("%+d", u2i((cat6->b.off_hi << 8) | cat6->b.off, 13));
+ printf(",");
+ print_reg_src((reg_t)(cat6->b.src), type_size(cat6->type) == 32,
+ false, false, false, false, false, false);
+ break;
+ }
+
+ printf(", %d", cat6->iim_val);
+
+ if (debug & PRINT_VERBOSE) {
+ switch (cat6->opc) {
+ case OPC_LDG:
+ case OPC_LDP:
+ /* load instructions: */
+ if (cat6->a.dummy1|cat6->a.dummy2|cat6->a.dummy3)
+ printf("\t{6: %x,%x,%x}", cat6->a.dummy1, cat6->a.dummy2, cat6->a.dummy3);
+ if ((cat6->a.must_be_one1 != 1) || (cat6->a.must_be_one2 != 1))
+ printf("{?? %d,%d ??}", cat6->a.must_be_one1, cat6->a.must_be_one2);
+ break;
+ case OPC_STG:
+ case OPC_STP:
+ case OPC_STI:
+ /* store instructions: */
+ if (cat6->b.dummy1|cat6->b.dummy2)
+ printf("\t{6: %x,%x}", cat6->b.dummy1, cat6->b.dummy2);
+ if ((cat6->b.must_be_one1 != 1) || (cat6->b.must_be_one2 != 1) ||
+ (cat6->b.must_be_zero1 != 0))
+ printf("{?? %d,%d,%d ??}", cat6->b.must_be_one1, cat6->b.must_be_one2,
+ cat6->b.must_be_zero1);
+ break;
+ }
+ }
+}
+
+/* size of largest OPC field of all the instruction categories: */
+#define NOPC_BITS 6
+
+struct opc_info {
+ uint16_t cat;
+ uint16_t opc;
+ const char *name;
+ void (*print)(instr_t *instr);
+} opcs[1 << (3+NOPC_BITS)] = {
+#define OPC(cat, opc, name) [((cat) << NOPC_BITS) | (opc)] = { (cat), (opc), #name, print_instr_cat##cat }
+ /* category 0: */
+ OPC(0, OPC_NOP, nop),
+ OPC(0, OPC_BR, br),
+ OPC(0, OPC_JUMP, jump),
+ OPC(0, OPC_CALL, call),
+ OPC(0, OPC_RET, ret),
+ OPC(0, OPC_KILL, kill),
+ OPC(0, OPC_END, end),
+ OPC(0, OPC_EMIT, emit),
+ OPC(0, OPC_CUT, cut),
+ OPC(0, OPC_CHMASK, chmask),
+ OPC(0, OPC_CHSH, chsh),
+ OPC(0, OPC_FLOW_REV, flow_rev),
+
+ /* category 1: */
+ OPC(1, 0, ),
+
+ /* category 2: */
+ OPC(2, OPC_ADD_F, add.f),
+ OPC(2, OPC_MIN_F, min.f),
+ OPC(2, OPC_MAX_F, max.f),
+ OPC(2, OPC_MUL_F, mul.f),
+ OPC(2, OPC_SIGN_F, sign.f),
+ OPC(2, OPC_CMPS_F, cmps.f),
+ OPC(2, OPC_ABSNEG_F, absneg.f),
+ OPC(2, OPC_CMPV_F, cmpv.f),
+ OPC(2, OPC_FLOOR_F, floor.f),
+ OPC(2, OPC_CEIL_F, ceil.f),
+ OPC(2, OPC_RNDNE_F, rndne.f),
+ OPC(2, OPC_RNDAZ_F, rndaz.f),
+ OPC(2, OPC_TRUNC_F, trunc.f),
+ OPC(2, OPC_ADD_U, add.u),
+ OPC(2, OPC_ADD_S, add.s),
+ OPC(2, OPC_SUB_U, sub.u),
+ OPC(2, OPC_SUB_S, sub.s),
+ OPC(2, OPC_CMPS_U, cmps.u),
+ OPC(2, OPC_CMPS_S, cmps.s),
+ OPC(2, OPC_MIN_U, min.u),
+ OPC(2, OPC_MIN_S, min.s),
+ OPC(2, OPC_MAX_U, max.u),
+ OPC(2, OPC_MAX_S, max.s),
+ OPC(2, OPC_ABSNEG_S, absneg.s),
+ OPC(2, OPC_AND_B, and.b),
+ OPC(2, OPC_OR_B, or.b),
+ OPC(2, OPC_NOT_B, not.b),
+ OPC(2, OPC_XOR_B, xor.b),
+ OPC(2, OPC_CMPV_U, cmpv.u),
+ OPC(2, OPC_CMPV_S, cmpv.s),
+ OPC(2, OPC_MUL_U, mul.u),
+ OPC(2, OPC_MUL_S, mul.s),
+ OPC(2, OPC_MULL_U, mull.u),
+ OPC(2, OPC_BFREV_B, bfrev.b),
+ OPC(2, OPC_CLZ_S, clz.s),
+ OPC(2, OPC_CLZ_B, clz.b),
+ OPC(2, OPC_SHL_B, shl.b),
+ OPC(2, OPC_SHR_B, shr.b),
+ OPC(2, OPC_ASHR_B, ashr.b),
+ OPC(2, OPC_BARY_F, bary.f),
+ OPC(2, OPC_MGEN_B, mgen.b),
+ OPC(2, OPC_GETBIT_B, getbit.b),
+ OPC(2, OPC_SETRM, setrm),
+ OPC(2, OPC_CBITS_B, cbits.b),
+ OPC(2, OPC_SHB, shb),
+ OPC(2, OPC_MSAD, msad),
+
+ /* category 3: */
+ OPC(3, OPC_MAD_U16, mad.u16),
+ OPC(3, OPC_MADSH_U16, madsh.u16),
+ OPC(3, OPC_MAD_S16, mad.s16),
+ OPC(3, OPC_MADSH_M16, madsh.m16),
+ OPC(3, OPC_MAD_U24, mad.u24),
+ OPC(3, OPC_MAD_S24, mad.s24),
+ OPC(3, OPC_MAD_F16, mad.f16),
+ OPC(3, OPC_MAD_F32, mad.f32),
+ OPC(3, OPC_SEL_B16, sel.b16),
+ OPC(3, OPC_SEL_B32, sel.b32),
+ OPC(3, OPC_SEL_S16, sel.s16),
+ OPC(3, OPC_SEL_S32, sel.s32),
+ OPC(3, OPC_SEL_F16, sel.f16),
+ OPC(3, OPC_SEL_F32, sel.f32),
+ OPC(3, OPC_SAD_S16, sad.s16),
+ OPC(3, OPC_SAD_S32, sad.s32),
+
+ /* category 4: */
+ OPC(4, OPC_RCP, rcp),
+ OPC(4, OPC_RSQ, rsq),
+ OPC(4, OPC_LOG2, log2),
+ OPC(4, OPC_EXP2, exp2),
+ OPC(4, OPC_SIN, sin),
+ OPC(4, OPC_COS, cos),
+ OPC(4, OPC_SQRT, sqrt),
+
+ /* category 5: */
+ OPC(5, OPC_ISAM, isam),
+ OPC(5, OPC_ISAML, isaml),
+ OPC(5, OPC_ISAMM, isamm),
+ OPC(5, OPC_SAM, sam),
+ OPC(5, OPC_SAMB, samb),
+ OPC(5, OPC_SAML, saml),
+ OPC(5, OPC_SAMGQ, samgq),
+ OPC(5, OPC_GETLOD, getlod),
+ OPC(5, OPC_CONV, conv),
+ OPC(5, OPC_CONVM, convm),
+ OPC(5, OPC_GETSIZE, getsize),
+ OPC(5, OPC_GETBUF, getbuf),
+ OPC(5, OPC_GETPOS, getpos),
+ OPC(5, OPC_GETINFO, getinfo),
+ OPC(5, OPC_DSX, dsx),
+ OPC(5, OPC_DSY, dsy),
+ OPC(5, OPC_GATHER4R, gather4r),
+ OPC(5, OPC_GATHER4G, gather4g),
+ OPC(5, OPC_GATHER4B, gather4b),
+ OPC(5, OPC_GATHER4A, gather4a),
+ OPC(5, OPC_SAMGP0, samgp0),
+ OPC(5, OPC_SAMGP1, samgp1),
+ OPC(5, OPC_SAMGP2, samgp2),
+ OPC(5, OPC_SAMGP3, samgp3),
+ OPC(5, OPC_DSXPP_1, dsxpp.1),
+ OPC(5, OPC_DSYPP_1, dsypp.1),
+ OPC(5, OPC_RGETPOS, rgetpos),
+ OPC(5, OPC_RGETINFO, rgetinfo),
+
+
+ /* category 6: */
+ OPC(6, OPC_LDG, ldg),
+ OPC(6, OPC_LDL, ldl),
+ OPC(6, OPC_LDP, ldp),
+ OPC(6, OPC_STG, stg),
+ OPC(6, OPC_STL, stl),
+ OPC(6, OPC_STP, stp),
+ OPC(6, OPC_STI, sti),
+ OPC(6, OPC_G2L, g2l),
+ OPC(6, OPC_L2G, l2g),
+ OPC(6, OPC_PREFETCH, prefetch),
+ OPC(6, OPC_LDLW, ldlw),
+ OPC(6, OPC_STLW, stlw),
+ OPC(6, OPC_RESFMT, resfmt),
+ OPC(6, OPC_RESINFO, resinf),
+ OPC(6, OPC_ATOMIC_ADD_L, atomic.add.l),
+ OPC(6, OPC_ATOMIC_SUB_L, atomic.sub.l),
+ OPC(6, OPC_ATOMIC_XCHG_L, atomic.xchg.l),
+ OPC(6, OPC_ATOMIC_INC_L, atomic.inc.l),
+ OPC(6, OPC_ATOMIC_DEC_L, atomic.dec.l),
+ OPC(6, OPC_ATOMIC_CMPXCHG_L, atomic.cmpxchg.l),
+ OPC(6, OPC_ATOMIC_MIN_L, atomic.min.l),
+ OPC(6, OPC_ATOMIC_MAX_L, atomic.max.l),
+ OPC(6, OPC_ATOMIC_AND_L, atomic.and.l),
+ OPC(6, OPC_ATOMIC_OR_L, atomic.or.l),
+ OPC(6, OPC_ATOMIC_XOR_L, atomic.xor.l),
+ OPC(6, OPC_LDGB_TYPED_4D, ldgb.typed.4d),
+ OPC(6, OPC_STGB_4D_4, stgb.4d.4),
+ OPC(6, OPC_STIB, stib),
+ OPC(6, OPC_LDC_4, ldc.4),
+ OPC(6, OPC_LDLV, ldlv),
+
+
+#undef OPC
+};
+
+#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | getopc(instr)]))
+
+static uint32_t getopc(instr_t *instr)
+{
+ switch (instr->opc_cat) {
+ case 0: return instr->cat0.opc;
+ case 1: return 0;
+ case 2: return instr->cat2.opc;
+ case 3: return instr->cat3.opc;
+ case 4: return instr->cat4.opc;
+ case 5: return instr->cat5.opc;
+ case 6: return instr->cat6.opc;
+ default: return 0;
+ }
+}
+
+static void print_instr(uint32_t *dwords, int level, int n)
+{
+ instr_t *instr = (instr_t *)dwords;
+ uint32_t opc = getopc(instr);
+ const char *name;
+
+ printf("%s%04d[%08xx_%08xx] ", levels[level], n, dwords[1], dwords[0]);
+
+#if 0
+ /* print unknown bits: */
+ if (debug & PRINT_RAW)
+ printf("[%08xx_%08xx] ", dwords[1] & 0x001ff800, dwords[0] & 0x00000000);
+
+ if (debug & PRINT_VERBOSE)
+ printf("%d,%02d ", instr->opc_cat, opc);
+#endif
+
+ /* NOTE: order flags are printed is a bit fugly.. but for now I
+ * try to match the order in llvm-a3xx disassembler for easy
+ * diff'ing..
+ */
+
+ if (instr->sync)
+ printf("(sy)");
+ if (instr->ss && (instr->opc_cat <= 4))
+ printf("(ss)");
+ if (instr->jmp_tgt)
+ printf("(jp)");
+ if (instr->repeat && (instr->opc_cat <= 4)) {
+ printf("(rpt%d)", instr->repeat);
+ repeat = instr->repeat;
+ } else {
+ repeat = 0;
+ }
+ if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4)))
+ printf("(ul)");
+
+ name = GETINFO(instr)->name;
+
+ if (name) {
+ printf("%s", name);
+ GETINFO(instr)->print(instr);
+ } else {
+ printf("unknown(%d,%d)", instr->opc_cat, opc);
+ }
+
+ printf("\n");
+
+ process_reg_dst();
+}
+
+int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type)
+{
+ int i;
+
+ assert((sizedwords % 2) == 0);
+
+ memset(&regs, 0, sizeof(regs));
+
+ for (i = 0; i < sizedwords; i += 2)
+ print_instr(&dwords[i], level, i/2);
+
+ print_reg_stats(level);
+
+ return 0;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_blend.c b/src/gallium/drivers/freedreno/a3xx/fd3_blend.c
new file mode 100644
index 00000000000..395228d4589
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_blend.c
@@ -0,0 +1,87 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "fd3_blend.h"
+#include "fd3_context.h"
+#include "fd3_util.h"
+
+void *
+fd3_blend_state_create(struct pipe_context *pctx,
+ const struct pipe_blend_state *cso)
+{
+ struct fd3_blend_stateobj *so;
+ int i;
+
+ if (cso->logicop_enable) {
+ DBG("Unsupported! logicop");
+ return NULL;
+ }
+
+ if (cso->independent_blend_enable) {
+ DBG("Unsupported! independent blend state");
+ return NULL;
+ }
+
+ so = CALLOC_STRUCT(fd3_blend_stateobj);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
+ const struct pipe_rt_blend_state *rt = &cso->rt[i];
+
+ so->rb_mrt[i].blend_control =
+ A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
+ A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(fd_blend_func(rt->rgb_func)) |
+ A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) |
+ A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
+ A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(fd_blend_func(rt->alpha_func)) |
+ A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor)) |
+ A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE;
+
+ so->rb_mrt[i].control =
+ A3XX_RB_MRT_CONTROL_ROP_CODE(12) |
+ A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
+
+ if (rt->blend_enable)
+ so->rb_mrt[i].control |=
+ A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
+ A3XX_RB_MRT_CONTROL_BLEND |
+ A3XX_RB_MRT_CONTROL_BLEND2;
+
+ if (cso->dither)
+ so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS);
+ }
+
+ return so;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_blend.h b/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
new file mode 100644
index 00000000000..d269d74dd74
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
@@ -0,0 +1,52 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_BLEND_H_
+#define FD3_BLEND_H_
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+struct fd3_blend_stateobj {
+ struct pipe_blend_state base;
+ struct {
+ uint32_t blend_control;
+ uint32_t control;
+ } rb_mrt[4];
+};
+
+static INLINE struct fd3_blend_stateobj *
+fd3_blend_stateobj(struct pipe_blend_state *blend)
+{
+ return (struct fd3_blend_stateobj *)blend;
+}
+
+void * fd3_blend_state_create(struct pipe_context *pctx,
+ const struct pipe_blend_state *cso);
+
+#endif /* FD3_BLEND_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
new file mode 100644
index 00000000000..d844cc0f164
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -0,0 +1,1240 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include <stdarg.h>
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_strings.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_scan.h"
+
+#include "fd3_compiler.h"
+#include "fd3_program.h"
+#include "fd3_util.h"
+
+#include "instr-a3xx.h"
+#include "ir-a3xx.h"
+
+/* ************************************************************************* */
+/* split the out or find some helper to use.. like main/bitset.h.. */
+
+#define MAX_REG 256
+
+typedef uint8_t regmask_t[2 * MAX_REG / 8];
+
+static unsigned regmask_idx(struct ir3_register *reg)
+{
+ unsigned num = reg->num;
+ assert(num < MAX_REG);
+ if (reg->flags & IR3_REG_HALF)
+ num += MAX_REG;
+ return num;
+}
+
+static void regmask_set(regmask_t regmask, struct ir3_register *reg)
+{
+ unsigned idx = regmask_idx(reg);
+ regmask[idx / 8] |= 1 << (idx % 8);
+}
+
+static unsigned regmask_get(regmask_t regmask, struct ir3_register *reg)
+{
+ unsigned idx = regmask_idx(reg);
+ return regmask[idx / 8] & (1 << (idx % 8));
+}
+
+/* ************************************************************************* */
+
+struct fd3_compile_context {
+ const struct tgsi_token *tokens;
+ struct ir3_shader *ir;
+ struct fd3_shader_stateobj *so;
+
+ struct tgsi_parse_context parser;
+ unsigned type;
+
+ struct tgsi_shader_info info;
+
+ /* last input dst (for setting (ei) flag): */
+ struct ir3_register *last_input;
+
+ unsigned next_inloc;
+ unsigned num_internal_temps;
+
+ /* track registers which need to synchronize w/ "complex alu" cat3
+ * instruction pipeline:
+ */
+ regmask_t needs_ss;
+
+ /* track registers which need to synchronize with texture fetch
+ * pipeline:
+ */
+ regmask_t needs_sy;
+
+ /* inputs start at r0, temporaries start after last input, and
+ * outputs start after last temporary.
+ *
+ * We could be more clever, because this is not a hw restriction,
+ * but probably best just to implement an optimizing pass to
+ * reduce the # of registers used and get rid of redundant mov's
+ * (to output register).
+ */
+ unsigned base_reg[TGSI_FILE_COUNT];
+
+ /* idx/slot for last compiler generated immediate */
+ unsigned immediate_idx;
+
+ /* stack of branch instructions that start (potentially nested)
+ * branch instructions, so that we can fix up the branch targets
+ * so that we can fix up the branch target on the corresponding
+ * END instruction
+ */
+ struct ir3_instruction *branch[16];
+ unsigned int branch_count;
+
+ /* used when dst is same as one of the src, to avoid overwriting a
+ * src element before the remaining scalar instructions that make
+ * up the vector operation
+ */
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register tmp_src;
+};
+
+static unsigned
+compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
+ const struct tgsi_token *tokens)
+{
+ unsigned ret;
+
+ ctx->tokens = tokens;
+ ctx->ir = so->ir;
+ ctx->so = so;
+ ctx->last_input = NULL;
+ ctx->next_inloc = 8;
+ ctx->num_internal_temps = 0;
+ ctx->branch_count = 0;
+
+ memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
+ memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
+ memset(ctx->base_reg, 0, sizeof(ctx->base_reg));
+
+ tgsi_scan_shader(tokens, &ctx->info);
+
+ /* Immediates go after constants: */
+ ctx->base_reg[TGSI_FILE_CONSTANT] = 0;
+ ctx->base_reg[TGSI_FILE_IMMEDIATE] =
+ ctx->info.file_count[TGSI_FILE_CONSTANT];
+
+ /* Temporaries after outputs after inputs: */
+ ctx->base_reg[TGSI_FILE_INPUT] = 0;
+ ctx->base_reg[TGSI_FILE_OUTPUT] =
+ ctx->info.file_count[TGSI_FILE_INPUT];
+ ctx->base_reg[TGSI_FILE_TEMPORARY] =
+ ctx->info.file_count[TGSI_FILE_INPUT] +
+ ctx->info.file_count[TGSI_FILE_OUTPUT];
+
+ so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
+ ctx->immediate_idx = 4 * (ctx->info.file_count[TGSI_FILE_CONSTANT] +
+ ctx->info.file_count[TGSI_FILE_IMMEDIATE]);
+
+ ret = tgsi_parse_init(&ctx->parser, tokens);
+ if (ret != TGSI_PARSE_OK)
+ return ret;
+
+ ctx->type = ctx->parser.FullHeader.Processor.Processor;
+
+ return ret;
+}
+
+static void
+compile_free(struct fd3_compile_context *ctx)
+{
+ tgsi_parse_free(&ctx->parser);
+}
+
+struct instr_translater {
+ void (*fxn)(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst);
+ opc_t opc;
+ opc_t hopc; /* opc to use for half_precision mode, if different */
+ unsigned arg;
+};
+
+static struct ir3_register *
+add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
+ const struct tgsi_dst_register *dst, unsigned chan)
+{
+ unsigned flags = 0, num = 0;
+
+ switch (dst->File) {
+ case TGSI_FILE_OUTPUT:
+ case TGSI_FILE_TEMPORARY:
+ num = dst->Index + ctx->base_reg[dst->File];
+ break;
+ default:
+ DBG("unsupported dst register file: %s",
+ tgsi_file_name(dst->File));
+ assert(0);
+ break;
+ }
+
+ if (ctx->so->half_precision)
+ flags |= IR3_REG_HALF;
+
+ return ir3_reg_create(instr, regid(num, chan), flags);
+}
+
+static struct ir3_register *
+add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
+ const struct tgsi_src_register *src, unsigned chan)
+{
+ unsigned flags = 0, num = 0;
+ struct ir3_register *reg;
+
+ switch (src->File) {
+ case TGSI_FILE_IMMEDIATE:
+ /* TODO if possible, use actual immediate instead of const.. but
+ * TGSI has vec4 immediates, we can only embed scalar (of limited
+ * size, depending on instruction..)
+ */
+ case TGSI_FILE_CONSTANT:
+ flags |= IR3_REG_CONST;
+ num = src->Index + ctx->base_reg[src->File];
+ break;
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_TEMPORARY:
+ num = src->Index + ctx->base_reg[src->File];
+ break;
+ default:
+ DBG("unsupported src register file: %s",
+ tgsi_file_name(src->File));
+ assert(0);
+ break;
+ }
+
+ if (src->Absolute)
+ flags |= IR3_REG_ABS;
+ if (src->Negate)
+ flags |= IR3_REG_NEGATE;
+ if (ctx->so->half_precision)
+ flags |= IR3_REG_HALF;
+
+ reg = ir3_reg_create(instr, regid(num, chan), flags);
+
+ if (regmask_get(ctx->needs_ss, reg)) {
+ instr->flags |= IR3_INSTR_SS;
+ memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
+ }
+
+ if (regmask_get(ctx->needs_sy, reg)) {
+ instr->flags |= IR3_INSTR_SY;
+ memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
+ }
+
+ return reg;
+}
+
+static void
+src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
+{
+ src->File = dst->File;
+ src->Indirect = dst->Indirect;
+ src->Dimension = dst->Dimension;
+ src->Index = dst->Index;
+ src->Absolute = 0;
+ src->Negate = 0;
+ src->SwizzleX = TGSI_SWIZZLE_X;
+ src->SwizzleY = TGSI_SWIZZLE_Y;
+ src->SwizzleZ = TGSI_SWIZZLE_Z;
+ src->SwizzleW = TGSI_SWIZZLE_W;
+}
+
+/* Get internal-temp src/dst to use for a sequence of instructions
+ * generated by a single TGSI op.
+ */
+static void
+get_internal_temp(struct fd3_compile_context *ctx,
+ struct tgsi_dst_register *tmp_dst,
+ struct tgsi_src_register *tmp_src)
+{
+ int n;
+
+ tmp_dst->File = TGSI_FILE_TEMPORARY;
+ tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
+ tmp_dst->Indirect = 0;
+ tmp_dst->Dimension = 0;
+
+ /* assign next temporary: */
+ n = ctx->num_internal_temps++;
+
+ tmp_dst->Index = ctx->info.file_count[TGSI_FILE_TEMPORARY] + n;
+
+ src_from_dst(tmp_src, tmp_dst);
+}
+
+static void
+get_immediate(struct fd3_compile_context *ctx,
+ struct tgsi_src_register *reg, uint32_t val)
+{
+ unsigned neg, swiz, idx, i;
+ /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
+ static const unsigned swiz2tgsi[] = {
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
+ };
+
+ for (i = 0; i < ctx->immediate_idx; i++) {
+ swiz = i % 4;
+ idx = i / 4;
+
+ if (ctx->so->immediates[idx].val[swiz] == val) {
+ neg = 0;
+ break;
+ }
+
+ if (ctx->so->immediates[idx].val[swiz] == -val) {
+ neg = 1;
+ break;
+ }
+ }
+
+ if (i == ctx->immediate_idx) {
+ /* need to generate a new immediate: */
+ swiz = i % 4;
+ idx = i / 4;
+ neg = 0;
+ ctx->so->immediates[idx].val[swiz] = val;
+ ctx->so->immediates_count = idx + 1;
+ ctx->immediate_idx++;
+ }
+
+ reg->File = TGSI_FILE_IMMEDIATE;
+ reg->Indirect = 0;
+ reg->Dimension = 0;
+ reg->Index = idx;
+ reg->Absolute = 0;
+ reg->Negate = neg;
+ reg->SwizzleX = swiz2tgsi[swiz];
+ reg->SwizzleY = swiz2tgsi[swiz];
+ reg->SwizzleZ = swiz2tgsi[swiz];
+ reg->SwizzleW = swiz2tgsi[swiz];
+}
+
+static type_t
+get_type(struct fd3_compile_context *ctx)
+{
+ return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
+}
+
+static unsigned
+src_swiz(struct tgsi_src_register *src, int chan)
+{
+ switch (chan) {
+ case 0: return src->SwizzleX;
+ case 1: return src->SwizzleY;
+ case 2: return src->SwizzleZ;
+ case 3: return src->SwizzleW;
+ }
+ assert(0);
+ return 0;
+}
+
+static void
+create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
+ struct tgsi_src_register *src)
+{
+ type_t type_mov = get_type(ctx);
+ unsigned i;
+
+ for (i = 0; i < 4; i++) {
+ /* move to destination: */
+ if (dst->WriteMask & (1 << i)) {
+ struct ir3_instruction *instr =
+ ir3_instr_create(ctx->ir, 1, 0);
+ instr->cat1.src_type = type_mov;
+ instr->cat1.dst_type = type_mov;
+ add_dst_reg(ctx, instr, dst, i);
+ add_src_reg(ctx, instr, src, src_swiz(src, i));
+ } else {
+ ir3_instr_create(ctx->ir, 0, OPC_NOP);
+ }
+ }
+
+}
+
+static struct tgsi_dst_register *
+get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst)
+{
+ struct tgsi_dst_register *dst = &inst->Dst[0].Register;
+ unsigned i;
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ struct tgsi_src_register *src = &inst->Src[i].Register;
+ if ((src->File == dst->File) && (src->Index == dst->Index)) {
+ get_internal_temp(ctx, &ctx->tmp_dst, &ctx->tmp_src);
+ ctx->tmp_dst.WriteMask = dst->WriteMask;
+ dst = &ctx->tmp_dst;
+ break;
+ }
+ }
+ return dst;
+}
+
+static void
+put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst,
+ struct tgsi_dst_register *dst)
+{
+ /* if necessary, add mov back into original dst: */
+ if (dst != &inst->Dst[0].Register) {
+ create_mov(ctx, &inst->Dst[0].Register, &ctx->tmp_src);
+ }
+}
+
+/* helper to generate the necessary repeat and/or additional instructions
+ * to turn a scalar instruction into a vector operation:
+ */
+static void
+vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
+ struct tgsi_dst_register *dst, int nsrcs, ...)
+{
+ va_list ap;
+ int i, j, n = 0;
+
+ add_dst_reg(ctx, instr, dst, 0);
+
+ va_start(ap, nsrcs);
+ for (j = 0; j < nsrcs; j++) {
+ struct tgsi_src_register *src =
+ va_arg(ap, struct tgsi_src_register *);
+ unsigned flags = va_arg(ap, unsigned);
+ add_src_reg(ctx, instr, src, 0)->flags |= flags;
+ }
+ va_end(ap);
+
+ for (i = 0; i < 4; i++) {
+ if (dst->WriteMask & (1 << i)) {
+ struct ir3_instruction *cur;
+
+ if (n++ == 0) {
+ cur = instr;
+ } else {
+ cur = ir3_instr_clone(instr);
+ cur->flags &= ~(IR3_INSTR_SY | IR3_INSTR_SS | IR3_INSTR_JP);
+ }
+
+ /* fix-up dst register component: */
+ cur->regs[0]->num = regid(cur->regs[0]->num >> 2, i);
+
+ /* fix-up src register component: */
+ va_start(ap, nsrcs);
+ for (j = 0; j < nsrcs; j++) {
+ struct tgsi_src_register *src =
+ va_arg(ap, struct tgsi_src_register *);
+ (void)va_arg(ap, unsigned);
+ cur->regs[j+1]->num =
+ regid(cur->regs[j+1]->num >> 2,
+ src_swiz(src, i));
+ }
+ va_end(ap);
+ }
+ }
+
+ /* pad w/ nop's.. at least until we are clever enough to
+ * figure out if we really need to..
+ */
+ for (; n < 4; n++) {
+ ir3_instr_create(instr->shader, 0, OPC_NOP);
+ }
+}
+
+/*
+ * Handlers for TGSI instructions which do not have a 1:1 mapping to
+ * native instructions:
+ */
+
+static void
+trans_dotp(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct ir3_instruction *instr;
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register tmp_src;
+ struct tgsi_dst_register *dst = &inst->Dst[0].Register;
+ struct tgsi_src_register *src0 = &inst->Src[0].Register;
+ struct tgsi_src_register *src1 = &inst->Src[1].Register;
+ unsigned swiz0[] = { src0->SwizzleX, src0->SwizzleY, src0->SwizzleZ, src0->SwizzleW };
+ unsigned swiz1[] = { src1->SwizzleX, src1->SwizzleY, src1->SwizzleZ, src1->SwizzleW };
+ opc_t opc_mad = ctx->so->half_precision ? OPC_MAD_F16 : OPC_MAD_F32;
+ unsigned i;
+
+ assert(inst->Instruction.NumSrcRegs == 2);
+ assert(inst->Instruction.NumDstRegs == 1);
+
+ get_internal_temp(ctx, &tmp_dst, &tmp_src);
+
+ /* Blob compiler never seems to use a const in src1 position for
+ * mad.*, although there does seem (according to disassembler
+ * hidden in libllvm-a3xx.so) to be a bit to indicate that src1
+ * is a const. Not sure if this is a hw bug, or simply that the
+ * disassembler lies.
+ */
+ if ((src1->File == TGSI_FILE_IMMEDIATE) ||
+ (src1->File == TGSI_FILE_CONSTANT)) {
+
+ /* the mov to tmp unswizzles src1, so now we have tmp.xyzw:
+ */
+ for (i = 0; i < 4; i++)
+ swiz1[i] = i;
+
+ /* the first mul.f will clobber tmp.x, but that is ok
+ * because after that point we no longer need tmp.x:
+ */
+ create_mov(ctx, &tmp_dst, src1);
+ src1 = &tmp_src;
+ }
+
+ instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+ add_dst_reg(ctx, instr, &tmp_dst, 0);
+ add_src_reg(ctx, instr, src0, swiz0[0]);
+ add_src_reg(ctx, instr, src1, swiz1[0]);
+
+ for (i = 1; i < t->arg; i++) {
+ ir3_instr_create(ctx->ir, 0, OPC_NOP);
+
+ instr = ir3_instr_create(ctx->ir, 3, opc_mad);
+ add_dst_reg(ctx, instr, &tmp_dst, 0);
+ add_src_reg(ctx, instr, src0, swiz0[i]);
+ add_src_reg(ctx, instr, src1, swiz1[i]);
+ add_src_reg(ctx, instr, &tmp_src, 0);
+ }
+
+ ir3_instr_create(ctx->ir, 0, OPC_NOP);
+
+ /* pad out to multiple of 4 scalar instructions: */
+ for (i = 2 * t->arg; i % 4; i++) {
+ ir3_instr_create(ctx->ir, 0, OPC_NOP);
+ }
+
+ create_mov(ctx, dst, &tmp_src);
+}
+
+/* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
+static void
+trans_lrp(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct ir3_instruction *instr;
+ struct tgsi_dst_register tmp_dst1, tmp_dst2;
+ struct tgsi_src_register tmp_src1, tmp_src2;
+ struct tgsi_src_register tmp_const;
+
+ get_internal_temp(ctx, &tmp_dst1, &tmp_src1);
+ get_internal_temp(ctx, &tmp_dst2, &tmp_src2);
+
+ get_immediate(ctx, &tmp_const, fui(1.0));
+
+ /* tmp1 = (a * b) */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+ vectorize(ctx, instr, &tmp_dst1, 2,
+ &inst->Src[0].Register, 0,
+ &inst->Src[1].Register, 0);
+
+ /* tmp2 = (1 - a) */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+ vectorize(ctx, instr, &tmp_dst2, 2,
+ &tmp_const, 0,
+ &inst->Src[0].Register, IR3_REG_NEGATE);
+
+ /* tmp2 = tmp2 * c */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+ vectorize(ctx, instr, &tmp_dst2, 2,
+ &tmp_src2, 0,
+ &inst->Src[2].Register, 0);
+
+ /* dst = tmp1 + tmp2 */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+ vectorize(ctx, instr, &inst->Dst[0].Register, 2,
+ &tmp_src1, 0,
+ &tmp_src2, 0);
+}
+
+/* FRC(x) = x - FLOOR(x) */
+static void
+trans_frac(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct ir3_instruction *instr;
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register tmp_src;
+
+ get_internal_temp(ctx, &tmp_dst, &tmp_src);
+
+ /* tmp = FLOOR(x) */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_FLOOR_F);
+ vectorize(ctx, instr, &tmp_dst, 1,
+ &inst->Src[0].Register, 0);
+
+ /* dst = x - tmp */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+ vectorize(ctx, instr, &inst->Dst[0].Register, 2,
+ &inst->Src[0].Register, 0,
+ &tmp_src, IR3_REG_NEGATE);
+}
+
+/* POW(a,b) = EXP2(b * LOG2(a)) */
+static void
+trans_pow(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct ir3_instruction *instr;
+ struct ir3_register *r;
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register tmp_src;
+ struct tgsi_dst_register *dst = &inst->Dst[0].Register;
+ struct tgsi_src_register *src0 = &inst->Src[0].Register;
+ struct tgsi_src_register *src1 = &inst->Src[1].Register;
+
+ assert(inst->Instruction.NumSrcRegs == 2);
+ assert(inst->Instruction.NumDstRegs == 1);
+
+ get_internal_temp(ctx, &tmp_dst, &tmp_src);
+
+ /* log2 Rtmp, Rsrc0 */
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
+ instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2);
+ r = add_dst_reg(ctx, instr, &tmp_dst, 0);
+ add_src_reg(ctx, instr, src0, src0->SwizzleX);
+ regmask_set(ctx->needs_ss, r);
+
+ /* mul.f Rtmp, Rtmp, Rsrc1 */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+ add_dst_reg(ctx, instr, &tmp_dst, 0);
+ add_src_reg(ctx, instr, &tmp_src, 0);
+ add_src_reg(ctx, instr, src1, src1->SwizzleX);
+
+ /* blob compiler seems to ensure there are at least 6 instructions
+ * between a "simple" (non-cat4) instruction and a dependent cat4..
+ * probably we need to handle this in some other places too.
+ */
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
+
+ /* exp2 Rdst, Rtmp */
+ instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2);
+ r = add_dst_reg(ctx, instr, &tmp_dst, 0);
+ add_src_reg(ctx, instr, &tmp_src, 0);
+ regmask_set(ctx->needs_ss, r);
+
+ create_mov(ctx, dst, &tmp_src);
+}
+
+/* texture fetch/sample instructions: */
+static void
+trans_samp(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct ir3_register *r;
+ struct ir3_instruction *instr;
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register tmp_src;
+ struct tgsi_src_register *coord = &inst->Src[0].Register;
+ struct tgsi_src_register *samp = &inst->Src[1].Register;
+ unsigned tex = inst->Texture.Texture;
+ int8_t *order;
+ unsigned i, j, flags = 0;
+
+ switch (t->arg) {
+ case TGSI_OPCODE_TEX:
+ order = (tex == TGSI_TEXTURE_2D) ?
+ (int8_t[4]){ 0, 1, -1, -1 } : /* 2D */
+ (int8_t[4]){ 0, 1, 2, -1 }; /* 3D */
+ break;
+ case TGSI_OPCODE_TXP:
+ order = (tex == TGSI_TEXTURE_2D) ?
+ (int8_t[4]){ 0, 1, 3, -1 } : /* 2D */
+ (int8_t[4]){ 0, 1, 2, 3 }; /* 3D */
+ flags |= IR3_INSTR_P;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (tex == TGSI_TEXTURE_3D)
+ flags |= IR3_INSTR_3D;
+
+ assert(inst->Instruction.NumSrcRegs == 2);
+ assert(inst->Instruction.NumDstRegs == 1);
+
+ /* The texture sample instructions need to coord in successive
+ * registers/components (ie. src.xy but not src.yx). And TXP
+ * needs the .w component in .z for 2D.. so in some cases we
+ * might need to emit some mov instructions to shuffle things
+ * around:
+ */
+ for (i = 1; (i < 4) && (order[i] >= 0); i++) {
+ if (src_swiz(coord, 0) != (src_swiz(coord, i) + order[i])) {
+ type_t type_mov = get_type(ctx);
+
+ /* need to move things around: */
+ get_internal_temp(ctx, &tmp_dst, &tmp_src);
+
+ for (j = 0; (j < 4) && (order[j] >= 0); j++) {
+ instr = ir3_instr_create(ctx->ir, 1, 0);
+ instr->cat1.src_type = type_mov;
+ instr->cat1.dst_type = type_mov;
+ add_dst_reg(ctx, instr, &tmp_dst, j);
+ add_src_reg(ctx, instr, coord,
+ src_swiz(coord, order[j]));
+ }
+
+ coord = &tmp_src;
+
+ if (j < 4)
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1;
+
+ break;
+ }
+ }
+
+ instr = ir3_instr_create(ctx->ir, 5, t->opc);
+ instr->cat5.type = get_type(ctx);
+ instr->cat5.samp = samp->Index;
+ instr->cat5.tex = samp->Index;
+ instr->flags |= flags;
+
+ r = add_dst_reg(ctx, instr, &inst->Dst[0].Register, 0);
+ r->wrmask = inst->Dst[0].Register.WriteMask;
+
+ add_src_reg(ctx, instr, coord, coord->SwizzleX);
+
+ regmask_set(ctx->needs_sy, r);
+}
+
+/* CMP(a,b,c) = (a < 0) ? b : c */
+static void
+trans_cmp(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct ir3_instruction *instr;
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register tmp_src;
+ struct tgsi_src_register constval;
+ /* final instruction uses original src1 and src2, so we need get_dst() */
+ struct tgsi_dst_register *dst = get_dst(ctx, inst);
+
+ get_internal_temp(ctx, &tmp_dst, &tmp_src);
+
+ /* cmps.f.ge tmp, src0, 0.0 */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
+ instr->cat2.condition = IR3_COND_GE;
+ get_immediate(ctx, &constval, fui(0.0));
+ vectorize(ctx, instr, &tmp_dst, 2,
+ &inst->Src[0].Register, 0,
+ &constval, 0);
+
+ /* add.s tmp, tmp, -1 */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
+ instr->repeat = 3;
+ add_dst_reg(ctx, instr, &tmp_dst, 0);
+ add_src_reg(ctx, instr, &tmp_src, 0);
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
+
+ /* sel.{f32,f16} dst, src2, tmp, src1 */
+ instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ?
+ OPC_SEL_F16 : OPC_SEL_F32);
+ vectorize(ctx, instr, &inst->Dst[0].Register, 3,
+ &inst->Src[2].Register, 0,
+ &tmp_src, 0,
+ &inst->Src[1].Register, 0);
+
+ put_dst(ctx, inst, dst);
+}
+
+/*
+ * Conditional / Flow control
+ */
+
+static unsigned
+find_instruction(struct fd3_compile_context *ctx, struct ir3_instruction *instr)
+{
+ unsigned i;
+ for (i = 0; i < ctx->ir->instrs_count; i++)
+ if (ctx->ir->instrs[i] == instr)
+ return i;
+ return ~0;
+}
+
+static void
+push_branch(struct fd3_compile_context *ctx, struct ir3_instruction *instr)
+{
+ ctx->branch[ctx->branch_count++] = instr;
+}
+
+static void
+pop_branch(struct fd3_compile_context *ctx)
+{
+ struct ir3_instruction *instr;
+
+ /* if we were clever enough, we'd patch this up after the fact,
+ * and set (jp) flag on whatever the next instruction was, rather
+ * than inserting an extra nop..
+ */
+ instr = ir3_instr_create(ctx->ir, 0, OPC_NOP);
+ instr->flags |= IR3_INSTR_JP;
+
+ /* pop the branch instruction from the stack and fix up branch target: */
+ instr = ctx->branch[--ctx->branch_count];
+ instr->cat0.immed = ctx->ir->instrs_count - find_instruction(ctx, instr) - 1;
+}
+
+/* We probably don't really want to translate if/else/endif into branches..
+ * the blob driver evaluates both legs of the if and then uses the sel
+ * instruction to pick which sides of the branch to "keep".. but figuring
+ * that out will take somewhat more compiler smarts. So hopefully branches
+ * don't kill performance too badly.
+ */
+static void
+trans_if(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct ir3_instruction *instr;
+ struct tgsi_src_register *src = &inst->Src[0].Register;
+ struct tgsi_src_register constval;
+
+ get_immediate(ctx, &constval, fui(0.0));
+
+ instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
+ ir3_reg_create(instr, regid(REG_P0, 0), 0);
+ add_src_reg(ctx, instr, &constval, constval.SwizzleX);
+ add_src_reg(ctx, instr, src, src->SwizzleX);
+ instr->cat2.condition = IR3_COND_EQ;
+
+ instr = ir3_instr_create(ctx->ir, 0, OPC_BR);
+ push_branch(ctx, instr);
+}
+
+static void
+trans_else(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct ir3_instruction *instr;
+
+ /* for first half of if/else/endif, generate a jump past the else: */
+ instr = ir3_instr_create(ctx->ir, 0, OPC_JUMP);
+
+ pop_branch(ctx);
+ push_branch(ctx, instr);
+}
+
+static void
+trans_endif(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ pop_branch(ctx);
+}
+
+/*
+ * Handlers for TGSI instructions which do have 1:1 mapping to native
+ * instructions:
+ */
+
+static void
+instr_cat0(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ assert(inst->Instruction.NumSrcRegs == 0);
+ assert(inst->Instruction.NumDstRegs == 0);
+
+ ir3_instr_create(ctx->ir, 0, t->opc);
+}
+
+static void
+instr_cat1(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ struct tgsi_src_register *src = &inst->Src[0].Register;
+
+ assert(inst->Instruction.NumSrcRegs == 1);
+ assert(inst->Instruction.NumDstRegs == 1);
+
+ /* mov instructions can't handle a negate on src: */
+ if (src->Negate) {
+ struct tgsi_src_register constval;
+ struct ir3_instruction *instr;
+
+ /* since right now, we are using uniformly either TYPE_F16 or
+ * TYPE_F32, and we don't utilize the conversion possibilities
+ * of mov instructions, we can get away with substituting an
+ * add.f which can handle negate. Might need to revisit this
+ * in the future if we start supporting widening/narrowing or
+ * conversion to/from integer..
+ */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+ get_immediate(ctx, &constval, fui(0.0));
+ vectorize(ctx, instr, dst, 2, src, 0, &constval, 0);
+ } else {
+ create_mov(ctx, dst, src);
+ /* create_mov() generates vector sequence, so no vectorize() */
+ }
+ put_dst(ctx, inst, dst);
+}
+
+static void
+instr_cat2(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ struct ir3_instruction *instr;
+
+ assert(inst->Instruction.NumSrcRegs == 2);
+ assert(inst->Instruction.NumDstRegs == 1);
+
+ instr = ir3_instr_create(ctx->ir, 2, t->opc);
+ instr->cat2.condition = t->arg;
+
+ switch (t->opc) {
+ case OPC_ABSNEG_F:
+ case OPC_ABSNEG_S:
+ case OPC_CLZ_B:
+ case OPC_CLZ_S:
+ case OPC_SIGN_F:
+ case OPC_FLOOR_F:
+ case OPC_CEIL_F:
+ case OPC_RNDNE_F:
+ case OPC_RNDAZ_F:
+ case OPC_TRUNC_F:
+ case OPC_NOT_B:
+ case OPC_BFREV_B:
+ case OPC_SETRM:
+ case OPC_CBITS_B:
+ /* these only have one src reg */
+ vectorize(ctx, instr, dst, 1,
+ &inst->Src[0].Register, 0);
+ break;
+ default:
+ vectorize(ctx, instr, dst, 2,
+ &inst->Src[0].Register, 0,
+ &inst->Src[1].Register, 0);
+ break;
+ }
+
+ put_dst(ctx, inst, dst);
+}
+
+static void
+instr_cat3(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ struct tgsi_src_register *src1 = &inst->Src[1].Register;
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register tmp_src;
+ struct ir3_instruction *instr;
+
+ assert(inst->Instruction.NumSrcRegs == 3);
+ assert(inst->Instruction.NumDstRegs == 1);
+
+ /* Blob compiler never seems to use a const in src1 position..
+ * although there does seem (according to disassembler hidden
+ * in libllvm-a3xx.so) to be a bit to indicate that src1 is a
+ * const. Not sure if this is a hw bug, or simply that the
+ * disassembler lies.
+ */
+ if ((src1->File == TGSI_FILE_CONSTANT) ||
+ (src1->File == TGSI_FILE_IMMEDIATE)) {
+ get_internal_temp(ctx, &tmp_dst, &tmp_src);
+ create_mov(ctx, &tmp_dst, src1);
+ src1 = &tmp_src;
+ }
+
+ instr = ir3_instr_create(ctx->ir, 3,
+ ctx->so->half_precision ? t->hopc : t->opc);
+ vectorize(ctx, instr, dst, 3,
+ &inst->Src[0].Register, 0,
+ src1, 0,
+ &inst->Src[2].Register, 0);
+ put_dst(ctx, inst, dst);
+}
+
+static void
+instr_cat4(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ struct ir3_instruction *instr;
+
+ assert(inst->Instruction.NumSrcRegs == 1);
+ assert(inst->Instruction.NumDstRegs == 1);
+
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
+ instr = ir3_instr_create(ctx->ir, 4, t->opc);
+
+ vectorize(ctx, instr, dst, 1,
+ &inst->Src[0].Register, 0);
+
+ regmask_set(ctx->needs_ss, instr->regs[0]);
+
+ put_dst(ctx, inst, dst);
+}
+
+static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
+#define INSTR(n, f, ...) \
+ [TGSI_OPCODE_ ## n] = { .fxn = (f), ##__VA_ARGS__ }
+
+ INSTR(MOV, instr_cat1),
+ INSTR(RCP, instr_cat4, .opc = OPC_RCP),
+ INSTR(RSQ, instr_cat4, .opc = OPC_RSQ),
+ INSTR(SQRT, instr_cat4, .opc = OPC_SQRT),
+ INSTR(MUL, instr_cat2, .opc = OPC_MUL_F),
+ INSTR(ADD, instr_cat2, .opc = OPC_ADD_F),
+ INSTR(DP2, trans_dotp, .arg = 2),
+ INSTR(DP3, trans_dotp, .arg = 3),
+ INSTR(DP4, trans_dotp, .arg = 4),
+ INSTR(MIN, instr_cat2, .opc = OPC_MIN_F),
+ INSTR(MAX, instr_cat2, .opc = OPC_MAX_F),
+ INSTR(SLT, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_LT),
+ INSTR(SGE, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_GE),
+ INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
+ INSTR(LRP, trans_lrp),
+ INSTR(FRC, trans_frac),
+ INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F),
+ INSTR(EX2, instr_cat4, .opc = OPC_EXP2),
+ INSTR(LG2, instr_cat4, .opc = OPC_LOG2),
+ INSTR(POW, trans_pow),
+ INSTR(COS, instr_cat4, .opc = OPC_SIN),
+ INSTR(SIN, instr_cat4, .opc = OPC_COS),
+ INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX),
+ INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP),
+ INSTR(CMP, trans_cmp),
+ INSTR(IF, trans_if),
+ INSTR(ELSE, trans_else),
+ INSTR(ENDIF, trans_endif),
+ INSTR(END, instr_cat0, .opc = OPC_END),
+};
+
+static void
+decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
+{
+ struct fd3_shader_stateobj *so = ctx->so;
+ unsigned base = ctx->base_reg[TGSI_FILE_INPUT];
+ unsigned i, flags = 0;
+
+ if (ctx->so->half_precision)
+ flags |= IR3_REG_HALF;
+
+ for (i = decl->Range.First; i <= decl->Range.Last; i++) {
+ unsigned n = so->inputs_count++;
+ unsigned r = regid(i + base, 0);
+ unsigned ncomp;
+
+ /* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */
+ ncomp = 4;
+
+ DBG("decl in -> r%d", i + base); // XXX
+
+ so->inputs[n].compmask = (1 << ncomp) - 1;
+ so->inputs[n].regid = r;
+ so->inputs[n].inloc = ctx->next_inloc;
+ ctx->next_inloc += ncomp;
+
+ so->total_in += ncomp;
+
+ /* for frag shaders, we need to generate the corresponding bary instr: */
+ if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+ struct ir3_instruction *instr;
+
+ instr = ir3_instr_create(ctx->ir, 2, OPC_BARY_F);
+ instr->repeat = ncomp - 1;
+
+ /* dst register: */
+ ctx->last_input = ir3_reg_create(instr, r, flags);
+
+ /* input position: */
+ ir3_reg_create(instr, 0, IR3_REG_IMMED | IR3_REG_R)->iim_val =
+ so->inputs[n].inloc - 8;
+
+ /* input base (always r0.x): */
+ ir3_reg_create(instr, regid(0,0), 0);
+ }
+ }
+}
+
+static void
+decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
+{
+ struct fd3_shader_stateobj *so = ctx->so;
+ unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT];
+ unsigned name = decl->Semantic.Name;
+ unsigned i;
+
+ assert(decl->Declaration.Semantic); // TODO is this ever not true?
+
+ DBG("decl out[%d] -> r%d", name, decl->Range.First + base); // XXX
+
+ if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+ switch (name) {
+ case TGSI_SEMANTIC_POSITION:
+ so->pos_regid = regid(decl->Range.First + base, 0);
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ so->psize_regid = regid(decl->Range.First + base, 0);
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ case TGSI_SEMANTIC_GENERIC:
+ for (i = decl->Range.First; i <= decl->Range.Last; i++)
+ so->outputs[so->outputs_count++].regid = regid(i + base, 0);
+ break;
+ default:
+ DBG("unknown VS semantic name: %s",
+ tgsi_semantic_names[name]);
+ assert(0);
+ }
+ } else {
+ switch (name) {
+ case TGSI_SEMANTIC_COLOR:
+ so->color_regid = regid(decl->Range.First + base, 0);
+ break;
+ default:
+ DBG("unknown VS semantic name: %s",
+ tgsi_semantic_names[name]);
+ assert(0);
+ }
+ }
+}
+
+static void
+decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
+{
+ ctx->so->samplers_count++;
+}
+
+static void
+compile_instructions(struct fd3_compile_context *ctx)
+{
+ struct ir3_shader *ir = ctx->ir;
+
+ while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
+ tgsi_parse_token(&ctx->parser);
+
+ switch (ctx->parser.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION: {
+ struct tgsi_full_declaration *decl =
+ &ctx->parser.FullToken.FullDeclaration;
+ if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
+ decl_out(ctx, decl);
+ } else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+ decl_in(ctx, decl);
+ } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
+ decl_samp(ctx, decl);
+ }
+ break;
+ }
+ case TGSI_TOKEN_TYPE_IMMEDIATE: {
+ /* TODO: if we know the immediate is small enough, and only
+ * used with instructions that can embed an immediate, we
+ * can skip this:
+ */
+ struct tgsi_full_immediate *imm =
+ &ctx->parser.FullToken.FullImmediate;
+ unsigned n = ctx->so->immediates_count++;
+ memcpy(ctx->so->immediates[n].val, imm->u, 16);
+ break;
+ }
+ case TGSI_TOKEN_TYPE_INSTRUCTION: {
+ struct tgsi_full_instruction *inst =
+ &ctx->parser.FullToken.FullInstruction;
+ unsigned opc = inst->Instruction.Opcode;
+ const struct instr_translater *t = &translaters[opc];
+
+ if (t->fxn) {
+ t->fxn(t, ctx, inst);
+ ctx->num_internal_temps = 0;
+ } else {
+ debug_printf("unknown TGSI opc: %s\n",
+ tgsi_get_opcode_name(opc));
+ tgsi_dump(ctx->tokens, 0);
+ assert(0);
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (ir->instrs_count > 0)
+ ir->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
+
+ if (ctx->last_input)
+ ctx->last_input->flags |= IR3_REG_EI;
+}
+
+int
+fd3_compile_shader(struct fd3_shader_stateobj *so,
+ const struct tgsi_token *tokens)
+{
+ struct fd3_compile_context ctx;
+
+ assert(!so->ir);
+
+ so->ir = ir3_shader_create();
+
+ so->color_regid = regid(63,0);
+ so->pos_regid = regid(63,0);
+ so->psize_regid = regid(63,0);
+
+ if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK)
+ return -1;
+
+ compile_instructions(&ctx);
+
+ compile_free(&ctx);
+
+ return 0;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h
new file mode 100644
index 00000000000..1116f598a58
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h
@@ -0,0 +1,38 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_COMPILER_H_
+#define FD3_COMPILER_H_
+
+#include "fd3_program.h"
+#include "fd3_util.h"
+
+int fd3_compile_shader(struct fd3_shader_stateobj *so,
+ const struct tgsi_token *tokens);
+
+#endif /* FD3_COMPILER_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
new file mode 100644
index 00000000000..3ae9b2953e4
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
@@ -0,0 +1,118 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "fd3_context.h"
+#include "fd3_blend.h"
+#include "fd3_draw.h"
+#include "fd3_emit.h"
+#include "fd3_gmem.h"
+#include "fd3_program.h"
+#include "fd3_rasterizer.h"
+#include "fd3_texture.h"
+#include "fd3_zsa.h"
+
+static void
+fd3_context_destroy(struct pipe_context *pctx)
+{
+ fd3_prog_fini(pctx);
+ fd_context_destroy(pctx);
+}
+
+/* TODO we could combine a few of these small buffers (solid_vbuf,
+ * blit_texcoord_vbuf, and vsc_size_mem, into a single buffer and
+ * save a tiny bit of memory
+ */
+
+static struct pipe_resource *
+create_solid_vertexbuf(struct pipe_context *pctx)
+{
+ static const float init_shader_const[] = {
+ -1.000000, +1.000000, +1.000000,
+ +1.000000, -1.000000, +1.000000,
+ };
+ struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+ PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
+ pipe_buffer_write(pctx, prsc, 0,
+ sizeof(init_shader_const), init_shader_const);
+ return prsc;
+}
+
+static struct pipe_resource *
+create_blit_texcoord_vertexbuf(struct pipe_context *pctx)
+{
+ struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+ PIPE_BIND_CUSTOM, PIPE_USAGE_DYNAMIC, 16);
+ return prsc;
+}
+
+struct pipe_context *
+fd3_context_create(struct pipe_screen *pscreen, void *priv)
+{
+ struct fd_screen *screen = fd_screen(pscreen);
+ struct fd3_context *fd3_ctx = CALLOC_STRUCT(fd3_context);
+ struct pipe_context *pctx;
+
+ if (!fd3_ctx)
+ return NULL;
+
+ pctx = &fd3_ctx->base.base;
+
+ fd3_ctx->base.screen = fd_screen(pscreen);
+
+ pctx->destroy = fd3_context_destroy;
+ pctx->create_blend_state = fd3_blend_state_create;
+ pctx->create_rasterizer_state = fd3_rasterizer_state_create;
+ pctx->create_depth_stencil_alpha_state = fd3_zsa_state_create;
+
+ fd3_draw_init(pctx);
+ fd3_gmem_init(pctx);
+ fd3_texture_init(pctx);
+ fd3_prog_init(pctx);
+
+ pctx = fd_context_init(&fd3_ctx->base, pscreen, priv);
+ if (!pctx)
+ return NULL;
+
+ fd3_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
+ DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+ fd3_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
+ DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+ fd3_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000,
+ DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+ fd3_ctx->vsc_pipe_mem = fd_bo_new(screen->dev, 0x40000,
+ DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+ fd3_ctx->solid_vbuf = create_solid_vertexbuf(pctx);
+ fd3_ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);
+
+ return pctx;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.h b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
new file mode 100644
index 00000000000..3829ab52675
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
@@ -0,0 +1,68 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_CONTEXT_H_
+#define FD3_CONTEXT_H_
+
+#include "freedreno_drmif.h"
+
+#include "freedreno_context.h"
+
+struct fd3_context {
+ struct fd_context base;
+
+ struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
+
+ /* not sure how big this actually needs to be.. the blob driver
+ * combines it w/ the solid_vertexbuf, we could probably do the
+ * same to save an extra bo allocation..
+ */
+ struct fd_bo *vsc_size_mem;
+
+ struct fd_bo *vsc_pipe_mem;
+
+ /* vertex buf used for clear/gmem->mem vertices, and mem->gmem
+ * vertices:
+ */
+ struct pipe_resource *solid_vbuf;
+
+ /* vertex buf used for mem->gmem tex coords:
+ */
+ struct pipe_resource *blit_texcoord_vbuf;
+};
+
+static INLINE struct fd3_context *
+fd3_context(struct fd_context *ctx)
+{
+ return (struct fd3_context *)ctx;
+}
+
+struct pipe_context *
+fd3_context_create(struct pipe_screen *pscreen, void *priv);
+
+#endif /* FD3_CONTEXT_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
new file mode 100644
index 00000000000..953d45e1738
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -0,0 +1,236 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+
+#include "fd3_draw.h"
+#include "fd3_context.h"
+#include "fd3_emit.h"
+#include "fd3_program.h"
+#include "fd3_util.h"
+#include "fd3_zsa.h"
+
+
+static void
+emit_vertexbufs(struct fd_context *ctx)
+{
+ struct fd_vertex_stateobj *vtx = ctx->vtx;
+ struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf;
+ struct fd3_vertex_buf bufs[PIPE_MAX_ATTRIBS];
+ unsigned i;
+
+ if (!vtx->num_elements)
+ return;
+
+ for (i = 0; i < vtx->num_elements; i++) {
+ struct pipe_vertex_element *elem = &vtx->pipe[i];
+ struct pipe_vertex_buffer *vb =
+ &vertexbuf->vb[elem->vertex_buffer_index];
+ bufs[i].offset = vb->buffer_offset + elem->src_offset;
+ bufs[i].stride = vb->stride;
+ bufs[i].prsc = vb->buffer;
+ bufs[i].format = elem->src_format;
+ }
+
+ fd3_emit_vertex_bufs(ctx->ring, &ctx->prog, bufs, vtx->num_elements);
+}
+
+static void
+fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
+{
+ struct fd_ringbuffer *ring = ctx->ring;
+ unsigned dirty = ctx->dirty;
+
+ fd3_emit_state(ctx, dirty);
+
+ if (dirty & FD_DIRTY_VTXBUF)
+ emit_vertexbufs(ctx);
+
+ OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
+ OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
+
+ OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
+ OUT_RING(ring, 0x0000000);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+ OUT_RING(ring, info->min_index); /* VFD_INDEX_MIN */
+ OUT_RING(ring, info->max_index + 1); /* VFD_INDEX_MAX */
+ OUT_RING(ring, info->start_instance); /* VFD_INSTANCEID_OFFSET */
+ OUT_RING(ring, info->start); /* VFD_INDEX_OFFSET */
+
+ OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
+ OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
+ info->restart_index : 0xffffffff);
+
+ fd_draw_emit(ctx, info);
+}
+
+static void
+fd3_clear(struct fd_context *ctx, unsigned buffers,
+ const union pipe_color_union *color, double depth, unsigned stencil)
+{
+ struct fd3_context *fd3_ctx = fd3_context(ctx);
+ struct fd_ringbuffer *ring = ctx->ring;
+ unsigned ce, i;
+
+ /* emit generic state now: */
+ fd3_emit_state(ctx, ctx->dirty & (FD_DIRTY_VIEWPORT |
+ FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR));
+
+ OUT_PKT0(ring, REG_A3XX_RB_BLEND_ALPHA, 1);
+ OUT_RING(ring, 0X3c0000ff);
+
+ fd3_emit_rbrc_draw_state(ring,
+ A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
+
+ if (buffers & PIPE_CLEAR_DEPTH) {
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
+ A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
+ A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_ZOFFSET, 2);
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(depth));
+ } else {
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+ }
+
+ if (buffers & PIPE_CLEAR_STENCIL) {
+ OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
+ OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(stencil) |
+ A3XX_RB_STENCILREFMASK_STENCILMASK(stencil) |
+ A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+ OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(0) |
+ A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
+ 0xff000000 | // XXX ???
+ A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+
+ OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+ A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
+ A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_REPLACE) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+ A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+ } else {
+ OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
+ OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(0) |
+ A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
+ A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0));
+ OUT_RING(ring, A3XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
+ A3XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
+ A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0));
+
+ OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+ A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+ A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+ }
+
+ if (buffers & PIPE_CLEAR_COLOR) {
+ ce = 0xf;
+ } else {
+ ce = 0x0;
+ }
+
+ for (i = 0; i < 4; i++) {
+ OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
+ OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(12) |
+ A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS) |
+ A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
+ OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
+ A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+ A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
+ A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
+ A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+ A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO) |
+ A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE);
+ }
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
+
+ fd3_program_emit(ring, &ctx->solid_prog);
+
+ fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
+ { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
+ }, 1);
+
+ fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+
+ OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+ OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+ OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+ OUT_RING(ring, 0); /* VFD_INDEX_MIN */
+ OUT_RING(ring, 2); /* VFD_INDEX_MAX */
+ OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
+ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
+ OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
+ OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */
+
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, PERFCOUNTER_STOP);
+
+ OUT_PKT3(ring, CP_DRAW_INDX, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX,
+ INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+ OUT_RING(ring, 2); /* NumIndices */
+
+ OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
+ OUT_RING(ring, 0x00000000);
+}
+
+void
+fd3_draw_init(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->draw = fd3_draw;
+ ctx->clear = fd3_clear;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.h b/src/gallium/drivers/freedreno/a3xx/fd3_draw.h
new file mode 100644
index 00000000000..09b1243dbb3
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.h
@@ -0,0 +1,38 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_DRAW_H_
+#define FD3_DRAW_H_
+
+#include "pipe/p_context.h"
+
+#include "freedreno_draw.h"
+
+void fd3_draw_init(struct pipe_context *pctx);
+
+#endif /* FD3_DRAW_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
new file mode 100644
index 00000000000..1d048b08c83
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -0,0 +1,581 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_helpers.h"
+#include "util/u_format.h"
+
+#include "freedreno_resource.h"
+
+#include "fd3_emit.h"
+#include "fd3_blend.h"
+#include "fd3_context.h"
+#include "fd3_program.h"
+#include "fd3_rasterizer.h"
+#include "fd3_texture.h"
+#include "fd3_util.h"
+#include "fd3_zsa.h"
+
+/* regid: base const register
+ * prsc or dwords: buffer containing constant values
+ * sizedwords: size of const value buffer
+ */
+void
+fd3_emit_constant(struct fd_ringbuffer *ring,
+ enum adreno_state_block sb,
+ uint32_t regid, uint32_t offset, uint32_t sizedwords,
+ const uint32_t *dwords, struct pipe_resource *prsc)
+{
+ uint32_t i, sz;
+ enum adreno_state_src src;
+
+ if (prsc) {
+ sz = 0;
+ src = SS_INDIRECT;
+ } else {
+ sz = sizedwords;
+ src = SS_DIRECT;
+ }
+
+ /* we have this sometimes, not others.. perhaps we could be clever
+ * and figure out actually when we need to invalidate cache:
+ */
+ OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+ OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
+ OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
+ A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
+ A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
+
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
+ CP_LOAD_STATE_0_STATE_SRC(src) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2));
+ if (prsc) {
+ struct fd_bo *bo = fd_resource(prsc)->bo;
+ OUT_RELOC(ring, bo, offset,
+ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+ } else {
+ OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+ dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
+ }
+ for (i = 0; i < sz; i++) {
+ OUT_RING(ring, dwords[i]);
+ }
+}
+
+static void
+emit_constants(struct fd_ringbuffer *ring,
+ enum adreno_state_block sb,
+ struct fd_constbuf_stateobj *constbuf,
+ struct fd3_shader_stateobj *shader)
+{
+ uint32_t enabled_mask = constbuf->enabled_mask;
+ uint32_t base = 0;
+ unsigned i;
+
+ // XXX TODO only emit dirty consts.. but we need to keep track if
+ // they are clobbered by a clear, gmem2mem, or mem2gmem..
+ constbuf->dirty_mask = enabled_mask;
+
+ /* emit user constants: */
+ while (enabled_mask) {
+ unsigned index = ffs(enabled_mask) - 1;
+ struct pipe_constant_buffer *cb = &constbuf->cb[index];
+ unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
+
+ // I expect that size should be a multiple of vec4's:
+ assert(size == align(size, 4));
+
+ /* gallium could have const-buffer still bound, even though the
+ * shader is not using it. Writing consts above constlen (or
+ * rather, HLSQ_{VS,FS}_CONTROL_REG.CONSTLENGTH) will cause a
+ * hang.
+ */
+ if ((base / 4) >= shader->constlen)
+ break;
+
+ if (constbuf->dirty_mask & (1 << index)) {
+ fd3_emit_constant(ring, sb, base,
+ cb->buffer_offset, size,
+ cb->user_buffer, cb->buffer);
+ constbuf->dirty_mask &= ~(1 << index);
+ }
+
+ base += size;
+ enabled_mask &= ~(1 << index);
+ }
+
+ /* emit shader immediates: */
+ if (shader) {
+ for (i = 0; i < shader->immediates_count; i++) {
+ fd3_emit_constant(ring, sb,
+ 4 * (shader->first_immediate + i),
+ 0, 4, shader->immediates[i].val, NULL);
+ }
+ }
+}
+
+#define VERT_TEX_OFF 0
+#define FRAG_TEX_OFF 16
+#define BASETABLE_SZ 14
+
+static void
+emit_textures(struct fd_ringbuffer *ring,
+ enum adreno_state_block sb,
+ struct fd_texture_stateobj *tex)
+{
+ static const unsigned tex_off[] = {
+ [SB_VERT_TEX] = VERT_TEX_OFF,
+ [SB_FRAG_TEX] = FRAG_TEX_OFF,
+ };
+ static const enum adreno_state_block mipaddr[] = {
+ [SB_VERT_TEX] = SB_VERT_MIPADDR,
+ [SB_FRAG_TEX] = SB_FRAG_MIPADDR,
+ };
+ unsigned i, j;
+
+ assert(tex->num_samplers == tex->num_textures); // TODO check..
+
+ if (!tex->num_samplers)
+ return;
+
+ /* output sampler state: */
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * tex->num_samplers));
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_NUM_UNIT(tex->num_samplers));
+ OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
+ CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+ for (i = 0; i < tex->num_samplers; i++) {
+ struct fd3_sampler_stateobj *sampler =
+ fd3_sampler_stateobj(tex->samplers[i]);
+ OUT_RING(ring, sampler->texsamp0);
+ OUT_RING(ring, sampler->texsamp1);
+ }
+
+ /* emit texture state: */
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + (4 * tex->num_textures));
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_NUM_UNIT(tex->num_textures));
+ OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+ CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+ for (i = 0; i < tex->num_textures; i++) {
+ struct fd3_pipe_sampler_view *view =
+ fd3_pipe_sampler_view(tex->textures[i]);
+ OUT_RING(ring, view->texconst0);
+ OUT_RING(ring, view->texconst1);
+ OUT_RING(ring, view->texconst2 |
+ A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
+ OUT_RING(ring, view->texconst3);
+ }
+
+ /* emit mipaddrs: */
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + (BASETABLE_SZ * tex->num_textures));
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * tex_off[sb]) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(mipaddr[sb]) |
+ CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * tex->num_textures));
+ OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+ CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+ for (i = 0; i < tex->num_textures; i++) {
+ struct fd3_pipe_sampler_view *view =
+ fd3_pipe_sampler_view(tex->textures[i]);
+ OUT_RELOC(ring, view->tex_resource->bo, 0, 0);
+ /* I think each entry is a ptr to mipmap level.. for now, just
+ * pad w/ null's until I get around to actually implementing
+ * mipmap support..
+ */
+ for (j = 1; j < BASETABLE_SZ; j++) {
+ OUT_RING(ring, 0x00000000);
+ }
+ }
+}
+
+static void
+emit_cache_flush(struct fd_ringbuffer *ring)
+{
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, CACHE_FLUSH);
+
+ OUT_PKT3(ring, CP_DRAW_INDX, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, DRAW(DI_PT_POINTLIST, DI_SRC_SEL_AUTO_INDEX,
+ INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+ OUT_RING(ring, 0); /* NumIndices */
+
+ OUT_PKT3(ring, CP_NOP, 4);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
+ OUT_RING(ring, 0x00000000);
+}
+
+/* emit texture state for mem->gmem restore operation.. eventually it would
+ * be good to get rid of this and use normal CSO/etc state for more of these
+ * special cases, but for now the compiler is not sufficient..
+ */
+void
+fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf)
+{
+ struct fd_resource *rsc = fd_resource(psurf->texture);
+
+ /* output sampler state: */
+ OUT_PKT3(ring, CP_LOAD_STATE, 4);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
+ CP_LOAD_STATE_0_NUM_UNIT(1));
+ OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
+ CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) |
+ A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) |
+ A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) |
+ A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) |
+ A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT));
+ OUT_RING(ring, 0x00000000);
+
+ /* emit texture state: */
+ OUT_PKT3(ring, CP_LOAD_STATE, 6);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
+ CP_LOAD_STATE_0_NUM_UNIT(1));
+ OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+ CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(psurf->format)) |
+ 0x40000000 | // XXX
+ fd3_tex_swiz(psurf->format, PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_GREEN,
+ PIPE_SWIZZLE_RED, PIPE_SWIZZLE_ALPHA));
+ OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(psurf->format)) |
+ A3XX_TEX_CONST_1_WIDTH(psurf->width) |
+ A3XX_TEX_CONST_1_HEIGHT(psurf->height));
+ OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(rsc->pitch * rsc->cpp) |
+ A3XX_TEX_CONST_2_INDX(0));
+ OUT_RING(ring, 0x00000000);
+
+ /* emit mipaddrs: */
+ OUT_PKT3(ring, CP_LOAD_STATE, 3);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * FRAG_TEX_OFF) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_MIPADDR) |
+ CP_LOAD_STATE_0_NUM_UNIT(1));
+ OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+ CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+ OUT_RELOC(ring, rsc->bo, 0, 0);
+}
+
+void
+fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
+ struct fd_program_stateobj *prog,
+ struct fd3_vertex_buf *vbufs, uint32_t n)
+{
+ struct fd3_shader_stateobj *vp = prog->vp;
+ uint32_t i;
+
+ n = MIN2(n, vp->inputs_count);
+
+ for (i = 0; i < n; i++) {
+ struct pipe_resource *prsc = vbufs[i].prsc;
+ struct fd_resource *rsc = fd_resource(prsc);
+ enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(vbufs[i].format);
+ bool switchnext = (i != (n - 1));
+ uint32_t fs = util_format_get_blocksize(vbufs[i].format);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_FETCH(i), 2);
+ OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
+ A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vbufs[i].stride) |
+ COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
+ A3XX_VFD_FETCH_INSTR_0_INDEXCODE(i) |
+ A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
+ OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(i), 1);
+ OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
+ A3XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
+ A3XX_VFD_DECODE_INSTR_FORMAT(fmt) |
+ A3XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
+ A3XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
+ A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+ COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
+ }
+}
+
+void
+fd3_emit_state(struct fd_context *ctx, uint32_t dirty)
+{
+ struct fd_ringbuffer *ring = ctx->ring;
+
+ if (dirty & FD_DIRTY_SAMPLE_MASK) {
+ OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
+ A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
+ A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask));
+ }
+
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
+ struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
+ struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+
+ fd3_emit_rbrc_draw_state(ring, zsa->rb_render_control);
+
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, zsa->rb_depth_control);
+
+ OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+ OUT_RING(ring, zsa->rb_stencil_control);
+
+ OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
+ OUT_RING(ring, zsa->rb_stencilrefmask |
+ A3XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
+ OUT_RING(ring, zsa->rb_stencilrefmask_bf |
+ A3XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
+ }
+
+ if (dirty & FD_DIRTY_RASTERIZER) {
+ struct fd3_rasterizer_stateobj *rasterizer =
+ fd3_rasterizer_stateobj(ctx->rasterizer);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
+ OUT_RING(ring, rasterizer->gras_su_mode_control);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2);
+ OUT_RING(ring, rasterizer->gras_su_point_minmax);
+ OUT_RING(ring, rasterizer->gras_su_point_size);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE, 2);
+ OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
+ OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
+ }
+
+ if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
+ struct fd3_rasterizer_stateobj *rasterizer =
+ fd3_rasterizer_stateobj(ctx->rasterizer);
+ struct fd3_shader_stateobj *fp = ctx->prog.fp;
+ uint32_t stride_in_vpc;
+
+ stride_in_vpc = align(fp->total_in, 4) / 4;
+ if (stride_in_vpc > 0)
+ stride_in_vpc = MAX2(stride_in_vpc, 2);
+
+ OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+ OUT_RING(ring, rasterizer->pc_prim_vtx_cntl |
+ A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(stride_in_vpc));
+ }
+
+ if (dirty & FD_DIRTY_SCISSOR) {
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(ctx->scissor.minx) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(ctx->scissor.miny));
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(ctx->scissor.maxx - 1) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(ctx->scissor.maxy - 1));
+
+ ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, ctx->scissor.minx);
+ ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, ctx->scissor.miny);
+ ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, ctx->scissor.maxx);
+ ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, ctx->scissor.maxy);
+ }
+
+ if (dirty & FD_DIRTY_VIEWPORT) {
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(ctx->viewport.translate[0] - 0.5));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(ctx->viewport.scale[0]));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(ctx->viewport.translate[1] - 0.5));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(ctx->viewport.scale[1]));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(ctx->viewport.translate[2]));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
+ }
+
+ if (dirty & FD_DIRTY_PROG)
+ fd3_program_emit(ring, &ctx->prog);
+
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
+ struct fd_program_stateobj *prog = &ctx->prog;
+
+ emit_constants(ring, SB_VERT_SHADER,
+ &ctx->constbuf[PIPE_SHADER_VERTEX],
+ (prog->dirty & FD_SHADER_DIRTY_VP) ? prog->vp : NULL);
+ emit_constants(ring, SB_FRAG_SHADER,
+ &ctx->constbuf[PIPE_SHADER_FRAGMENT],
+ (prog->dirty & FD_SHADER_DIRTY_FP) ? prog->fp : NULL);
+ }
+
+ if (dirty & FD_DIRTY_BLEND) {
+ struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend);
+ uint32_t i;
+
+ for (i = 0; i < ARRAY_SIZE(blend->rb_mrt); i++) {
+ OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
+ OUT_RING(ring, blend->rb_mrt[i].control);
+
+ OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
+ OUT_RING(ring, blend->rb_mrt[i].blend_control);
+ }
+ }
+
+ if (dirty & FD_DIRTY_VERTTEX)
+ emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
+
+ if (dirty & FD_DIRTY_FRAGTEX)
+ emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex);
+
+ ctx->dirty &= ~dirty;
+}
+
+/* emit setup at begin of new cmdstream buffer (don't rely on previous
+ * state, there could have been a context switch between ioctls):
+ */
+void
+fd3_emit_restore(struct fd_context *ctx)
+{
+ struct fd3_context *fd3_ctx = fd3_context(ctx);
+ struct fd_ringbuffer *ring = ctx->ring;
+ int i;
+
+ OUT_PKT3(ring, CP_REG_RMW, 3);
+ OUT_RING(ring, REG_A3XX_RBBM_CLOCK_CTL);
+ OUT_RING(ring, 0xfffcffff);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+ OUT_RING(ring, 0x00007fff);
+
+ OUT_PKT0(ring, REG_A3XX_SP_VS_PVT_MEM_CTRL_REG, 3);
+ OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_CTRL_REG */
+ OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0, 0); /* SP_VS_PVT_MEM_ADDR_REG */
+ OUT_RING(ring, 0x00000000); /* SP_VS_PVT_MEM_SIZE_REG */
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_PVT_MEM_CTRL_REG, 3);
+ OUT_RING(ring, 0x08000001); /* SP_FS_PVT_MEM_CTRL_REG */
+ OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0, 0); /* SP_FS_PVT_MEM_ADDR_REG */
+ OUT_RING(ring, 0x00000000); /* SP_FS_PVT_MEM_SIZE_REG */
+
+ OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
+ OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 2);
+ OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
+ A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
+ A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff));
+ OUT_RING(ring, 0x00000000); /* UNKNOWN_20C3 */
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
+ OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
+ A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
+
+ OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C81, 1);
+ OUT_RING(ring, 0x00000001); /* UNKNOWN_0C81 */
+
+ OUT_PKT0(ring, REG_A3XX_TPL1_TP_VS_TEX_OFFSET, 1);
+ OUT_RING(ring, A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(VERT_TEX_OFF) |
+ A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(VERT_TEX_OFF) |
+ A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ * VERT_TEX_OFF));
+
+ OUT_PKT0(ring, REG_A3XX_TPL1_TP_FS_TEX_OFFSET, 1);
+ OUT_RING(ring, A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(FRAG_TEX_OFF) |
+ A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(FRAG_TEX_OFF) |
+ A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ * FRAG_TEX_OFF));
+
+ OUT_PKT0(ring, REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0, 2);
+ OUT_RING(ring, 0x00000000); /* VPC_VARY_CYLWRAP_ENABLE_0 */
+ OUT_RING(ring, 0x00000000); /* VPC_VARY_CYLWRAP_ENABLE_1 */
+
+ OUT_PKT0(ring, REG_A3XX_UNKNOWN_0E43, 1);
+ OUT_RING(ring, 0x00000001); /* UNKNOWN_0E43 */
+
+ OUT_PKT0(ring, REG_A3XX_UNKNOWN_0F03, 1);
+ OUT_RING(ring, 0x00000001); /* UNKNOWN_0F03 */
+
+ OUT_PKT0(ring, REG_A3XX_UNKNOWN_0EE0, 1);
+ OUT_RING(ring, 0x00000003); /* UNKNOWN_0EE0 */
+
+ OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C3D, 1);
+ OUT_RING(ring, 0x00000001); /* UNKNOWN_0C3D */
+
+ OUT_PKT0(ring, REG_A3XX_UNKNOWN_0E00, 1);
+ OUT_RING(ring, 0x00000000); /* UNKNOWN_0E00 */
+
+ OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 2);
+ OUT_RING(ring, A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(0) |
+ A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(0));
+ OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) |
+ A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0));
+
+ OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 1);
+ OUT_RING(ring, 0x00000001); /* UCHE_CACHE_MODE_CONTROL_REG */
+
+ OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
+ OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0); /* VSC_SIZE_ADDRESS */
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2);
+ OUT_RING(ring, 0xffc00010); /* GRAS_SU_POINT_MINMAX */
+ OUT_RING(ring, 0x00000008); /* GRAS_SU_POINT_SIZE */
+
+ OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
+ OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */
+
+ OUT_PKT0(ring, REG_A3XX_PA_SC_WINDOW_OFFSET, 1);
+ OUT_RING(ring, A3XX_PA_SC_WINDOW_OFFSET_X(0) |
+ A3XX_PA_SC_WINDOW_OFFSET_Y(0));
+
+ OUT_PKT0(ring, REG_A3XX_RB_BLEND_RED, 4);
+ OUT_RING(ring, 0x00000000); /* RB_BLEND_RED */
+ OUT_RING(ring, 0x00000000); /* RB_BLEND_GREEN */
+ OUT_RING(ring, 0x00000000); /* RB_BLEND_BLUE */
+ OUT_RING(ring, 0x3c0000ff); /* RB_BLEND_ALPHA */
+
+ for (i = 0; i < 6; i++) {
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(i), 4);
+ OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].X */
+ OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].Y */
+ OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].Z */
+ OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].W */
+ }
+
+ emit_cache_flush(ring);
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
new file mode 100644
index 00000000000..668e5ddd095
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
@@ -0,0 +1,89 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_EMIT_H
+#define FD3_EMIT_H
+
+#include "pipe/p_context.h"
+
+#include "freedreno_context.h"
+#include "fd3_util.h"
+
+
+struct fd_ringbuffer;
+enum adreno_state_block;
+
+void fd3_emit_constant(struct fd_ringbuffer *ring,
+ enum adreno_state_block sb,
+ uint32_t regid, uint32_t offset, uint32_t sizedwords,
+ const uint32_t *dwords, struct pipe_resource *prsc);
+
+void fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
+ struct pipe_surface *psurf);
+
+/* NOTE: this just exists because we don't have proper vertex/vertexbuf
+ * state objs for clear, and mem2gmem/gmem2mem operations..
+ */
+struct fd3_vertex_buf {
+ unsigned offset, stride;
+ struct pipe_resource *prsc;
+ enum pipe_format format;
+};
+
+void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
+ struct fd_program_stateobj *prog,
+ struct fd3_vertex_buf *vbufs, uint32_t n);
+void fd3_emit_state(struct fd_context *ctx, uint32_t dirty);
+void fd3_emit_restore(struct fd_context *ctx);
+
+
+/* use RMW (read-modify-write) to update RB_RENDER_CONTROL since the
+ * GMEM/binning code is deciding on the bin-width (and whether to
+ * use binning) after the draw/clear state is emitted.
+ */
+static inline void
+fd3_emit_rbrc_draw_state(struct fd_ringbuffer *ring, uint32_t val)
+{
+ OUT_PKT3(ring, CP_REG_RMW, 3);
+ OUT_RING(ring, REG_A3XX_RB_RENDER_CONTROL);
+ OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK |
+ A3XX_RB_RENDER_CONTROL_ENABLE_GMEM);
+ OUT_RING(ring, val);
+}
+
+static inline void
+fd3_emit_rbrc_tile_state(struct fd_ringbuffer *ring, uint32_t val)
+{
+ OUT_PKT3(ring, CP_REG_RMW, 3);
+ OUT_RING(ring, REG_A3XX_RB_RENDER_CONTROL);
+ OUT_RING(ring, ~(A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK |
+ A3XX_RB_RENDER_CONTROL_ENABLE_GMEM));
+ OUT_RING(ring, val);
+}
+
+#endif /* FD3_EMIT_H */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
new file mode 100644
index 00000000000..16ec95972a0
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -0,0 +1,486 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+
+#include "fd3_gmem.h"
+#include "fd3_context.h"
+#include "fd3_emit.h"
+#include "fd3_program.h"
+#include "fd3_util.h"
+#include "fd3_zsa.h"
+
+
+static void
+emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
+ struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w)
+{
+ unsigned i;
+
+ for (i = 0; i < 4; i++) {
+ enum a3xx_color_fmt format = 0;
+ enum a3xx_color_swap swap = WZYX;
+ struct fd_resource *res = NULL;
+ uint32_t stride = 0;
+ uint32_t base = 0;
+
+ if (i < nr_bufs) {
+ struct pipe_surface *psurf = bufs[i];
+ struct fd_resource *res = fd_resource(psurf->texture);
+
+ format = fd3_pipe2color(psurf->format);
+ swap = fd3_pipe2swap(psurf->format);
+ stride = bin_w * res->cpp;
+
+ if (bases) {
+ base = bases[i] * res->cpp;
+ }
+ }
+
+ OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2);
+ OUT_RING(ring, A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
+ A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
+ A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE_32X32) |
+ A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap));
+ OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1);
+ OUT_RING(ring, A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(format));
+ }
+}
+
+static uint32_t
+depth_base(struct fd_gmem_stateobj *gmem)
+{
+ return align(gmem->bin_w * gmem->bin_h, 0x4000);
+}
+
+/* transfer from gmem to system memory (ie. normal RAM) */
+
+static void
+emit_gmem2mem_surf(struct fd_ringbuffer *ring,
+ enum adreno_rb_copy_control_mode mode,
+ uint32_t base, struct pipe_surface *psurf)
+{
+ struct fd_resource *rsc = fd_resource(psurf->texture);
+
+ OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
+ OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
+ A3XX_RB_COPY_CONTROL_MODE(mode) |
+ A3XX_RB_COPY_CONTROL_GMEM_BASE(base));
+ OUT_RELOCS(ring, rsc->bo, 0, 0, -1); /* RB_COPY_DEST_BASE */
+ OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(rsc->pitch * rsc->cpp));
+ OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
+ A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(psurf->format)) |
+ A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
+ A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
+ A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(psurf->format)));
+
+ OUT_PKT3(ring, CP_DRAW_INDX, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX,
+ INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+ OUT_RING(ring, 2); /* NumIndices */
+}
+
+static void
+fd3_emit_tile_gmem2mem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
+ uint32_t bin_w, uint32_t bin_h)
+{
+ struct fd3_context *fd3_ctx = fd3_context(ctx);
+ struct fd_ringbuffer *ring = ctx->ring;
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+
+ OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+ A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+ A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+
+ OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
+ OUT_RING(ring, 0xff000000 |
+ A3XX_RB_STENCILREFMASK_STENCILREF(0) |
+ A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
+ A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+ OUT_RING(ring, 0xff000000 |
+ A3XX_RB_STENCILREFMASK_STENCILREF(0) |
+ A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
+ A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+
+ fd3_emit_rbrc_draw_state(ring,
+ A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
+ A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+
+ OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+ OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
+
+ OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+ OUT_RING(ring, 0); /* VFD_INDEX_MIN */
+ OUT_RING(ring, 2); /* VFD_INDEX_MAX */
+ OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
+ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
+
+ fd3_program_emit(ring, &ctx->solid_prog);
+
+ fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
+ { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
+ }, 1);
+
+ if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+ uint32_t base = depth_base(&ctx->gmem) *
+ fd_resource(pfb->cbufs[0]->texture)->cpp;
+ emit_gmem2mem_surf(ring, RB_COPY_DEPTH_STENCIL, base, pfb->zsbuf);
+ }
+
+ if (ctx->resolve & FD_BUFFER_COLOR) {
+ emit_gmem2mem_surf(ring, RB_COPY_RESOLVE, 0, pfb->cbufs[0]);
+ }
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+}
+
+/* transfer from system memory to gmem */
+
+static void
+emit_mem2gmem_surf(struct fd_ringbuffer *ring, uint32_t base,
+ struct pipe_surface *psurf, uint32_t bin_w)
+{
+ emit_mrt(ring, 1, &psurf, &base, bin_w);
+
+ fd3_emit_gmem_restore_tex(ring, psurf);
+
+ OUT_PKT3(ring, CP_DRAW_INDX, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX,
+ INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+ OUT_RING(ring, 2); /* NumIndices */
+}
+
+static void
+fd3_emit_tile_mem2gmem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
+ uint32_t bin_w, uint32_t bin_h)
+{
+ struct fd3_context *fd3_ctx = fd3_context(ctx);
+ struct fd_gmem_stateobj *gmem = &ctx->gmem;
+ struct fd_ringbuffer *ring = ctx->ring;
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ float x0, y0, x1, y1;
+ unsigned i;
+
+ /* write texture coordinates to vertexbuf: */
+ x0 = ((float)xoff) / ((float)pfb->width);
+ x1 = ((float)xoff + bin_w) / ((float)pfb->width);
+ y0 = ((float)yoff) / ((float)pfb->height);
+ y1 = ((float)yoff + bin_h) / ((float)pfb->height);
+
+ OUT_PKT3(ring, CP_MEM_WRITE, 5);
+ OUT_RELOC(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0);
+ OUT_RING(ring, fui(x0));
+ OUT_RING(ring, fui(y0));
+ OUT_RING(ring, fui(x1));
+ OUT_RING(ring, fui(y1));
+
+ for (i = 0; i < 4; i++) {
+ OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
+ OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(12) |
+ A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |
+ A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
+ OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
+ A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+ A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
+ A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
+ A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+ A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO) |
+ A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE);
+ }
+
+ fd3_emit_rbrc_tile_state(ring,
+ A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
+
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER); /* GRAS_CL_CLIP_CNTL */
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)bin_w/2.0 - 0.5));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)bin_w/2.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)bin_h/2.0 - 0.5));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)bin_h/2.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));
+
+ OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+ OUT_RING(ring, 0x2 |
+ A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
+ A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
+ A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+
+ fd3_emit_rbrc_draw_state(ring,
+ A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+
+ OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+ OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(2) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+ A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+ OUT_RING(ring, 0); /* VFD_INDEX_MIN */
+ OUT_RING(ring, 2); /* VFD_INDEX_MAX */
+ OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
+ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
+
+ fd3_program_emit(ring, &ctx->blit_prog);
+
+ fd3_emit_vertex_bufs(ring, &ctx->blit_prog, (struct fd3_vertex_buf[]) {
+ { .prsc = fd3_ctx->blit_texcoord_vbuf, .stride = 8, .format = PIPE_FORMAT_R32G32_FLOAT },
+ { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
+ }, 2);
+
+ /* for gmem pitch/base calculations, we need to use the non-
+ * truncated tile sizes:
+ */
+ bin_w = gmem->bin_w;
+ bin_h = gmem->bin_h;
+
+ if (ctx->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
+ emit_mem2gmem_surf(ring, depth_base(gmem), pfb->zsbuf, bin_w);
+
+ if (ctx->restore & FD_BUFFER_COLOR)
+ emit_mem2gmem_surf(ring, 0, pfb->cbufs[0], bin_w);
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+}
+
+static void
+update_vsc_pipe(struct fd_context *ctx)
+{
+ struct fd_ringbuffer *ring = ctx->ring;
+ struct fd_gmem_stateobj *gmem = &ctx->gmem;
+ struct fd_bo *bo = fd3_context(ctx)->vsc_pipe_mem;
+ int i;
+
+ /* since we aren't using binning, just try to assign all bins
+ * to same pipe for now:
+ */
+ OUT_PKT0(ring, REG_A3XX_VSC_PIPE(0), 3);
+ OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(0) |
+ A3XX_VSC_PIPE_CONFIG_Y(0) |
+ A3XX_VSC_PIPE_CONFIG_W(gmem->nbins_x) |
+ A3XX_VSC_PIPE_CONFIG_H(gmem->nbins_y));
+ OUT_RELOC(ring, bo, 0, 0); /* VSC_PIPE[0].DATA_ADDRESS */
+ OUT_RING(ring, fd_bo_size(bo) - 32); /* VSC_PIPE[0].DATA_LENGTH */
+
+ for (i = 1; i < 8; i++) {
+ OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3);
+ OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(0) |
+ A3XX_VSC_PIPE_CONFIG_Y(0) |
+ A3XX_VSC_PIPE_CONFIG_W(0) |
+ A3XX_VSC_PIPE_CONFIG_H(0));
+ OUT_RING(ring, 0x00000000); /* VSC_PIPE[i].DATA_ADDRESS */
+ OUT_RING(ring, 0x00000000); /* VSC_PIPE[i].DATA_LENGTH */
+ }
+}
+
+/* before first tile */
+static void
+fd3_emit_tile_init(struct fd_context *ctx)
+{
+ struct fd_ringbuffer *ring = ctx->ring;
+ struct fd_gmem_stateobj *gmem = &ctx->gmem;
+
+ fd3_emit_restore(ctx);
+
+ /* note: use gmem->bin_w/h, the bin_w/h parameters may be truncated
+ * at the right and bottom edge tiles
+ */
+ OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
+ OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
+ A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
+
+ /* TODO we only need to do this if gmem stateobj changes.. or in
+ * particular if the # of bins changes..
+ */
+ update_vsc_pipe(ctx);
+}
+
+/* before mem2gmem */
+static void
+fd3_emit_tile_prep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
+ uint32_t bin_w, uint32_t bin_h)
+{
+ struct fd_ringbuffer *ring = ctx->ring;
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ struct fd_gmem_stateobj *gmem = &ctx->gmem;
+ uint32_t reg;
+
+
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
+ reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(gmem));
+ if (pfb->zsbuf) {
+ reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
+ }
+ OUT_RING(ring, reg);
+ if (pfb->zsbuf) {
+ uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format);
+ OUT_RING(ring, A3XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
+ } else {
+ OUT_RING(ring, 0x00000000);
+ }
+
+ OUT_PKT0(ring, REG_A3XX_RB_WINDOW_SIZE, 1);
+ OUT_RING(ring, A3XX_RB_WINDOW_SIZE_WIDTH(pfb->width) |
+ A3XX_RB_WINDOW_SIZE_HEIGHT(pfb->height));
+
+ OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+}
+
+/* before IB to rendering cmds: */
+static void
+fd3_emit_tile_renderprep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
+ uint32_t bin_w, uint32_t bin_h)
+{
+ struct fd_ringbuffer *ring = ctx->ring;
+ struct fd_gmem_stateobj *gmem = &ctx->gmem;
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+
+ uint32_t x1 = xoff;
+ uint32_t y1 = yoff;
+ uint32_t x2 = xoff + bin_w - 1;
+ uint32_t y2 = yoff + bin_h - 1;
+
+ OUT_PKT3(ring, CP_SET_BIN, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
+ OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
+
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w);
+
+ fd3_emit_rbrc_tile_state(ring,
+ A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
+ A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
+
+ /* setup scissor/offset for current tile: */
+ OUT_PKT0(ring, REG_A3XX_PA_SC_WINDOW_OFFSET, 1);
+ OUT_RING(ring, A3XX_PA_SC_WINDOW_OFFSET_X(xoff) |
+ A3XX_PA_SC_WINDOW_OFFSET_Y(yoff));
+
+ OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
+ OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
+ A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
+}
+
+void
+fd3_gmem_init(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+
+ ctx->emit_tile_init = fd3_emit_tile_init;
+ ctx->emit_tile_prep = fd3_emit_tile_prep;
+ ctx->emit_tile_mem2gmem = fd3_emit_tile_mem2gmem;
+ ctx->emit_tile_renderprep = fd3_emit_tile_renderprep;
+ ctx->emit_tile_gmem2mem = fd3_emit_tile_gmem2mem;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.h b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.h
new file mode 100644
index 00000000000..91b02866c84
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.h
@@ -0,0 +1,36 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_GMEM_H_
+#define FD3_GMEM_H_
+
+#include "pipe/p_context.h"
+
+void fd3_gmem_init(struct pipe_context *pctx);
+
+#endif /* FD3_GMEM_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
new file mode 100644
index 00000000000..b5a027e6503
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -0,0 +1,642 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "fd3_program.h"
+#include "fd3_compiler.h"
+#include "fd3_texture.h"
+#include "fd3_util.h"
+
+static void
+delete_shader(struct fd3_shader_stateobj *so)
+{
+ ir3_shader_destroy(so->ir);
+ fd_bo_del(so->bo);
+ free(so);
+}
+
+static void
+assemble_shader(struct pipe_context *pctx, struct fd3_shader_stateobj *so)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ uint32_t sz, *bin;
+
+ bin = ir3_shader_assemble(so->ir, &so->info);
+ sz = so->info.sizedwords * 4;
+
+ so->bo = fd_bo_new(ctx->screen->dev, sz,
+ DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
+ DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+ memcpy(fd_bo_map(so->bo), bin, sz);
+
+ free(bin);
+
+ so->instrlen = so->info.sizedwords / 8;
+ so->constlen = so->info.max_const + 1;
+}
+
+/* for vertex shader, the inputs are loaded into registers before the shader
+ * is executed, so max_regs from the shader instructions might not properly
+ * reflect the # of registers actually used:
+ */
+static void
+fixup_vp_regfootprint(struct fd3_shader_stateobj *so)
+{
+ unsigned i;
+ for (i = 0; i < so->inputs_count; i++) {
+ so->info.max_reg = MAX2(so->info.max_reg, so->inputs[i].regid >> 2);
+ }
+}
+
+static struct fd3_shader_stateobj *
+create_shader(struct pipe_context *pctx, const struct pipe_shader_state *cso,
+ enum shader_t type)
+{
+ struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj);
+ int ret;
+
+ if (!so)
+ return NULL;
+
+ so->type = type;
+
+ if (fd_mesa_debug & FD_DBG_DISASM) {
+ DBG("dump tgsi: type=%d", so->type);
+ tgsi_dump(cso->tokens, 0);
+ }
+
+ if (type == SHADER_FRAGMENT) {
+ /* we seem to get wrong colors (maybe swap/endianess or hw issue?)
+ * with full precision color reg. And blob driver only seems to
+ * use half precision register for color output (that I can find
+ * so far), even with highp precision. So for force half precision
+ * for frag shader:
+ */
+ so->half_precision = true;
+ }
+
+ ret = fd3_compile_shader(so, cso->tokens);
+ if (ret) {
+ debug_error("compile failed!");
+ goto fail;
+ }
+
+ assemble_shader(pctx, so);
+ if (!so->bo) {
+ debug_error("assemble failed!");
+ goto fail;
+ }
+
+ if (type == SHADER_VERTEX)
+ fixup_vp_regfootprint(so);
+
+ if (fd_mesa_debug & FD_DBG_DISASM) {
+ DBG("disassemble: type=%d", so->type);
+ disasm_a3xx(fd_bo_map(so->bo), so->info.sizedwords, 0, so->type);
+ }
+
+ return so;
+
+fail:
+ delete_shader(so);
+ return NULL;
+}
+
+static void *
+fd3_fp_state_create(struct pipe_context *pctx,
+ const struct pipe_shader_state *cso)
+{
+ return create_shader(pctx, cso, SHADER_FRAGMENT);
+}
+
+static void
+fd3_fp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+ struct fd3_shader_stateobj *so = hwcso;
+ delete_shader(so);
+}
+
+static void
+fd3_fp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->prog.fp = hwcso;
+ ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+ ctx->dirty |= FD_DIRTY_PROG;
+}
+
+static void *
+fd3_vp_state_create(struct pipe_context *pctx,
+ const struct pipe_shader_state *cso)
+{
+ return create_shader(pctx, cso, SHADER_VERTEX);
+}
+
+static void
+fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+ struct fd3_shader_stateobj *so = hwcso;
+ delete_shader(so);
+}
+
+static void
+fd3_vp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->prog.vp = hwcso;
+ ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
+ ctx->dirty |= FD_DIRTY_PROG;
+}
+
+static void
+emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so)
+{
+ struct ir3_shader_info *si = &so->info;
+ enum adreno_state_block sb;
+ uint32_t i, *bin;
+
+ if (so->type == SHADER_VERTEX) {
+ sb = SB_VERT_SHADER;
+ } else {
+ sb = SB_FRAG_SHADER;
+ }
+
+ // XXX use SS_INDIRECT
+ bin = fd_bo_map(so->bo);
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + si->sizedwords);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
+ OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
+ CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+ for (i = 0; i < si->sizedwords; i++)
+ OUT_RING(ring, bin[i]);
+}
+
+void
+fd3_program_emit(struct fd_ringbuffer *ring,
+ struct fd_program_stateobj *prog)
+{
+ struct fd3_shader_stateobj *vp = prog->vp;
+ struct fd3_shader_stateobj *fp = prog->fp;
+ struct ir3_shader_info *vsi = &vp->info;
+ struct ir3_shader_info *fsi = &fp->info;
+ int i;
+
+ /* we could probably divide this up into things that need to be
+ * emitted if frag-prog is dirty vs if vert-prog is dirty..
+ */
+
+ OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
+ A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
+ A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
+ A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE);
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
+ OUT_RING(ring, 0x00000000); /* HLSQ_CONTROL_3_REG */
+ OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
+ A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
+ A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vp->instrlen));
+ OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) |
+ A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) |
+ A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fp->instrlen));
+
+ OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
+ OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) |
+ A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
+ // XXX "resolve" (?) bit set on gmem->mem pass..
+// COND(!uniforms, A3XX_SP_SP_CTRL_REG_RESOLVE) |
+ // XXX sometimes 0, sometimes 1:
+ A3XX_SP_SP_CTRL_REG_LOMODE(1));
+
+ /* emit unknown sequence of perfcounter disables that the blob
+ * emits as part of the program state..
+ */
+ for (i = 0; i < 6; i++) {
+ OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER0_SELECT, 1);
+ OUT_RING(ring, 0x00000000); /* SP_PERFCOUNTER4_SELECT */
+
+ OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER4_SELECT, 1);
+ OUT_RING(ring, 0x00000000); /* SP_PERFCOUNTER4_SELECT */
+ }
+
+ OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
+ OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));
+
+ OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
+ OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
+ A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
+ A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
+ A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
+ A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
+ A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
+ A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
+ COND(vp->samplers_count > 0, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
+ A3XX_SP_VS_CTRL_REG0_LENGTH(vp->instrlen));
+ OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
+ A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
+ A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vsi->max_const, 0)));
+ OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(vp->pos_regid) |
+ A3XX_SP_VS_PARAM_REG_PSIZEREGID(vp->psize_regid) |
+ A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(vp->outputs_count));
+
+ assert(vp->outputs_count >= fp->inputs_count);
+
+ for (i = 0; i < fp->inputs_count; ) {
+ uint32_t reg = 0;
+
+ OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i/2), 1);
+
+ reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[i].regid);
+ reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[i].compmask);
+ i++;
+
+ reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[i].regid);
+ reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[i].compmask);
+ i++;
+
+ OUT_RING(ring, reg);
+ }
+
+ for (i = 0; i < fp->inputs_count; ) {
+ uint32_t reg = 0;
+
+ OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i/4), 1);
+
+ reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(fp->inputs[i++].inloc);
+ reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(fp->inputs[i++].inloc);
+ reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(fp->inputs[i++].inloc);
+ reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(fp->inputs[i++].inloc);
+
+ OUT_RING(ring, reg);
+ }
+
+#if 0
+ /* for some reason, when I write SP_{VS,FS}_OBJ_START_REG I get:
+[ 666.663665] kgsl kgsl-3d0: |a3xx_err_callback| RBBM | AHB bus error | READ | addr=201 | ports=1:3
+[ 666.664001] kgsl kgsl-3d0: |a3xx_err_callback| ringbuffer AHB error interrupt
+[ 670.680909] kgsl kgsl-3d0: |adreno_idle| spun too long waiting for RB to idle
+[ 670.681062] kgsl kgsl-3d0: |kgsl-3d0| Dump Started
+[ 670.681123] kgsl kgsl-3d0: POWER: FLAGS = 00000007 | ACTIVE POWERLEVEL = 00000001
+[ 670.681214] kgsl kgsl-3d0: POWER: INTERVAL TIMEOUT = 0000000A
+[ 670.681367] kgsl kgsl-3d0: GRP_CLK = 325000000
+[ 670.681489] kgsl kgsl-3d0: BUS CLK = 0
+ */
+ OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
+ OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
+ A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
+ OUT_RELOC(ring, vp->bo, 0, 0); /* SP_VS_OBJ_START_REG */
+#endif
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
+ OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
+ OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
+ A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
+ A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
+ A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
+ A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
+ A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
+ A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
+ COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
+ A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen));
+ OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
+ A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
+ A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fsi->max_const, 0)) |
+ A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
+
+#if 0
+ OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
+ OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
+ A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(128 - fp->instrlen));
+ OUT_RELOC(ring, fp->bo, 0, 0); /* SP_FS_OBJ_START_REG */
+#endif
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
+ OUT_RING(ring, 0x00000000); /* SP_FS_FLAT_SHAD_MODE_REG_0 */
+ OUT_RING(ring, 0x00000000); /* SP_FS_FLAT_SHAD_MODE_REG_1 */
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
+ OUT_RING(ring, 0x00000000); /* SP_FS_OUTPUT_REG */
+
+ OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
+ OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(fp->color_regid) |
+ COND(fp->half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION));
+ OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
+ OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
+ OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
+
+ OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
+ OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
+ A3XX_VPC_ATTR_THRDASSIGN(1) |
+ A3XX_VPC_ATTR_LMSIZE(1));
+ OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
+ A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
+
+ OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
+ OUT_RING(ring, fp->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
+ OUT_RING(ring, fp->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
+ OUT_RING(ring, fp->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
+ OUT_RING(ring, fp->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
+
+ OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
+ OUT_RING(ring, fp->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
+ OUT_RING(ring, fp->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
+ OUT_RING(ring, fp->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
+ OUT_RING(ring, fp->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
+
+ OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
+ OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
+ A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(252));
+
+ emit_shader(ring, vp);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
+ OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
+
+ emit_shader(ring, fp);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
+ OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
+
+ OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
+ OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(vp->total_in) |
+ A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
+ A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(vp->inputs_count) |
+ A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(vp->inputs_count));
+ OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
+ A3XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) |
+ A3XX_VFD_CONTROL_1_REGID4INST(regid(63,0)));
+}
+
+/* once the compiler is good enough, we should construct TGSI in the
+ * core freedreno driver, and then let the a2xx/a3xx parts compile
+ * the internal shaders from TGSI the same as regular shaders. This
+ * would be the first step towards handling most of clear (and the
+ * gmem<->mem blits) from the core via normal state changes and shader
+ * state objects.
+ *
+ * (Well, there would still be some special bits, because there are
+ * some registers that don't get set for normal draw, but this should
+ * be relatively small and could be handled via callbacks from core
+ * into a2xx/a3xx..)
+ */
+static struct fd3_shader_stateobj *
+create_internal_shader(struct pipe_context *pctx, enum shader_t type,
+ struct ir3_shader *ir)
+{
+ struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj);
+
+ if (!so) {
+ ir3_shader_destroy(ir);
+ return NULL;
+ }
+
+ so->type = type;
+ so->ir = ir;
+
+ assemble_shader(pctx, so);
+ assert(so->bo);
+
+ return so;
+}
+
+/* Creates shader:
+ * (sy)(ss)(rpt1)bary.f (ei)r0.z, (r)0, r0.x
+ * (rpt5)nop
+ * sam (f32)(xyzw)r0.x, r0.z, s#0, t#0
+ * (sy)(rpt3)cov.f32f16 hr0.x, (r)r0.x
+ * end
+ */
+static struct fd3_shader_stateobj *
+create_blit_fp(struct pipe_context *pctx)
+{
+ struct fd3_shader_stateobj *so;
+ struct ir3_shader *ir = ir3_shader_create();
+ struct ir3_instruction *instr;
+
+ /* (sy)(ss)(rpt1)bary.f (ei)r0.z, (r)0, r0.x */
+ instr = ir3_instr_create(ir, 2, OPC_BARY_F);
+ instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
+ instr->repeat = 1;
+
+ ir3_reg_create(instr, regid(0,2), IR3_REG_EI); /* (ei)r0.z */
+ ir3_reg_create(instr, 0, IR3_REG_R | /* (r)0 */
+ IR3_REG_IMMED)->iim_val = 0;
+ ir3_reg_create(instr, regid(0,0), 0); /* r0.x */
+
+ /* (rpt5)nop */
+ instr = ir3_instr_create(ir, 0, OPC_NOP);
+ instr->repeat = 5;
+
+ /* sam (f32)(xyzw)r0.x, r0.z, s#0, t#0 */
+ instr = ir3_instr_create(ir, 5, OPC_SAM);
+ instr->cat5.samp = 0;
+ instr->cat5.tex = 0;
+ instr->cat5.type = TYPE_F32;
+
+ ir3_reg_create(instr, regid(0,0), /* (xyzw)r0.x */
+ 0)->wrmask = 0xf;
+ ir3_reg_create(instr, regid(0,2), 0); /* r0.z */
+
+ /* (sy)(rpt3)cov.f32f16 hr0.x, (r)r0.x */
+ instr = ir3_instr_create(ir, 1, 0); /* mov/cov instructions have no opc */
+ instr->flags = IR3_INSTR_SY;
+ instr->repeat = 3;
+ instr->cat1.src_type = TYPE_F32;
+ instr->cat1.dst_type = TYPE_F16;
+
+ ir3_reg_create(instr, regid(0,0), IR3_REG_HALF); /* hr0.x */
+ ir3_reg_create(instr, regid(0,0), IR3_REG_R); /* (r)r0.x */
+
+ /* end */
+ instr = ir3_instr_create(ir, 0, OPC_END);
+
+ so = create_internal_shader(pctx, SHADER_FRAGMENT, ir);
+ if (!so)
+ return NULL;
+
+ so->color_regid = regid(0,0);
+ so->half_precision = true;
+ so->inputs_count = 1;
+ so->inputs[0].inloc = 8;
+ so->inputs[0].compmask = 0x3;
+ so->total_in = 2;
+ so->samplers_count = 1;
+
+ so->vpsrepl[0] = 0x99999999;
+ so->vpsrepl[1] = 0x99999999;
+ so->vpsrepl[2] = 0x99999999;
+ so->vpsrepl[3] = 0x99999999;
+
+ return so;
+}
+
+/* Creates shader:
+ * (sy)(ss)end
+ */
+static struct fd3_shader_stateobj *
+create_blit_vp(struct pipe_context *pctx)
+{
+ struct fd3_shader_stateobj *so;
+ struct ir3_shader *ir = ir3_shader_create();
+ struct ir3_instruction *instr;
+
+ /* (sy)(ss)end */
+ instr = ir3_instr_create(ir, 0, OPC_END);
+ instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
+
+ so = create_internal_shader(pctx, SHADER_VERTEX, ir);
+ if (!so)
+ return NULL;
+
+ so->pos_regid = regid(1,0);
+ so->psize_regid = regid(63,0);
+ so->inputs_count = 2;
+ so->inputs[0].regid = regid(0,0);
+ so->inputs[0].compmask = 0xf;
+ so->inputs[1].regid = regid(1,0);
+ so->inputs[1].compmask = 0xf;
+ so->total_in = 8;
+ so->outputs_count = 1;
+ so->outputs[0].regid = regid(0,0);
+
+ fixup_vp_regfootprint(so);
+
+ return so;
+}
+
+/* Creates shader:
+ * (sy)(ss)(rpt3)mov.f16f16 hr0.x, (r)hc0.x
+ * end
+ */
+static struct fd3_shader_stateobj *
+create_solid_fp(struct pipe_context *pctx)
+{
+ struct fd3_shader_stateobj *so;
+ struct ir3_shader *ir = ir3_shader_create();
+ struct ir3_instruction *instr;
+
+ /* (sy)(ss)(rpt3)mov.f16f16 hr0.x, (r)hc0.x */
+ instr = ir3_instr_create(ir, 1, 0); /* mov/cov instructions have no opc */
+ instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
+ instr->repeat = 3;
+ instr->cat1.src_type = TYPE_F16;
+ instr->cat1.dst_type = TYPE_F16;
+
+ ir3_reg_create(instr, regid(0,0), IR3_REG_HALF); /* hr0.x */
+ ir3_reg_create(instr, regid(0,0), IR3_REG_HALF | /* (r)hc0.x */
+ IR3_REG_CONST | IR3_REG_R);
+
+ /* end */
+ instr = ir3_instr_create(ir, 0, OPC_END);
+
+ so = create_internal_shader(pctx, SHADER_FRAGMENT, ir);
+ if (!so)
+ return NULL;
+
+ so->color_regid = regid(0,0);
+ so->half_precision = true;
+ so->inputs_count = 0;
+ so->total_in = 0;
+
+ return so;
+}
+
+/* Creates shader:
+ * (sy)(ss)end
+ */
+static struct fd3_shader_stateobj *
+create_solid_vp(struct pipe_context *pctx)
+{
+ struct fd3_shader_stateobj *so;
+ struct ir3_shader *ir = ir3_shader_create();
+ struct ir3_instruction *instr;
+
+ /* (sy)(ss)end */
+ instr = ir3_instr_create(ir, 0, OPC_END);
+ instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
+
+
+ so = create_internal_shader(pctx, SHADER_VERTEX, ir);
+ if (!so)
+ return NULL;
+
+ so->pos_regid = regid(0,0);
+ so->psize_regid = regid(63,0);
+ so->inputs_count = 1;
+ so->inputs[0].regid = regid(0,0);
+ so->inputs[0].compmask = 0xf;
+ so->total_in = 4;
+ so->outputs_count = 0;
+
+ fixup_vp_regfootprint(so);
+
+ return so;
+}
+
+void
+fd3_prog_init(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+
+ pctx->create_fs_state = fd3_fp_state_create;
+ pctx->bind_fs_state = fd3_fp_state_bind;
+ pctx->delete_fs_state = fd3_fp_state_delete;
+
+ pctx->create_vs_state = fd3_vp_state_create;
+ pctx->bind_vs_state = fd3_vp_state_bind;
+ pctx->delete_vs_state = fd3_vp_state_delete;
+
+ ctx->solid_prog.fp = create_solid_fp(pctx);
+ ctx->solid_prog.vp = create_solid_vp(pctx);
+ ctx->blit_prog.fp = create_blit_fp(pctx);
+ ctx->blit_prog.vp = create_blit_vp(pctx);
+}
+
+void
+fd3_prog_fini(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+
+ delete_shader(ctx->solid_prog.vp);
+ delete_shader(ctx->solid_prog.fp);
+ delete_shader(ctx->blit_prog.vp);
+ delete_shader(ctx->blit_prog.fp);
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
new file mode 100644
index 00000000000..9b50d34f756
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
@@ -0,0 +1,116 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_PROGRAM_H_
+#define FD3_PROGRAM_H_
+
+#include "pipe/p_context.h"
+
+#include "freedreno_context.h"
+
+#include "ir-a3xx.h"
+#include "disasm.h"
+
+struct fd3_shader_stateobj {
+ enum shader_t type;
+
+ struct fd_bo *bo;
+
+ struct ir3_shader_info info;
+ struct ir3_shader *ir;
+
+ /* is shader using (or more precisely, is color_regid) half-
+ * precision register?
+ */
+ bool half_precision;
+
+ /* special output register locations: */
+ uint8_t pos_regid, psize_regid, color_regid;
+
+ /* the instructions length is in units of instruction groups
+ * (4 instructions, 8 dwords):
+ */
+ unsigned instrlen;
+
+ /* the constants length is in units of vec4's, and is the sum of
+ * the uniforms and the built-in compiler constants
+ */
+ unsigned constlen;
+
+ /* About Linkage:
+ * + Let the frag shader determine the position/compmask for the
+ * varyings, since it is the place where we know if the varying
+ * is actually used, and if so, which components are used. So
+ * what the hw calls "outloc" is taken from the "inloc" of the
+ * frag shader.
+ * + From the vert shader, we only need the output regid
+ */
+
+ /* varyings/outputs: */
+ unsigned outputs_count;
+ struct {
+ uint8_t regid;
+ } outputs[16];
+
+ /* vertices/inputs: */
+ unsigned inputs_count;
+ struct {
+ uint8_t regid;
+ uint8_t compmask;
+ /* in theory inloc of fs should match outloc of vs: */
+ uint8_t inloc;
+ } inputs[16];
+
+ unsigned total_in; /* sum of inputs (scalar) */
+
+ /* samplers: */
+ unsigned samplers_count;
+
+ /* const reg # of first immediate, ie. 1 == c1
+ * (not regid, because TGSI thinks in terms of vec4 registers,
+ * not scalar registers)
+ */
+ unsigned first_immediate;
+ unsigned immediates_count;
+ struct {
+ uint32_t val[4];
+ } immediates[64];
+
+ /* so far, only used for blit_prog shader.. values for
+ * VPC_VARYING_INTERP[i].MODE and VPC_VARYING_PS_REPL[i].MODE
+ */
+ uint32_t vinterp[4], vpsrepl[4];
+};
+
+void fd3_program_emit(struct fd_ringbuffer *ring,
+ struct fd_program_stateobj *prog);
+
+void fd3_prog_init(struct pipe_context *pctx);
+void fd3_prog_fini(struct pipe_context *pctx);
+
+#endif /* FD3_PROGRAM_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c
new file mode 100644
index 00000000000..8f6c0fe95cf
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c
@@ -0,0 +1,92 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "fd3_rasterizer.h"
+#include "fd3_context.h"
+#include "fd3_util.h"
+
+void *
+fd3_rasterizer_state_create(struct pipe_context *pctx,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct fd3_rasterizer_stateobj *so;
+
+ so = CALLOC_STRUCT(fd3_rasterizer_stateobj);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+/*
+ if (cso->line_stipple_enable) {
+ ??? TODO line stipple
+ }
+ TODO cso->half_pixel_center
+ TODO cso->point_size
+ TODO psize_min/psize_max
+*/
+ so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER; /* ??? */
+ so->gras_su_point_minmax = 0xffc00010; /* ??? */
+ so->gras_su_point_size = 0x00000008; /* ??? */
+ so->gras_su_poly_offset_scale =
+ A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale);
+ so->gras_su_poly_offset_offset =
+ A3XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units);
+
+ so->gras_su_mode_control =
+ A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2);
+
+ so->pc_prim_vtx_cntl =
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
+ A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
+
+ if (cso->cull_face & PIPE_FACE_FRONT)
+ so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
+ if (cso->cull_face & PIPE_FACE_BACK)
+ so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK;
+ if (!cso->flatshade_first)
+ so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;
+/*
+ if (!cso->front_ccw)
+ TODO
+ if (cso->line_stipple_enable)
+ TODO
+ if (cso->multisample)
+ TODO
+*/
+
+ if (cso->offset_tri)
+ so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
+
+ return so;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h
new file mode 100644
index 00000000000..7e9c1f51f59
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h
@@ -0,0 +1,56 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_RASTERIZER_H_
+#define FD3_RASTERIZER_H_
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+struct fd3_rasterizer_stateobj {
+ struct pipe_rasterizer_state base;
+ uint32_t gras_su_point_minmax;
+ uint32_t gras_su_point_size;
+ uint32_t gras_su_poly_offset_scale;
+ uint32_t gras_su_poly_offset_offset;
+
+ uint32_t gras_su_mode_control;
+ uint32_t gras_cl_clip_cntl;
+ uint32_t pc_prim_vtx_cntl;
+};
+
+static INLINE struct fd3_rasterizer_stateobj *
+fd3_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
+{
+ return (struct fd3_rasterizer_stateobj *)rast;
+}
+
+void * fd3_rasterizer_state_create(struct pipe_context *pctx,
+ const struct pipe_rasterizer_state *cso);
+
+#endif /* FD3_RASTERIZER_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
new file mode 100644
index 00000000000..9bb19abc2a8
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
@@ -0,0 +1,105 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_screen.h"
+#include "util/u_format.h"
+
+#include "fd3_screen.h"
+#include "fd3_context.h"
+#include "fd3_util.h"
+
+static boolean
+fd3_screen_is_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned usage)
+{
+ unsigned retval = 0;
+
+ if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+ (sample_count > 1) || /* TODO add MSAA */
+ !util_format_is_supported(format, usage)) {
+ DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
+ util_format_name(format), target, sample_count, usage);
+ return FALSE;
+ }
+
+ if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
+ (fd3_pipe2vtx(format) != ~0)) {
+ retval |= PIPE_BIND_VERTEX_BUFFER;
+ }
+
+ if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+ (fd3_pipe2tex(format) != ~0)) {
+ retval |= PIPE_BIND_SAMPLER_VIEW;
+ }
+
+ if ((usage & (PIPE_BIND_RENDER_TARGET |
+ PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT |
+ PIPE_BIND_SHARED)) &&
+ (fd3_pipe2color(format) != ~0) &&
+ (fd3_pipe2tex(format) != ~0)) {
+ retval |= usage & (PIPE_BIND_RENDER_TARGET |
+ PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT |
+ PIPE_BIND_SHARED);
+ }
+
+ if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+ (fd_pipe2depth(format) != ~0) &&
+ (fd3_pipe2tex(format) != ~0)) {
+ retval |= PIPE_BIND_DEPTH_STENCIL;
+ }
+
+ if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+ (fd_pipe2index(format) != ~0)) {
+ retval |= PIPE_BIND_INDEX_BUFFER;
+ }
+
+ if (usage & PIPE_BIND_TRANSFER_READ)
+ retval |= PIPE_BIND_TRANSFER_READ;
+ if (usage & PIPE_BIND_TRANSFER_WRITE)
+ retval |= PIPE_BIND_TRANSFER_WRITE;
+
+ if (retval != usage) {
+ DBG("not supported: format=%s, target=%d, sample_count=%d, "
+ "usage=%x, retval=%x", util_format_name(format),
+ target, sample_count, usage, retval);
+ }
+
+ return retval == usage;
+}
+
+void
+fd3_screen_init(struct pipe_screen *pscreen)
+{
+ pscreen->context_create = fd3_context_create;
+ pscreen->is_format_supported = fd3_screen_is_format_supported;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_screen.h b/src/gallium/drivers/freedreno/a3xx/fd3_screen.h
new file mode 100644
index 00000000000..38204d387a3
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_screen.h
@@ -0,0 +1,36 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_SCREEN_H_
+#define FD3_SCREEN_H_
+
+#include "pipe/p_screen.h"
+
+void fd3_screen_init(struct pipe_screen *pscreen);
+
+#endif /* FD3_SCREEN_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
new file mode 100644
index 00000000000..ae08b8ac90a
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -0,0 +1,140 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+
+#include "fd3_texture.h"
+#include "fd3_util.h"
+
+static enum a3xx_tex_clamp
+tex_clamp(unsigned wrap)
+{
+ /* hardware probably supports more, but we can't coax all the
+ * wrap/clamp modes out of the GLESv2 blob driver.
+ *
+ * TODO once we have basics working, go back and just try
+ * different values and see what happens
+ */
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return A3XX_TEX_REPEAT;
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return A3XX_TEX_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return A3XX_TEX_MIRROR_REPEAT;
+ default:
+ DBG("invalid wrap: %u", wrap);
+ return 0;
+ }
+}
+
+static enum a3xx_tex_filter
+tex_filter(unsigned filter)
+{
+ switch (filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ return A3XX_TEX_NEAREST;
+ case PIPE_TEX_FILTER_LINEAR:
+ return A3XX_TEX_LINEAR;
+ default:
+ DBG("invalid filter: %u", filter);
+ return 0;
+ }
+}
+
+static void *
+fd3_sampler_state_create(struct pipe_context *pctx,
+ const struct pipe_sampler_state *cso)
+{
+ struct fd3_sampler_stateobj *so = CALLOC_STRUCT(fd3_sampler_stateobj);
+
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ so->texsamp0 =
+ A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter)) |
+ A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter)) |
+ A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) |
+ A3XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t)) |
+ A3XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r));
+ so->texsamp1 = 0x00000000; /* ??? */
+
+ return so;
+}
+
+static struct pipe_sampler_view *
+fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
+ const struct pipe_sampler_view *cso)
+{
+ struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view);
+ struct fd_resource *rsc = fd_resource(prsc);
+
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+ pipe_reference(NULL, &prsc->reference);
+ so->base.texture = prsc;
+ so->base.reference.count = 1;
+ so->base.context = pctx;
+
+ so->tex_resource = rsc;
+
+ so->texconst0 =
+ 0x40000000 | /* ??? */
+ A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) |
+ fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
+ cso->swizzle_b, cso->swizzle_a);
+ so->texconst1 =
+ A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
+ A3XX_TEX_CONST_1_WIDTH(prsc->width0) |
+ A3XX_TEX_CONST_1_HEIGHT(prsc->height0);
+ /* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
+ so->texconst2 =
+ A3XX_TEX_CONST_2_PITCH(rsc->pitch * rsc->cpp);
+ so->texconst3 = 0x00000000; /* ??? */
+
+ return &so->base;
+}
+
+void
+fd3_texture_init(struct pipe_context *pctx)
+{
+ pctx->create_sampler_state = fd3_sampler_state_create;
+ pctx->create_sampler_view = fd3_sampler_view_create;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.h b/src/gallium/drivers/freedreno/a3xx/fd3_texture.h
new file mode 100644
index 00000000000..a83f527366b
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.h
@@ -0,0 +1,68 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_TEXTURE_H_
+#define FD3_TEXTURE_H_
+
+#include "pipe/p_context.h"
+
+#include "freedreno_texture.h"
+#include "freedreno_resource.h"
+
+#include "fd3_context.h"
+#include "fd3_util.h"
+
+struct fd3_sampler_stateobj {
+ struct pipe_sampler_state base;
+ uint32_t texsamp0, texsamp1;
+};
+
+static INLINE struct fd3_sampler_stateobj *
+fd3_sampler_stateobj(struct pipe_sampler_state *samp)
+{
+ return (struct fd3_sampler_stateobj *)samp;
+}
+
+struct fd3_pipe_sampler_view {
+ struct pipe_sampler_view base;
+ struct fd_resource *tex_resource;
+ uint32_t texconst0, texconst1, texconst2, texconst3;
+};
+
+static INLINE struct fd3_pipe_sampler_view *
+fd3_pipe_sampler_view(struct pipe_sampler_view *pview)
+{
+ return (struct fd3_pipe_sampler_view *)pview;
+}
+
+unsigned fd3_get_const_idx(struct fd_context *ctx,
+ struct fd_texture_stateobj *tex, unsigned samp_id);
+
+void fd3_texture_init(struct pipe_context *pctx);
+
+#endif /* FD3_TEXTURE_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_util.c b/src/gallium/drivers/freedreno/a3xx/fd3_util.c
new file mode 100644
index 00000000000..a08bc2349eb
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_util.c
@@ -0,0 +1,348 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_format.h"
+
+#include "fd3_util.h"
+
+/* convert pipe format to vertex buffer format: */
+enum a3xx_vtx_fmt
+fd3_pipe2vtx(enum pipe_format format)
+{
+ switch (format) {
+ /* 8-bit buffers. */
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_L8_SRGB:
+ return VFMT_NORM_UBYTE_8;
+
+ case PIPE_FORMAT_A8_SNORM:
+ case PIPE_FORMAT_I8_SNORM:
+ case PIPE_FORMAT_L8_SNORM:
+ case PIPE_FORMAT_R8_SNORM:
+ return VFMT_NORM_BYTE_8;
+
+ case PIPE_FORMAT_A8_UINT:
+ case PIPE_FORMAT_I8_UINT:
+ case PIPE_FORMAT_L8_UINT:
+ case PIPE_FORMAT_R8_UINT:
+ return VFMT_UBYTE_8;
+
+ case PIPE_FORMAT_A8_SINT:
+ case PIPE_FORMAT_I8_SINT:
+ case PIPE_FORMAT_L8_SINT:
+ case PIPE_FORMAT_R8_SINT:
+ return VFMT_BYTE_8;
+
+ /* 16-bit buffers. */
+ case PIPE_FORMAT_R16_UNORM:
+ case PIPE_FORMAT_A16_UNORM:
+ case PIPE_FORMAT_L16_UNORM:
+ case PIPE_FORMAT_I16_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return VFMT_NORM_USHORT_16;
+
+ case PIPE_FORMAT_R16_SNORM:
+ case PIPE_FORMAT_A16_SNORM:
+ case PIPE_FORMAT_L16_SNORM:
+ case PIPE_FORMAT_I16_SNORM:
+ return VFMT_NORM_SHORT_16;
+
+ case PIPE_FORMAT_R16_UINT:
+ case PIPE_FORMAT_A16_UINT:
+ case PIPE_FORMAT_L16_UINT:
+ case PIPE_FORMAT_I16_UINT:
+ return VFMT_USHORT_16;
+
+ case PIPE_FORMAT_R16_SINT:
+ case PIPE_FORMAT_A16_SINT:
+ case PIPE_FORMAT_L16_SINT:
+ case PIPE_FORMAT_I16_SINT:
+ return VFMT_SHORT_16;
+
+ case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ return VFMT_NORM_UBYTE_8_8;
+
+ case PIPE_FORMAT_L8A8_SNORM:
+ case PIPE_FORMAT_R8G8_SNORM:
+ return VFMT_NORM_BYTE_8_8;
+
+ case PIPE_FORMAT_L8A8_UINT:
+ case PIPE_FORMAT_R8G8_UINT:
+ return VFMT_UBYTE_8_8;
+
+ case PIPE_FORMAT_L8A8_SINT:
+ case PIPE_FORMAT_R8G8_SINT:
+ return VFMT_BYTE_8_8;
+
+ /* 24-bit buffers. */
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return VFMT_NORM_UBYTE_8_8_8;
+
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return VFMT_NORM_BYTE_8_8_8;
+
+ case PIPE_FORMAT_R8G8B8_UINT:
+ return VFMT_UBYTE_8_8_8;
+
+ case PIPE_FORMAT_R8G8B8_SINT:
+ return VFMT_BYTE_8_8_8;
+
+ /* 32-bit buffers. */
+ case PIPE_FORMAT_A8B8G8R8_UNORM:
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_R8G8B8X8_UNORM:
+ case PIPE_FORMAT_X8B8G8R8_UNORM:
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ case PIPE_FORMAT_A8B8G8R8_SRGB:
+ case PIPE_FORMAT_B8G8R8A8_SRGB:
+ return VFMT_NORM_UBYTE_8_8_8_8;
+
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ case PIPE_FORMAT_R8G8B8X8_SNORM:
+ return VFMT_NORM_BYTE_8_8_8_8;
+
+ case PIPE_FORMAT_R8G8B8A8_UINT:
+ case PIPE_FORMAT_R8G8B8X8_UINT:
+ return VFMT_UBYTE_8_8_8_8;
+
+ case PIPE_FORMAT_R8G8B8A8_SINT:
+ case PIPE_FORMAT_R8G8B8X8_SINT:
+ return VFMT_BYTE_8_8_8_8;
+
+/* TODO probably need gles3 blob drivers to find the 32bit int formats:
+ case PIPE_FORMAT_R32_UINT:
+ case PIPE_FORMAT_R32_SINT:
+ case PIPE_FORMAT_A32_UINT:
+ case PIPE_FORMAT_A32_SINT:
+ case PIPE_FORMAT_L32_UINT:
+ case PIPE_FORMAT_L32_SINT:
+ case PIPE_FORMAT_I32_UINT:
+ case PIPE_FORMAT_I32_SINT:
+*/
+
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_A32_FLOAT:
+ case PIPE_FORMAT_L32_FLOAT:
+ case PIPE_FORMAT_I32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT:
+ return VFMT_FLOAT_32;
+
+ case PIPE_FORMAT_R32_FIXED:
+ return VFMT_FIXED_32;
+
+ /* 64-bit buffers. */
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ return VFMT_NORM_USHORT_16_16_16_16;
+
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ return VFMT_NORM_SHORT_16_16_16_16;
+
+ case PIPE_FORMAT_R16G16B16A16_UINT:
+ return VFMT_USHORT_16_16_16_16;
+
+ case PIPE_FORMAT_R16G16B16A16_SINT:
+ return VFMT_SHORT_16_16_16_16;
+
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_L32A32_FLOAT:
+ return VFMT_FLOAT_32_32;
+
+ case PIPE_FORMAT_R32G32_FIXED:
+ return VFMT_FIXED_32_32;
+
+/* TODO probably need gles3 blob drivers to find the 32bit int formats:
+ case PIPE_FORMAT_R32G32_SINT:
+ case PIPE_FORMAT_R32G32_UINT:
+ case PIPE_FORMAT_L32A32_UINT:
+ case PIPE_FORMAT_L32A32_SINT:
+*/
+
+ /* 96-bit buffers. */
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return VFMT_FLOAT_32_32_32;
+
+ case PIPE_FORMAT_R32G32B32_FIXED:
+ return VFMT_FIXED_32_32_32;
+
+ /* 128-bit buffers. */
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return VFMT_FLOAT_32_32_32_32;
+
+ case PIPE_FORMAT_R32G32B32A32_FIXED:
+ return VFMT_FIXED_32_32_32_32;
+
+/* TODO probably need gles3 blob drivers to find the 32bit int formats:
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ case PIPE_FORMAT_R32G32B32A32_SINT:
+ case PIPE_FORMAT_R32G32B32A32_UINT:
+*/
+
+ default:
+ return ~0;
+ }
+}
+
+/* convert pipe format to texture sampler format: */
+enum a3xx_tex_fmt
+fd3_pipe2tex(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ return TFMT_NORM_UINT_8;
+
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ return TFMT_NORM_UINT_8_8_8_8;
+
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return TFMT_NORM_UINT_X8Z24;
+
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ return TFMT_NORM_UINT_8_8_8_8;
+
+ case PIPE_FORMAT_Z16_UNORM:
+ return TFMT_NORM_UINT_8_8;
+
+ // TODO add more..
+
+ default:
+ return ~0;
+ }
+}
+
+enum a3xx_tex_fetchsize
+fd3_pipe2fetchsize(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ return TFETCH_1_BYTE;
+
+ case PIPE_FORMAT_Z16_UNORM:
+ return TFETCH_2_BYTE;
+
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ return TFETCH_4_BYTE;
+
+ // TODO add more..
+
+ default:
+ return TFETCH_DISABLE; /* save default */
+ }
+}
+
+/* convert pipe format to MRT / copydest format used for render-target: */
+enum a3xx_color_fmt
+fd3_pipe2color(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ return RB_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_Z16_UNORM:
+ return RB_Z16_UNORM;
+
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ /* for DEPTHX_24_8, blob driver also seems to use R8G8B8A8 fmt.. */
+ return RB_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ return RB_A8_UNORM;
+
+ // TODO add more..
+
+ default:
+ return ~0;
+ }
+}
+
+enum a3xx_color_swap
+fd3_pipe2swap(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ return WXYZ;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ return WZYX;
+
+ default:
+ return WZYX;
+ }
+}
+
+static inline enum a3xx_tex_swiz
+tex_swiz(unsigned swiz)
+{
+ switch (swiz) {
+ default:
+ case PIPE_SWIZZLE_RED: return A3XX_TEX_X;
+ case PIPE_SWIZZLE_GREEN: return A3XX_TEX_Y;
+ case PIPE_SWIZZLE_BLUE: return A3XX_TEX_Z;
+ case PIPE_SWIZZLE_ALPHA: return A3XX_TEX_W;
+ case PIPE_SWIZZLE_ZERO: return A3XX_TEX_ZERO;
+ case PIPE_SWIZZLE_ONE: return A3XX_TEX_ONE;
+ }
+}
+
+uint32_t
+fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
+ unsigned swizzle_b, unsigned swizzle_a)
+{
+ const struct util_format_description *desc =
+ util_format_description(format);
+ uint8_t swiz[] = {
+ swizzle_r, swizzle_g, swizzle_b, swizzle_a,
+ PIPE_SWIZZLE_ZERO, PIPE_SWIZZLE_ONE,
+ PIPE_SWIZZLE_ONE, PIPE_SWIZZLE_ONE,
+ };
+
+ return A3XX_TEX_CONST_0_SWIZ_X(tex_swiz(swiz[desc->swizzle[0]])) |
+ A3XX_TEX_CONST_0_SWIZ_Y(tex_swiz(swiz[desc->swizzle[1]])) |
+ A3XX_TEX_CONST_0_SWIZ_Z(tex_swiz(swiz[desc->swizzle[2]])) |
+ A3XX_TEX_CONST_0_SWIZ_W(tex_swiz(swiz[desc->swizzle[3]]));
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_util.h b/src/gallium/drivers/freedreno/a3xx/fd3_util.h
new file mode 100644
index 00000000000..e9ec15f5a3f
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_util.h
@@ -0,0 +1,56 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_UTIL_H_
+#define FD3_UTIL_H_
+
+#include "freedreno_util.h"
+
+#include "a3xx.xml.h"
+
+enum a3xx_vtx_fmt fd3_pipe2vtx(enum pipe_format format);
+enum a3xx_tex_fmt fd3_pipe2tex(enum pipe_format format);
+enum a3xx_tex_fetchsize fd3_pipe2fetchsize(enum pipe_format format);
+enum a3xx_color_fmt fd3_pipe2color(enum pipe_format format);
+enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format);
+
+uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
+ unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
+
+/* comp:
+ * 0 - x
+ * 1 - y
+ * 2 - z
+ * 3 - w
+ */
+static inline uint32_t regid(int num, int comp)
+{
+ return (num << 2) | (comp & 0x3);
+}
+
+#endif /* FD3_UTIL_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_zsa.c b/src/gallium/drivers/freedreno/a3xx/fd3_zsa.c
new file mode 100644
index 00000000000..857ab8f106a
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_zsa.c
@@ -0,0 +1,100 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "fd3_zsa.h"
+#include "fd3_context.h"
+#include "fd3_util.h"
+
+void *
+fd3_zsa_state_create(struct pipe_context *pctx,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct fd3_zsa_stateobj *so;
+
+ so = CALLOC_STRUCT(fd3_zsa_stateobj);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ so->rb_depth_control |=
+ A3XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth.func); /* maps 1:1 */
+
+ if (cso->depth.enabled)
+ so->rb_depth_control |=
+ A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
+ A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
+
+ if (cso->depth.writemask)
+ so->rb_depth_control |= A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE;
+
+ if (cso->stencil[0].enabled) {
+ const struct pipe_stencil_state *s = &cso->stencil[0];
+
+ so->rb_stencil_control |=
+ A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+ A3XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
+ A3XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
+ so->rb_stencilrefmask |=
+ 0xff000000 | /* ??? */
+ A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
+ A3XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
+
+ if (cso->stencil[1].enabled) {
+ const struct pipe_stencil_state *bs = &cso->stencil[1];
+
+ so->rb_stencil_control |=
+ A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
+ A3XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
+ A3XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
+ A3XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
+ A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
+ so->rb_stencilrefmask_bf |=
+ 0xff000000 | /* ??? */
+ A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) |
+ A3XX_RB_STENCILREFMASK_STENCILMASK(bs->valuemask);
+ }
+ }
+
+ if (cso->alpha.enabled) {
+ so->rb_render_control =
+ A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(cso->alpha.func);
+ // TODO alpha_ref and alpha_test_enable??
+ }
+
+ so->rb_render_control |= 0x2000; /* ??? */
+
+ return so;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h b/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h
new file mode 100644
index 00000000000..0cc80a8cf0a
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h
@@ -0,0 +1,56 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_ZSA_H_
+#define FD3_ZSA_H_
+
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+#include "freedreno_util.h"
+
+struct fd3_zsa_stateobj {
+ struct pipe_depth_stencil_alpha_state base;
+ uint32_t rb_render_control;
+ uint32_t rb_depth_control;
+ uint32_t rb_stencil_control;
+ uint32_t rb_stencilrefmask;
+ uint32_t rb_stencilrefmask_bf;
+};
+
+static INLINE struct fd3_zsa_stateobj *
+fd3_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
+{
+ return (struct fd3_zsa_stateobj *)zsa;
+}
+
+void * fd3_zsa_state_create(struct pipe_context *pctx,
+ const struct pipe_depth_stencil_alpha_state *cso);
+
+#endif /* FD3_ZSA_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h b/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h
new file mode 100644
index 00000000000..464a7e9d757
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h
@@ -0,0 +1,532 @@
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef INSTR_A3XX_H_
+#define INSTR_A3XX_H_
+
+#define PACKED __attribute__((__packed__))
+
+#include <stdint.h>
+#include <assert.h>
+
+typedef enum {
+ /* category 0: */
+ OPC_NOP = 0,
+ OPC_BR = 1,
+ OPC_JUMP = 2,
+ OPC_CALL = 3,
+ OPC_RET = 4,
+ OPC_KILL = 5,
+ OPC_END = 6,
+ OPC_EMIT = 7,
+ OPC_CUT = 8,
+ OPC_CHMASK = 9,
+ OPC_CHSH = 10,
+ OPC_FLOW_REV = 11,
+
+ /* category 1: */
+ /* no opc.. all category 1 are variants of mov */
+
+ /* category 2: */
+ OPC_ADD_F = 0,
+ OPC_MIN_F = 1,
+ OPC_MAX_F = 2,
+ OPC_MUL_F = 3,
+ OPC_SIGN_F = 4,
+ OPC_CMPS_F = 5,
+ OPC_ABSNEG_F = 6,
+ OPC_CMPV_F = 7,
+ /* 8 - invalid */
+ OPC_FLOOR_F = 9,
+ OPC_CEIL_F = 10,
+ OPC_RNDNE_F = 11,
+ OPC_RNDAZ_F = 12,
+ OPC_TRUNC_F = 13,
+ /* 14-15 - invalid */
+ OPC_ADD_U = 16,
+ OPC_ADD_S = 17,
+ OPC_SUB_U = 18,
+ OPC_SUB_S = 19,
+ OPC_CMPS_U = 20,
+ OPC_CMPS_S = 21,
+ OPC_MIN_U = 22,
+ OPC_MIN_S = 23,
+ OPC_MAX_U = 24,
+ OPC_MAX_S = 25,
+ OPC_ABSNEG_S = 26,
+ /* 27 - invalid */
+ OPC_AND_B = 28,
+ OPC_OR_B = 29,
+ OPC_NOT_B = 30,
+ OPC_XOR_B = 31,
+ /* 32 - invalid */
+ OPC_CMPV_U = 33,
+ OPC_CMPV_S = 34,
+ /* 35-47 - invalid */
+ OPC_MUL_U = 48,
+ OPC_MUL_S = 49,
+ OPC_MULL_U = 50,
+ OPC_BFREV_B = 51,
+ OPC_CLZ_S = 52,
+ OPC_CLZ_B = 53,
+ OPC_SHL_B = 54,
+ OPC_SHR_B = 55,
+ OPC_ASHR_B = 56,
+ OPC_BARY_F = 57,
+ OPC_MGEN_B = 58,
+ OPC_GETBIT_B = 59,
+ OPC_SETRM = 60,
+ OPC_CBITS_B = 61,
+ OPC_SHB = 62,
+ OPC_MSAD = 63,
+
+ /* category 3: */
+ OPC_MAD_U16 = 0,
+ OPC_MADSH_U16 = 1,
+ OPC_MAD_S16 = 2,
+ OPC_MADSH_M16 = 3, /* should this be .s16? */
+ OPC_MAD_U24 = 4,
+ OPC_MAD_S24 = 5,
+ OPC_MAD_F16 = 6,
+ OPC_MAD_F32 = 7,
+ OPC_SEL_B16 = 8,
+ OPC_SEL_B32 = 9,
+ OPC_SEL_S16 = 10,
+ OPC_SEL_S32 = 11,
+ OPC_SEL_F16 = 12,
+ OPC_SEL_F32 = 13,
+ OPC_SAD_S16 = 14,
+ OPC_SAD_S32 = 15,
+
+ /* category 4: */
+ OPC_RCP = 0,
+ OPC_RSQ = 1,
+ OPC_LOG2 = 2,
+ OPC_EXP2 = 3,
+ OPC_SIN = 4,
+ OPC_COS = 5,
+ OPC_SQRT = 6,
+ // 7-63 - invalid
+
+ /* category 5: */
+ OPC_ISAM = 0,
+ OPC_ISAML = 1,
+ OPC_ISAMM = 2,
+ OPC_SAM = 3,
+ OPC_SAMB = 4,
+ OPC_SAML = 5,
+ OPC_SAMGQ = 6,
+ OPC_GETLOD = 7,
+ OPC_CONV = 8,
+ OPC_CONVM = 9,
+ OPC_GETSIZE = 10,
+ OPC_GETBUF = 11,
+ OPC_GETPOS = 12,
+ OPC_GETINFO = 13,
+ OPC_DSX = 14,
+ OPC_DSY = 15,
+ OPC_GATHER4R = 16,
+ OPC_GATHER4G = 17,
+ OPC_GATHER4B = 18,
+ OPC_GATHER4A = 19,
+ OPC_SAMGP0 = 20,
+ OPC_SAMGP1 = 21,
+ OPC_SAMGP2 = 22,
+ OPC_SAMGP3 = 23,
+ OPC_DSXPP_1 = 24,
+ OPC_DSYPP_1 = 25,
+ OPC_RGETPOS = 26,
+ OPC_RGETINFO = 27,
+
+ /* category 6: */
+ OPC_LDG = 0, /* load-global */
+ OPC_LDL = 1,
+ OPC_LDP = 2,
+ OPC_STG = 3, /* store-global */
+ OPC_STL = 4,
+ OPC_STP = 5,
+ OPC_STI = 6,
+ OPC_G2L = 7,
+ OPC_L2G = 8,
+ OPC_PREFETCH = 9,
+ OPC_LDLW = 10,
+ OPC_STLW = 11,
+ OPC_RESFMT = 14,
+ OPC_RESINFO = 15,
+ OPC_ATOMIC_ADD_L = 16,
+ OPC_ATOMIC_SUB_L = 17,
+ OPC_ATOMIC_XCHG_L = 18,
+ OPC_ATOMIC_INC_L = 19,
+ OPC_ATOMIC_DEC_L = 20,
+ OPC_ATOMIC_CMPXCHG_L = 21,
+ OPC_ATOMIC_MIN_L = 22,
+ OPC_ATOMIC_MAX_L = 23,
+ OPC_ATOMIC_AND_L = 24,
+ OPC_ATOMIC_OR_L = 25,
+ OPC_ATOMIC_XOR_L = 26,
+ OPC_LDGB_TYPED_4D = 27,
+ OPC_STGB_4D_4 = 28,
+ OPC_STIB = 29,
+ OPC_LDC_4 = 30,
+ OPC_LDLV = 31,
+
+} opc_t;
+
+typedef enum {
+ TYPE_F16 = 0,
+ TYPE_F32 = 1,
+ TYPE_U16 = 2,
+ TYPE_U32 = 3,
+ TYPE_S16 = 4,
+ TYPE_S32 = 5,
+ TYPE_U8 = 6,
+ TYPE_S8 = 7, // XXX I assume?
+} type_t;
+
+static inline uint32_t type_size(type_t type)
+{
+ switch (type) {
+ case TYPE_F32:
+ case TYPE_U32:
+ case TYPE_S32:
+ return 32;
+ case TYPE_F16:
+ case TYPE_U16:
+ case TYPE_S16:
+ return 16;
+ case TYPE_U8:
+ case TYPE_S8:
+ return 8;
+ default:
+ assert(0); /* invalid type */
+ return 0;
+ }
+}
+
+static inline int type_float(type_t type)
+{
+ return (type == TYPE_F32) || (type == TYPE_F16);
+}
+
+typedef union PACKED {
+ /* normal gpr or const src register: */
+ struct PACKED {
+ uint32_t comp : 2;
+ uint32_t num : 9;
+ };
+ /* for immediate val: */
+ int32_t iim_val : 11;
+ /* to make compiler happy: */
+ uint32_t dummy32;
+ uint32_t dummy11 : 11;
+ uint32_t dummy8 : 8;
+} reg_t;
+
+/* special registers: */
+#define REG_A0 61 /* address register */
+#define REG_P0 62 /* predicate register */
+
+static inline int reg_special(reg_t reg)
+{
+ return (reg.num == REG_A0) || (reg.num == REG_P0);
+}
+
+typedef struct PACKED {
+ /* dword0: */
+ int16_t immed : 16;
+ uint32_t dummy1 : 16;
+
+ /* dword1: */
+ uint32_t dummy2 : 8;
+ uint32_t repeat : 3;
+ uint32_t dummy3 : 1;
+ uint32_t ss : 1;
+ uint32_t dummy4 : 7;
+ uint32_t inv : 1;
+ uint32_t comp : 2;
+ uint32_t opc : 4;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat0_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ union PACKED {
+ /* for normal src register: */
+ struct PACKED {
+ uint32_t src : 11;
+ uint32_t pad : 21;
+ };
+ /* for address relative: */
+ struct PACKED {
+ int32_t off : 10;
+ uint32_t must_be_3 : 2;
+ uint32_t unknown : 20;
+ };
+ /* for immediate: */
+ int32_t iim_val;
+ float fim_val;
+ };
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t repeat : 3;
+ uint32_t src_r : 1;
+ uint32_t ss : 1;
+ uint32_t src_rel : 1;
+ uint32_t dst_type : 3;
+ uint32_t dst_rel : 1;
+ uint32_t src_type : 3;
+ uint32_t src_c : 1;
+ uint32_t src_im : 1;
+ uint32_t even : 1;
+ uint32_t pos_inf : 1;
+ uint32_t must_be_0 : 2;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat1_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t src1 : 11;
+ uint32_t src1_rel : 1; /* relative address */
+ uint32_t src1_c : 1; /* const */
+ uint32_t src1_im : 1; /* immediate */
+ uint32_t src1_neg : 1; /* negate */
+ uint32_t src1_abs : 1; /* absolute value */
+
+ uint32_t src2 : 11;
+ uint32_t src2_rel : 1; /* relative address */
+ uint32_t src2_c : 1; /* const */
+ uint32_t src2_im : 1; /* immediate */
+ uint32_t src2_neg : 1; /* negate */
+ uint32_t src2_abs : 1; /* absolute value */
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t repeat : 3;
+ uint32_t src1_r : 1;
+ uint32_t ss : 1;
+ uint32_t ul : 1; /* dunno */
+ uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
+ uint32_t ei : 1;
+ uint32_t cond : 3;
+ uint32_t src2_r : 1;
+ uint32_t full : 1; /* not half */
+ uint32_t opc : 6;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat2_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t src1 : 11;
+ uint32_t src1_rel : 1;
+ uint32_t src1_c : 1;
+ uint32_t src2_c : 1;
+ uint32_t src1_neg : 1;
+ uint32_t src2_r : 1;
+ uint32_t src3 : 11;
+ uint32_t src3_rel : 1;
+ uint32_t src3_c : 1;
+ uint32_t src3_r : 1;
+ uint32_t src2_neg : 1;
+ uint32_t src3_neg : 1;
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t repeat : 3;
+ uint32_t src1_r : 1;
+ uint32_t ss : 1;
+ uint32_t ul : 1;
+ uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
+ uint32_t src2 : 8;
+ uint32_t opc : 4;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat3_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t src : 11;
+ uint32_t src_rel : 1;
+ uint32_t src_c : 1;
+ uint32_t src_im : 1;
+ uint32_t src_neg : 1;
+ uint32_t src_abs : 1;
+ uint32_t dummy1 : 16; /* seem to be ignored */
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t repeat : 3;
+ uint32_t src_r : 1;
+ uint32_t ss : 1;
+ uint32_t ul : 1;
+ uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
+ uint32_t dummy2 : 5; /* seem to be ignored */
+ uint32_t full : 1; /* not half */
+ uint32_t opc : 6;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat4_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ union PACKED {
+ /* normal case: */
+ struct PACKED {
+ uint32_t full : 1; /* not half */
+ uint32_t src1 : 8;
+ uint32_t src2 : 8;
+ uint32_t dummy1 : 4; /* seem to be ignored */
+ uint32_t samp : 4;
+ uint32_t tex : 7;
+ } norm;
+ /* s2en case: */
+ struct PACKED {
+ uint32_t full : 1; /* not half */
+ uint32_t src1 : 8;
+ uint32_t src2 : 11;
+ uint32_t dummy1 : 1;
+ uint32_t src3 : 8;
+ uint32_t dummy2 : 3;
+ } s2en;
+ /* same in either case: */
+ // XXX I think, confirm this
+ struct PACKED {
+ uint32_t full : 1; /* not half */
+ uint32_t src1 : 8;
+ uint32_t pad : 23;
+ };
+ };
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t wrmask : 4; /* write-mask */
+ uint32_t type : 3;
+ uint32_t dummy2 : 1; /* seems to be ignored */
+ uint32_t is_3d : 1;
+
+ uint32_t is_a : 1;
+ uint32_t is_s : 1;
+ uint32_t is_s2en : 1;
+ uint32_t is_o : 1;
+ uint32_t is_p : 1;
+
+ uint32_t opc : 5;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat5_t;
+
+/* used for load instructions: */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t must_be_one1 : 1;
+ int16_t off : 13;
+ uint32_t src : 8;
+ uint32_t dummy1 : 1;
+ uint32_t must_be_one2 : 1;
+ int32_t iim_val : 8;
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t dummy2 : 9;
+ uint32_t type : 3;
+ uint32_t dummy3 : 2;
+ uint32_t opc : 5;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat6a_t;
+
+/* used for store instructions: */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t must_be_zero1 : 1;
+ uint32_t src : 8;
+ uint32_t off_hi : 5; /* high bits of 'off'... ugly! */
+ uint32_t dummy1 : 9;
+ uint32_t must_be_one1 : 1;
+ int32_t iim_val : 8;
+
+ /* dword1: */
+ uint16_t off : 8;
+ uint32_t must_be_one2 : 1;
+ uint32_t dst : 8;
+ uint32_t type : 3;
+ uint32_t dummy2 : 2;
+ uint32_t opc : 5;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat6b_t;
+
+typedef union PACKED {
+ instr_cat6a_t a;
+ instr_cat6b_t b;
+ struct PACKED {
+ /* dword0: */
+ uint32_t pad1 : 24;
+ int32_t iim_val : 8;
+
+ /* dword1: */
+ uint32_t pad2 : 17;
+ uint32_t type : 3;
+ uint32_t pad3 : 2;
+ uint32_t opc : 5;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+ };
+} instr_cat6_t;
+
+typedef union PACKED {
+ instr_cat0_t cat0;
+ instr_cat1_t cat1;
+ instr_cat2_t cat2;
+ instr_cat3_t cat3;
+ instr_cat4_t cat4;
+ instr_cat5_t cat5;
+ instr_cat6_t cat6;
+ struct PACKED {
+ /* dword0: */
+ uint64_t pad1 : 40;
+ uint32_t repeat : 3; /* cat0-cat4 */
+ uint32_t pad2 : 1;
+ uint32_t ss : 1; /* cat1-cat4 (cat0??) */
+ uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
+ uint32_t pad3 : 13;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+
+ };
+} instr_t;
+
+#endif /* INSTR_A3XX_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c
new file mode 100644
index 00000000000..76e8b113ac1
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c
@@ -0,0 +1,527 @@
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ir-a3xx.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <stdbool.h>
+#include <errno.h>
+
+#include "freedreno_util.h"
+#include "instr-a3xx.h"
+
+/* simple allocator to carve allocations out of an up-front allocated heap,
+ * so that we can free everything easily in one shot.
+ */
+static void * ir3_alloc(struct ir3_shader *shader, int sz)
+{
+ void *ptr = &shader->heap[shader->heap_idx];
+ shader->heap_idx += align(sz, 4);
+ return ptr;
+}
+
+struct ir3_shader * ir3_shader_create(void)
+{
+ return calloc(1, sizeof(struct ir3_shader));
+}
+
+void ir3_shader_destroy(struct ir3_shader *shader)
+{
+ free(shader);
+}
+
+#define iassert(cond) do { \
+ if (!(cond)) { \
+ assert(cond); \
+ return -1; \
+ } } while (0)
+
+static uint32_t reg(struct ir3_register *reg, struct ir3_shader_info *info,
+ uint32_t repeat, uint32_t valid_flags)
+{
+ reg_t val = { .dummy32 = 0 };
+
+ assert(!(reg->flags & ~valid_flags));
+
+ if (!(reg->flags & IR3_REG_R))
+ repeat = 0;
+
+ if (reg->flags & IR3_REG_IMMED) {
+ val.iim_val = reg->iim_val;
+ } else {
+ int8_t max = (reg->num + repeat) >> 2;
+
+ val.comp = reg->num & 0x3;
+ val.num = reg->num >> 2;
+
+ if (reg->flags & IR3_REG_CONST) {
+ info->max_const = MAX2(info->max_const, max);
+ } else if ((max != REG_A0) && (max != REG_P0)) {
+ if (reg->flags & IR3_REG_HALF) {
+ info->max_half_reg = MAX2(info->max_half_reg, max);
+ } else {
+ info->max_reg = MAX2(info->max_reg, max);
+ }
+ }
+ }
+
+ return val.dummy32;
+}
+
+static int emit_cat0(struct ir3_instruction *instr, void *ptr,
+ struct ir3_shader_info *info)
+{
+ instr_cat0_t *cat0 = ptr;
+
+ cat0->immed = instr->cat0.immed;
+ cat0->repeat = instr->repeat;
+ cat0->ss = !!(instr->flags & IR3_INSTR_SS);
+ cat0->inv = instr->cat0.inv;
+ cat0->comp = instr->cat0.comp;
+ cat0->opc = instr->opc;
+ cat0->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
+ cat0->sync = !!(instr->flags & IR3_INSTR_SY);
+ cat0->opc_cat = 0;
+
+ return 0;
+}
+
+static uint32_t type_flags(type_t type)
+{
+ return (type_size(type) == 32) ? 0 : IR3_REG_HALF;
+}
+
+static int emit_cat1(struct ir3_instruction *instr, void *ptr,
+ struct ir3_shader_info *info)
+{
+ struct ir3_register *dst = instr->regs[0];
+ struct ir3_register *src = instr->regs[1];
+ instr_cat1_t *cat1 = ptr;
+
+ iassert(instr->regs_count == 2);
+ iassert(!((dst->flags ^ type_flags(instr->cat1.dst_type)) & IR3_REG_HALF));
+ iassert((src->flags & IR3_REG_IMMED) ||
+ !((src->flags ^ type_flags(instr->cat1.src_type)) & IR3_REG_HALF));
+
+ if (src->flags & IR3_REG_IMMED) {
+ cat1->iim_val = src->iim_val;
+ cat1->src_im = 1;
+ } else if (src->flags & IR3_REG_RELATIV) {
+ cat1->off = src->offset;
+ cat1->src_rel = 1;
+ cat1->must_be_3 = 3;
+ } else {
+ cat1->src = reg(src, info, instr->repeat,
+ IR3_REG_IMMED | IR3_REG_RELATIV |
+ IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF);
+ }
+
+ cat1->dst = reg(dst, info, instr->repeat,
+ IR3_REG_RELATIV | IR3_REG_EVEN |
+ IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF);
+ cat1->repeat = instr->repeat;
+ cat1->src_r = !!(src->flags & IR3_REG_R);
+ cat1->ss = !!(instr->flags & IR3_INSTR_SS);
+ cat1->dst_type = instr->cat1.dst_type;
+ cat1->dst_rel = !!(dst->flags & IR3_REG_RELATIV);
+ cat1->src_type = instr->cat1.src_type;
+ cat1->src_c = !!(src->flags & IR3_REG_CONST);
+ cat1->even = !!(dst->flags & IR3_REG_EVEN);
+ cat1->pos_inf = !!(dst->flags & IR3_REG_POS_INF);
+ cat1->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
+ cat1->sync = !!(instr->flags & IR3_INSTR_SY);
+ cat1->opc_cat = 1;
+
+ return 0;
+}
+
+static int emit_cat2(struct ir3_instruction *instr, void *ptr,
+ struct ir3_shader_info *info)
+{
+ struct ir3_register *dst = instr->regs[0];
+ struct ir3_register *src1 = instr->regs[1];
+ struct ir3_register *src2 = instr->regs[2];
+ instr_cat2_t *cat2 = ptr;
+
+ iassert((instr->regs_count == 2) || (instr->regs_count == 3));
+
+ cat2->src1 = reg(src1, info, instr->repeat,
+ IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_IMMED |
+ IR3_REG_NEGATE | IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF);
+ cat2->src1_rel = !!(src1->flags & IR3_REG_RELATIV);
+ cat2->src1_c = !!(src1->flags & IR3_REG_CONST);
+ cat2->src1_im = !!(src1->flags & IR3_REG_IMMED);
+ cat2->src1_neg = !!(src1->flags & IR3_REG_NEGATE);
+ cat2->src1_abs = !!(src1->flags & IR3_REG_ABS);
+ cat2->src1_r = !!(src1->flags & IR3_REG_R);
+
+ if (src2) {
+ iassert((src2->flags & IR3_REG_IMMED) ||
+ !((src1->flags ^ src2->flags) & IR3_REG_HALF));
+ cat2->src2 = reg(src2, info, instr->repeat,
+ IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_IMMED |
+ IR3_REG_NEGATE | IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF);
+ cat2->src2_rel = !!(src2->flags & IR3_REG_RELATIV);
+ cat2->src2_c = !!(src2->flags & IR3_REG_CONST);
+ cat2->src2_im = !!(src2->flags & IR3_REG_IMMED);
+ cat2->src2_neg = !!(src2->flags & IR3_REG_NEGATE);
+ cat2->src2_abs = !!(src2->flags & IR3_REG_ABS);
+ cat2->src2_r = !!(src2->flags & IR3_REG_R);
+ }
+
+ cat2->dst = reg(dst, info, instr->repeat,
+ IR3_REG_R | IR3_REG_EI | IR3_REG_HALF);
+ cat2->repeat = instr->repeat;
+ cat2->ss = !!(instr->flags & IR3_INSTR_SS);
+ cat2->ul = !!(instr->flags & IR3_INSTR_UL);
+ cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF);
+ cat2->ei = !!(dst->flags & IR3_REG_EI);
+ cat2->cond = instr->cat2.condition;
+ cat2->full = ! (src1->flags & IR3_REG_HALF);
+ cat2->opc = instr->opc;
+ cat2->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
+ cat2->sync = !!(instr->flags & IR3_INSTR_SY);
+ cat2->opc_cat = 2;
+
+ return 0;
+}
+
+static int emit_cat3(struct ir3_instruction *instr, void *ptr,
+ struct ir3_shader_info *info)
+{
+ struct ir3_register *dst = instr->regs[0];
+ struct ir3_register *src1 = instr->regs[1];
+ struct ir3_register *src2 = instr->regs[2];
+ struct ir3_register *src3 = instr->regs[3];
+ instr_cat3_t *cat3 = ptr;
+ uint32_t src_flags = 0;
+
+ switch (instr->opc) {
+ case OPC_MAD_F16:
+ case OPC_MAD_U16:
+ case OPC_MAD_S16:
+ case OPC_SEL_B16:
+ case OPC_SEL_S16:
+ case OPC_SEL_F16:
+ case OPC_SAD_S16:
+ case OPC_SAD_S32: // really??
+ src_flags |= IR3_REG_HALF;
+ break;
+ default:
+ break;
+ }
+
+ iassert(instr->regs_count == 4);
+ iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF));
+ iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF));
+ iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
+
+ cat3->src1 = reg(src1, info, instr->repeat,
+ IR3_REG_RELATIV | IR3_REG_CONST |
+ IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF);
+ cat3->src1_rel = !!(src1->flags & IR3_REG_RELATIV);
+ cat3->src1_c = !!(src1->flags & IR3_REG_CONST);
+ cat3->src1_neg = !!(src1->flags & IR3_REG_NEGATE);
+ cat3->src1_r = !!(src1->flags & IR3_REG_R);
+
+ cat3->src2 = reg(src2, info, instr->repeat,
+ IR3_REG_CONST | IR3_REG_NEGATE |
+ IR3_REG_R | IR3_REG_HALF);
+ cat3->src2_c = !!(src2->flags & IR3_REG_CONST);
+ cat3->src2_neg = !!(src2->flags & IR3_REG_NEGATE);
+ cat3->src2_r = !!(src2->flags & IR3_REG_R);
+
+ cat3->src3 = reg(src3, info, instr->repeat,
+ IR3_REG_RELATIV | IR3_REG_CONST |
+ IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF);
+ cat3->src3_rel = !!(src3->flags & IR3_REG_RELATIV);
+ cat3->src3_c = !!(src3->flags & IR3_REG_CONST);
+ cat3->src3_neg = !!(src3->flags & IR3_REG_NEGATE);
+ cat3->src3_r = !!(src3->flags & IR3_REG_R);
+
+ cat3->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+ cat3->repeat = instr->repeat;
+ cat3->ss = !!(instr->flags & IR3_INSTR_SS);
+ cat3->ul = !!(instr->flags & IR3_INSTR_UL);
+ cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF);
+ cat3->opc = instr->opc;
+ cat3->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
+ cat3->sync = !!(instr->flags & IR3_INSTR_SY);
+ cat3->opc_cat = 3;
+
+ return 0;
+}
+
+static int emit_cat4(struct ir3_instruction *instr, void *ptr,
+ struct ir3_shader_info *info)
+{
+ struct ir3_register *dst = instr->regs[0];
+ struct ir3_register *src = instr->regs[1];
+ instr_cat4_t *cat4 = ptr;
+
+ iassert(instr->regs_count == 2);
+
+ cat4->src = reg(src, info, instr->repeat,
+ IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_IMMED |
+ IR3_REG_NEGATE | IR3_REG_ABS | IR3_REG_R |
+ IR3_REG_HALF);
+ cat4->src_rel = !!(src->flags & IR3_REG_RELATIV);
+ cat4->src_c = !!(src->flags & IR3_REG_CONST);
+ cat4->src_im = !!(src->flags & IR3_REG_IMMED);
+ cat4->src_neg = !!(src->flags & IR3_REG_NEGATE);
+ cat4->src_abs = !!(src->flags & IR3_REG_ABS);
+ cat4->src_r = !!(src->flags & IR3_REG_R);
+
+ cat4->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+ cat4->repeat = instr->repeat;
+ cat4->ss = !!(instr->flags & IR3_INSTR_SS);
+ cat4->ul = !!(instr->flags & IR3_INSTR_UL);
+ cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF);
+ cat4->full = ! (src->flags & IR3_REG_HALF);
+ cat4->opc = instr->opc;
+ cat4->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
+ cat4->sync = !!(instr->flags & IR3_INSTR_SY);
+ cat4->opc_cat = 4;
+
+ return 0;
+}
+
+static int emit_cat5(struct ir3_instruction *instr, void *ptr,
+ struct ir3_shader_info *info)
+{
+ struct ir3_register *dst = instr->regs[0];
+ struct ir3_register *src1 = instr->regs[1];
+ struct ir3_register *src2 = instr->regs[2];
+ struct ir3_register *src3 = instr->regs[3];
+ instr_cat5_t *cat5 = ptr;
+
+ iassert(!((dst->flags ^ type_flags(instr->cat5.type)) & IR3_REG_HALF));
+
+ if (src1) {
+ cat5->full = ! (src1->flags & IR3_REG_HALF);
+ cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
+ }
+
+
+ if (instr->flags & IR3_INSTR_S2EN) {
+ if (src2) {
+ iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
+ cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
+ }
+ if (src3) {
+ iassert(src3->flags & IR3_REG_HALF);
+ cat5->s2en.src3 = reg(src3, info, instr->repeat, IR3_REG_HALF);
+ }
+ iassert(!(instr->cat5.samp | instr->cat5.tex));
+ } else {
+ iassert(!src3);
+ if (src2) {
+ iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
+ cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
+ }
+ cat5->norm.samp = instr->cat5.samp;
+ cat5->norm.tex = instr->cat5.tex;
+ }
+
+ cat5->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+ cat5->wrmask = dst->wrmask;
+ cat5->type = instr->cat5.type;
+ cat5->is_3d = !!(instr->flags & IR3_INSTR_3D);
+ cat5->is_a = !!(instr->flags & IR3_INSTR_A);
+ cat5->is_s = !!(instr->flags & IR3_INSTR_S);
+ cat5->is_s2en = !!(instr->flags & IR3_INSTR_S2EN);
+ cat5->is_o = !!(instr->flags & IR3_INSTR_O);
+ cat5->is_p = !!(instr->flags & IR3_INSTR_P);
+ cat5->opc = instr->opc;
+ cat5->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
+ cat5->sync = !!(instr->flags & IR3_INSTR_SY);
+ cat5->opc_cat = 5;
+
+ return 0;
+}
+
+static int emit_cat6(struct ir3_instruction *instr, void *ptr,
+ struct ir3_shader_info *info)
+{
+ struct ir3_register *dst = instr->regs[0];
+ struct ir3_register *src = instr->regs[1];
+ instr_cat6_t *cat6 = ptr;
+
+ iassert(instr->regs_count == 2);
+
+ switch (instr->opc) {
+ /* load instructions: */
+ case OPC_LDG:
+ case OPC_LDP:
+ case OPC_LDL:
+ case OPC_LDLW:
+ case OPC_LDLV:
+ case OPC_PREFETCH: {
+ instr_cat6a_t *cat6a = ptr;
+
+ iassert(!((dst->flags ^ type_flags(instr->cat6.type)) & IR3_REG_HALF));
+
+ cat6a->must_be_one1 = 1;
+ cat6a->must_be_one2 = 1;
+ cat6a->off = instr->cat6.offset;
+ cat6a->src = reg(src, info, instr->repeat, 0);
+ cat6a->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+ break;
+ }
+ /* store instructions: */
+ case OPC_STG:
+ case OPC_STP:
+ case OPC_STL:
+ case OPC_STLW:
+ case OPC_STI: {
+ instr_cat6b_t *cat6b = ptr;
+ uint32_t src_flags = type_flags(instr->cat6.type);
+ uint32_t dst_flags = (instr->opc == OPC_STI) ? IR3_REG_HALF : 0;
+
+ iassert(!((src->flags ^ src_flags) & IR3_REG_HALF));
+
+ cat6b->must_be_one1 = 1;
+ cat6b->must_be_one2 = 1;
+ cat6b->src = reg(src, info, instr->repeat, src_flags);
+ cat6b->off_hi = instr->cat6.offset >> 8;
+ cat6b->off = instr->cat6.offset;
+ cat6b->dst = reg(dst, info, instr->repeat, IR3_REG_R | dst_flags);
+
+ break;
+ }
+ default:
+ // TODO
+ break;
+ }
+
+ cat6->iim_val = instr->cat6.iim_val;
+ cat6->type = instr->cat6.type;
+ cat6->opc = instr->opc;
+ cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
+ cat6->sync = !!(instr->flags & IR3_INSTR_SY);
+ cat6->opc_cat = 6;
+
+ return 0;
+}
+
+static int (*emit[])(struct ir3_instruction *instr, void *ptr,
+ struct ir3_shader_info *info) = {
+ emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6,
+};
+
+void * ir3_shader_assemble(struct ir3_shader *shader, struct ir3_shader_info *info)
+{
+ uint32_t *ptr, *dwords;
+ uint32_t i;
+
+ info->max_reg = -1;
+ info->max_half_reg = -1;
+ info->max_const = -1;
+
+ /* need a integer number of instruction "groups" (sets of four
+ * instructions), so pad out w/ NOPs if needed:
+ */
+ while (shader->instrs_count != align(shader->instrs_count, 4))
+ ir3_instr_create(shader, 0, OPC_NOP);
+
+ /* each instruction is 64bits: */
+ info->sizedwords = 2 * shader->instrs_count;
+
+ ptr = dwords = calloc(1, 4 * info->sizedwords);
+
+ for (i = 0; i < shader->instrs_count; i++) {
+ struct ir3_instruction *instr = shader->instrs[i];
+ int ret = emit[instr->category](instr, dwords, info);
+ if (ret)
+ goto fail;
+ dwords += 2;
+ }
+
+ return ptr;
+
+fail:
+ free(ptr);
+ return NULL;
+}
+
+static struct ir3_register * reg_create(struct ir3_shader *shader,
+ int num, int flags)
+{
+ struct ir3_register *reg =
+ ir3_alloc(shader, sizeof(struct ir3_register));
+ reg->flags = flags;
+ reg->num = num;
+ return reg;
+}
+
+static void insert_instr(struct ir3_shader *shader,
+ struct ir3_instruction *instr)
+{
+ assert(shader->instrs_count < ARRAY_SIZE(shader->instrs));
+ shader->instrs[shader->instrs_count++] = instr;
+}
+
+struct ir3_instruction * ir3_instr_create(struct ir3_shader *shader,
+ int category, opc_t opc)
+{
+ struct ir3_instruction *instr =
+ ir3_alloc(shader, sizeof(struct ir3_instruction));
+ instr->shader = shader;
+ instr->category = category;
+ instr->opc = opc;
+ insert_instr(shader, instr);
+ return instr;
+}
+
+struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
+{
+ struct ir3_instruction *new_instr =
+ ir3_alloc(instr->shader, sizeof(struct ir3_instruction));
+ unsigned i;
+
+ *new_instr = *instr;
+ insert_instr(instr->shader, new_instr);
+
+ /* clone registers: */
+ new_instr->regs_count = 0;
+ for (i = 0; i < instr->regs_count; i++) {
+ struct ir3_register *reg = instr->regs[i];
+ struct ir3_register *new_reg =
+ ir3_reg_create(new_instr, reg->num, reg->flags);
+ *new_reg = *reg;
+ }
+
+ return new_instr;
+}
+
+struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
+ int num, int flags)
+{
+ struct ir3_register *reg = reg_create(instr->shader, num, flags);
+ assert(instr->regs_count < ARRAY_SIZE(instr->regs));
+ instr->regs[instr->regs_count++] = reg;
+ return reg;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
new file mode 100644
index 00000000000..2fedc7bee38
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IR3_H_
+#define IR3_H_
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "instr-a3xx.h"
+
+/* low level intermediate representation of an adreno shader program */
+
+struct ir3_shader;
+
+struct ir3_shader * fd_asm_parse(const char *src);
+
+struct ir3_shader_info {
+ uint16_t sizedwords;
+ /* NOTE: max_reg, etc, does not include registers not touched
+ * by the shader (ie. vertex fetched via VFD_DECODE but not
+ * touched by shader)
+ */
+ int8_t max_reg; /* highest GPR # used by shader */
+ int8_t max_half_reg;
+ int8_t max_const;
+};
+
+struct ir3_register {
+ enum {
+ IR3_REG_CONST = 0x001,
+ IR3_REG_IMMED = 0x002,
+ IR3_REG_HALF = 0x004,
+ IR3_REG_RELATIV= 0x008,
+ IR3_REG_R = 0x010,
+ IR3_REG_NEGATE = 0x020,
+ IR3_REG_ABS = 0x040,
+ IR3_REG_EVEN = 0x080,
+ IR3_REG_POS_INF= 0x100,
+ /* (ei) flag, end-input? Set on last bary, presumably to signal
+ * that the shader needs no more input:
+ */
+ IR3_REG_EI = 0x200,
+ } flags;
+ union {
+ /* normal registers: */
+ struct {
+ /* the component is in the low two bits of the reg #, so
+ * rN.x becomes: (n << 2) | x
+ */
+ int num;
+ int wrmask;
+ };
+ /* immediate: */
+ int iim_val;
+ float fim_val;
+ /* relative: */
+ int offset;
+ };
+};
+
+struct ir3_instruction {
+ struct ir3_shader *shader;
+ int category;
+ opc_t opc;
+ enum {
+ /* (sy) flag is set on first instruction, and after sample
+ * instructions (probably just on RAW hazard).
+ */
+ IR3_INSTR_SY = 0x001,
+ /* (ss) flag is set on first instruction, and first instruction
+ * to depend on the result of "long" instructions (RAW hazard):
+ *
+ * rcp, rsq, log2, exp2, sin, cos, sqrt
+ *
+ * It seems to synchronize until all in-flight instructions are
+ * completed, for example:
+ *
+ * rsq hr1.w, hr1.w
+ * add.f hr2.z, (neg)hr2.z, hc0.y
+ * mul.f hr2.w, (neg)hr2.y, (neg)hr2.y
+ * rsq hr2.x, hr2.x
+ * (rpt1)nop
+ * mad.f16 hr2.w, hr2.z, hr2.z, hr2.w
+ * nop
+ * mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w
+ * (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w
+ * (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x
+ *
+ * The last mul.f does not have (ss) set, presumably because the
+ * (ss) on the previous instruction does the job.
+ *
+ * The blob driver also seems to set it on WAR hazards, although
+ * not really clear if this is needed or just blob compiler being
+ * sloppy. So far I haven't found a case where removing the (ss)
+ * causes problems for WAR hazard, but I could just be getting
+ * lucky:
+ *
+ * rcp r1.y, r3.y
+ * (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z
+ *
+ */
+ IR3_INSTR_SS = 0x002,
+ /* (jp) flag is set on jump targets:
+ */
+ IR3_INSTR_JP = 0x004,
+ IR3_INSTR_UL = 0x008,
+ IR3_INSTR_3D = 0x010,
+ IR3_INSTR_A = 0x020,
+ IR3_INSTR_O = 0x040,
+ IR3_INSTR_P = 0x080,
+ IR3_INSTR_S = 0x100,
+ IR3_INSTR_S2EN = 0x200,
+ } flags;
+ int repeat;
+ unsigned regs_count;
+ struct ir3_register *regs[4];
+ union {
+ struct {
+ char inv;
+ char comp;
+ int immed;
+ } cat0;
+ struct {
+ type_t src_type, dst_type;
+ } cat1;
+ struct {
+ enum {
+ IR3_COND_LT = 0,
+ IR3_COND_LE = 1,
+ IR3_COND_GT = 2,
+ IR3_COND_GE = 3,
+ IR3_COND_EQ = 4,
+ IR3_COND_NE = 5,
+ } condition;
+ } cat2;
+ struct {
+ unsigned samp, tex;
+ type_t type;
+ } cat5;
+ struct {
+ type_t type;
+ int offset;
+ int iim_val;
+ } cat6;
+ };
+};
+
+/* this is just large to cope w/ the large test *.asm: */
+#define MAX_INSTRS 10240
+
+struct ir3_shader {
+ unsigned instrs_count;
+ struct ir3_instruction *instrs[MAX_INSTRS];
+ uint32_t heap[128 * MAX_INSTRS];
+ unsigned heap_idx;
+};
+
+struct ir3_shader * ir3_shader_create(void);
+void ir3_shader_destroy(struct ir3_shader *shader);
+void * ir3_shader_assemble(struct ir3_shader *shader,
+ struct ir3_shader_info *info);
+
+struct ir3_instruction * ir3_instr_create(struct ir3_shader *shader, int category, opc_t opc);
+struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
+
+struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
+ int num, int flags);
+
+#endif /* IR3_H_ */
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index 42057da0737..b1198125e97 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -5,13 +5,13 @@
This file was generated by the rules-ng-ng headergen tool in this git repository:
http://0x04.net/cgit/index.cgi/rules-ng-ng
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 38794 bytes, from 2013-05-05 22:47:28)
+- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22)
- /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37)
Copyright (C) 2013 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
index 853206d3757..d3a7baca0e9 100644
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -5,13 +5,13 @@
This file was generated by the rules-ng-ng headergen tool in this git repository:
http://0x04.net/cgit/index.cgi/rules-ng-ng
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 38794 bytes, from 2013-05-05 22:47:28)
+- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22)
- /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37)
Copyright (C) 2013 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 9c47a58a81f..f88fa08aa7f 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -47,12 +47,13 @@
#include "freedreno_screen.h"
#include "freedreno_resource.h"
#include "freedreno_fence.h"
#include "freedreno_util.h"
#include "fd2_screen.h"
+#include "fd3_screen.h"
/* XXX this should go away */
#include "state_tracker/drm_driver.h"
static const struct debug_named_value debug_options[] = {
{"msgs", FD_DBG_MSGS, "Print debug messages"},
@@ -409,12 +410,15 @@ fd_screen_create(struct fd_device *dev)
* send a patch ;-)
*/
switch (screen->gpu_id) {
case 220:
fd2_screen_init(pscreen);
break;
+ case 320:
+ fd3_screen_init(pscreen);
+ break;
default:
debug_printf("unsupported GPU: a%03d\n", screen->gpu_id);
goto fail;
}
pscreen->destroy = fd_screen_destroy;
diff --git a/src/gallium/drivers/freedreno/freedreno_util.c b/src/gallium/drivers/freedreno/freedreno_util.c
index 83a33db8f5b..0462e5fb515 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.c
+++ b/src/gallium/drivers/freedreno/freedreno_util.c
@@ -36,12 +36,14 @@ fd_pipe2depth(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
return DEPTHX_16;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
return DEPTHX_24_8;
default:
return ~0;
}
}