summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Deucher <alexdeucher@gmail.com>2011-03-03 01:44:19 (GMT)
committerDave Airlie <airlied@redhat.com>2011-05-24 05:55:58 (GMT)
commitb8ade97c9d0fa5aacb0e3166868bb72e9bc679a6 (patch)
tree6e30e7d614cc47b9a031f762418f5d878f964c9d
parent01b646ed800732985c1638b147716641a99082f9 (diff)
cayman: first pass at exa/Xv shaders
Main differences with evergreen: - 4-way rather than 5-way - END_OF_PROGRAM bit removed from CF istructions, use CF_INST_END instead. - MEGA_FETCH* fields removed from VTX commands - no more VC, all fetches go through the TC Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
-rw-r--r--src/Makefile.am3
-rw-r--r--src/cayman_shader.c3125
-rw-r--r--src/cayman_shader.h279
-rw-r--r--src/evergreen_exa.c76
-rw-r--r--src/radeon.h1
5 files changed, 3479 insertions, 5 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index f52b761..8d4cf17 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -54,7 +54,7 @@ endif
if USE_EXA
RADEON_EXA_SOURCES = radeon_exa.c r600_exa.c r6xx_accel.c r600_textured_videofuncs.c r600_shader.c radeon_exa_shared.c \
- evergreen_exa.c evergreen_accel.c evergreen_shader.c evergreen_textured_videofuncs.c cayman_accel.c
+ evergreen_exa.c evergreen_accel.c evergreen_shader.c evergreen_textured_videofuncs.c cayman_accel.c cayman_shader.c
endif
AM_CFLAGS = \
@@ -136,6 +136,7 @@ EXTRA_DIST = \
evergreen_state.h \
cayman_reg.h \
cayman_reg_auto.h \
+ cayman_shader.h \
ati.h \
ativersion.h \
bicubic_table.h \
diff --git a/src/cayman_shader.c b/src/cayman_shader.c
new file mode 100644
index 0000000..2cdcc68
--- /dev/null
+++ b/src/cayman_shader.c
@@ -0,0 +1,3125 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Alex Deucher <alexander.deucher@amd.com>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef XF86DRM_MODE
+
+#include "xf86.h"
+
+#include "cayman_shader.h"
+#include "cayman_reg.h"
+
+/* solid vs --------------------------------------- */
+int cayman_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_DWORD0(ADDR(4),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TC),
+ BARRIER(1));
+ /* 1 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 2 - always export a param whether it's used or not */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(0));
+ /* 3 - end */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_END),
+ BARRIER(1));
+ /* 4/5 */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+ LDS_REQ(0),
+ COALESCED_READ(0));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+
+ return i;
+}
+
+/* solid ps --------------------------------------- */
+int cayman_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(3),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 1 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+
+ /* 2 - end */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_END),
+ BARRIER(1));
+ /* 3 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ /* 4 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ /* 5 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ /* 6 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+
+ return i;
+}
+
+/* copy vs --------------------------------------- */
+int cayman_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_DWORD0(ADDR(4),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TC),
+ BARRIER(1));
+ /* 1 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 2 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(0));
+ /* 3 - end */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_END),
+ BARRIER(1));
+ /* 4/5 */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+ LDS_REQ(0),
+ COALESCED_READ(0));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+ /* 6/7 */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+ LDS_REQ(0),
+ COALESCED_READ(0));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+
+ return i;
+}
+
+/* copy ps --------------------------------------- */
+int cayman_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* CF INST 0 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(4),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* CF INST 1 */
+ shader[i++] = CF_DWORD0(ADDR(8),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TC),
+ BARRIER(1));
+ /* CF INST 2 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* CF INST 3 - end */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_END),
+ BARRIER(1));
+ /* 4 interpolate tex coords */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 5 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 6 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 7 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 8/9 TEX INST 0 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X), /* R */
+ DST_SEL_Y(SQ_SEL_Y), /* G */
+ DST_SEL_Z(SQ_SEL_Z), /* B */
+ DST_SEL_W(SQ_SEL_W), /* A */
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_UNNORMALIZED),
+ COORD_TYPE_Y(TEX_UNNORMALIZED),
+ COORD_TYPE_Z(TEX_UNNORMALIZED),
+ COORD_TYPE_W(TEX_UNNORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+
+ return i;
+}
+
+int cayman_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_DWORD0(ADDR(8),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TC),
+ BARRIER(1));
+
+ /* 1 - ALU */
+ shader[i++] = CF_ALU_DWORD0(ADDR(5),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(2),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 2 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(3));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 3 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(3));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(0));
+ /* 4 - end */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_END),
+ BARRIER(1));
+ /* 5 texX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 6 texY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 7 - padding */
+ shader[i++] = 0x00000000;
+ shader[i++] = 0x00000000;
+ /* 8/9 */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+ LDS_REQ(0),
+ COALESCED_READ(0));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+ /* 10/11 */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+ LDS_REQ(0),
+ COALESCED_READ(0));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+
+ return i;
+}
+
+int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(6),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 1 */
+ shader[i++] = CF_DWORD0(ADDR(22),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_CALL),
+ BARRIER(0));
+ /* 2 */
+ shader[i++] = CF_DWORD0(ADDR(30),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_NOT_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_CALL),
+ BARRIER(0));
+ /* 3 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(10),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(12),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 4 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(3));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 5 - end */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_END),
+ BARRIER(1));
+ /* 6 interpolate tex coords */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 7 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 8 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 9 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 10,11,12,13 */
+ /* r2.x = MAD(c0.w, r1.x, c0.x) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* r2.y = MAD(c0.w, r1.x, c0.y) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* r2.z = MAD(c0.w, r1.x, c0.z) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* r2.w = MAD(0, 0, 1) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 14,15,16,17 */
+ /* r2.x = MAD(c1.x, r1.y, pv.x) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* r2.y = MAD(c1.y, r1.y, pv.y) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* r2.z = MAD(c1.z, r1.y, pv.z) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* r2.w = MAD(0, 0, 1) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+ /* 18,19,20,21 */
+ /* r2.x = MAD(c2.x, r1.z, pv.x) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ /* r2.y = MAD(c2.y, r1.z, pv.y) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ /* r2.z = MAD(c2.z, r1.z, pv.z) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ /* r2.w = MAD(0, 0, 1) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+
+ /* 22 */
+ shader[i++] = CF_DWORD0(ADDR(24),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(3),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TC),
+ BARRIER(1));
+ /* 23 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ BARRIER(1));
+ /* 24/25 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_MASK),
+ DST_SEL_Z(SQ_SEL_MASK),
+ DST_SEL_W(SQ_SEL_1),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+ /* 26/27 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(1),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_MASK),
+ DST_SEL_Y(SQ_SEL_MASK),
+ DST_SEL_Z(SQ_SEL_X),
+ DST_SEL_W(SQ_SEL_MASK),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(1),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+ /* 28/29 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(2),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_MASK),
+ DST_SEL_Y(SQ_SEL_X),
+ DST_SEL_Z(SQ_SEL_MASK),
+ DST_SEL_W(SQ_SEL_MASK),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(2),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+ /* 30 */
+ shader[i++] = CF_DWORD0(ADDR(32),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TC),
+ BARRIER(1));
+ /* 31 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ BARRIER(1));
+ /* 32/33 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_MASK),
+ DST_SEL_Z(SQ_SEL_MASK),
+ DST_SEL_W(SQ_SEL_1),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+ /* 34/35 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(1),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_MASK),
+ DST_SEL_Y(SQ_SEL_X),
+ DST_SEL_Z(SQ_SEL_Y),
+ DST_SEL_W(SQ_SEL_MASK),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(1),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+
+ return i;
+}
+
+/* comp vs --------------------------------------- */
+int cayman_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_DWORD0(ADDR(3),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_CALL),
+ BARRIER(0));
+ /* 1 */
+ shader[i++] = CF_DWORD0(ADDR(9),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_NOT_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_CALL),
+ BARRIER(0));
+ /* 2 - end */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_END),
+ BARRIER(1));
+ /* 3 - mask sub */
+ shader[i++] = CF_DWORD0(ADDR(44),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(3),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TC),
+ BARRIER(1));
+
+ /* 4 - ALU */
+ shader[i++] = CF_ALU_DWORD0(ADDR(14),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(20),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 5 - dst */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 6 - src */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT),
+ MARK(0),
+ BARRIER(0));
+ /* 7 - mask */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(0));
+ /* 8 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ BARRIER(1));
+ /* 9 - non-mask sub */
+ shader[i++] = CF_DWORD0(ADDR(50),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TC),
+ BARRIER(1));
+
+ /* 10 - ALU */
+ shader[i++] = CF_ALU_DWORD0(ADDR(34),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(10),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 11 - dst */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
+ BURST_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 12 - src */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
+ BURST_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(0));
+ /* 13 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ BARRIER(1));
+
+ /* 14 srcX.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 15 srcX.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 16 srcX.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 17 srcX.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 18 srcY.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 19 srcY.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 20 srcY.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 21 srcY.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 22 maskX.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 23 maskX.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 24 maskX.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 25 maskX.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 26 maskY.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 27 maskY.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 28 maskY.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 29 maskY.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 30 srcX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 31 srcY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 32 maskX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 33 maskY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 34 srcX.x DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 35 srcX.y DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 36 srcX.z DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 37 srcX.w DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 38 srcY.x DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 39 srcY.y DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 40 srcY.z DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 41 srcY.w DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 42 srcX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 43 srcY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* mask vfetch - 44/45 - dst */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+ LDS_REQ(0),
+ COALESCED_READ(0));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+ /* 46/47 - src */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+ LDS_REQ(0),
+ COALESCED_READ(0));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_0),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+ /* 48/49 - mask */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+ LDS_REQ(0),
+ COALESCED_READ(0));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_0),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(16),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+
+ /* no mask vfetch - 50/51 - dst */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+ LDS_REQ(0),
+ COALESCED_READ(0));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+ /* 52/53 - src */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+ LDS_REQ(0),
+ COALESCED_READ(0));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_0),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+
+ return i;
+}
+
+/* comp ps --------------------------------------- */
+int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_DWORD0(ADDR(3),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_CALL),
+ BARRIER(0));
+ /* 1 */
+ shader[i++] = CF_DWORD0(ADDR(8),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_NOT_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_CALL),
+ BARRIER(0));
+ /* 2 - end */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_END),
+ BARRIER(1));
+ /* 3 - mask sub */
+ shader[i++] = CF_ALU_DWORD0(ADDR(12),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(8),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 4 */
+ shader[i++] = CF_DWORD0(ADDR(28),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TC),
+ BARRIER(1));
+
+ /* 5 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(20),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 6 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 7 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ BARRIER(1));
+
+ /* 8 - non-mask sub */
+ shader[i++] = CF_ALU_DWORD0(ADDR(24),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 9 */
+ shader[i++] = CF_DWORD0(ADDR(32),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TC),
+ BARRIER(1));
+
+ /* 10 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+
+ /* 11 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ BARRIER(1));
+
+ /* 12 interpolate src tex coords - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 13 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 14 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 15 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 16 interpolate mask tex coords */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 17 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 18 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 19 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 20 - alu 0 */
+ /* MUL gpr[2].x gpr[0].x gpr[1].x */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ /* 21 - alu 1 */
+ /* MUL gpr[2].y gpr[0].y gpr[1].y */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ /* 22 - alu 2 */
+ /* MUL gpr[2].z gpr[0].z gpr[1].z */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ /* 23 - alu 3 */
+ /* MUL gpr[2].w gpr[0].w gpr[1].w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+
+ /* 24 - interpolate tex coords - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 25 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 26 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 27 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 28/29 - src - mask */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(1),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+ /* 30/31 - mask */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(1),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(1),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+
+ /* 32/33 - src - non-mask */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+
+ return i;
+}
+
+#endif
diff --git a/src/cayman_shader.h b/src/cayman_shader.h
new file mode 100644
index 0000000..76a74d8
--- /dev/null
+++ b/src/cayman_shader.h
@@ -0,0 +1,279 @@
+/*
+ * Cayman shaders
+ *
+ * Copyright (C) 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Shader macros
+ */
+
+#ifndef __SHADER_H__
+#define __SHADER_H__
+
+#include "radeon.h"
+
+/* Oder of instructions: All CF, All ALU, All Tex/Vtx fetches */
+
+
+// CF insts
+// addr
+#define ADDR(x) (x)
+// jumptable
+#define JUMPTABLE_SEL(x) (x)
+// pc
+#define POP_COUNT(x) (x)
+// const
+#define CF_CONST(x) (x)
+// cond
+#define COND(x) (x) // SQ_COND_*
+// count
+#define I_COUNT(x) ((x) ? ((x) - 1) : 0)
+// vpm
+#define VALID_PIXEL_MODE(x) (x)
+// cf inst
+#define CF_INST(x) (x) // SQ_CF_INST_*
+// wqm
+#define WHOLE_QUAD_MODE(x) (x)
+// barrier
+#define BARRIER(x) (x)
+//kb0
+#define KCACHE_BANK0(x) (x)
+//kb1
+#define KCACHE_BANK1(x) (x)
+// km0/1
+#define KCACHE_MODE0(x) (x)
+#define KCACHE_MODE1(x) (x) // SQ_CF_KCACHE_*
+//
+#define KCACHE_ADDR0(x) (x)
+#define KCACHE_ADDR1(x) (x)
+
+#define ALT_CONST(x) (x)
+
+#define ARRAY_BASE(x) (x)
+// export pixel
+#define CF_PIXEL_MRT0 0
+#define CF_PIXEL_MRT1 1
+#define CF_PIXEL_MRT2 2
+#define CF_PIXEL_MRT3 3
+#define CF_PIXEL_MRT4 4
+#define CF_PIXEL_MRT5 5
+#define CF_PIXEL_MRT6 6
+#define CF_PIXEL_MRT7 7
+// computed Z
+#define CF_COMPUTED_Z 61
+// export pos
+#define CF_POS0 60
+#define CF_POS1 61
+#define CF_POS2 62
+#define CF_POS3 63
+// export param
+// 0...31
+#define TYPE(x) (x) // SQ_EXPORT_*
+#define RW_GPR(x) (x)
+#define RW_REL(x) (x)
+#define ABSOLUTE 0
+#define RELATIVE 1
+#define INDEX_GPR(x) (x)
+#define ELEM_SIZE(x) (x ? (x - 1) : 0)
+#define BURST_COUNT(x) (x ? (x - 1) : 0)
+#define MARK(x) (x)
+
+// swiz
+#define SRC_SEL_X(x) (x) // SQ_SEL_* each
+#define SRC_SEL_Y(x) (x)
+#define SRC_SEL_Z(x) (x)
+#define SRC_SEL_W(x) (x)
+
+#define CF_DWORD0(addr, jmptbl) ((addr) | ((jmptbl) << 24))
+#define CF_DWORD1(pc, cf_const, cond, count, vpm, cf_inst, b) \
+ (((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | ((count) << 10) | \
+ ((vpm) << 20) | ((cf_inst) << 22) | ((b) << 31))
+
+#define CF_ALU_DWORD0(addr, kb0, kb1, km0) (((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30))
+#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, alt_const, cf_inst, wqm, b) \
+ (((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \
+ ((count) << 18) | ((alt_const) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31))
+
+#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \
+ (((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | \
+ ((index_gpr) << 23) | ((es) << 30))
+#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, bc, vpm, cf_inst, m, b) \
+ (((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | \
+ ((bc) << 16) | ((vpm) << 20) | ((cf_inst) << 22) | ((m) << 30) | ((b) << 31))
+
+// ALU clause insts
+#define SRC0_SEL(x) (x)
+#define SRC1_SEL(x) (x)
+#define SRC2_SEL(x) (x)
+// src[0-2]_sel
+// 0-127 GPR
+// 128-159 kcache constants bank 0
+// 160-191 kcache constants bank 1
+// 192-255 inline const values
+// 256-287 kcache constants bank 2
+// 288-319 kcache constants bank 3
+// 219-255 special SQ_ALU_SRC_* (0, 1, etc.)
+// 488-520 src param space
+#define ALU_SRC_GPR_BASE 0
+#define ALU_SRC_KCACHE0_BASE 128
+#define ALU_SRC_KCACHE1_BASE 160
+#define ALU_SRC_INLINE_K_BASE 192
+#define ALU_SRC_KCACHE2_BASE 256
+#define ALU_SRC_KCACHE3_BASE 288
+#define ALU_SRC_PARAM_BASE 448
+
+#define SRC0_REL(x) (x)
+#define SRC1_REL(x) (x)
+#define SRC2_REL(x) (x)
+// elem
+#define SRC0_ELEM(x) (x)
+#define SRC1_ELEM(x) (x)
+#define SRC2_ELEM(x) (x)
+#define ELEM_X 0
+#define ELEM_Y 1
+#define ELEM_Z 2
+#define ELEM_W 3
+// neg
+#define SRC0_NEG(x) (x)
+#define SRC1_NEG(x) (x)
+#define SRC2_NEG(x) (x)
+// im
+#define INDEX_MODE(x) (x) // SQ_INDEX_*
+// ps
+#define PRED_SEL(x) (x) // SQ_PRED_SEL_*
+// last
+#define LAST(x) (x)
+// abs
+#define SRC0_ABS(x) (x)
+#define SRC1_ABS(x) (x)
+// uem
+#define UPDATE_EXECUTE_MASK(x) (x)
+// up
+#define UPDATE_PRED(x) (x)
+// wm
+#define WRITE_MASK(x) (x)
+// omod
+#define OMOD(x) (x) // SQ_ALU_OMOD_*
+// alu inst
+#define ALU_INST(x) (x) // SQ_ALU_INST_*
+//bs
+#define BANK_SWIZZLE(x) (x) // SQ_ALU_VEC_*
+#define DST_GPR(x) (x)
+#define DST_REL(x) (x)
+#define DST_ELEM(x) (x)
+#define CLAMP(x) (x)
+
+#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \
+ (((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \
+ ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \
+ ((im) << 26) | ((ps) << 29) | ((last) << 31))
+
+#define ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
+ (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
+ ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \
+ ((dr) << 28) | ((de) << 29) | ((clamp) << 31))
+
+#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \
+ (((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \
+ ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \
+ ((de) << 29) | ((clamp) << 31))
+
+// VTX clause insts
+// vxt insts
+#define VTX_INST(x) (x) // SQ_VTX_INST_*
+
+// fetch type
+#define FETCH_TYPE(x) (x) // SQ_VTX_FETCH_*
+
+#define FETCH_WHOLE_QUAD(x) (x)
+#define BUFFER_ID(x) (x)
+#define SRC_GPR(x) (x)
+#define SRC_REL(x) (x)
+
+#define STRUCTURED_READ(x) (x)
+#define LDS_REQ(x) (x)
+#define COALESCED_READ(x) (x)
+
+#define DST_SEL_X(x) (x)
+#define DST_SEL_Y(x) (x)
+#define DST_SEL_Z(x) (x)
+#define DST_SEL_W(x) (x)
+#define USE_CONST_FIELDS(x) (x)
+#define DATA_FORMAT(x) (x)
+// num format
+#define NUM_FORMAT_ALL(x) (x) // SQ_NUM_FORMAT_*
+// format comp
+#define FORMAT_COMP_ALL(x) (x) // SQ_FORMAT_COMP_*
+// sma
+#define SRF_MODE_ALL(x) (x)
+#define SRF_MODE_ZERO_CLAMP_MINUS_ONE 0
+#define SRF_MODE_NO_ZERO 1
+#define OFFSET(x) (x)
+// endian swap
+#define ENDIAN_SWAP(x) (x) // SQ_ENDIAN_*
+#define CONST_BUF_NO_STRIDE(x) (x)
+#define BUFFER_INDEX_MODE(x) (x)
+
+#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, ssy, str, ldsr, cr) \
+ (((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \
+ ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((ssy) << 26) | \
+ ((str) << 28) | ((ldsr) << 30) | ((cr) << 31))
+#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
+ (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+ ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))
+#define VTX_DWORD2(offset, es, cbns, alt_const, bim) \
+ (((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((alt_const) << 20) | ((bim) << 21))
+#define VTX_DWORD_PAD 0x00000000
+
+// TEX clause insts
+// tex insts
+#define TEX_INST(x) (x) // SQ_TEX_INST_*
+#define INST_MOD(x) (x)
+#define FETCH_WHOLE_QUAD(x) (x)
+#define RESOURCE_ID(x) (x)
+#define RESOURCE_INDEX_MODE(x) (x)
+#define SAMPLER_INDEX_MODE(x) (x)
+
+#define LOD_BIAS(x) (x)
+//ct
+#define COORD_TYPE_X(x) (x)
+#define COORD_TYPE_Y(x) (x)
+#define COORD_TYPE_Z(x) (x)
+#define COORD_TYPE_W(x) (x)
+#define TEX_UNNORMALIZED 0
+#define TEX_NORMALIZED 1
+#define OFFSET_X(x) (((int)(x) * 2) & 0x1f) /* 4:1-bits 2's-complement fixed-point: [-8.0..7.5] */
+#define OFFSET_Y(x) (((int)(x) * 2) & 0x1f)
+#define OFFSET_Z(x) (((int)(x) * 2) & 0x1f)
+#define SAMPLER_ID(x) (x)
+
+#define TEX_DWORD0(tex_inst, im, fwq, resource_id, src_gpr, sr, ac, rim, sim) \
+ (((tex_inst) << 0) | ((im) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \
+ ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24) | ((rim) << 25) | ((sim) << 27))
+#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \
+ (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+ ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31))
+#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \
+ (((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \
+ ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29))
+#define TEX_DWORD_PAD 0x00000000
+
+#endif
diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index 8879d01..58f3bf1 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -43,6 +43,18 @@
#include "radeon_exa_shared.h"
#include "radeon_vbo.h"
+extern int cayman_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int cayman_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+
+extern int cayman_copy_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int cayman_copy_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+
+extern int cayman_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader);
+extern int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader);
+
+extern int cayman_comp_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+
static void
EVERGREENDoneSolid(PixmapPtr pPix);
@@ -1685,7 +1697,7 @@ EVERGREENAllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
return TRUE;
}
-Bool
+static Bool
EVERGREENLoadShaders(ScrnInfoPtr pScrn)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
@@ -1738,6 +1750,59 @@ EVERGREENLoadShaders(ScrnInfoPtr pScrn)
return TRUE;
}
+static Bool
+CAYMANLoadShaders(ScrnInfoPtr pScrn)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ RADEONChipFamily ChipSet = info->ChipFamily;
+ uint32_t *shader;
+ int ret;
+
+ ret = radeon_bo_map(accel_state->shaders_bo, 1);
+ if (ret) {
+ FatalError("failed to map shader %d\n", ret);
+ return FALSE;
+ }
+ shader = accel_state->shaders_bo->ptr;
+
+ /* solid vs --------------------------------------- */
+ accel_state->solid_vs_offset = 0;
+ cayman_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
+
+ /* solid ps --------------------------------------- */
+ accel_state->solid_ps_offset = 512;
+ cayman_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
+
+ /* copy vs --------------------------------------- */
+ accel_state->copy_vs_offset = 1024;
+ cayman_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
+
+ /* copy ps --------------------------------------- */
+ accel_state->copy_ps_offset = 1536;
+ cayman_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
+
+ /* comp vs --------------------------------------- */
+ accel_state->comp_vs_offset = 2048;
+ cayman_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
+
+ /* comp ps --------------------------------------- */
+ accel_state->comp_ps_offset = 2560;
+ cayman_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
+
+ /* xv vs --------------------------------------- */
+ accel_state->xv_vs_offset = 3072;
+ cayman_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
+
+ /* xv ps --------------------------------------- */
+ accel_state->xv_ps_offset = 3584;
+ cayman_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
+
+ radeon_bo_unmap(accel_state->shaders_bo);
+
+ return TRUE;
+}
+
Bool
EVERGREENDrawInit(ScreenPtr pScreen)
{
@@ -1837,8 +1902,13 @@ EVERGREENDrawInit(ScreenPtr pScreen)
if (!EVERGREENAllocShaders(pScrn, pScreen))
return FALSE;
- if (!EVERGREENLoadShaders(pScrn))
- return FALSE;
+ if (info->ChipFamily == CHIP_FAMILY_CAYMAN) {
+ if (!CAYMANLoadShaders(pScrn))
+ return FALSE;
+ } else {
+ if (!EVERGREENLoadShaders(pScrn))
+ return FALSE;
+ }
exaMarkSync(pScreen);
diff --git a/src/radeon.h b/src/radeon.h
index a9a2b69..dd83a69 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -1314,7 +1314,6 @@ extern Bool R600DrawInit(ScreenPtr pScreen);
extern Bool R600LoadShaders(ScrnInfoPtr pScrn);
#ifdef XF86DRM_MODE
extern Bool EVERGREENDrawInit(ScreenPtr pScreen);
-extern Bool EVERGREENLoadShaders(ScrnInfoPtr pScrn);
#endif
#endif