summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Deucher <alexdeucher@gmail.com>2009-05-13 15:48:32 -0400
committerAlex Deucher <alexdeucher@gmail.com>2009-05-13 15:48:32 -0400
commit026b6f820d6caea17d2a082193e850713d5770a8 (patch)
tree7e38f1478599d1e9251bf906a3a114c94f160abe
parentcd89241396d1931b04cfbdd8d553be16dbf9c360 (diff)
R6xx/R7xx: do EXA transforms in the vertex shader
-rw-r--r--src/r600_exa.c93
-rw-r--r--src/r600_shader.c537
2 files changed, 550 insertions, 80 deletions
diff --git a/src/r600_exa.c b/src/r600_exa.c
index 2dc33a83..18831f7f 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -924,17 +924,6 @@ do { \
#define xFixedToFloat(f) (((float) (f)) / 65536)
-static inline void transformPoint(PictTransform *transform, xPointFixed *point)
-{
- PictVector v;
- v.vector[0] = point->x;
- v.vector[1] = point->y;
- v.vector[2] = xFixed1;
- PictureTransformPoint(transform, &v);
- point->x = v.vector[0];
- point->y = v.vector[1];
-}
-
struct blendinfo {
Bool dst_alpha;
Bool src_alpha;
@@ -1099,6 +1088,7 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
tex_resource_t tex_res;
tex_sampler_t tex_samp;
int pix_r, pix_g, pix_b, pix_a;
+ float vs_alu_consts[8];
CLEAR (tex_res);
CLEAR (tex_samp);
@@ -1118,9 +1108,6 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
break;
}
- accel_state->texW[unit] = w;
- accel_state->texH[unit] = h;
-
/* ErrorF("Tex %d setup %dx%d\n", unit, w, h); */
/* flush texture cache */
@@ -1294,9 +1281,34 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
if (pPict->transform != 0) {
accel_state->is_transform[unit] = TRUE;
accel_state->transform[unit] = pPict->transform;
- } else
+
+ vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
+ vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
+ vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
+ vs_alu_consts[3] = 1.0 / w;
+
+ vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
+ vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
+ vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
+ vs_alu_consts[7] = 1.0 / h;
+ } else {
accel_state->is_transform[unit] = FALSE;
+ vs_alu_consts[0] = 1.0;
+ vs_alu_consts[1] = 0.0;
+ vs_alu_consts[2] = 0.0;
+ vs_alu_consts[3] = 1.0 / w;
+
+ vs_alu_consts[4] = 0.0;
+ vs_alu_consts[5] = 1.0;
+ vs_alu_consts[6] = 0.0;
+ vs_alu_consts[7] = 1.0 / h;
+ }
+
+ /* VS alu constants */
+ set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
+ sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
+
return TRUE;
}
@@ -1586,14 +1598,6 @@ static void R600Composite(PixmapPtr pDst,
srcBottomRight.x = IntToxFixed(srcX + w);
srcBottomRight.y = IntToxFixed(srcY + h);
- /* XXX do transform in vertex shader */
- if (accel_state->is_transform[0]) {
- transformPoint(accel_state->transform[0], &srcTopLeft);
- transformPoint(accel_state->transform[0], &srcTopRight);
- transformPoint(accel_state->transform[0], &srcBottomLeft);
- transformPoint(accel_state->transform[0], &srcBottomRight);
- }
-
if (accel_state->has_mask) {
xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight;
@@ -1616,33 +1620,26 @@ static void R600Composite(PixmapPtr pDst,
maskBottomRight.x = IntToxFixed(maskX + w);
maskBottomRight.y = IntToxFixed(maskY + h);
- if (accel_state->is_transform[1]) {
- transformPoint(accel_state->transform[1], &maskTopLeft);
- transformPoint(accel_state->transform[1], &maskTopRight);
- transformPoint(accel_state->transform[1], &maskBottomLeft);
- transformPoint(accel_state->transform[1], &maskBottomRight);
- }
-
vb[0] = (float)dstX;
vb[1] = (float)dstY;
- vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0];
- vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0];
- vb[4] = xFixedToFloat(maskTopLeft.x) / accel_state->texW[1];
- vb[5] = xFixedToFloat(maskTopLeft.y) / accel_state->texH[1];
+ vb[2] = xFixedToFloat(srcTopLeft.x);
+ vb[3] = xFixedToFloat(srcTopLeft.y);
+ vb[4] = xFixedToFloat(maskTopLeft.x);
+ vb[5] = xFixedToFloat(maskTopLeft.y);
vb[6] = (float)dstX;
vb[7] = (float)(dstY + h);
- vb[8] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0];
- vb[9] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0];
- vb[10] = xFixedToFloat(maskBottomLeft.x) / accel_state->texW[1];
- vb[11] = xFixedToFloat(maskBottomLeft.y) / accel_state->texH[1];
+ vb[8] = xFixedToFloat(srcBottomLeft.x);
+ vb[9] = xFixedToFloat(srcBottomLeft.y);
+ vb[10] = xFixedToFloat(maskBottomLeft.x);
+ vb[11] = xFixedToFloat(maskBottomLeft.y);
vb[12] = (float)(dstX + w);
vb[13] = (float)(dstY + h);
- vb[14] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0];
- vb[15] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0];
- vb[16] = xFixedToFloat(maskBottomRight.x) / accel_state->texW[1];
- vb[17] = xFixedToFloat(maskBottomRight.y) / accel_state->texH[1];
+ vb[14] = xFixedToFloat(srcBottomRight.x);
+ vb[15] = xFixedToFloat(srcBottomRight.y);
+ vb[16] = xFixedToFloat(maskBottomRight.x);
+ vb[17] = xFixedToFloat(maskBottomRight.y);
} else {
if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) {
@@ -1657,18 +1654,18 @@ static void R600Composite(PixmapPtr pDst,
vb[0] = (float)dstX;
vb[1] = (float)dstY;
- vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0];
- vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0];
+ vb[2] = xFixedToFloat(srcTopLeft.x);
+ vb[3] = xFixedToFloat(srcTopLeft.y);
vb[4] = (float)dstX;
vb[5] = (float)(dstY + h);
- vb[6] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0];
- vb[7] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0];
+ vb[6] = xFixedToFloat(srcBottomLeft.x);
+ vb[7] = xFixedToFloat(srcBottomLeft.y);
vb[8] = (float)(dstX + w);
vb[9] = (float)(dstY + h);
- vb[10] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0];
- vb[11] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0];
+ vb[10] = xFixedToFloat(srcBottomRight.x);
+ vb[11] = xFixedToFloat(srcBottomRight.y);
}
accel_state->vb_index += 3;
diff --git a/src/r600_shader.c b/src/r600_shader.c
index 0a820cf3..fba8dcb4 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -1322,7 +1322,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(0));
/* 1 */
- shader[i++] = CF_DWORD0(ADDR(14));
+ shader[i++] = CF_DWORD0(ADDR(28));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_NOT_BOOL),
@@ -1346,7 +1346,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(1));
/* 3 - mask sub */
- shader[i++] = CF_DWORD0(ADDR(8));
+ shader[i++] = CF_DWORD0(ADDR(22));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -1357,7 +1357,22 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_VTX),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 4 - dst */
+
+ /* 4 - ALU */
+ shader[i++] = CF_ALU_DWORD0(ADDR(9),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(12),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 5 - dst */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
TYPE(SQ_EXPORT_POS),
RW_GPR(2),
@@ -1366,8 +1381,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ELEM_SIZE(0));
shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
R6xx_ELEM_LOOP(0),
BURST_COUNT(1),
END_OF_PROGRAM(0),
@@ -1375,7 +1390,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 5 - src */
+ /* 6 - src */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
TYPE(SQ_EXPORT_PARAM),
RW_GPR(1),
@@ -1384,8 +1399,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ELEM_SIZE(0));
shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
R6xx_ELEM_LOOP(0),
BURST_COUNT(1),
END_OF_PROGRAM(0),
@@ -1393,7 +1408,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT),
WHOLE_QUAD_MODE(0),
BARRIER(0));
- /* 6 - mask */
+ /* 7 - mask */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
TYPE(SQ_EXPORT_PARAM),
RW_GPR(0),
@@ -1402,8 +1417,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ELEM_SIZE(0));
shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
R6xx_ELEM_LOOP(0),
BURST_COUNT(1),
END_OF_PROGRAM(0),
@@ -1411,7 +1426,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(0));
- /* 7 */
+ /* 8 */
shader[i++] = CF_DWORD0(ADDR(0));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -1423,7 +1438,301 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_RETURN),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 8/9 - dst */
+
+
+ /* 9 srcX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 10 srcY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(257),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 11 srcX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 12 srcY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(257),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 13 maskX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(258),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(258),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 14 maskY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(259),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 15 srcX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(258),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 16 srcY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(259),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 17 srcX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+
+ /* 18 srcY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+
+ /* 19 maskX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+
+ /* 20 maskY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(259),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ /* 21 */
+ shader[i++] = 0x00000000;
+ shader[i++] = 0x00000000;
+
+ /* 22/23 - dst */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -1448,7 +1757,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1));
shader[i++] = VTX_DWORD_PAD;
- /* 10/11 - src */
+ /* 24/25 - src */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -1461,8 +1770,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(0),
DST_SEL_X(SQ_SEL_X),
DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_0),
- DST_SEL_W(SQ_SEL_1),
+ DST_SEL_Z(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_0),
USE_CONST_FIELDS(0),
DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
@@ -1473,7 +1782,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(0));
shader[i++] = VTX_DWORD_PAD;
- /* 12/13 - mask */
+ /* 26/27 - mask */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -1486,8 +1795,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(0),
DST_SEL_X(SQ_SEL_X),
DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_0),
- DST_SEL_W(SQ_SEL_1),
+ DST_SEL_Z(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_0),
USE_CONST_FIELDS(0),
DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
@@ -1499,8 +1808,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
MEGA_FETCH(0));
shader[i++] = VTX_DWORD_PAD;
- /* 14 - non-mask sub */
- shader[i++] = CF_DWORD0(ADDR(18));
+ /* 28 - non-mask sub */
+ shader[i++] = CF_DWORD0(ADDR(40));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -1511,7 +1820,22 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_VTX),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 15 - dst */
+
+ /* 29 - ALU */
+ shader[i++] = CF_ALU_DWORD0(ADDR(33),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(6),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 30 - dst */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
TYPE(SQ_EXPORT_POS),
RW_GPR(1),
@@ -1520,8 +1844,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ELEM_SIZE(0));
shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
R6xx_ELEM_LOOP(0),
BURST_COUNT(0),
END_OF_PROGRAM(0),
@@ -1529,7 +1853,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 16 - src */
+ /* 31 - src */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
TYPE(SQ_EXPORT_PARAM),
RW_GPR(0),
@@ -1538,8 +1862,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ELEM_SIZE(0));
shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
R6xx_ELEM_LOOP(0),
BURST_COUNT(0),
END_OF_PROGRAM(0),
@@ -1547,7 +1871,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(0));
- /* 17 */
+ /* 32 */
shader[i++] = CF_DWORD0(ADDR(0));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -1559,7 +1883,156 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_RETURN),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 18/19 - dst */
+
+
+ /* 33 srcX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 34 srcY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(257),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 35 srcX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 36 srcY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(257),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 37 srcX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+
+ /* 38 srcY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+
+ /* 39 */
+ shader[i++] = 0x00000000;
+ shader[i++] = 0x00000000;
+
+ /* 40/41 - dst */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -1584,7 +2057,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1));
shader[i++] = VTX_DWORD_PAD;
- /* 20/21 - src */
+ /* 42/43 - src */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -1597,8 +2070,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(0),
DST_SEL_X(SQ_SEL_X),
DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_0),
- DST_SEL_W(SQ_SEL_1),
+ DST_SEL_Z(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_0),
USE_CONST_FIELDS(0),
DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */