From ce9c5a1a27b4e59dbf49b066baaae6ef371b04bc Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Mon, 20 Jul 2009 15:27:28 -0400
Subject: r600: add logicop support
---
src/mesa/drivers/dri/r600/r700_state.c | 43 ++++++++++++++++++++++++++++++++--
1 file changed, 41 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index eda224def33..1ccd793512b 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -30,6 +30,7 @@
#include "main/imports.h"
#include "main/enums.h"
#include "main/macros.h"
+#include "main/context.h"
#include "main/dd.h"
#include "main/simple_list.h"
@@ -322,6 +323,43 @@ static void r700BlendFuncSeparate(GLcontext * ctx,
{
}
+/**
+ * Translate LogicOp enums into hardware representation.
+ * Both use a very logical bit-wise layout, but unfortunately the order
+ * of bits is reversed.
+ */
+static GLuint translate_logicop(GLenum logicop)
+{
+ GLuint bits = logicop - GL_CLEAR;
+ bits = ((bits & 1) << 3) | ((bits & 2) << 1) | ((bits & 4) >> 1) | ((bits & 8) >> 3);
+ return bits;
+}
+
+/**
+ * Used internally to update the r300->hw hardware state to match the
+ * current OpenGL state.
+ */
+static void r700SetLogicOpState(GLcontext *ctx)
+{
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw);
+
+ if (RGBA_LOGICOP_ENABLED(ctx))
+ SETfield(r700->CB_COLOR_CONTROL.u32All,
+ translate_logicop(ctx->Color.LogicOp), ROP3_shift, ROP3_mask);
+ else
+ SETfield(r700->CB_COLOR_CONTROL.u32All, 0xCC, ROP3_shift, ROP3_mask);
+}
+
+/**
+ * Called by Mesa when an application program changes the LogicOp state
+ * via glLogicOp.
+ */
+static void r700LogicOpcode(GLcontext *ctx, GLenum logicop)
+{
+ if (RGBA_LOGICOP_ENABLED(ctx))
+ r700SetLogicOpState(ctx);
+}
+
static void r700UpdateCulling(GLcontext * ctx)
{
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw);
@@ -397,7 +435,7 @@ static void r700Enable(GLcontext * ctx, GLenum cap, GLboolean state) //---------
//r700SetAlphaState(ctx);
break;
case GL_COLOR_LOGIC_OP:
- //r700SetLogicOpState(ctx);
+ r700SetLogicOpState(ctx);
/* fall-through, because logic op overrides blending */
case GL_BLEND:
//r700SetBlendState(ctx);
@@ -1083,7 +1121,7 @@ void r700InitState(GLcontext * ctx) //-------------------
if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770)
SETfield(r700->SPI_THREAD_GROUPING.u32All, 1, PS_GROUPING_shift, PS_GROUPING_mask);
- SETfield(r700->CB_COLOR_CONTROL.u32All, 0xCC, ROP3_shift, ROP3_mask);
+ r700SetLogicOpState(ctx);
CLEARbit(r700->CB_COLOR_CONTROL.u32All, PER_MRT_BLEND_bit);
r700->DB_SHADER_CONTROL.u32All = 0;
@@ -1228,6 +1266,7 @@ void r700InitStateFuncs(struct dd_function_table *functions) //-----------------
functions->Fogfv = r700Fogfv;
functions->FrontFace = r700FrontFace;
functions->ShadeModel = r700ShadeModel;
+ functions->LogicOpcode = r700LogicOpcode;
/* ARB_point_parameters */
functions->PointParameterfv = r700PointParameter;
--
cgit v1.2.3
From a369963b18fc7ef75f6f5354e0d685cef9ecb70d Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Mon, 20 Jul 2009 17:22:59 -0400
Subject: r600: add user clip plane support
---
src/mesa/drivers/dri/r600/r700_chip.c | 22 +++++++++++++++++++++
src/mesa/drivers/dri/r600/r700_render.c | 1 +
src/mesa/drivers/dri/r600/r700_state.c | 35 +++++++++++++++++++++++++++++++--
3 files changed, 56 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index 087d17312e9..91aa8fc8fc6 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -722,3 +722,25 @@ GLboolean r700SendSQConfig(context_t *context)
return GL_TRUE;
}
+GLboolean r700SendUCPState(context_t *context)
+{
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ BATCH_LOCALS(&context->radeon);
+ int i;
+
+ for (i = 0; i < R700_MAX_UCP; i++) {
+ if (r700->ucp[i].enabled) {
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ R600_OUT_BATCH_REGSEQ(PA_CL_UCP_0_X + (16 * i), 4);
+ R600_OUT_BATCH(r700->ucp[i].PA_CL_UCP_0_X.u32All);
+ R600_OUT_BATCH(r700->ucp[i].PA_CL_UCP_0_Y.u32All);
+ R600_OUT_BATCH(r700->ucp[i].PA_CL_UCP_0_Z.u32All);
+ R600_OUT_BATCH(r700->ucp[i].PA_CL_UCP_0_W.u32All);
+ END_BATCH();
+ COMMIT_BATCH();
+ }
+ }
+
+ return GL_TRUE;
+}
+
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index f1e467a317f..77cbe3cfd0f 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -322,6 +322,7 @@ static GLboolean r700RunRender(GLcontext * ctx,
r700SendPSState(context);
r700SendVSState(context);
+ r700SendUCPState(context);
r700SendContextStates(context);
r700SendViewportState(context, 0);
r700SendRenderTargetState(context, 0);
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index 1ccd793512b..1d6d398f63e 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -56,6 +56,8 @@
#include "r700_vertprog.h"
+static void r700SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state);
+
void r700SetDefaultStates(context_t *context) //--------------------
{
@@ -446,7 +448,7 @@ static void r700Enable(GLcontext * ctx, GLenum cap, GLboolean state) //---------
case GL_CLIP_PLANE3:
case GL_CLIP_PLANE4:
case GL_CLIP_PLANE5:
- //r700SetClipPlaneState(ctx, cap, state);
+ r700SetClipPlaneState(ctx, cap, state);
break;
case GL_DEPTH_TEST:
r700SetDepthState(ctx);
@@ -675,8 +677,37 @@ static void r700RenderMode(GLcontext * ctx, GLenum mode) //---------------------
{
}
-static void r700ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) //-----------------
+static void r700ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ GLint p;
+ GLint *ip;
+
+ p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+ ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+ r700->ucp[p].PA_CL_UCP_0_X.u32All = ip[0];
+ r700->ucp[p].PA_CL_UCP_0_Y.u32All = ip[1];
+ r700->ucp[p].PA_CL_UCP_0_Z.u32All = ip[2];
+ r700->ucp[p].PA_CL_UCP_0_W.u32All = ip[3];
+}
+
+static void r700SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ GLuint p;
+
+ p = cap - GL_CLIP_PLANE0;
+ if (state) {
+ r700->PA_CL_CLIP_CNTL.u32All |= (UCP_ENA_0_bit << p);
+ r700->ucp[p].enabled = GL_TRUE;
+ r700ClipPlane(ctx, cap, NULL);
+ } else {
+ r700->PA_CL_CLIP_CNTL.u32All &= ~(UCP_ENA_0_bit << p);
+ r700->ucp[p].enabled = GL_FALSE;
+ }
}
void r700SetScissor(context_t *context) //---------------
--
cgit v1.2.3
From 265d5eba658f38f5a9d12d57b701e4ffe49100eb Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Mon, 20 Jul 2009 18:50:59 -0400
Subject: r600: add blending support
---
src/mesa/drivers/dri/r600/r700_state.c | 210 +++++++++++++++++++++++++++++++--
1 file changed, 197 insertions(+), 13 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index 1d6d398f63e..070cea56f8e 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -312,17 +312,211 @@ static void r700AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) //---------
static void r700BlendColor(GLcontext * ctx, const GLfloat cf[4]) //----------------
{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ r700->CB_BLEND_RED.f32All = cf[0];
+ r700->CB_BLEND_GREEN.f32All = cf[1];
+ r700->CB_BLEND_BLUE.f32All = cf[2];
+ r700->CB_BLEND_ALPHA.f32All = cf[3];
+}
+
+static int blend_factor(GLenum factor, GLboolean is_src)
+{
+ switch (factor) {
+ case GL_ZERO:
+ return BLEND_ZERO;
+ break;
+ case GL_ONE:
+ return BLEND_ONE;
+ break;
+ case GL_DST_COLOR:
+ return BLEND_DST_COLOR;
+ break;
+ case GL_ONE_MINUS_DST_COLOR:
+ return BLEND_ONE_MINUS_DST_COLOR;
+ break;
+ case GL_SRC_COLOR:
+ return BLEND_SRC_COLOR;
+ break;
+ case GL_ONE_MINUS_SRC_COLOR:
+ return BLEND_ONE_MINUS_SRC_COLOR;
+ break;
+ case GL_SRC_ALPHA:
+ return BLEND_SRC_ALPHA;
+ break;
+ case GL_ONE_MINUS_SRC_ALPHA:
+ return BLEND_ONE_MINUS_SRC_ALPHA;
+ break;
+ case GL_DST_ALPHA:
+ return BLEND_DST_ALPHA;
+ break;
+ case GL_ONE_MINUS_DST_ALPHA:
+ return BLEND_ONE_MINUS_DST_ALPHA;
+ break;
+ case GL_SRC_ALPHA_SATURATE:
+ return (is_src) ? BLEND_SRC_ALPHA_SATURATE : BLEND_ZERO;
+ break;
+ case GL_CONSTANT_COLOR:
+ return BLEND_CONSTANT_COLOR;
+ break;
+ case GL_ONE_MINUS_CONSTANT_COLOR:
+ return BLEND_ONE_MINUS_CONSTANT_COLOR;
+ break;
+ case GL_CONSTANT_ALPHA:
+ return BLEND_CONSTANT_ALPHA;
+ break;
+ case GL_ONE_MINUS_CONSTANT_ALPHA:
+ return BLEND_ONE_MINUS_CONSTANT_ALPHA;
+ break;
+ default:
+ fprintf(stderr, "unknown blend factor %x\n", factor);
+ return (is_src) ? BLEND_ONE : BLEND_ZERO;
+ break;
+ }
+}
+
+static void r700SetBlendState(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ int id = 0;
+ uint32_t blend_reg = 0, eqn, eqnA;
+
+ if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) {
+ SETfield(blend_reg,
+ BLEND_ONE, COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ZERO, COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask);
+ SETfield(blend_reg,
+ COMB_DST_PLUS_SRC, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask);
+ SETfield(blend_reg,
+ BLEND_ONE, ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ZERO, ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
+ SETfield(blend_reg,
+ COMB_DST_PLUS_SRC, ALPHA_COMB_FCN_shift, ALPHA_COMB_FCN_mask);
+ if (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_R600)
+ r700->CB_BLEND_CONTROL.u32All = blend_reg;
+ else
+ r700->render_target[id].CB_BLEND0_CONTROL.u32All = blend_reg;
+ return;
+ }
+
+ SETfield(blend_reg,
+ blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE),
+ COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask);
+ SETfield(blend_reg,
+ blend_factor(ctx->Color.BlendDstRGB, GL_FALSE),
+ COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask);
+
+ switch (ctx->Color.BlendEquationRGB) {
+ case GL_FUNC_ADD:
+ eqn = COMB_DST_PLUS_SRC;
+ break;
+ case GL_FUNC_SUBTRACT:
+ eqn = COMB_SRC_MINUS_DST;
+ break;
+ case GL_FUNC_REVERSE_SUBTRACT:
+ eqn = COMB_DST_MINUS_SRC;
+ break;
+ case GL_MIN:
+ eqn = COMB_MIN_DST_SRC;
+ SETfield(blend_reg,
+ BLEND_ONE,
+ COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ONE,
+ COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask);
+ break;
+ case GL_MAX:
+ eqn = COMB_MAX_DST_SRC;
+ SETfield(blend_reg,
+ BLEND_ONE,
+ COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ONE,
+ COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask);
+ break;
+
+ default:
+ fprintf(stderr,
+ "[%s:%u] Invalid RGB blend equation (0x%04x).\n",
+ __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB);
+ return;
+ }
+ SETfield(blend_reg,
+ eqn, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask);
+
+ SETfield(blend_reg,
+ blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE),
+ ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
+ SETfield(blend_reg,
+ blend_factor(ctx->Color.BlendDstRGB, GL_FALSE),
+ ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
+
+ switch (ctx->Color.BlendEquationA) {
+ case GL_FUNC_ADD:
+ eqnA = COMB_DST_PLUS_SRC;
+ break;
+ case GL_FUNC_SUBTRACT:
+ eqnA = COMB_SRC_MINUS_DST;
+ break;
+ case GL_FUNC_REVERSE_SUBTRACT:
+ eqnA = COMB_DST_MINUS_SRC;
+ break;
+ case GL_MIN:
+ eqnA = COMB_MIN_DST_SRC;
+ SETfield(blend_reg,
+ BLEND_ONE,
+ ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ONE,
+ ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
+ break;
+ case GL_MAX:
+ eqnA = COMB_MAX_DST_SRC;
+ SETfield(blend_reg,
+ BLEND_ONE,
+ ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ONE,
+ ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
+ break;
+ default:
+ fprintf(stderr,
+ "[%s:%u] Invalid A blend equation (0x%04x).\n",
+ __FUNCTION__, __LINE__, ctx->Color.BlendEquationA);
+ return;
+ }
+
+ SETfield(blend_reg,
+ eqnA, ALPHA_COMB_FCN_shift, ALPHA_COMB_FCN_mask);
+
+ SETbit(r700->render_target[id].CB_BLEND0_CONTROL.u32All, SEPARATE_ALPHA_BLEND_bit);
+
+ if (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_R600)
+ r700->CB_BLEND_CONTROL.u32All = blend_reg;
+ else {
+ r700->render_target[id].CB_BLEND0_CONTROL.u32All = blend_reg;
+ SETbit(r700->CB_COLOR_CONTROL.u32All, PER_MRT_BLEND_bit);
+ }
+ SETfield(r700->CB_COLOR_CONTROL.u32All, (1 << id),
+ TARGET_BLEND_ENABLE_shift, TARGET_BLEND_ENABLE_mask);
+
}
static void r700BlendEquationSeparate(GLcontext * ctx,
GLenum modeRGB, GLenum modeA) //-----------------
{
+ r700SetBlendState(ctx);
}
static void r700BlendFuncSeparate(GLcontext * ctx,
GLenum sfactorRGB, GLenum dfactorRGB,
GLenum sfactorA, GLenum dfactorA) //------------------------
{
+ r700SetBlendState(ctx);
}
/**
@@ -440,7 +634,7 @@ static void r700Enable(GLcontext * ctx, GLenum cap, GLboolean state) //---------
r700SetLogicOpState(ctx);
/* fall-through, because logic op overrides blending */
case GL_BLEND:
- //r700SetBlendState(ctx);
+ r700SetBlendState(ctx);
break;
case GL_CLIP_PLANE0:
case GL_CLIP_PLANE1:
@@ -471,7 +665,7 @@ static void r700Enable(GLcontext * ctx, GLenum cap, GLboolean state) //---------
break;
case GL_LINE_STIPPLE:
r700UpdateLineStipple(ctx);
- break;
+ break;
default:
break;
}
@@ -833,9 +1027,6 @@ void r700SetRenderTarget(context_t *context, int id)
SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, BLEND_CLAMP_bit);
SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
- CLEARfield(r700->render_target[id].CB_BLEND0_CONTROL.u32All, COLOR_SRCBLEND_mask); /* no dst blend */
- CLEARfield(r700->render_target[id].CB_BLEND0_CONTROL.u32All, ALPHA_SRCBLEND_mask); /* no dst blend */
-
r700->render_target[id].enabled = GL_TRUE;
}
@@ -1152,8 +1343,8 @@ void r700InitState(GLcontext * ctx) //-------------------
if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770)
SETfield(r700->SPI_THREAD_GROUPING.u32All, 1, PS_GROUPING_shift, PS_GROUPING_mask);
+ r700SetBlendState(ctx);
r700SetLogicOpState(ctx);
- CLEARbit(r700->CB_COLOR_CONTROL.u32All, PER_MRT_BLEND_bit);
r700->DB_SHADER_CONTROL.u32All = 0;
SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit);
@@ -1245,13 +1436,6 @@ void r700InitState(GLcontext * ctx) //-------------------
r700->CB_FOG_GREEN_R6XX.u32All = 0; //r6xx only
r700->CB_FOG_BLUE_R6XX.u32All = 0; //r6xx only
- r700->CB_BLEND_RED.u32All = 0;
- r700->CB_BLEND_GREEN.u32All = 0;
- r700->CB_BLEND_BLUE.u32All = 0;
- r700->CB_BLEND_ALPHA.u32All = 0;
-
- r700->CB_BLEND_CONTROL.u32All = 0;
-
/* Disable color compares */
SETfield(r700->CB_CLRCMP_CONTROL.u32All, CLRCMP_DRAW_ALWAYS,
CLRCMP_FCN_SRC_shift, CLRCMP_FCN_SRC_mask);
--
cgit v1.2.3
From cc893d9a98255d3c26df7123ba5cc02e478c9328 Mon Sep 17 00:00:00 2001
From: Kevin DeKorte
Date: Mon, 20 Jul 2009 18:56:47 -0400
Subject: r600: fix dst reg indexing
This fixes segfaults in apps like teapot and tunnel
---
src/mesa/drivers/dri/r600/r700_assembler.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 1d41c5cf785..2d40dfa708e 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -2193,6 +2193,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
GLboolean next_ins(r700_AssemblerBase *pAsm)
{
struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+ uint index;
if( GL_TRUE == IsTex(pILInst->Opcode) )
{
@@ -2213,14 +2214,20 @@ GLboolean next_ins(r700_AssemblerBase *pAsm)
if(pAsm->D.dst.rtype == DST_REG_OUT)
{
+ if (pAsm->starting_export_register_number >= pAsm->D.dst.reg) {
+ index = 0;
+ } else {
+ index = pAsm->D.dst.reg - pAsm->starting_export_register_number;
+ }
+
if(pAsm->D.dst.op3)
{
// There is no mask for OP3 instructions, so all channels are written
- pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
+ pAsm->pucOutMask[index] = 0xF;
}
else
{
- pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
+ pAsm->pucOutMask[index]
|= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
}
}
--
cgit v1.2.3
From 6617fa6fab2df5d3d8085affac019f90101bb7a8 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Mon, 20 Jul 2009 19:33:05 -0400
Subject: r600: fix typo in blend code
---
src/mesa/drivers/dri/r600/r700_state.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index 070cea56f8e..44584430036 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -493,7 +493,7 @@ static void r700SetBlendState(GLcontext * ctx)
SETfield(blend_reg,
eqnA, ALPHA_COMB_FCN_shift, ALPHA_COMB_FCN_mask);
- SETbit(r700->render_target[id].CB_BLEND0_CONTROL.u32All, SEPARATE_ALPHA_BLEND_bit);
+ SETbit(blend_reg, SEPARATE_ALPHA_BLEND_bit);
if (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_R600)
r700->CB_BLEND_CONTROL.u32All = blend_reg;
--
cgit v1.2.3
From 81d555068408d4343d7627c8bedda5675f09bd21 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 20 Jul 2009 17:58:12 -0700
Subject: i965: Don't clip everything if FRONT_AND_BACK culling while culling
disabled.
Fixes everything-black with meta_clear_tris on quake4-mpdemo and doom3-demo.
Bug #18844, 22077.
---
src/mesa/drivers/dri/i965/brw_clip.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c
index 54d30a3f422..20a927cf386 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.c
+++ b/src/mesa/drivers/dri/i965/brw_clip.c
@@ -166,7 +166,8 @@ static void upload_clip_prog(struct brw_context *brw)
/* _NEW_POLYGON */
if (key.primitive == GL_TRIANGLES) {
- if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
+ if (ctx->Polygon.CullFlag &&
+ ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
else {
GLuint fill_front = CLIP_CULL;
--
cgit v1.2.3
From 5358e54d1ae64ccfa81199b343a2931b415fcc0a Mon Sep 17 00:00:00 2001
From: Peter Hutterer
Date: Mon, 20 Jul 2009 16:11:26 +1000
Subject: Add missing X11_INCLUDES to egl/drivers/demo and egl/main.
Compiling mesa on a system with no X headers installed in the default
include paths fails due to missing X11 includes. The header includes are
picked up by configure but not applied.
Signed-off-by: Peter Hutterer
Signed-off-by: Dave Airlie
---
src/egl/drivers/demo/Makefile | 2 +-
src/egl/main/Makefile | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/egl/drivers/demo/Makefile b/src/egl/drivers/demo/Makefile
index 26694c92fa6..444dfb35bde 100644
--- a/src/egl/drivers/demo/Makefile
+++ b/src/egl/drivers/demo/Makefile
@@ -4,7 +4,7 @@ TOP = ../../../..
include $(TOP)/configs/current
-INCLUDE_DIRS = -I$(TOP)/include -I$(TOP)/src/egl/main
+INCLUDE_DIRS = -I$(TOP)/include -I$(TOP)/src/egl/main $(X11_INCLUDES)
SOURCES = demo.c
diff --git a/src/egl/main/Makefile b/src/egl/main/Makefile
index e1ff8794b35..7cab005214d 100644
--- a/src/egl/main/Makefile
+++ b/src/egl/main/Makefile
@@ -4,7 +4,7 @@ TOP = ../../..
include $(TOP)/configs/current
-INCLUDE_DIRS = -I$(TOP)/include -I$(TOP)/src/mesa/glapi
+INCLUDE_DIRS = -I$(TOP)/include -I$(TOP)/src/mesa/glapi $(X11_INCLUDES)
HEADERS = \
eglcompiler.h \
--
cgit v1.2.3
From 1b445f96737cf5a1a28e81ff94a2e07b2cac3a96 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Mon, 20 Jul 2009 17:58:12 -0700
Subject: i965: Don't clip everything if FRONT_AND_BACK culling while culling
disabled.
Fixes everything-black with meta_clear_tris on quake4-mpdemo and doom3-demo.
Bug #18844, 22077.
(cherry picked from commit 81d555068408d4343d7627c8bedda5675f09bd21)
---
src/mesa/drivers/dri/i965/brw_clip.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c
index 5cffcebde43..8fc9f89cb70 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.c
+++ b/src/mesa/drivers/dri/i965/brw_clip.c
@@ -152,7 +152,8 @@ static void upload_clip_prog(struct brw_context *brw)
/* _NEW_POLYGON */
if (key.primitive == GL_TRIANGLES) {
- if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
+ if (ctx->Polygon.CullFlag &&
+ ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
else {
GLuint fill_front = CLIP_CULL;
--
cgit v1.2.3
From a6b314150c141f4c73e408b114181e57237540d9 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Thu, 2 Jul 2009 19:21:22 -0700
Subject: intel: Fall back on glBitmap with fog enabled.
We would have to build the program with the appropriate fog mode, and
also supply the fog coordinate if appropriate.
Bug #19413.
(cherry picked from commit 8ae02a3919bf31bd33f86208472e100eedb58497)
---
src/mesa/drivers/dri/intel/intel_pixel_bitmap.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index a2ccae1b7d6..d137aef13d2 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -409,6 +409,12 @@ intel_texture_bitmap(GLcontext * ctx,
return GL_FALSE;
}
+ if (ctx->Fog.Enabled) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glBitmap() fallback: fog\n");
+ return GL_FALSE;
+ }
+
/* Check that we can load in a texture this big. */
if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) ||
height > (1 << (ctx->Const.MaxTextureLevels - 1))) {
--
cgit v1.2.3
From cf24119d38360bfb25fa2683fe86a139826084f0 Mon Sep 17 00:00:00 2001
From: Michel Dänzer
Date: Tue, 21 Jul 2009 10:46:29 +0200
Subject: Track Radeon driver symlinks in Git.
---
src/mesa/drivers/dri/r200/.gitignore | 15 -------
src/mesa/drivers/dri/r200/Makefile | 48 +----------------------
src/mesa/drivers/dri/r200/radeon_bo_legacy.c | 1 +
src/mesa/drivers/dri/r200/radeon_bo_legacy.h | 1 +
src/mesa/drivers/dri/r200/radeon_bocs_wrapper.h | 1 +
src/mesa/drivers/dri/r200/radeon_chipset.h | 1 +
src/mesa/drivers/dri/r200/radeon_cmdbuf.h | 1 +
src/mesa/drivers/dri/r200/radeon_common.c | 1 +
src/mesa/drivers/dri/r200/radeon_common.h | 1 +
src/mesa/drivers/dri/r200/radeon_common_context.c | 1 +
src/mesa/drivers/dri/r200/radeon_common_context.h | 1 +
src/mesa/drivers/dri/r200/radeon_cs_legacy.c | 1 +
src/mesa/drivers/dri/r200/radeon_cs_legacy.h | 1 +
src/mesa/drivers/dri/r200/radeon_cs_space_drm.c | 1 +
src/mesa/drivers/dri/r200/radeon_dma.c | 1 +
src/mesa/drivers/dri/r200/radeon_dma.h | 1 +
src/mesa/drivers/dri/r200/radeon_fbo.c | 1 +
src/mesa/drivers/dri/r200/radeon_lock.c | 1 +
src/mesa/drivers/dri/r200/radeon_lock.h | 1 +
src/mesa/drivers/dri/r200/radeon_mipmap_tree.c | 1 +
src/mesa/drivers/dri/r200/radeon_mipmap_tree.h | 1 +
src/mesa/drivers/dri/r200/radeon_screen.c | 1 +
src/mesa/drivers/dri/r200/radeon_screen.h | 1 +
src/mesa/drivers/dri/r200/radeon_span.c | 1 +
src/mesa/drivers/dri/r200/radeon_span.h | 1 +
src/mesa/drivers/dri/r200/radeon_texture.c | 1 +
src/mesa/drivers/dri/r200/radeon_texture.h | 1 +
src/mesa/drivers/dri/r200/server/radeon.h | 1 +
src/mesa/drivers/dri/r200/server/radeon_dri.c | 1 +
src/mesa/drivers/dri/r200/server/radeon_dri.h | 1 +
src/mesa/drivers/dri/r200/server/radeon_egl.c | 1 +
src/mesa/drivers/dri/r200/server/radeon_macros.h | 1 +
src/mesa/drivers/dri/r200/server/radeon_reg.h | 1 +
src/mesa/drivers/dri/r300/.gitignore | 15 -------
src/mesa/drivers/dri/r300/Makefile | 46 +---------------------
src/mesa/drivers/dri/r300/radeon_bo_legacy.c | 1 +
src/mesa/drivers/dri/r300/radeon_bo_legacy.h | 1 +
src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h | 1 +
src/mesa/drivers/dri/r300/radeon_chipset.h | 1 +
src/mesa/drivers/dri/r300/radeon_cmdbuf.h | 1 +
src/mesa/drivers/dri/r300/radeon_common.c | 1 +
src/mesa/drivers/dri/r300/radeon_common.h | 1 +
src/mesa/drivers/dri/r300/radeon_common_context.c | 1 +
src/mesa/drivers/dri/r300/radeon_common_context.h | 1 +
src/mesa/drivers/dri/r300/radeon_cs_legacy.c | 1 +
src/mesa/drivers/dri/r300/radeon_cs_legacy.h | 1 +
src/mesa/drivers/dri/r300/radeon_cs_space_drm.c | 1 +
src/mesa/drivers/dri/r300/radeon_dma.c | 1 +
src/mesa/drivers/dri/r300/radeon_dma.h | 1 +
src/mesa/drivers/dri/r300/radeon_fbo.c | 1 +
src/mesa/drivers/dri/r300/radeon_lock.c | 1 +
src/mesa/drivers/dri/r300/radeon_lock.h | 1 +
src/mesa/drivers/dri/r300/radeon_mipmap_tree.c | 1 +
src/mesa/drivers/dri/r300/radeon_mipmap_tree.h | 1 +
src/mesa/drivers/dri/r300/radeon_screen.c | 1 +
src/mesa/drivers/dri/r300/radeon_screen.h | 1 +
src/mesa/drivers/dri/r300/radeon_span.c | 1 +
src/mesa/drivers/dri/r300/radeon_span.h | 1 +
src/mesa/drivers/dri/r300/radeon_texture.c | 1 +
src/mesa/drivers/dri/r300/radeon_texture.h | 1 +
src/mesa/drivers/dri/r300/server/radeon.h | 1 +
src/mesa/drivers/dri/r300/server/radeon_dri.c | 1 +
src/mesa/drivers/dri/r300/server/radeon_dri.h | 1 +
src/mesa/drivers/dri/r300/server/radeon_egl.c | 1 +
src/mesa/drivers/dri/r300/server/radeon_macros.h | 1 +
src/mesa/drivers/dri/r300/server/radeon_reg.h | 1 +
src/mesa/drivers/dri/r600/Makefile | 46 +---------------------
src/mesa/drivers/dri/r600/radeon_bo_legacy.c | 1 +
src/mesa/drivers/dri/r600/radeon_bo_legacy.h | 1 +
src/mesa/drivers/dri/r600/radeon_bocs_wrapper.h | 1 +
src/mesa/drivers/dri/r600/radeon_chipset.h | 1 +
src/mesa/drivers/dri/r600/radeon_cmdbuf.h | 1 +
src/mesa/drivers/dri/r600/radeon_common.c | 1 +
src/mesa/drivers/dri/r600/radeon_common.h | 1 +
src/mesa/drivers/dri/r600/radeon_common_context.c | 1 +
src/mesa/drivers/dri/r600/radeon_common_context.h | 1 +
src/mesa/drivers/dri/r600/radeon_cs_legacy.c | 1 +
src/mesa/drivers/dri/r600/radeon_cs_legacy.h | 1 +
src/mesa/drivers/dri/r600/radeon_cs_space_drm.c | 1 +
src/mesa/drivers/dri/r600/radeon_dma.c | 1 +
src/mesa/drivers/dri/r600/radeon_dma.h | 1 +
src/mesa/drivers/dri/r600/radeon_fbo.c | 1 +
src/mesa/drivers/dri/r600/radeon_lock.c | 1 +
src/mesa/drivers/dri/r600/radeon_lock.h | 1 +
src/mesa/drivers/dri/r600/radeon_mipmap_tree.c | 1 +
src/mesa/drivers/dri/r600/radeon_mipmap_tree.h | 1 +
src/mesa/drivers/dri/r600/radeon_screen.c | 1 +
src/mesa/drivers/dri/r600/radeon_screen.h | 1 +
src/mesa/drivers/dri/r600/radeon_span.c | 1 +
src/mesa/drivers/dri/r600/radeon_span.h | 1 +
src/mesa/drivers/dri/r600/radeon_texture.c | 1 +
src/mesa/drivers/dri/r600/radeon_texture.h | 1 +
src/mesa/drivers/dri/r600/server/radeon.h | 1 +
src/mesa/drivers/dri/r600/server/radeon_dri.c | 1 +
src/mesa/drivers/dri/r600/server/radeon_dri.h | 1 +
src/mesa/drivers/dri/r600/server/radeon_egl.c | 1 +
src/mesa/drivers/dri/r600/server/radeon_macros.h | 1 +
src/mesa/drivers/dri/r600/server/radeon_reg.h | 1 +
98 files changed, 96 insertions(+), 167 deletions(-)
delete mode 100644 src/mesa/drivers/dri/r200/.gitignore
create mode 120000 src/mesa/drivers/dri/r200/radeon_bo_legacy.c
create mode 120000 src/mesa/drivers/dri/r200/radeon_bo_legacy.h
create mode 120000 src/mesa/drivers/dri/r200/radeon_bocs_wrapper.h
create mode 120000 src/mesa/drivers/dri/r200/radeon_chipset.h
create mode 120000 src/mesa/drivers/dri/r200/radeon_cmdbuf.h
create mode 120000 src/mesa/drivers/dri/r200/radeon_common.c
create mode 120000 src/mesa/drivers/dri/r200/radeon_common.h
create mode 120000 src/mesa/drivers/dri/r200/radeon_common_context.c
create mode 120000 src/mesa/drivers/dri/r200/radeon_common_context.h
create mode 120000 src/mesa/drivers/dri/r200/radeon_cs_legacy.c
create mode 120000 src/mesa/drivers/dri/r200/radeon_cs_legacy.h
create mode 120000 src/mesa/drivers/dri/r200/radeon_cs_space_drm.c
create mode 120000 src/mesa/drivers/dri/r200/radeon_dma.c
create mode 120000 src/mesa/drivers/dri/r200/radeon_dma.h
create mode 120000 src/mesa/drivers/dri/r200/radeon_fbo.c
create mode 120000 src/mesa/drivers/dri/r200/radeon_lock.c
create mode 120000 src/mesa/drivers/dri/r200/radeon_lock.h
create mode 120000 src/mesa/drivers/dri/r200/radeon_mipmap_tree.c
create mode 120000 src/mesa/drivers/dri/r200/radeon_mipmap_tree.h
create mode 120000 src/mesa/drivers/dri/r200/radeon_screen.c
create mode 120000 src/mesa/drivers/dri/r200/radeon_screen.h
create mode 120000 src/mesa/drivers/dri/r200/radeon_span.c
create mode 120000 src/mesa/drivers/dri/r200/radeon_span.h
create mode 120000 src/mesa/drivers/dri/r200/radeon_texture.c
create mode 120000 src/mesa/drivers/dri/r200/radeon_texture.h
create mode 120000 src/mesa/drivers/dri/r200/server/radeon.h
create mode 120000 src/mesa/drivers/dri/r200/server/radeon_dri.c
create mode 120000 src/mesa/drivers/dri/r200/server/radeon_dri.h
create mode 120000 src/mesa/drivers/dri/r200/server/radeon_egl.c
create mode 120000 src/mesa/drivers/dri/r200/server/radeon_macros.h
create mode 120000 src/mesa/drivers/dri/r200/server/radeon_reg.h
delete mode 100644 src/mesa/drivers/dri/r300/.gitignore
create mode 120000 src/mesa/drivers/dri/r300/radeon_bo_legacy.c
create mode 120000 src/mesa/drivers/dri/r300/radeon_bo_legacy.h
create mode 120000 src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h
create mode 120000 src/mesa/drivers/dri/r300/radeon_chipset.h
create mode 120000 src/mesa/drivers/dri/r300/radeon_cmdbuf.h
create mode 120000 src/mesa/drivers/dri/r300/radeon_common.c
create mode 120000 src/mesa/drivers/dri/r300/radeon_common.h
create mode 120000 src/mesa/drivers/dri/r300/radeon_common_context.c
create mode 120000 src/mesa/drivers/dri/r300/radeon_common_context.h
create mode 120000 src/mesa/drivers/dri/r300/radeon_cs_legacy.c
create mode 120000 src/mesa/drivers/dri/r300/radeon_cs_legacy.h
create mode 120000 src/mesa/drivers/dri/r300/radeon_cs_space_drm.c
create mode 120000 src/mesa/drivers/dri/r300/radeon_dma.c
create mode 120000 src/mesa/drivers/dri/r300/radeon_dma.h
create mode 120000 src/mesa/drivers/dri/r300/radeon_fbo.c
create mode 120000 src/mesa/drivers/dri/r300/radeon_lock.c
create mode 120000 src/mesa/drivers/dri/r300/radeon_lock.h
create mode 120000 src/mesa/drivers/dri/r300/radeon_mipmap_tree.c
create mode 120000 src/mesa/drivers/dri/r300/radeon_mipmap_tree.h
create mode 120000 src/mesa/drivers/dri/r300/radeon_screen.c
create mode 120000 src/mesa/drivers/dri/r300/radeon_screen.h
create mode 120000 src/mesa/drivers/dri/r300/radeon_span.c
create mode 120000 src/mesa/drivers/dri/r300/radeon_span.h
create mode 120000 src/mesa/drivers/dri/r300/radeon_texture.c
create mode 120000 src/mesa/drivers/dri/r300/radeon_texture.h
create mode 120000 src/mesa/drivers/dri/r300/server/radeon.h
create mode 120000 src/mesa/drivers/dri/r300/server/radeon_dri.c
create mode 120000 src/mesa/drivers/dri/r300/server/radeon_dri.h
create mode 120000 src/mesa/drivers/dri/r300/server/radeon_egl.c
create mode 120000 src/mesa/drivers/dri/r300/server/radeon_macros.h
create mode 120000 src/mesa/drivers/dri/r300/server/radeon_reg.h
create mode 120000 src/mesa/drivers/dri/r600/radeon_bo_legacy.c
create mode 120000 src/mesa/drivers/dri/r600/radeon_bo_legacy.h
create mode 120000 src/mesa/drivers/dri/r600/radeon_bocs_wrapper.h
create mode 120000 src/mesa/drivers/dri/r600/radeon_chipset.h
create mode 120000 src/mesa/drivers/dri/r600/radeon_cmdbuf.h
create mode 120000 src/mesa/drivers/dri/r600/radeon_common.c
create mode 120000 src/mesa/drivers/dri/r600/radeon_common.h
create mode 120000 src/mesa/drivers/dri/r600/radeon_common_context.c
create mode 120000 src/mesa/drivers/dri/r600/radeon_common_context.h
create mode 120000 src/mesa/drivers/dri/r600/radeon_cs_legacy.c
create mode 120000 src/mesa/drivers/dri/r600/radeon_cs_legacy.h
create mode 120000 src/mesa/drivers/dri/r600/radeon_cs_space_drm.c
create mode 120000 src/mesa/drivers/dri/r600/radeon_dma.c
create mode 120000 src/mesa/drivers/dri/r600/radeon_dma.h
create mode 120000 src/mesa/drivers/dri/r600/radeon_fbo.c
create mode 120000 src/mesa/drivers/dri/r600/radeon_lock.c
create mode 120000 src/mesa/drivers/dri/r600/radeon_lock.h
create mode 120000 src/mesa/drivers/dri/r600/radeon_mipmap_tree.c
create mode 120000 src/mesa/drivers/dri/r600/radeon_mipmap_tree.h
create mode 120000 src/mesa/drivers/dri/r600/radeon_screen.c
create mode 120000 src/mesa/drivers/dri/r600/radeon_screen.h
create mode 120000 src/mesa/drivers/dri/r600/radeon_span.c
create mode 120000 src/mesa/drivers/dri/r600/radeon_span.h
create mode 120000 src/mesa/drivers/dri/r600/radeon_texture.c
create mode 120000 src/mesa/drivers/dri/r600/radeon_texture.h
create mode 120000 src/mesa/drivers/dri/r600/server/radeon.h
create mode 120000 src/mesa/drivers/dri/r600/server/radeon_dri.c
create mode 120000 src/mesa/drivers/dri/r600/server/radeon_dri.h
create mode 120000 src/mesa/drivers/dri/r600/server/radeon_egl.c
create mode 120000 src/mesa/drivers/dri/r600/server/radeon_macros.h
create mode 120000 src/mesa/drivers/dri/r600/server/radeon_reg.h
diff --git a/src/mesa/drivers/dri/r200/.gitignore b/src/mesa/drivers/dri/r200/.gitignore
deleted file mode 100644
index 2f9cd1a987b..00000000000
--- a/src/mesa/drivers/dri/r200/.gitignore
+++ /dev/null
@@ -1,15 +0,0 @@
-radeon_bocs_wrapper.h
-radeon_bo_legacy.[ch]
-radeon_chipset.h
-radeon_cmdbuf.h
-radeon_common.[ch]
-radeon_common_context.[ch]
-radeon_cs_legacy.[ch]
-radeon_dma.[ch]
-radeon_fbo.c
-radeon_lock.[ch]
-radeon_mipmap_tree.[ch]
-radeon_screen.[ch]
-radeon_span.[ch]
-radeon_texture.[ch]
-server
diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
index 4686241957b..e81a1b38ac3 100644
--- a/src/mesa/drivers/dri/r200/Makefile
+++ b/src/mesa/drivers/dri/r200/Makefile
@@ -55,41 +55,6 @@ X86_SOURCES =
DRIVER_DEFINES = -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R200
-SYMLINKS = \
- server/radeon_egl.c \
- server/radeon_dri.c \
- server/radeon_dri.h \
- server/radeon.h \
- server/radeon_macros.h \
- server/radeon_reg.h
-
-COMMON_SYMLINKS = \
- radeon_chipset.h \
- radeon_screen.c \
- radeon_screen.h \
- radeon_bo_legacy.c \
- radeon_cs_legacy.c \
- radeon_bo_legacy.h \
- radeon_cs_legacy.h \
- radeon_bocs_wrapper.h \
- radeon_span.h \
- radeon_span.c \
- radeon_lock.c \
- radeon_lock.h \
- radeon_common.c \
- radeon_common_context.c \
- radeon_common_context.h \
- radeon_common.h \
- radeon_cmdbuf.h \
- radeon_mipmap_tree.c \
- radeon_mipmap_tree.h \
- radeon_texture.c \
- radeon_texture.h \
- radeon_dma.c \
- radeon_dma.h \
- radeon_fbo.c \
- $(CS_SOURCES)
-
DRI_LIB_DEPS += $(RADEON_LDFLAGS)
##### TARGETS #####
@@ -99,15 +64,4 @@ include ../Makefile.template
#INCLUDES += -I../radeon/server
-server:
- mkdir -p server
-
-$(SYMLINKS): server
- @[ -e $@ ] || ln -sf ../../radeon/$@ server/
-
-
-$(COMMON_SYMLINKS):
- @[ -e $@ ] || ln -sf ../radeon/$@ ./
-
-symlinks: $(SYMLINKS) $(COMMON_SYMLINKS)
-
+symlinks:
diff --git a/src/mesa/drivers/dri/r200/radeon_bo_legacy.c b/src/mesa/drivers/dri/r200/radeon_bo_legacy.c
new file mode 120000
index 00000000000..79ad050e6b6
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_bo_legacy.c
@@ -0,0 +1 @@
+../radeon/radeon_bo_legacy.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_bo_legacy.h b/src/mesa/drivers/dri/r200/radeon_bo_legacy.h
new file mode 120000
index 00000000000..83b0f7ffabe
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_bo_legacy.h
@@ -0,0 +1 @@
+../radeon/radeon_bo_legacy.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/r200/radeon_bocs_wrapper.h
new file mode 120000
index 00000000000..ca894b2443c
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_bocs_wrapper.h
@@ -0,0 +1 @@
+../radeon/radeon_bocs_wrapper.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_chipset.h b/src/mesa/drivers/dri/r200/radeon_chipset.h
new file mode 120000
index 00000000000..eba99001ff8
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_chipset.h
@@ -0,0 +1 @@
+../radeon/radeon_chipset.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_cmdbuf.h b/src/mesa/drivers/dri/r200/radeon_cmdbuf.h
new file mode 120000
index 00000000000..a799e1dc6df
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_cmdbuf.h
@@ -0,0 +1 @@
+../radeon/radeon_cmdbuf.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_common.c b/src/mesa/drivers/dri/r200/radeon_common.c
new file mode 120000
index 00000000000..67b19ba940d
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_common.c
@@ -0,0 +1 @@
+../radeon/radeon_common.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_common.h b/src/mesa/drivers/dri/r200/radeon_common.h
new file mode 120000
index 00000000000..5bcb696a9f7
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_common.h
@@ -0,0 +1 @@
+../radeon/radeon_common.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_common_context.c b/src/mesa/drivers/dri/r200/radeon_common_context.c
new file mode 120000
index 00000000000..86800f3819c
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_common_context.c
@@ -0,0 +1 @@
+../radeon/radeon_common_context.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_common_context.h b/src/mesa/drivers/dri/r200/radeon_common_context.h
new file mode 120000
index 00000000000..4d663125500
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_common_context.h
@@ -0,0 +1 @@
+../radeon/radeon_common_context.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_cs_legacy.c b/src/mesa/drivers/dri/r200/radeon_cs_legacy.c
new file mode 120000
index 00000000000..006720f8a46
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_cs_legacy.c
@@ -0,0 +1 @@
+../radeon/radeon_cs_legacy.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_cs_legacy.h b/src/mesa/drivers/dri/r200/radeon_cs_legacy.h
new file mode 120000
index 00000000000..a5f95e0a3dc
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_cs_legacy.h
@@ -0,0 +1 @@
+../radeon/radeon_cs_legacy.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_cs_space_drm.c b/src/mesa/drivers/dri/r200/radeon_cs_space_drm.c
new file mode 120000
index 00000000000..c248ea7d1a5
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_cs_space_drm.c
@@ -0,0 +1 @@
+../radeon/radeon_cs_space_drm.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_dma.c b/src/mesa/drivers/dri/r200/radeon_dma.c
new file mode 120000
index 00000000000..43be0006255
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_dma.c
@@ -0,0 +1 @@
+../radeon/radeon_dma.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_dma.h b/src/mesa/drivers/dri/r200/radeon_dma.h
new file mode 120000
index 00000000000..82e50634e3c
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_dma.h
@@ -0,0 +1 @@
+../radeon/radeon_dma.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_fbo.c b/src/mesa/drivers/dri/r200/radeon_fbo.c
new file mode 120000
index 00000000000..0d738d8d780
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_fbo.c
@@ -0,0 +1 @@
+../radeon/radeon_fbo.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_lock.c b/src/mesa/drivers/dri/r200/radeon_lock.c
new file mode 120000
index 00000000000..af4108a8e30
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_lock.c
@@ -0,0 +1 @@
+../radeon/radeon_lock.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_lock.h b/src/mesa/drivers/dri/r200/radeon_lock.h
new file mode 120000
index 00000000000..64bdf94ee7e
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_lock.h
@@ -0,0 +1 @@
+../radeon/radeon_lock.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_mipmap_tree.c b/src/mesa/drivers/dri/r200/radeon_mipmap_tree.c
new file mode 120000
index 00000000000..31c0cfbe942
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_mipmap_tree.c
@@ -0,0 +1 @@
+../radeon/radeon_mipmap_tree.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_mipmap_tree.h b/src/mesa/drivers/dri/r200/radeon_mipmap_tree.h
new file mode 120000
index 00000000000..254d50cf8c5
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_mipmap_tree.h
@@ -0,0 +1 @@
+../radeon/radeon_mipmap_tree.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_screen.c b/src/mesa/drivers/dri/r200/radeon_screen.c
new file mode 120000
index 00000000000..86161118dd3
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_screen.c
@@ -0,0 +1 @@
+../radeon/radeon_screen.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_screen.h b/src/mesa/drivers/dri/r200/radeon_screen.h
new file mode 120000
index 00000000000..23bb6bd4598
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_screen.h
@@ -0,0 +1 @@
+../radeon/radeon_screen.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_span.c b/src/mesa/drivers/dri/r200/radeon_span.c
new file mode 120000
index 00000000000..232868c4c9e
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_span.c
@@ -0,0 +1 @@
+../radeon/radeon_span.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_span.h b/src/mesa/drivers/dri/r200/radeon_span.h
new file mode 120000
index 00000000000..f9d634508c2
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_span.h
@@ -0,0 +1 @@
+../radeon/radeon_span.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_texture.c b/src/mesa/drivers/dri/r200/radeon_texture.c
new file mode 120000
index 00000000000..a822710915f
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_texture.c
@@ -0,0 +1 @@
+../radeon/radeon_texture.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_texture.h b/src/mesa/drivers/dri/r200/radeon_texture.h
new file mode 120000
index 00000000000..17fac3d5ea5
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_texture.h
@@ -0,0 +1 @@
+../radeon/radeon_texture.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/server/radeon.h b/src/mesa/drivers/dri/r200/server/radeon.h
new file mode 120000
index 00000000000..81274a54f11
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/server/radeon.h
@@ -0,0 +1 @@
+../../radeon/server/radeon.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/server/radeon_dri.c b/src/mesa/drivers/dri/r200/server/radeon_dri.c
new file mode 120000
index 00000000000..d05847d650f
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/server/radeon_dri.c
@@ -0,0 +1 @@
+../../radeon/server/radeon_dri.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/server/radeon_dri.h b/src/mesa/drivers/dri/r200/server/radeon_dri.h
new file mode 120000
index 00000000000..27c591d3c9d
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/server/radeon_dri.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_dri.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/server/radeon_egl.c b/src/mesa/drivers/dri/r200/server/radeon_egl.c
new file mode 120000
index 00000000000..d7735a76438
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/server/radeon_egl.c
@@ -0,0 +1 @@
+../../radeon/server/radeon_egl.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/server/radeon_macros.h b/src/mesa/drivers/dri/r200/server/radeon_macros.h
new file mode 120000
index 00000000000..c56cd735b83
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/server/radeon_macros.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_macros.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/server/radeon_reg.h b/src/mesa/drivers/dri/r200/server/radeon_reg.h
new file mode 120000
index 00000000000..e2349dcb685
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/server/radeon_reg.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_reg.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/.gitignore b/src/mesa/drivers/dri/r300/.gitignore
deleted file mode 100644
index 2f9cd1a987b..00000000000
--- a/src/mesa/drivers/dri/r300/.gitignore
+++ /dev/null
@@ -1,15 +0,0 @@
-radeon_bocs_wrapper.h
-radeon_bo_legacy.[ch]
-radeon_chipset.h
-radeon_cmdbuf.h
-radeon_common.[ch]
-radeon_common_context.[ch]
-radeon_cs_legacy.[ch]
-radeon_dma.[ch]
-radeon_fbo.c
-radeon_lock.[ch]
-radeon_mipmap_tree.[ch]
-radeon_screen.[ch]
-radeon_span.[ch]
-radeon_texture.[ch]
-server
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
index a77209074ae..7460410ee65 100644
--- a/src/mesa/drivers/dri/r300/Makefile
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -73,54 +73,10 @@ DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \
# -DRADEON_BO_TRACK \
-Wall
-SYMLINKS = \
- server/radeon_dri.c \
- server/radeon_dri.h \
- server/radeon.h \
- server/radeon_macros.h \
- server/radeon_reg.h \
- server/radeon_egl.c
-
-COMMON_SYMLINKS = \
- radeon_chipset.h \
- radeon_screen.c \
- radeon_screen.h \
- radeon_span.h \
- radeon_span.c \
- radeon_bo_legacy.c \
- radeon_cs_legacy.c \
- radeon_bo_legacy.h \
- radeon_cs_legacy.h \
- radeon_bocs_wrapper.h \
- radeon_lock.c \
- radeon_lock.h \
- radeon_common.c \
- radeon_common.h \
- radeon_common_context.c \
- radeon_common_context.h \
- radeon_cmdbuf.h \
- radeon_dma.c \
- radeon_dma.h \
- radeon_mipmap_tree.c \
- radeon_mipmap_tree.h \
- radeon_texture.c \
- radeon_texture.h \
- radeon_fbo.c \
- $(CS_SOURCES)
-
DRI_LIB_DEPS += $(RADEON_LDFLAGS)
##### TARGETS #####
include ../Makefile.template
-server:
- mkdir -p server
-
-$(SYMLINKS): server
- @[ -e $@ ] || ln -sf ../../radeon/$@ server/
-
-$(COMMON_SYMLINKS):
- @[ -e $@ ] || ln -sf ../radeon/$@ ./
-
-symlinks: $(SYMLINKS) $(COMMON_SYMLINKS)
+symlinks:
diff --git a/src/mesa/drivers/dri/r300/radeon_bo_legacy.c b/src/mesa/drivers/dri/r300/radeon_bo_legacy.c
new file mode 120000
index 00000000000..79ad050e6b6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_bo_legacy.c
@@ -0,0 +1 @@
+../radeon/radeon_bo_legacy.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_bo_legacy.h b/src/mesa/drivers/dri/r300/radeon_bo_legacy.h
new file mode 120000
index 00000000000..83b0f7ffabe
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_bo_legacy.h
@@ -0,0 +1 @@
+../radeon/radeon_bo_legacy.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h
new file mode 120000
index 00000000000..ca894b2443c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h
@@ -0,0 +1 @@
+../radeon/radeon_bocs_wrapper.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_chipset.h b/src/mesa/drivers/dri/r300/radeon_chipset.h
new file mode 120000
index 00000000000..eba99001ff8
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_chipset.h
@@ -0,0 +1 @@
+../radeon/radeon_chipset.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_cmdbuf.h b/src/mesa/drivers/dri/r300/radeon_cmdbuf.h
new file mode 120000
index 00000000000..a799e1dc6df
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_cmdbuf.h
@@ -0,0 +1 @@
+../radeon/radeon_cmdbuf.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_common.c b/src/mesa/drivers/dri/r300/radeon_common.c
new file mode 120000
index 00000000000..67b19ba940d
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_common.c
@@ -0,0 +1 @@
+../radeon/radeon_common.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_common.h b/src/mesa/drivers/dri/r300/radeon_common.h
new file mode 120000
index 00000000000..5bcb696a9f7
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_common.h
@@ -0,0 +1 @@
+../radeon/radeon_common.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_common_context.c b/src/mesa/drivers/dri/r300/radeon_common_context.c
new file mode 120000
index 00000000000..86800f3819c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_common_context.c
@@ -0,0 +1 @@
+../radeon/radeon_common_context.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_common_context.h b/src/mesa/drivers/dri/r300/radeon_common_context.h
new file mode 120000
index 00000000000..4d663125500
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_common_context.h
@@ -0,0 +1 @@
+../radeon/radeon_common_context.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_cs_legacy.c b/src/mesa/drivers/dri/r300/radeon_cs_legacy.c
new file mode 120000
index 00000000000..006720f8a46
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_cs_legacy.c
@@ -0,0 +1 @@
+../radeon/radeon_cs_legacy.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_cs_legacy.h b/src/mesa/drivers/dri/r300/radeon_cs_legacy.h
new file mode 120000
index 00000000000..a5f95e0a3dc
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_cs_legacy.h
@@ -0,0 +1 @@
+../radeon/radeon_cs_legacy.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_cs_space_drm.c b/src/mesa/drivers/dri/r300/radeon_cs_space_drm.c
new file mode 120000
index 00000000000..c248ea7d1a5
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_cs_space_drm.c
@@ -0,0 +1 @@
+../radeon/radeon_cs_space_drm.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_dma.c b/src/mesa/drivers/dri/r300/radeon_dma.c
new file mode 120000
index 00000000000..43be0006255
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_dma.c
@@ -0,0 +1 @@
+../radeon/radeon_dma.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_dma.h b/src/mesa/drivers/dri/r300/radeon_dma.h
new file mode 120000
index 00000000000..82e50634e3c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_dma.h
@@ -0,0 +1 @@
+../radeon/radeon_dma.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_fbo.c b/src/mesa/drivers/dri/r300/radeon_fbo.c
new file mode 120000
index 00000000000..0d738d8d780
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_fbo.c
@@ -0,0 +1 @@
+../radeon/radeon_fbo.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c
new file mode 120000
index 00000000000..af4108a8e30
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_lock.c
@@ -0,0 +1 @@
+../radeon/radeon_lock.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_lock.h b/src/mesa/drivers/dri/r300/radeon_lock.h
new file mode 120000
index 00000000000..64bdf94ee7e
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_lock.h
@@ -0,0 +1 @@
+../radeon/radeon_lock.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_mipmap_tree.c b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.c
new file mode 120000
index 00000000000..31c0cfbe942
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.c
@@ -0,0 +1 @@
+../radeon/radeon_mipmap_tree.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_mipmap_tree.h b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.h
new file mode 120000
index 00000000000..254d50cf8c5
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.h
@@ -0,0 +1 @@
+../radeon/radeon_mipmap_tree.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_screen.c b/src/mesa/drivers/dri/r300/radeon_screen.c
new file mode 120000
index 00000000000..86161118dd3
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_screen.c
@@ -0,0 +1 @@
+../radeon/radeon_screen.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_screen.h b/src/mesa/drivers/dri/r300/radeon_screen.h
new file mode 120000
index 00000000000..23bb6bd4598
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_screen.h
@@ -0,0 +1 @@
+../radeon/radeon_screen.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c
new file mode 120000
index 00000000000..232868c4c9e
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_span.c
@@ -0,0 +1 @@
+../radeon/radeon_span.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_span.h b/src/mesa/drivers/dri/r300/radeon_span.h
new file mode 120000
index 00000000000..f9d634508c2
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_span.h
@@ -0,0 +1 @@
+../radeon/radeon_span.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_texture.c b/src/mesa/drivers/dri/r300/radeon_texture.c
new file mode 120000
index 00000000000..a822710915f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_texture.c
@@ -0,0 +1 @@
+../radeon/radeon_texture.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_texture.h b/src/mesa/drivers/dri/r300/radeon_texture.h
new file mode 120000
index 00000000000..17fac3d5ea5
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_texture.h
@@ -0,0 +1 @@
+../radeon/radeon_texture.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/server/radeon.h b/src/mesa/drivers/dri/r300/server/radeon.h
new file mode 120000
index 00000000000..81274a54f11
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/server/radeon.h
@@ -0,0 +1 @@
+../../radeon/server/radeon.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/server/radeon_dri.c b/src/mesa/drivers/dri/r300/server/radeon_dri.c
new file mode 120000
index 00000000000..d05847d650f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/server/radeon_dri.c
@@ -0,0 +1 @@
+../../radeon/server/radeon_dri.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/server/radeon_dri.h b/src/mesa/drivers/dri/r300/server/radeon_dri.h
new file mode 120000
index 00000000000..27c591d3c9d
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/server/radeon_dri.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_dri.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/server/radeon_egl.c b/src/mesa/drivers/dri/r300/server/radeon_egl.c
new file mode 120000
index 00000000000..d7735a76438
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/server/radeon_egl.c
@@ -0,0 +1 @@
+../../radeon/server/radeon_egl.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/server/radeon_macros.h b/src/mesa/drivers/dri/r300/server/radeon_macros.h
new file mode 120000
index 00000000000..c56cd735b83
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/server/radeon_macros.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_macros.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/server/radeon_reg.h b/src/mesa/drivers/dri/r300/server/radeon_reg.h
new file mode 120000
index 00000000000..e2349dcb685
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/server/radeon_reg.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_reg.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile
index 2f225f7cb5e..5bdc1afbf07 100644
--- a/src/mesa/drivers/dri/r600/Makefile
+++ b/src/mesa/drivers/dri/r600/Makefile
@@ -68,54 +68,10 @@ DRIVER_DEFINES = -DCOMPILE_R600 -DR200_MERGED=0 \
# -DRADEON_BO_TRACK \
-Wall
-SYMLINKS = \
- server/radeon_dri.c \
- server/radeon_dri.h \
- server/radeon.h \
- server/radeon_macros.h \
- server/radeon_reg.h \
- server/radeon_egl.c
-
-COMMON_SYMLINKS = \
- radeon_chipset.h \
- radeon_screen.c \
- radeon_screen.h \
- radeon_span.h \
- radeon_span.c \
- radeon_bo_legacy.c \
- radeon_cs_legacy.c \
- radeon_bo_legacy.h \
- radeon_cs_legacy.h \
- radeon_bocs_wrapper.h \
- radeon_lock.c \
- radeon_lock.h \
- radeon_common.c \
- radeon_common.h \
- radeon_common_context.c \
- radeon_common_context.h \
- radeon_cmdbuf.h \
- radeon_dma.c \
- radeon_dma.h \
- radeon_mipmap_tree.c \
- radeon_mipmap_tree.h \
- radeon_texture.c \
- radeon_texture.h \
- radeon_fbo.c \
- $(CS_SOURCES)
-
DRI_LIB_DEPS += $(RADEON_LDFLAGS)
##### TARGETS #####
include ../Makefile.template
-server:
- mkdir -p server
-
-$(SYMLINKS): server
- @[ -e $@ ] || ln -sf ../../radeon/$@ server/
-
-$(COMMON_SYMLINKS):
- @[ -e $@ ] || ln -sf ../radeon/$@ ./
-
-symlinks: $(SYMLINKS) $(COMMON_SYMLINKS)
+symlinks:
diff --git a/src/mesa/drivers/dri/r600/radeon_bo_legacy.c b/src/mesa/drivers/dri/r600/radeon_bo_legacy.c
new file mode 120000
index 00000000000..79ad050e6b6
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_bo_legacy.c
@@ -0,0 +1 @@
+../radeon/radeon_bo_legacy.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_bo_legacy.h b/src/mesa/drivers/dri/r600/radeon_bo_legacy.h
new file mode 120000
index 00000000000..83b0f7ffabe
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_bo_legacy.h
@@ -0,0 +1 @@
+../radeon/radeon_bo_legacy.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/r600/radeon_bocs_wrapper.h
new file mode 120000
index 00000000000..ca894b2443c
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_bocs_wrapper.h
@@ -0,0 +1 @@
+../radeon/radeon_bocs_wrapper.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_chipset.h b/src/mesa/drivers/dri/r600/radeon_chipset.h
new file mode 120000
index 00000000000..eba99001ff8
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_chipset.h
@@ -0,0 +1 @@
+../radeon/radeon_chipset.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_cmdbuf.h b/src/mesa/drivers/dri/r600/radeon_cmdbuf.h
new file mode 120000
index 00000000000..a799e1dc6df
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_cmdbuf.h
@@ -0,0 +1 @@
+../radeon/radeon_cmdbuf.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_common.c b/src/mesa/drivers/dri/r600/radeon_common.c
new file mode 120000
index 00000000000..67b19ba940d
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_common.c
@@ -0,0 +1 @@
+../radeon/radeon_common.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_common.h b/src/mesa/drivers/dri/r600/radeon_common.h
new file mode 120000
index 00000000000..5bcb696a9f7
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_common.h
@@ -0,0 +1 @@
+../radeon/radeon_common.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_common_context.c b/src/mesa/drivers/dri/r600/radeon_common_context.c
new file mode 120000
index 00000000000..86800f3819c
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_common_context.c
@@ -0,0 +1 @@
+../radeon/radeon_common_context.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_common_context.h b/src/mesa/drivers/dri/r600/radeon_common_context.h
new file mode 120000
index 00000000000..4d663125500
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_common_context.h
@@ -0,0 +1 @@
+../radeon/radeon_common_context.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_cs_legacy.c b/src/mesa/drivers/dri/r600/radeon_cs_legacy.c
new file mode 120000
index 00000000000..006720f8a46
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_cs_legacy.c
@@ -0,0 +1 @@
+../radeon/radeon_cs_legacy.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_cs_legacy.h b/src/mesa/drivers/dri/r600/radeon_cs_legacy.h
new file mode 120000
index 00000000000..a5f95e0a3dc
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_cs_legacy.h
@@ -0,0 +1 @@
+../radeon/radeon_cs_legacy.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_cs_space_drm.c b/src/mesa/drivers/dri/r600/radeon_cs_space_drm.c
new file mode 120000
index 00000000000..c248ea7d1a5
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_cs_space_drm.c
@@ -0,0 +1 @@
+../radeon/radeon_cs_space_drm.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_dma.c b/src/mesa/drivers/dri/r600/radeon_dma.c
new file mode 120000
index 00000000000..43be0006255
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_dma.c
@@ -0,0 +1 @@
+../radeon/radeon_dma.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_dma.h b/src/mesa/drivers/dri/r600/radeon_dma.h
new file mode 120000
index 00000000000..82e50634e3c
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_dma.h
@@ -0,0 +1 @@
+../radeon/radeon_dma.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_fbo.c b/src/mesa/drivers/dri/r600/radeon_fbo.c
new file mode 120000
index 00000000000..0d738d8d780
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_fbo.c
@@ -0,0 +1 @@
+../radeon/radeon_fbo.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_lock.c b/src/mesa/drivers/dri/r600/radeon_lock.c
new file mode 120000
index 00000000000..af4108a8e30
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_lock.c
@@ -0,0 +1 @@
+../radeon/radeon_lock.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_lock.h b/src/mesa/drivers/dri/r600/radeon_lock.h
new file mode 120000
index 00000000000..64bdf94ee7e
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_lock.h
@@ -0,0 +1 @@
+../radeon/radeon_lock.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_mipmap_tree.c b/src/mesa/drivers/dri/r600/radeon_mipmap_tree.c
new file mode 120000
index 00000000000..31c0cfbe942
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_mipmap_tree.c
@@ -0,0 +1 @@
+../radeon/radeon_mipmap_tree.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_mipmap_tree.h b/src/mesa/drivers/dri/r600/radeon_mipmap_tree.h
new file mode 120000
index 00000000000..254d50cf8c5
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_mipmap_tree.h
@@ -0,0 +1 @@
+../radeon/radeon_mipmap_tree.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_screen.c b/src/mesa/drivers/dri/r600/radeon_screen.c
new file mode 120000
index 00000000000..86161118dd3
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_screen.c
@@ -0,0 +1 @@
+../radeon/radeon_screen.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_screen.h b/src/mesa/drivers/dri/r600/radeon_screen.h
new file mode 120000
index 00000000000..23bb6bd4598
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_screen.h
@@ -0,0 +1 @@
+../radeon/radeon_screen.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_span.c b/src/mesa/drivers/dri/r600/radeon_span.c
new file mode 120000
index 00000000000..232868c4c9e
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_span.c
@@ -0,0 +1 @@
+../radeon/radeon_span.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_span.h b/src/mesa/drivers/dri/r600/radeon_span.h
new file mode 120000
index 00000000000..f9d634508c2
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_span.h
@@ -0,0 +1 @@
+../radeon/radeon_span.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_texture.c b/src/mesa/drivers/dri/r600/radeon_texture.c
new file mode 120000
index 00000000000..a822710915f
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_texture.c
@@ -0,0 +1 @@
+../radeon/radeon_texture.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_texture.h b/src/mesa/drivers/dri/r600/radeon_texture.h
new file mode 120000
index 00000000000..17fac3d5ea5
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_texture.h
@@ -0,0 +1 @@
+../radeon/radeon_texture.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/server/radeon.h b/src/mesa/drivers/dri/r600/server/radeon.h
new file mode 120000
index 00000000000..81274a54f11
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/server/radeon.h
@@ -0,0 +1 @@
+../../radeon/server/radeon.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/server/radeon_dri.c b/src/mesa/drivers/dri/r600/server/radeon_dri.c
new file mode 120000
index 00000000000..d05847d650f
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/server/radeon_dri.c
@@ -0,0 +1 @@
+../../radeon/server/radeon_dri.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/server/radeon_dri.h b/src/mesa/drivers/dri/r600/server/radeon_dri.h
new file mode 120000
index 00000000000..27c591d3c9d
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/server/radeon_dri.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_dri.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/server/radeon_egl.c b/src/mesa/drivers/dri/r600/server/radeon_egl.c
new file mode 120000
index 00000000000..d7735a76438
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/server/radeon_egl.c
@@ -0,0 +1 @@
+../../radeon/server/radeon_egl.c
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/server/radeon_macros.h b/src/mesa/drivers/dri/r600/server/radeon_macros.h
new file mode 120000
index 00000000000..c56cd735b83
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/server/radeon_macros.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_macros.h
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/server/radeon_reg.h b/src/mesa/drivers/dri/r600/server/radeon_reg.h
new file mode 120000
index 00000000000..e2349dcb685
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/server/radeon_reg.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_reg.h
\ No newline at end of file
--
cgit v1.2.3
From 9e6d38f8a2cc89e3d45ef2bb169b72c3c41fc27b Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Tue, 21 Jul 2009 01:58:05 -0400
Subject: r600: add alpha test support
---
src/mesa/drivers/dri/r600/r700_chip.c | 1 +
src/mesa/drivers/dri/r600/r700_chip.h | 3 +-
src/mesa/drivers/dri/r600/r700_state.c | 55 +++++++++++++++++++++++++++++++---
3 files changed, 54 insertions(+), 5 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index 91aa8fc8fc6..635dd58e72c 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -139,6 +139,7 @@ GLboolean r700InitChipObject(context_t *context)
// SX
LINK_STATES(SX_MISC);
LINK_STATES(SX_ALPHA_TEST_CONTROL);
+ LINK_STATES(SX_ALPHA_REF);
// VGT
LINK_STATES(VGT_MAX_VTX_INDX);
diff --git a/src/mesa/drivers/dri/r600/r700_chip.h b/src/mesa/drivers/dri/r600/r700_chip.h
index 04af4bc3b98..fa419aa4995 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.h
+++ b/src/mesa/drivers/dri/r600/r700_chip.h
@@ -372,6 +372,7 @@ typedef struct _R700_CHIP_CONTEXT
// SX
union UINT_FLOAT SX_MISC ; /* 0xA0D4 */
union UINT_FLOAT SX_ALPHA_TEST_CONTROL ; /* 0xA104 */
+ union UINT_FLOAT SX_ALPHA_REF ; /* 0xA10E */
// VGT
union UINT_FLOAT VGT_MAX_VTX_INDX ; /* 0xA100 */
@@ -422,7 +423,7 @@ typedef struct _R700_CHIP_CONTEXT
union UINT_FLOAT SPI_FOG_FUNC_SCALE ; /* 0xA1B8 */
union UINT_FLOAT SPI_FOG_FUNC_BIAS ; /* 0xA1B9 */
- union UINT_FLOAT SQ_VTX_SEMANTIC_0 ; /* 0xA0E0 */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_0 ; /* 0xA0E0 */
union UINT_FLOAT SQ_VTX_SEMANTIC_1 ; /* 0xA0E1 */
union UINT_FLOAT SQ_VTX_SEMANTIC_2 ; /* 0xA0E2 */
union UINT_FLOAT SQ_VTX_SEMANTIC_3 ; /* 0xA0E3 */
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index 44584430036..5fe4b36b710 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -305,8 +305,57 @@ static void r700SetDepthState(GLcontext * ctx)
}
}
+static void r700SetAlphaState(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ uint32_t alpha_func;
+ GLboolean really_enabled = ctx->Color.AlphaEnabled;
+
+ switch (ctx->Color.AlphaFunc) {
+ case GL_NEVER:
+ alpha_func = REF_NEVER;
+ break;
+ case GL_LESS:
+ alpha_func = REF_LESS;
+ break;
+ case GL_EQUAL:
+ alpha_func = REF_EQUAL;
+ break;
+ case GL_LEQUAL:
+ alpha_func = REF_LEQUAL;
+ break;
+ case GL_GREATER:
+ alpha_func = REF_GREATER;
+ break;
+ case GL_NOTEQUAL:
+ alpha_func = REF_NOTEQUAL;
+ break;
+ case GL_GEQUAL:
+ alpha_func = REF_GEQUAL;
+ break;
+ case GL_ALWAYS:
+ /*alpha_func = REF_ALWAYS; */
+ really_enabled = GL_FALSE;
+ break;
+ }
+
+ if (really_enabled) {
+ SETfield(r700->SX_ALPHA_TEST_CONTROL.u32All, alpha_func,
+ ALPHA_FUNC_shift, ALPHA_FUNC_mask);
+ SETbit(r700->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_TEST_ENABLE_bit);
+ r700->SX_ALPHA_REF.f32All = ctx->Color.AlphaRef;
+ } else {
+ CLEARbit(r700->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_TEST_ENABLE_bit);
+ }
+
+}
+
static void r700AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) //---------------
{
+ (void)func;
+ (void)ref;
+ r700SetAlphaState(ctx);
}
@@ -628,7 +677,7 @@ static void r700Enable(GLcontext * ctx, GLenum cap, GLboolean state) //---------
/* empty */
break;
case GL_ALPHA_TEST:
- //r700SetAlphaState(ctx);
+ r700SetAlphaState(ctx);
break;
case GL_COLOR_LOGIC_OP:
r700SetLogicOpState(ctx);
@@ -1327,9 +1376,7 @@ void r700InitState(GLcontext * ctx) //-------------------
/* Specify the number of instances */
r700->VGT_DMA_NUM_INSTANCES.u32All = 1;
- /* not alpha blend */
- CLEARfield(r700->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_FUNC_mask);
- CLEARbit(r700->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_TEST_ENABLE_bit);
+ r700AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef);
/* default shader connections. */
r700->SPI_VS_OUT_ID_0.u32All = 0x03020100;
--
cgit v1.2.3
From ecead301112ad24f5ddb1616c99cde67930751a3 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Tue, 21 Jul 2009 11:09:05 -0400
Subject: R600: fix up some build problems
---
src/mesa/drivers/dri/r600/r600_context.c | 14 +-----
src/mesa/drivers/dri/r600/radeon_context.h | 76 ------------------------------
2 files changed, 1 insertion(+), 89 deletions(-)
delete mode 100644 src/mesa/drivers/dri/r600/radeon_context.h
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index 78bad8726b2..eacf8112825 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -57,7 +57,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "drivers/common/driverfuncs.h"
#include "r600_context.h"
-#include "radeon_context.h"
+#include "radeon_common_context.h"
#include "radeon_span.h"
#include "r600_cmdbuf.h"
#include "r600_emit.h"
@@ -218,7 +218,6 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
struct dd_function_table functions;
context_t *r600;
GLcontext *ctx;
- int tcl_mode;
assert(glVisual);
assert(driContextPriv);
@@ -377,23 +376,12 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
TNL_CONTEXT(ctx)->Driver.RunPipeline = r600RunPipeline;
- tcl_mode = driQueryOptioni(&r600->radeon.optionCache, "tcl_mode");
if (driQueryOptionb(&r600->radeon.optionCache, "no_rast")) {
fprintf(stderr, "disabling 3D acceleration\n");
#if R200_MERGED
FALLBACK(&r600->radeon, RADEON_FALLBACK_DISABLE, 1);
#endif
}
- if (tcl_mode == DRI_CONF_TCL_SW ||
- !(r600->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
- if (r600->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
- r600->radeon.radeonScreen->chip_flags &=
- ~RADEON_CHIPSET_TCL;
- fprintf(stderr, "Disabling HW TCL support\n");
- }
- TCL_FALLBACK(r600->radeon.glCtx,
- RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
- }
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r600/radeon_context.h b/src/mesa/drivers/dri/r600/radeon_context.h
deleted file mode 100644
index 250570f6b89..00000000000
--- a/src/mesa/drivers/dri/r600/radeon_context.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/**************************************************************************
-
-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
- VA Linux Systems Inc., Fremont, California.
-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
-
-The Weather Channel (TM) funded Tungsten Graphics to develop the
-initial release of the Radeon 8500 driver under the XFree86 license.
-This notice must be preserved.
-
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- * Gareth Hughes
- * Keith Whitwell
- * Kevin E. Martin
- * Nicolai Haehnle
- */
-
-#ifndef __RADEON_CONTEXT_H__
-#define __RADEON_CONTEXT_H__
-
-#include "main/mtypes.h"
-#include "main/colormac.h"
-#include "radeon_screen.h"
-#include "drm.h"
-#include "dri_util.h"
-
-#include "radeon_screen.h"
-
-#if R200_MERGED
-extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
-
-#define FALLBACK( radeon, bit, mode ) do { \
- if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \
- __FUNCTION__, bit, mode ); \
- radeonFallback( (radeon)->glCtx, bit, mode ); \
-} while (0)
-#else
-#define FALLBACK( radeon, bit, mode ) fprintf(stderr, "%s:%s\n", __LINE__, __FILE__);
-#endif
-
-/* TCL fallbacks */
-extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
-
-#if R200_MERGED
-#define TCL_FALLBACK( ctx, bit, mode ) radeonTclFallback( ctx, bit, mode )
-#else
-#define TCL_FALLBACK( ctx, bit, mode ) ;
-#endif
-
-
-#endif /* __RADEON_CONTEXT_H__ */
--
cgit v1.2.3
From 7247446ba81b6bafede9ead750e5b5e81f3f1a10 Mon Sep 17 00:00:00 2001
From: Jerome Glisse
Date: Tue, 21 Jul 2009 21:12:40 +0200
Subject: radeon: fix colorbuffer pitch emission regarding tiling in KMS/CS
case
We need to emit a relocation for pitch register so that kernel can
check and properly setup tiling on the color buffer.
---
src/mesa/drivers/dri/radeon/radeon_state_init.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c
index aaa82b1d6a4..57aa7f1ca46 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state_init.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c
@@ -423,7 +423,7 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
if (drb)
dwords += 6;
if (rrb)
- dwords += 6;
+ dwords += 8;
BEGIN_BATCH_NO_AUTOSTATE(dwords);
/* In the CS case we need to split this up */
@@ -449,7 +449,7 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
- OUT_BATCH(cbpitch);
+ OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
}
// if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
--
cgit v1.2.3
From 549b8e6f1a372b6cce4a9013854b3c97ba95af2b Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Tue, 21 Jul 2009 13:10:26 -0400
Subject: r600: first pass at polyoffset support
not working yet
---
src/mesa/drivers/dri/r600/r700_state.c | 119 +++++++++++++++++++++++++++++----
1 file changed, 106 insertions(+), 13 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index 5fe4b36b710..eee14b076cc 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -57,6 +57,8 @@
static void r700SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state);
+static void r700UpdatePolygonMode(GLcontext * ctx);
+static void r700SetPolygonOffsetState(GLcontext * ctx, GLboolean state);
void r700SetDefaultStates(context_t *context) //--------------------
{
@@ -613,11 +615,11 @@ static void r700UpdateCulling(GLcontext * ctx)
CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
- if (ctx->Polygon.CullFlag)
+ if (ctx->Polygon.CullFlag)
{
- switch (ctx->Polygon.CullFaceMode)
+ switch (ctx->Polygon.CullFaceMode)
{
- case GL_FRONT:
+ case GL_FRONT:
SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
break;
@@ -636,13 +638,13 @@ static void r700UpdateCulling(GLcontext * ctx)
}
}
- switch (ctx->Polygon.FrontFace)
+ switch (ctx->Polygon.FrontFace)
{
case GL_CW:
SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit);
break;
case GL_CCW:
- CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit);
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit);
break;
default:
CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); /* default: ccw */
@@ -705,7 +707,7 @@ static void r700Enable(GLcontext * ctx, GLenum cap, GLboolean state) //---------
case GL_POLYGON_OFFSET_POINT:
case GL_POLYGON_OFFSET_LINE:
case GL_POLYGON_OFFSET_FILL:
- //r700SetPolygonOffsetState(ctx, state);
+ r700SetPolygonOffsetState(ctx, state);
break;
case GL_SCISSOR_TEST:
radeon_firevertices(&context->radeon);
@@ -782,6 +784,7 @@ static void r700Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) //--
static void r700FrontFace(GLcontext * ctx, GLenum mode) //------------------
{
r700UpdateCulling(ctx);
+ r700UpdatePolygonMode(ctx);
}
static void r700ShadeModel(GLcontext * ctx, GLenum mode) //--------------------
@@ -907,15 +910,111 @@ static void r700LineStipple(GLcontext *ctx, GLint factor, GLushort pattern)
SETfield(r700->PA_SC_LINE_STIPPLE.u32All, 1, AUTO_RESET_CNTL_shift, AUTO_RESET_CNTL_mask);
}
+static void r700SetPolygonOffsetState(GLcontext * ctx, GLboolean state)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ if (state) {
+ SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_FRONT_ENABLE_bit);
+ SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_BACK_ENABLE_bit);
+ SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_PARA_ENABLE_bit);
+ } else {
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_FRONT_ENABLE_bit);
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_BACK_ENABLE_bit);
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_PARA_ENABLE_bit);
+ }
+}
+
static void r700PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units) //--------------
{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ GLfloat constant = units;
+
+ switch (ctx->Visual.depthBits) {
+ case 16:
+ constant *= 4.0;
+ break;
+ case 24:
+ constant *= 2.0;
+ break;
+ }
+
+ factor *= 12.0;
+
+ r700->PA_SU_POLY_OFFSET_FRONT_SCALE.f32All = factor;
+ r700->PA_SU_POLY_OFFSET_FRONT_OFFSET.f32All = constant;
+ r700->PA_SU_POLY_OFFSET_BACK_SCALE.f32All = factor;
+ r700->PA_SU_POLY_OFFSET_BACK_OFFSET.f32All = constant;
}
+static void r700UpdatePolygonMode(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DISABLE_POLY_MODE, POLY_MODE_shift, POLY_MODE_mask);
+
+ /* Only do something if a polygon mode is wanted, default is GL_FILL */
+ if (ctx->Polygon.FrontMode != GL_FILL ||
+ ctx->Polygon.BackMode != GL_FILL) {
+ GLenum f, b;
+
+ /* Handle GL_CW (clock wise and GL_CCW (counter clock wise)
+ * correctly by selecting the correct front and back face
+ */
+ if (ctx->Polygon.FrontFace == GL_CCW) {
+ f = ctx->Polygon.FrontMode;
+ b = ctx->Polygon.BackMode;
+ } else {
+ f = ctx->Polygon.BackMode;
+ b = ctx->Polygon.FrontMode;
+ }
+
+ /* Enable polygon mode */
+ SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DUAL_MODE, POLY_MODE_shift, POLY_MODE_mask);
+
+ switch (f) {
+ case GL_LINE:
+ SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_LINES,
+ POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask);
+ break;
+ case GL_POINT:
+ SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_POINTS,
+ POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask);
+ break;
+ case GL_FILL:
+ SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES,
+ POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask);
+ break;
+ }
+
+ switch (b) {
+ case GL_LINE:
+ SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_LINES,
+ POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask);
+ break;
+ case GL_POINT:
+ SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_POINTS,
+ POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask);
+ break;
+ case GL_FILL:
+ SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES,
+ POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask);
+ break;
+ }
+ }
+}
static void r700PolygonMode(GLcontext * ctx, GLenum face, GLenum mode) //------------------
{
+ (void)face;
+ (void)mode;
+
+ r700UpdatePolygonMode(ctx);
}
-
+
static void r700RenderMode(GLcontext * ctx, GLenum mode) //---------------------
{
}
@@ -1396,12 +1495,6 @@ void r700InitState(GLcontext * ctx) //-------------------
r700->DB_SHADER_CONTROL.u32All = 0;
SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit);
- /* Set up the culling control register */
- SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES,
- POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask);
- SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES,
- POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask);
-
/* screen */
r700->PA_SC_SCREEN_SCISSOR_TL.u32All = 0x0;
--
cgit v1.2.3
From 60787df1e549436557393bbeae5951d8f27c1976 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Tue, 21 Jul 2009 13:19:46 -0400
Subject: r600: fill in r700UpdateViewportOffset
---
src/mesa/drivers/dri/r600/r700_state.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index eee14b076cc..d54c6e5c146 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -118,10 +118,21 @@ void r700UpdateShaders (GLcontext * ctx) //----------------------------------
*/
void r700UpdateViewportOffset(GLcontext * ctx) //------------------
{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ __DRIdrawablePrivate *dPriv = radeon_get_drawable(&context->radeon);
+ GLfloat xoffset = (GLfloat) dPriv->x;
+ GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h;
+ const GLfloat *v = ctx->Viewport._WindowMap.m;
+ int id = 0;
- //radeonUpdateScissor(ctx);
+ GLfloat tx = v[MAT_TX] + xoffset;
+ GLfloat ty = (-v[MAT_TY]) + yoffset;
+
+ r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx;
+ r700->viewport[id].PA_CL_VPORT_YOFFSET.f32All = ty;
- return;
+ radeonUpdateScissor(ctx);
}
/**
@@ -311,7 +322,7 @@ static void r700SetAlphaState(GLcontext * ctx)
{
context_t *context = R700_CONTEXT(ctx);
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
- uint32_t alpha_func;
+ uint32_t alpha_func = REF_ALWAYS;
GLboolean really_enabled = ctx->Color.AlphaEnabled;
switch (ctx->Color.AlphaFunc) {
--
cgit v1.2.3
From 5a615e7e653c1aeab280c6ddfdba3ecb763302ec Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Tue, 21 Jul 2009 13:28:12 -0400
Subject: r600: set provoking vertex to last vertex for OGL
---
src/mesa/drivers/dri/r600/r700_state.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index d54c6e5c146..9f7e45a4b78 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -1557,6 +1557,9 @@ void r700InitState(GLcontext * ctx) //-------------------
CLEARfield(r700->PA_SU_POINT_MINMAX.u32All, MIN_SIZE_mask);
SETfield(r700->PA_SU_POINT_MINMAX.u32All, 0x8000, MAX_SIZE_shift, MAX_SIZE_mask);
+ /* GL uses last vtx for flat shading components */
+ SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit);
+
/* Set up line control */
SETfield(r700->PA_SU_LINE_CNTL.u32All, 0x8,
PA_SU_LINE_CNTL__WIDTH_shift, PA_SU_LINE_CNTL__WIDTH_mask);
--
cgit v1.2.3
From bd62cd735f8ccf1c983d57440e41a6959260cf9f Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Tue, 21 Jul 2009 13:46:15 -0400
Subject: r600: fill in point functions
---
src/mesa/drivers/dri/r600/r700_state.c | 53 +++++++++++++++++++++++++++++-----
1 file changed, 45 insertions(+), 8 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index 9f7e45a4b78..6b0aa686e7b 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -816,8 +816,51 @@ static void r700ShadeModel(GLcontext * ctx, GLenum mode) //--------------------
}
}
+/* =============================================================
+ * Point state
+ */
+static void r700PointSize(GLcontext * ctx, GLfloat size)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ /* We need to clamp to user defined range here, because
+ * the HW clamping happens only for per vertex point size. */
+ size = CLAMP(size, ctx->Point.MinSize, ctx->Point.MaxSize);
+
+ /* same size limits for AA, non-AA points */
+ size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize);
+
+ /* format is 12.4 fixed point */
+ SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 16),
+ PA_SU_POINT_SIZE__HEIGHT_shift, PA_SU_POINT_SIZE__HEIGHT_mask);
+ SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 16),
+ PA_SU_POINT_SIZE__WIDTH_shift, PA_SU_POINT_SIZE__WIDTH_mask);
+
+}
+
static void r700PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * param) //---------------
{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ /* format is 12.4 fixed point */
+ switch (pname) {
+ case GL_POINT_SIZE_MIN:
+ SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MinSize * 16.0),
+ MIN_SIZE_shift, MIN_SIZE_mask);
+ break;
+ case GL_POINT_SIZE_MAX:
+ SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MaxSize * 16.0),
+ MAX_SIZE_shift, MAX_SIZE_mask);
+ break;
+ case GL_POINT_DISTANCE_ATTENUATION:
+ break;
+ case GL_POINT_FADE_THRESHOLD_SIZE:
+ break;
+ default:
+ break;
+ }
}
static void r700StencilFuncSeparate(GLcontext * ctx, GLenum face,
@@ -896,10 +939,6 @@ static void r700DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) /
r700UpdateWindow(ctx, 0);
}
-static void r700PointSize(GLcontext * ctx, GLfloat size) //-------------------
-{
-}
-
static void r700LineWidth(GLcontext * ctx, GLfloat widthf) //---------------
{
context_t *context = R700_CONTEXT(ctx);
@@ -1550,10 +1589,8 @@ void r700InitState(GLcontext * ctx) //-------------------
SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Z_OFFSET_ENA_bit);
/* Set up point sizes and min/max values */
- SETfield(r700->PA_SU_POINT_SIZE.u32All, 0x8,
- PA_SU_POINT_SIZE__HEIGHT_shift, PA_SU_POINT_SIZE__HEIGHT_mask);
- SETfield(r700->PA_SU_POINT_SIZE.u32All, 0x8,
- PA_SU_POINT_SIZE__WIDTH_shift, PA_SU_POINT_SIZE__WIDTH_mask);
+ r700PointSize(ctx, 1.0);
+
CLEARfield(r700->PA_SU_POINT_MINMAX.u32All, MIN_SIZE_mask);
SETfield(r700->PA_SU_POINT_MINMAX.u32All, 0x8000, MAX_SIZE_shift, MAX_SIZE_mask);
--
cgit v1.2.3
From db0160bb9d80843c28965890814f0e957e8bf975 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Tue, 21 Jul 2009 14:06:47 -0400
Subject: r600: use state functions to set default state
---
src/mesa/drivers/dri/r600/r700_state.c | 92 ++++++++++++++++++++++------------
1 file changed, 59 insertions(+), 33 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index 6b0aa686e7b..186b811cc3a 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -1525,8 +1525,6 @@ void r700InitState(GLcontext * ctx) //-------------------
/* Specify the number of instances */
r700->VGT_DMA_NUM_INSTANCES.u32All = 1;
- r700AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef);
-
/* default shader connections. */
r700->SPI_VS_OUT_ID_0.u32All = 0x03020100;
r700->SPI_VS_OUT_ID_1.u32All = 0x07060504;
@@ -1539,12 +1537,6 @@ void r700InitState(GLcontext * ctx) //-------------------
if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770)
SETfield(r700->SPI_THREAD_GROUPING.u32All, 1, PS_GROUPING_shift, PS_GROUPING_mask);
- r700SetBlendState(ctx);
- r700SetLogicOpState(ctx);
-
- r700->DB_SHADER_CONTROL.u32All = 0;
- SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit);
-
/* screen */
r700->PA_SC_SCREEN_SCISSOR_TL.u32All = 0x0;
@@ -1588,23 +1580,9 @@ void r700InitState(GLcontext * ctx) //-------------------
SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Z_SCALE_ENA_bit);
SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Z_OFFSET_ENA_bit);
- /* Set up point sizes and min/max values */
- r700PointSize(ctx, 1.0);
-
- CLEARfield(r700->PA_SU_POINT_MINMAX.u32All, MIN_SIZE_mask);
- SETfield(r700->PA_SU_POINT_MINMAX.u32All, 0x8000, MAX_SIZE_shift, MAX_SIZE_mask);
-
/* GL uses last vtx for flat shading components */
SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit);
- /* Set up line control */
- SETfield(r700->PA_SU_LINE_CNTL.u32All, 0x8,
- PA_SU_LINE_CNTL__WIDTH_shift, PA_SU_LINE_CNTL__WIDTH_mask);
-
- r700->PA_SC_LINE_CNTL.u32All = 0;
- CLEARbit(r700->PA_SC_LINE_CNTL.u32All, EXPAND_LINE_WIDTH_bit);
- SETbit(r700->PA_SC_LINE_CNTL.u32All, LAST_PIXEL_bit);
-
/* Set up vertex control */
r700->PA_SU_VTX_CNTL.u32All = 0;
CLEARfield(r700->PA_SU_VTX_CNTL.u32All, QUANT_MODE_mask);
@@ -1618,7 +1596,66 @@ void r700InitState(GLcontext * ctx) //-------------------
r700->PA_CL_GB_HORZ_CLIP_ADJ.u32All = 0x3F800000;
r700->PA_CL_GB_HORZ_DISC_ADJ.u32All = 0x3F800000;
+ /* Enable all samples for multi-sample anti-aliasing */
+ r700->PA_SC_AA_MASK.u32All = 0xFFFFFFFF;
+ /* Turn off AA */
+ r700->PA_SC_AA_CONFIG.u32All = 0;
+
+ r700->SX_MISC.u32All = 0;
+
+ r700InitSQConfig(ctx);
+
+ r700ColorMask(ctx,
+ ctx->Color.ColorMask[RCOMP],
+ ctx->Color.ColorMask[GCOMP],
+ ctx->Color.ColorMask[BCOMP],
+ ctx->Color.ColorMask[ACOMP]);
+
+ r700Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test);
+ r700DepthMask(ctx, ctx->Depth.Mask);
+ r700DepthFunc(ctx, ctx->Depth.Func);
+ SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit);
+
+ /* stencil */
+ r700Enable(ctx, GL_STENCIL_TEST, ctx->Stencil._Enabled);
+ r700StencilMaskSeparate(ctx, 0, ctx->Stencil.WriteMask[0]);
+ r700StencilFuncSeparate(ctx, 0, ctx->Stencil.Function[0],
+ ctx->Stencil.Ref[0], ctx->Stencil.ValueMask[0]);
+ r700StencilOpSeparate(ctx, 0, ctx->Stencil.FailFunc[0],
+ ctx->Stencil.ZFailFunc[0],
+ ctx->Stencil.ZPassFunc[0]);
+
+ r700UpdateCulling(ctx);
+
+ r700SetBlendState(ctx);
+ r700SetLogicOpState(ctx);
+
+ r700AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef);
+ r700Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled);
+
+ r700PointSize(ctx, 1.0);
+
+ CLEARfield(r700->PA_SU_POINT_MINMAX.u32All, MIN_SIZE_mask);
+ SETfield(r700->PA_SU_POINT_MINMAX.u32All, 0x8000, MAX_SIZE_shift, MAX_SIZE_mask);
+
+ r700LineWidth(ctx, 1.0);
+
+ r700->PA_SC_LINE_CNTL.u32All = 0;
+ CLEARbit(r700->PA_SC_LINE_CNTL.u32All, EXPAND_LINE_WIDTH_bit);
+ SETbit(r700->PA_SC_LINE_CNTL.u32All, LAST_PIXEL_bit);
+
+ r700ShadeModel(ctx, ctx->Light.ShadeModel);
+ r700PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode);
+ r700PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode);
+ r700PolygonOffset(ctx, ctx->Polygon.OffsetFactor,
+ ctx->Polygon.OffsetUnits);
+ r700Enable(ctx, GL_POLYGON_OFFSET_POINT, ctx->Polygon.OffsetPoint);
+ r700Enable(ctx, GL_POLYGON_OFFSET_LINE, ctx->Polygon.OffsetLine);
+ r700Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill);
+
/* CB */
+ r700BlendColor(ctx, ctx->Color.BlendColor);
+
r700->CB_CLEAR_RED_R6XX.f32All = 1.0; //r6xx only
r700->CB_CLEAR_GREEN_R6XX.f32All = 0.0; //r6xx only
r700->CB_CLEAR_BLUE_R6XX.f32All = 1.0; //r6xx only
@@ -1644,17 +1681,6 @@ void r700InitState(GLcontext * ctx) //-------------------
/* Set up color compare mask */
r700->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF;
- /* default color mask */
- SETfield(r700->CB_SHADER_MASK.u32All, 0xF, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask);
-
- /* Enable all samples for multi-sample anti-aliasing */
- r700->PA_SC_AA_MASK.u32All = 0xFFFFFFFF;
- /* Turn off AA */
- r700->PA_SC_AA_CONFIG.u32All = 0;
-
- r700->SX_MISC.u32All = 0;
-
- r700InitSQConfig(ctx);
}
void r700InitStateFuncs(struct dd_function_table *functions) //-----------------
--
cgit v1.2.3
From 72a3d8845f55b4a82f7e3734c8b3aa51b65e220d Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Tue, 21 Jul 2009 17:07:17 -0400
Subject: r600: add stencil support
---
src/mesa/drivers/dri/r600/r700_state.c | 161 ++++++++++++++++++++++++++++++---
1 file changed, 146 insertions(+), 15 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index 186b811cc3a..a2ddebb1cee 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -59,6 +59,7 @@
static void r700SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state);
static void r700UpdatePolygonMode(GLcontext * ctx);
static void r700SetPolygonOffsetState(GLcontext * ctx, GLboolean state);
+static void r700SetStencilState(GLcontext * ctx, GLboolean state);
void r700SetDefaultStates(context_t *context) //--------------------
{
@@ -273,40 +274,40 @@ static void r700SetDepthState(GLcontext * ctx)
switch (ctx->Depth.Func)
{
- case GL_NEVER:
- SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_NEVER,
+ case GL_NEVER:
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_NEVER,
ZFUNC_shift, ZFUNC_mask);
break;
case GL_LESS:
- SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_LESS,
- ZFUNC_shift, ZFUNC_mask);
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_LESS,
+ ZFUNC_shift, ZFUNC_mask);
break;
case GL_EQUAL:
- SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_EQUAL,
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_EQUAL,
ZFUNC_shift, ZFUNC_mask);
break;
case GL_LEQUAL:
- SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_LEQUAL,
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_LEQUAL,
ZFUNC_shift, ZFUNC_mask);
break;
case GL_GREATER:
- SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_GREATER,
- ZFUNC_shift, ZFUNC_mask);
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_GREATER,
+ ZFUNC_shift, ZFUNC_mask);
break;
case GL_NOTEQUAL:
- SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_NOTEQUAL,
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_NOTEQUAL,
ZFUNC_shift, ZFUNC_mask);
break;
case GL_GEQUAL:
- SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_GEQUAL,
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_GEQUAL,
ZFUNC_shift, ZFUNC_mask);
break;
case GL_ALWAYS:
- SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS,
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS,
ZFUNC_shift, ZFUNC_mask);
break;
default:
- SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS,
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS,
ZFUNC_shift, ZFUNC_mask);
break;
}
@@ -710,7 +711,7 @@ static void r700Enable(GLcontext * ctx, GLenum cap, GLboolean state) //---------
r700SetDepthState(ctx);
break;
case GL_STENCIL_TEST:
- //r700SetStencilState(ctx, state);
+ r700SetStencilState(ctx, state);
break;
case GL_CULL_FACE:
r700UpdateCulling(ctx);
@@ -863,24 +864,154 @@ static void r700PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * pa
}
}
+static int translate_stencil_func(int func)
+{
+ switch (func) {
+ case GL_NEVER:
+ return REF_NEVER;
+ case GL_LESS:
+ return REF_LESS;
+ case GL_EQUAL:
+ return REF_EQUAL;
+ case GL_LEQUAL:
+ return REF_LEQUAL;
+ case GL_GREATER:
+ return REF_GREATER;
+ case GL_NOTEQUAL:
+ return REF_NOTEQUAL;
+ case GL_GEQUAL:
+ return REF_GEQUAL;
+ case GL_ALWAYS:
+ return REF_ALWAYS;
+ }
+ return 0;
+}
+
+static int translate_stencil_op(int op)
+{
+ switch (op) {
+ case GL_KEEP:
+ return STENCIL_KEEP;
+ case GL_ZERO:
+ return STENCIL_ZERO;
+ case GL_REPLACE:
+ return STENCIL_REPLACE;
+ case GL_INCR:
+ return STENCIL_INCR_CLAMP;
+ case GL_DECR:
+ return STENCIL_DECR_CLAMP;
+ case GL_INCR_WRAP_EXT:
+ return STENCIL_INCR_WRAP;
+ case GL_DECR_WRAP_EXT:
+ return STENCIL_DECR_WRAP;
+ case GL_INVERT:
+ return STENCIL_INVERT;
+ default:
+ WARN_ONCE("Do not know how to translate stencil op");
+ return STENCIL_KEEP;
+ }
+ return 0;
+}
+
+static void r700SetStencilState(GLcontext * ctx, GLboolean state)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ GLboolean hw_stencil = GL_FALSE;
+
+ //fixme
+ //r300CatchStencilFallback(ctx);
+
+ if (ctx->DrawBuffer) {
+ struct radeon_renderbuffer *rrbStencil
+ = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+ hw_stencil = (rrbStencil && rrbStencil->bo);
+ }
+
+ if (hw_stencil) {
+ if (state)
+ SETbit(r700->DB_DEPTH_CONTROL.u32All, STENCIL_ENABLE_bit);
+ else
+ CLEARbit(r700->DB_DEPTH_CONTROL.u32All, STENCIL_ENABLE_bit);
+ }
+}
+
static void r700StencilFuncSeparate(GLcontext * ctx, GLenum face,
GLenum func, GLint ref, GLuint mask) //---------------------
{
-}
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ const unsigned back = ctx->Stencil._BackFace;
+
+ //fixme
+ //r300CatchStencilFallback(ctx);
+
+ //front
+ SETfield(r700->DB_STENCILREFMASK.u32All, ctx->Stencil.Ref[0],
+ STENCILREF_shift, STENCILREF_mask);
+ SETfield(r700->DB_STENCILREFMASK.u32All, ctx->Stencil.ValueMask[0],
+ STENCILMASK_shift, STENCILMASK_mask);
+
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_func(ctx->Stencil.Function[0]),
+ STENCILFUNC_shift, STENCILFUNC_mask);
+ //back
+ SETfield(r700->DB_STENCILREFMASK_BF.u32All, ctx->Stencil.Ref[back],
+ STENCILREF_BF_shift, STENCILREF_BF_mask);
+ SETfield(r700->DB_STENCILREFMASK_BF.u32All, ctx->Stencil.ValueMask[back],
+ STENCILMASK_BF_shift, STENCILMASK_BF_mask);
+
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_func(ctx->Stencil.Function[back]),
+ STENCILFUNC_BF_shift, STENCILFUNC_BF_mask);
+
+}
static void r700StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) //--------------
{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ const unsigned back = ctx->Stencil._BackFace;
+
+ //fixme
+ //r300CatchStencilFallback(ctx);
+
+ // front
+ SETfield(r700->DB_STENCILREFMASK.u32All, ctx->Stencil.WriteMask[0],
+ STENCILWRITEMASK_shift, STENCILWRITEMASK_mask);
+
+ // back
+ SETfield(r700->DB_STENCILREFMASK_BF.u32All, ctx->Stencil.WriteMask[back],
+ STENCILWRITEMASK_BF_shift, STENCILWRITEMASK_BF_mask);
+
}
static void r700StencilOpSeparate(GLcontext * ctx, GLenum face,
GLenum fail, GLenum zfail, GLenum zpass) //--------------------
{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ const unsigned back = ctx->Stencil._BackFace;
+
+ //fixme
+ //r300CatchStencilFallback(ctx);
+
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.FailFunc[0]),
+ STENCILFAIL_shift, STENCILFAIL_mask);
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.ZFailFunc[0]),
+ STENCILZFAIL_shift, STENCILZFAIL_mask);
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.ZPassFunc[0]),
+ STENCILZPASS_shift, STENCILZPASS_mask);
+
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.FailFunc[back]),
+ STENCILFAIL_BF_shift, STENCILFAIL_BF_mask);
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.ZFailFunc[back]),
+ STENCILZFAIL_BF_shift, STENCILZFAIL_BF_mask);
+ SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.ZPassFunc[back]),
+ STENCILZPASS_BF_shift, STENCILZPASS_BF_mask);
}
static void r700UpdateWindow(GLcontext * ctx, int id) //--------------------
{
-
context_t *context = R700_CONTEXT(ctx);
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
__DRIdrawablePrivate *dPriv = radeon_get_drawable(&context->radeon);
--
cgit v1.2.3
From 7edb2a9eef698c386042e6cead68ac516ec15616 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Tue, 21 Jul 2009 17:44:36 -0400
Subject: Revert "r600: fix dst reg indexing"
This reverts commit cc893d9a98255d3c26df7123ba5cc02e478c9328.
Richard has the proper fix.
---
src/mesa/drivers/dri/r600/r700_assembler.c | 11 ++---------
1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 2d40dfa708e..1d41c5cf785 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -2193,7 +2193,6 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
GLboolean next_ins(r700_AssemblerBase *pAsm)
{
struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
- uint index;
if( GL_TRUE == IsTex(pILInst->Opcode) )
{
@@ -2214,20 +2213,14 @@ GLboolean next_ins(r700_AssemblerBase *pAsm)
if(pAsm->D.dst.rtype == DST_REG_OUT)
{
- if (pAsm->starting_export_register_number >= pAsm->D.dst.reg) {
- index = 0;
- } else {
- index = pAsm->D.dst.reg - pAsm->starting_export_register_number;
- }
-
if(pAsm->D.dst.op3)
{
// There is no mask for OP3 instructions, so all channels are written
- pAsm->pucOutMask[index] = 0xF;
+ pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
}
else
{
- pAsm->pucOutMask[index]
+ pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
|= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
}
}
--
cgit v1.2.3
From 5e3c62b550407111cf5f7a700abb18f947bd6903 Mon Sep 17 00:00:00 2001
From: Richard Li
Date: Tue, 21 Jul 2009 17:56:06 -0400
Subject: r600: fix dst reg indexing for real
This fixes segfaults in apps like teapot and tunnel
---
src/mesa/drivers/dri/r600/r700_vertprog.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index cbfeaf071b4..172e6ee501e 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -86,11 +86,11 @@ unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm,
//}
//TODO : dealing fog.
- //unBit = 1 << VERT_RESULT_FOGC;
- //if(mesa_vp->Base.OutputsWritten & unBit)
- //{
- // pAsm->ucVP_OutputMap[VERT_RESULT_FOGC] = unTotal++;
- //}
+ unBit = 1 << VERT_RESULT_FOGC;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_FOGC] = unTotal++;
+ }
//TODO : dealing point size.
//unBit = 1 << VERT_RESULT_PSIZ;
--
cgit v1.2.3
From fd31f92cea0ce8613a22d8f4b3c75b340bcc5689 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Wed, 22 Jul 2009 00:39:00 +0100
Subject: gallium: simplify tgsi_full_immediate struct
Remove the need to have a pointer in this struct by just including
the immediate data inline. Having a pointer in the struct introduces
complications like needing to alloc/free the data pointed to, uncertainty
about who owns the data, etc. There doesn't seem to be a need for it,
and it is unlikely to make much difference plus or minus to performance.
Added some asserts as we now will trip up on immediates with more
than four elements. There were actually already quite a few such asserts,
but the >4 case could be used in the future to specify indexable immediate
ranges, such as lookup tables.
---
src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 5 ++++-
src/gallium/auxiliary/draw/draw_vs_aos.c | 3 ++-
src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 6 ++++--
src/gallium/auxiliary/tgsi/tgsi_build.c | 23 +++++++++++++---------
src/gallium/auxiliary/tgsi/tgsi_build.h | 2 +-
src/gallium/auxiliary/tgsi/tgsi_dump.c | 4 +++-
src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 3 ++-
src/gallium/auxiliary/tgsi/tgsi_exec.c | 10 +++++-----
src/gallium/auxiliary/tgsi/tgsi_parse.c | 13 ++----------
src/gallium/auxiliary/tgsi/tgsi_parse.h | 6 +-----
src/gallium/auxiliary/tgsi/tgsi_ppc.c | 2 +-
src/gallium/auxiliary/tgsi/tgsi_sse2.c | 2 +-
src/gallium/auxiliary/tgsi/tgsi_text.c | 5 ++++-
src/gallium/drivers/cell/ppu/cell_gen_fp.c | 4 ++--
.../drivers/i915simple/i915_fpc_translate.c | 3 ++-
src/gallium/drivers/i965simple/brw_vs_emit.c | 8 ++++----
src/gallium/drivers/nv20/nv20_vertprog.c | 8 ++++----
src/gallium/drivers/nv30/nv30_fragprog.c | 8 ++++----
src/gallium/drivers/nv30/nv30_vertprog.c | 8 ++++----
src/gallium/drivers/nv40/nv40_fragprog.c | 8 ++++----
src/gallium/drivers/nv40/nv40_vertprog.c | 8 ++++----
src/gallium/drivers/nv50/nv50_program.c | 8 ++++----
src/gallium/drivers/r300/r300_fs.c | 3 +--
src/gallium/drivers/r300/r300_vs.c | 3 +--
src/gallium/include/pipe/p_shader_tokens.h | 2 +-
src/mesa/state_tracker/st_mesa_to_tgsi.c | 6 +++++-
26 files changed, 84 insertions(+), 77 deletions(-)
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 30a6d2919d9..283502cdf3e 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -256,7 +256,10 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
uint size = 4;
immed = tgsi_default_full_immediate();
immed.Immediate.NrTokens = 1 + size; /* one for the token itself */
- immed.u.Pointer = (void *) value;
+ immed.u[0].Float = value[0];
+ immed.u[1].Float = value[1];
+ immed.u[2].Float = value[2];
+ immed.u[3].Float = value[3];
ctx->emit_immediate(ctx, &immed);
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 9e37a26c1e2..68402bed5f3 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -1891,8 +1891,9 @@ static boolean note_immediate( struct aos_compilation *cp,
unsigned pos = cp->num_immediates++;
unsigned j;
+ assert( imm->Immediate.NrTokens <= 4 + 1 );
for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
- cp->vaos->machine->immediate[pos][j] = imm->u.ImmediateFloat32[j].Float;
+ cp->vaos->machine->immediate[pos][j] = imm->u[j].Float;
}
return TRUE;
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index 5b08200d142..9c8f89d5206 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -160,10 +160,11 @@ translate_immediate(Storage *storage,
{
float vec[4];
int i;
+ assert( imm->Immediate.NrTokens <= 4 + 1 );
for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) {
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
- vec[i] = imm->u.ImmediateFloat32[i].Float;
+ vec[i] = imm->u[i].Float;
break;
default:
assert(0);
@@ -179,10 +180,11 @@ translate_immediateir(StorageSoa *storage,
{
float vec[4];
int i;
+ assert( imm->Immediate.NrTokens <= 4 + 1 );
for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) {
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
- vec[i] = imm->u.ImmediateFloat32[i].Float;
+ vec[i] = imm->u[i].Float;
break;
default:
assert(0);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index d272533d63c..010d501c601 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -335,7 +335,10 @@ tgsi_default_full_immediate( void )
struct tgsi_full_immediate fullimm;
fullimm.Immediate = tgsi_default_immediate();
- fullimm.u.Pointer = (void *) 0;
+ fullimm.u[0].Float = 0.0f;
+ fullimm.u[1].Float = 0.0f;
+ fullimm.u[2].Float = 0.0f;
+ fullimm.u[3].Float = 0.0f;
return fullimm;
}
@@ -352,19 +355,19 @@ immediate_grow(
header_bodysize_grow( header );
}
-struct tgsi_immediate_float32
+union tgsi_immediate_data
tgsi_build_immediate_float32(
float value,
struct tgsi_immediate *immediate,
struct tgsi_header *header )
{
- struct tgsi_immediate_float32 immediate_float32;
+ union tgsi_immediate_data immediate_data;
- immediate_float32.Float = value;
+ immediate_data.Float = value;
immediate_grow( immediate, header );
- return immediate_float32;
+ return immediate_data;
}
unsigned
@@ -384,16 +387,18 @@ tgsi_build_full_immediate(
*immediate = tgsi_build_immediate( header );
+ assert( full_imm->Immediate.NrTokens <= 4 + 1 );
+
for( i = 0; i < full_imm->Immediate.NrTokens - 1; i++ ) {
- struct tgsi_immediate_float32 *if32;
+ union tgsi_immediate_data *data;
if( maxsize <= size )
return 0;
- if32 = (struct tgsi_immediate_float32 *) &tokens[size];
+ data = (union tgsi_immediate_data *) &tokens[size];
size++;
- *if32 = tgsi_build_immediate_float32(
- full_imm->u.ImmediateFloat32[i].Float,
+ *data = tgsi_build_immediate_float32(
+ full_imm->u[i].Float,
immediate,
header );
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
index 9a3a077cf2b..17d977b0597 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -119,7 +119,7 @@ tgsi_build_immediate(
struct tgsi_full_immediate
tgsi_default_full_immediate( void );
-struct tgsi_immediate_float32
+union tgsi_immediate_data
tgsi_build_immediate_float32(
float value,
struct tgsi_immediate *immediate,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index a6994ecd48b..e1cd8479cb4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -295,10 +295,12 @@ iter_immediate(
ENM( imm->Immediate.DataType, immediate_type_names );
TXT( " { " );
+
+ assert( imm->Immediate.NrTokens <= 4 + 1 );
for (i = 0; i < imm->Immediate.NrTokens - 1; i++) {
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
- FLT( imm->u.ImmediateFloat32[i].Float );
+ FLT( imm->u[i].Float );
break;
default:
assert( 0 );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index 3dc61c48ca3..c944760ca67 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -283,12 +283,13 @@ dump_immediate_verbose(
UIX( imm->Immediate.Padding );
}
+ assert( imm->Immediate.NrTokens <= 4 + 1 );
for( i = 0; i < imm->Immediate.NrTokens - 1; i++ ) {
EOL();
switch( imm->Immediate.DataType ) {
case TGSI_IMM_FLOAT32:
TXT( "\nFloat: " );
- FLT( imm->u.ImmediateFloat32[i].Float );
+ FLT( imm->u[i].Float );
break;
default:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index fe571a86bca..8c68a10a38e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -301,14 +301,14 @@ tgsi_exec_machine_bind_shader(
case TGSI_TOKEN_TYPE_IMMEDIATE:
{
uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
- assert( size % 4 == 0 );
- assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
+ assert( size <= 4 );
+ assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
for( i = 0; i < size; i++ ) {
- mach->Imms[mach->ImmLimit + i / 4][i % 4] =
- parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+ mach->Imms[mach->ImmLimit][i] =
+ parse.FullToken.FullImmediate.u[i].Float;
}
- mach->ImmLimit += size / 4;
+ mach->ImmLimit += 1;
}
break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 7f2cfb7988f..4870f82b6bd 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -42,9 +42,6 @@ void
tgsi_full_token_free(
union tgsi_full_token *full_token )
{
- if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) {
- FREE( (void *) full_token->FullImmediate.u.Pointer );
- }
}
unsigned
@@ -156,14 +153,8 @@ tgsi_parse_token(
case TGSI_IMM_FLOAT32:
{
uint imm_count = imm->Immediate.NrTokens - 1;
- struct tgsi_immediate_float32 *data;
-
- data = (struct tgsi_immediate_float32 *) MALLOC(sizeof(struct tgsi_immediate_float32) * imm_count);
- if (data) {
- for (i = 0; i < imm_count; i++) {
- next_token(ctx, &data[i]);
- }
- imm->u.ImmediateFloat32 = data;
+ for (i = 0; i < imm_count; i++) {
+ next_token(ctx, &imm->u[i]);
}
}
break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index a289e26e3ac..1035bda1a87 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -73,11 +73,7 @@ struct tgsi_full_declaration
struct tgsi_full_immediate
{
struct tgsi_immediate Immediate;
- union
- {
- const void *Pointer;
- const struct tgsi_immediate_float32 *ImmediateFloat32;
- } u;
+ union tgsi_immediate_data u[4];
};
#define TGSI_FULL_MAX_DST_REGISTERS 2
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 0c64ae57131..fddf54460a2 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -1333,7 +1333,7 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
for (i = 0; i < size; i++) {
immediates[num_immediates][i] =
- parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+ parse.FullToken.FullImmediate.u[i].Float;
}
num_immediates++;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 4c3343d26c3..c3470176f93 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2953,7 +2953,7 @@ tgsi_emit_sse2(
assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
for( i = 0; i < size; i++ ) {
immediates[num_immediates][i] =
- parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+ parse.FullToken.FullImmediate.u[i].Float;
}
#if 0
debug_printf("SSE FS immediate[%d] = %f %f %f %f\n",
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index a76bbc91400..3024da6a328 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -1091,7 +1091,10 @@ static boolean parse_immediate( struct translate_ctx *ctx )
imm = tgsi_default_full_immediate();
imm.Immediate.NrTokens += 4;
imm.Immediate.DataType = TGSI_IMM_FLOAT32;
- imm.u.Pointer = values;
+ imm.u[0].Float = values[0];
+ imm.u[1].Float = values[1];
+ imm.u[2].Float = values[2];
+ imm.u[3].Float = values[3];
advance = tgsi_build_full_immediate(
&imm,
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 5a889a6119d..7cd5656a7e6 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -1875,9 +1875,9 @@ emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
assert(gen->num_imm < MAX_TEMPS);
for (ch = 0; ch < 4; ch++) {
- float val = immed->u.ImmediateFloat32[ch].Float;
+ float val = immed->u[ch].Float;
- if (ch > 0 && val == immed->u.ImmediateFloat32[ch - 1].Float) {
+ if (ch > 0 && val == immed->u[ch - 1].Float) {
/* re-use previous register */
gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1];
}
diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c
index 961c1bf2134..89504ced276 100644
--- a/src/gallium/drivers/i915simple/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c
@@ -975,8 +975,9 @@ i915_translate_instructions(struct i915_fp_compile *p,
= &parse.FullToken.FullImmediate;
const uint pos = p->num_immediates++;
uint j;
+ assert( imm->Immediate.NrTokens <= 4 + 1 );
for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
- p->immediates[pos][j] = imm->u.ImmediateFloat32[j].Float;
+ p->immediates[pos][j] = imm->u[j].Float;
}
}
break;
diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c
index e03d6534821..3ee82d95b3a 100644
--- a/src/gallium/drivers/i965simple/brw_vs_emit.c
+++ b/src/gallium/drivers/i965simple/brw_vs_emit.c
@@ -1294,10 +1294,10 @@ void brw_vs_emit(struct brw_vs_compile *c)
case TGSI_TOKEN_TYPE_IMMEDIATE: {
struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate;
assert(imm->Immediate.NrTokens == 4 + 1);
- c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u.ImmediateFloat32[0].Float;
- c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u.ImmediateFloat32[1].Float;
- c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u.ImmediateFloat32[2].Float;
- c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u.ImmediateFloat32[3].Float;
+ c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u[0].Float;
+ c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u[1].Float;
+ c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u[2].Float;
+ c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u[3].Float;
c->prog_data.num_imm++;
}
break;
diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c
index c1e588902b2..388245ecb04 100644
--- a/src/gallium/drivers/nv20/nv20_vertprog.c
+++ b/src/gallium/drivers/nv20/nv20_vertprog.c
@@ -617,10 +617,10 @@ nv20_vertprog_translate(struct nv20_context *nv20,
assert(imm->Immediate.NrTokens == 4 + 1);
vpc->imm[vpc->nr_imm++] =
constant(vpc, -1,
- imm->u.ImmediateFloat32[0].Float,
- imm->u.ImmediateFloat32[1].Float,
- imm->u.ImmediateFloat32[2].Float,
- imm->u.ImmediateFloat32[3].Float);
+ imm->u[0].Float,
+ imm->u[1].Float,
+ imm->u[2].Float,
+ imm->u[3].Float);
}
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index 1d1c556fb11..a48ba9782b3 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -704,10 +704,10 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc)
assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
assert(fpc->nr_imm < MAX_IMM);
- vals[0] = imm->u.ImmediateFloat32[0].Float;
- vals[1] = imm->u.ImmediateFloat32[1].Float;
- vals[2] = imm->u.ImmediateFloat32[2].Float;
- vals[3] = imm->u.ImmediateFloat32[3].Float;
+ vals[0] = imm->u[0].Float;
+ vals[1] = imm->u[1].Float;
+ vals[2] = imm->u[2].Float;
+ vals[3] = imm->u[3].Float;
fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
}
break;
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
index c7514efcfea..14a5c0260d0 100644
--- a/src/gallium/drivers/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -617,10 +617,10 @@ nv30_vertprog_translate(struct nv30_context *nv30,
assert(imm->Immediate.NrTokens == 4 + 1);
vpc->imm[vpc->nr_imm++] =
constant(vpc, -1,
- imm->u.ImmediateFloat32[0].Float,
- imm->u.ImmediateFloat32[1].Float,
- imm->u.ImmediateFloat32[2].Float,
- imm->u.ImmediateFloat32[3].Float);
+ imm->u[0].Float,
+ imm->u[1].Float,
+ imm->u[2].Float,
+ imm->u[3].Float);
}
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index 680976da56b..32d9ed1a7f8 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -790,10 +790,10 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc)
assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
assert(fpc->nr_imm < MAX_IMM);
- vals[0] = imm->u.ImmediateFloat32[0].Float;
- vals[1] = imm->u.ImmediateFloat32[1].Float;
- vals[2] = imm->u.ImmediateFloat32[2].Float;
- vals[3] = imm->u.ImmediateFloat32[3].Float;
+ vals[0] = imm->u[0].Float;
+ vals[1] = imm->u[1].Float;
+ vals[2] = imm->u[2].Float;
+ vals[3] = imm->u[3].Float;
fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
}
break;
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index e75e8d3f424..0382dbba8f6 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -788,10 +788,10 @@ nv40_vertprog_translate(struct nv40_context *nv40,
assert(imm->Immediate.NrTokens == 4 + 1);
vpc->imm[vpc->nr_imm++] =
constant(vpc, -1,
- imm->u.ImmediateFloat32[0].Float,
- imm->u.ImmediateFloat32[1].Float,
- imm->u.ImmediateFloat32[2].Float,
- imm->u.ImmediateFloat32[3].Float);
+ imm->u[0].Float,
+ imm->u[1].Float,
+ imm->u[2].Float,
+ imm->u[3].Float);
}
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 5f7d06dbecb..4ec9c03305f 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1809,10 +1809,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
const struct tgsi_full_immediate *imm =
&p.FullToken.FullImmediate;
- ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
- imm->u.ImmediateFloat32[1].Float,
- imm->u.ImmediateFloat32[2].Float,
- imm->u.ImmediateFloat32[3].Float);
+ ctor_immd(pc, imm->u[0].Float,
+ imm->u[1].Float,
+ imm->u[2].Float,
+ imm->u[3].Float);
}
break;
case TGSI_TOKEN_TYPE_DECLARATION:
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 4b304306d0f..8672e211bc4 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -67,8 +67,7 @@ void r300_translate_fragment_shader(struct r300_context* r300,
for (i = 0; i < 4; i++) {
consts->constants[assembler->imm_offset +
assembler->imm_count][i] =
- parser.FullToken.FullImmediate.u.ImmediateFloat32[i]
- .Float;
+ parser.FullToken.FullImmediate.u[i].Float;
}
assembler->imm_count++;
break;
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index f87435f9f07..a664a316e8c 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -378,8 +378,7 @@ void r300_translate_vertex_shader(struct r300_context* r300,
for (i = 0; i < 4; i++) {
consts->constants[assembler->imm_offset +
assembler->imm_count][i] =
- parser.FullToken.FullImmediate.u.ImmediateFloat32[i]
- .Float;
+ parser.FullToken.FullImmediate.u[i].Float;
}
assembler->imm_count++;
break;
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index b00cfe3423c..b87aae61973 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -152,7 +152,7 @@ struct tgsi_immediate
unsigned Extended : 1; /**< BOOL */
};
-struct tgsi_immediate_float32
+union tgsi_immediate_data
{
float Float;
};
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index e150dff9bbc..6380cd6b2a8 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -225,11 +225,15 @@ static struct tgsi_full_immediate
make_immediate(const float *value, uint size)
{
struct tgsi_full_immediate imm;
+ unsigned i;
imm = tgsi_default_full_immediate();
imm.Immediate.NrTokens += size;
imm.Immediate.DataType = TGSI_IMM_FLOAT32;
- imm.u.Pointer = value;
+
+ for (i = 0; i < size; i++)
+ imm.u[i].Float = value[i];
+
return imm;
}
--
cgit v1.2.3
From ede9f3b52ecb27ada81fee06a943bb595c60eaee Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Fri, 12 Jun 2009 11:59:01 +0100
Subject: gallium: remove multiple aliases for TGSI opcodes
This is a source of ongoing confusion. TGSI has multiple names for
opcodes where the same semantics originate in multiple shader APIs.
For instance, TGSI includes both Mesa/GLSL and DX/SM30 names for
opcodes with the same semantics, but aliases those names to the same
underlying opcode number.
This makes it very difficult to visually inspect two sets of opcodes
(eg in state tracker & driver) and check if they implement the same
functionality.
This patch arbitarily rips out the versions of the opcodes not currently
favoured by the mesa state tracker and leaves us with a single name
for each distinct operation.
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 14 +-
src/gallium/auxiliary/tgsi/tgsi_exec.c | 24 +--
src/gallium/auxiliary/tgsi/tgsi_info.c | 249 +++++++++++++++--------------
src/gallium/auxiliary/tgsi/tgsi_info.h | 3 +-
src/gallium/auxiliary/tgsi/tgsi_sse2.c | 24 +--
src/gallium/auxiliary/tgsi/tgsi_text.c | 10 --
src/gallium/include/pipe/p_shader_tokens.h | 80 ++++-----
7 files changed, 194 insertions(+), 210 deletions(-)
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 68402bed5f3..62e04a65f30 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -1758,24 +1758,24 @@ emit_instruction( struct aos_compilation *cp,
case TGSI_OPCODE_SUB:
return emit_SUB(cp, inst);
- case TGSI_OPCODE_LERP:
+ case TGSI_OPCODE_LRP:
// return emit_LERP(cp, inst);
return FALSE;
- case TGSI_OPCODE_FRAC:
+ case TGSI_OPCODE_FRC:
return emit_FRC(cp, inst);
case TGSI_OPCODE_CLAMP:
// return emit_CLAMP(cp, inst);
return FALSE;
- case TGSI_OPCODE_FLOOR:
+ case TGSI_OPCODE_FLR:
return emit_FLR(cp, inst);
case TGSI_OPCODE_ROUND:
return emit_RND(cp, inst);
- case TGSI_OPCODE_EXPBASE2:
+ case TGSI_OPCODE_EX2:
#if FAST_MATH
return emit_EXPBASE2(cp, inst);
#elif 0
@@ -1787,13 +1787,13 @@ emit_instruction( struct aos_compilation *cp,
return FALSE;
#endif
- case TGSI_OPCODE_LOGBASE2:
+ case TGSI_OPCODE_LG2:
return emit_LG2(cp, inst);
- case TGSI_OPCODE_POWER:
+ case TGSI_OPCODE_POW:
return emit_POW(cp, inst);
- case TGSI_OPCODE_CROSSPRODUCT:
+ case TGSI_OPCODE_XPD:
return emit_XPD(cp, inst);
case TGSI_OPCODE_ABS:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 8c68a10a38e..b193fd7a0a5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2020,8 +2020,7 @@ exec_instruction(
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
- case TGSI_OPCODE_FLOOR:
- /* TGSI_OPCODE_FLR */
+ case TGSI_OPCODE_FLR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
micro_flr( &r[0], &r[0] );
@@ -2290,8 +2289,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_LERP:
- /* TGSI_OPCODE_LRP */
+ case TGSI_OPCODE_LRP:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
@@ -2325,8 +2323,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_DOT2ADD:
- /* TGSI_OPCODE_DP2A */
+ case TGSI_OPCODE_DP2A:
FETCH( &r[0], 0, CHAN_X );
FETCH( &r[1], 1, CHAN_X );
micro_mul( &r[0], &r[0], &r[1] );
@@ -2354,8 +2351,7 @@ exec_instruction(
assert (0);
break;
- case TGSI_OPCODE_FRAC:
- /* TGSI_OPCODE_FRC */
+ case TGSI_OPCODE_FRC:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
micro_frc( &r[0], &r[0] );
@@ -2383,8 +2379,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_EXPBASE2:
- /* TGSI_OPCODE_EX2 */
+ case TGSI_OPCODE_EX2:
FETCH(&r[0], 0, CHAN_X);
#if FAST_MATH
@@ -2398,8 +2393,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_LOGBASE2:
- /* TGSI_OPCODE_LG2 */
+ case TGSI_OPCODE_LG2:
FETCH( &r[0], 0, CHAN_X );
micro_lg2( &r[0], &r[0] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -2407,8 +2401,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_POWER:
- /* TGSI_OPCODE_POW */
+ case TGSI_OPCODE_POW:
FETCH(&r[0], 0, CHAN_X);
FETCH(&r[1], 1, CHAN_X);
@@ -2419,8 +2412,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_CROSSPRODUCT:
- /* TGSI_OPCODE_XPD */
+ case TGSI_OPCODE_XPD:
FETCH(&r[0], 0, CHAN_Y);
FETCH(&r[1], 1, CHAN_Z);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 37f2b66d1f6..29b81155609 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -26,136 +26,147 @@
**************************************************************************/
#include "util/u_debug.h"
+#include "util/u_memory.h"
#include "tgsi_info.h"
static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{
- { 1, 1, 0, 0, "ARL", NULL, NULL },
- { 1, 1, 0, 0, "MOV", NULL, NULL },
- { 1, 1, 0, 0, "LIT", NULL, NULL },
- { 1, 1, 0, 0, "RCP", "RECIP", NULL },
- { 1, 1, 0, 0, "RSQ", "RECIPSQRT", NULL },
- { 1, 1, 0, 0, "EXP", "EXPP", NULL },
- { 1, 1, 0, 0, "LOG", NULL, NULL },
- { 1, 2, 0, 0, "MUL", NULL, NULL },
- { 1, 2, 0, 0, "ADD", NULL, NULL },
- { 1, 2, 0, 0, "DP3", "DOT3", NULL },
- { 1, 2, 0, 0, "DP4", "DOT4", NULL },
- { 1, 2, 0, 0, "DST", NULL, NULL },
- { 1, 2, 0, 0, "MIN", NULL, NULL },
- { 1, 2, 0, 0, "MAX", NULL, NULL },
- { 1, 2, 0, 0, "SLT", "SETLT", NULL },
- { 1, 2, 0, 0, "SGE", "SETGE", NULL },
- { 1, 3, 0, 0, "MAD", "MADD", NULL },
- { 1, 2, 0, 0, "SUB", NULL, NULL },
- { 1, 3, 0, 0, "LRP", "LERP", NULL },
- { 1, 3, 0, 0, "CND", NULL, NULL },
- { 1, 3, 0, 0, "CND0", NULL, NULL },
- { 1, 3, 0, 0, "DP2A", "DP2ADD", "DOT2ADD" },
- { 1, 2, 0, 0, "INDEX", NULL, NULL },
- { 1, 1, 0, 0, "NEGATE", NULL, NULL },
- { 1, 1, 0, 0, "FRC", "FRAC", NULL },
- { 1, 3, 0, 0, "CLAMP", NULL, NULL },
- { 1, 1, 0, 0, "FLR", "FLOOR", NULL },
- { 1, 1, 0, 0, "ROUND", NULL, NULL },
- { 1, 1, 0, 0, "EX2", "EXPBASE2", NULL },
- { 1, 1, 0, 0, "LG2", "LOGBASE2", "LOGP" },
- { 1, 2, 0, 0, "POW", "POWER", NULL },
- { 1, 2, 0, 0, "XPD", "CRS", "CROSSPRODUCT" },
- { 1, 2, 0, 0, "M4X4", "MULTIPLYMATRIX", NULL },
- { 1, 1, 0, 0, "ABS", NULL, NULL },
- { 1, 1, 0, 0, "RCC", NULL, NULL },
- { 1, 2, 0, 0, "DPH", NULL, NULL },
- { 1, 1, 0, 0, "COS", NULL, NULL },
- { 1, 1, 0, 0, "DDX", "DSX", NULL },
- { 1, 1, 0, 0, "DDY", "DSY", NULL },
- { 0, 0, 0, 0, "KILP", NULL, NULL },
- { 1, 1, 0, 0, "PK2H", NULL, NULL },
- { 1, 1, 0, 0, "PK2US", NULL, NULL },
- { 1, 1, 0, 0, "PK4B", NULL, NULL },
- { 1, 1, 0, 0, "PK4UB", NULL, NULL },
- { 1, 2, 0, 0, "RFL", NULL, NULL },
- { 1, 2, 0, 0, "SEQ", NULL, NULL },
- { 1, 2, 0, 0, "SFL", NULL, NULL },
- { 1, 2, 0, 0, "SGT", NULL, NULL },
- { 1, 1, 0, 0, "SIN", NULL, NULL },
- { 1, 2, 0, 0, "SLE", NULL, NULL },
- { 1, 2, 0, 0, "SNE", NULL, NULL },
- { 1, 2, 0, 0, "STR", NULL, NULL },
- { 1, 2, 1, 0, "TEX", "TEXLD", NULL },
- { 1, 4, 1, 0, "TXD", "TEXLDD", NULL },
- { 1, 2, 1, 0, "TXP", NULL, NULL },
- { 1, 1, 0, 0, "UP2H", NULL, NULL },
- { 1, 1, 0, 0, "UP2US", NULL, NULL },
- { 1, 1, 0, 0, "UP4B", NULL, NULL },
- { 1, 1, 0, 0, "UP4UB", NULL, NULL },
- { 1, 3, 0, 0, "X2D", NULL, NULL },
- { 1, 1, 0, 0, "ARA", NULL, NULL },
- { 1, 1, 0, 0, "ARR", "MOVA", NULL },
- { 0, 1, 0, 0, "BRA", NULL, NULL },
- { 0, 0, 0, 1, "CAL", "CALL", NULL },
- { 0, 0, 0, 0, "RET", NULL, NULL },
- { 1, 1, 0, 0, "SGN", "SSG", NULL },
- { 1, 3, 0, 0, "CMP", NULL, NULL },
- { 1, 1, 0, 0, "SCS", "SINCOS", NULL },
- { 1, 2, 1, 0, "TXB", "TEXLDB", NULL },
- { 1, 1, 0, 0, "NRM", NULL, NULL },
- { 1, 2, 0, 0, "DIV", NULL, NULL },
- { 1, 2, 0, 0, "DP2", NULL, NULL },
- { 1, 2, 1, 0, "TXL", NULL, NULL },
- { 0, 0, 0, 0, "BRK", "BREAK", NULL },
- { 0, 1, 0, 1, "IF", NULL, NULL },
- { 0, 0, 0, 0, "LOOP", NULL, NULL },
- { 0, 1, 0, 0, "REP", NULL, NULL },
- { 0, 0, 0, 1, "ELSE", NULL, NULL },
- { 0, 0, 0, 0, "ENDIF", NULL, NULL },
- { 0, 0, 0, 0, "ENDLOOP", NULL, NULL },
- { 0, 0, 0, 0, "ENDREP", NULL, NULL },
- { 0, 1, 0, 0, "PUSHA", NULL, NULL },
- { 1, 0, 0, 0, "POPA", NULL, NULL },
- { 1, 1, 0, 0, "CEIL", NULL, NULL },
- { 1, 1, 0, 0, "I2F", NULL, NULL },
- { 1, 1, 0, 0, "NOT", NULL, NULL },
- { 1, 1, 0, 0, "INT", "TRUNC", NULL },
- { 1, 2, 0, 0, "SHL", NULL, NULL },
- { 1, 2, 0, 0, "SHR", NULL, NULL },
- { 1, 2, 0, 0, "AND", NULL, NULL },
- { 1, 2, 0, 0, "OR", NULL, NULL },
- { 1, 2, 0, 0, "MOD", NULL, NULL },
- { 1, 2, 0, 0, "XOR", NULL, NULL },
- { 1, 3, 0, 0, "SAD", NULL, NULL },
- { 1, 2, 1, 0, "TXF", NULL, NULL },
- { 1, 2, 1, 0, "TXQ", NULL, NULL },
- { 0, 0, 0, 0, "CONT", NULL, NULL },
- { 0, 0, 0, 0, "EMIT", NULL, NULL },
- { 0, 0, 0, 0, "ENDPRIM", NULL, NULL },
- { 0, 0, 0, 1, "BGNLOOP2", NULL, NULL },
- { 0, 0, 0, 0, "BGNSUB", NULL, NULL },
- { 0, 0, 0, 1, "ENDLOOP2", NULL, NULL },
- { 0, 0, 0, 0, "ENDSUB", NULL, NULL },
- { 1, 1, 0, 0, "NOISE1", NULL, NULL },
- { 1, 1, 0, 0, "NOISE2", NULL, NULL },
- { 1, 1, 0, 0, "NOISE3", NULL, NULL },
- { 1, 1, 0, 0, "NOISE4", NULL, NULL },
- { 0, 0, 0, 0, "NOP", NULL, NULL },
- { 1, 2, 0, 0, "M4X3", NULL, NULL },
- { 1, 2, 0, 0, "M3X4", NULL, NULL },
- { 1, 2, 0, 0, "M3X3", NULL, NULL },
- { 1, 2, 0, 0, "M3X2", NULL, NULL },
- { 1, 1, 0, 0, "NRM4", NULL, NULL },
- { 0, 1, 0, 0, "CALLNZ", NULL, NULL },
- { 0, 1, 0, 0, "IFC", NULL, NULL },
- { 0, 1, 0, 0, "BREAKC", NULL, NULL },
- { 0, 1, 0, 0, "KIL", "TEXKILL", NULL },
- { 0, 0, 0, 0, "END", NULL, NULL },
- { 1, 1, 0, 0, "SWZ", NULL, NULL }
+ { 1, 1, 0, 0, "ARL", TGSI_OPCODE_ARL },
+ { 1, 1, 0, 0, "MOV", TGSI_OPCODE_MOV },
+ { 1, 1, 0, 0, "LIT", TGSI_OPCODE_LIT },
+ { 1, 1, 0, 0, "RCP", TGSI_OPCODE_RCP },
+ { 1, 1, 0, 0, "RSQ", TGSI_OPCODE_RSQ },
+ { 1, 1, 0, 0, "EXP", TGSI_OPCODE_EXP },
+ { 1, 1, 0, 0, "LOG", TGSI_OPCODE_LOG },
+ { 1, 2, 0, 0, "MUL", TGSI_OPCODE_MUL },
+ { 1, 2, 0, 0, "ADD", TGSI_OPCODE_ADD },
+ { 1, 2, 0, 0, "DP3", TGSI_OPCODE_DP3 },
+ { 1, 2, 0, 0, "DP4", TGSI_OPCODE_DP4 },
+ { 1, 2, 0, 0, "DST", TGSI_OPCODE_DST },
+ { 1, 2, 0, 0, "MIN", TGSI_OPCODE_MIN },
+ { 1, 2, 0, 0, "MAX", TGSI_OPCODE_MAX },
+ { 1, 2, 0, 0, "SLT", TGSI_OPCODE_SLT },
+ { 1, 2, 0, 0, "SGE", TGSI_OPCODE_SGE },
+ { 1, 3, 0, 0, "MAD", TGSI_OPCODE_MAD },
+ { 1, 2, 0, 0, "SUB", TGSI_OPCODE_SUB },
+ { 1, 3, 0, 0, "LRP", TGSI_OPCODE_LRP },
+ { 1, 3, 0, 0, "CND", TGSI_OPCODE_CND },
+ { 1, 3, 0, 0, "CND0", TGSI_OPCODE_CND0 },
+ { 1, 3, 0, 0, "DP2A", TGSI_OPCODE_DP2A },
+ { 1, 2, 0, 0, "INDEX", TGSI_OPCODE_INDEX },
+ { 1, 1, 0, 0, "NEGATE", TGSI_OPCODE_NEGATE },
+ { 1, 1, 0, 0, "FRC", TGSI_OPCODE_FRC },
+ { 1, 3, 0, 0, "CLAMP", TGSI_OPCODE_CLAMP },
+ { 1, 1, 0, 0, "FLR", TGSI_OPCODE_FLR },
+ { 1, 1, 0, 0, "ROUND", TGSI_OPCODE_ROUND },
+ { 1, 1, 0, 0, "EX2", TGSI_OPCODE_EX2 },
+ { 1, 1, 0, 0, "LG2", TGSI_OPCODE_LG2 },
+ { 1, 2, 0, 0, "POW", TGSI_OPCODE_POW },
+ { 1, 2, 0, 0, "XPD", TGSI_OPCODE_XPD },
+ { 1, 2, 0, 0, "M4X4", TGSI_OPCODE_MULTIPLYMATRIX },
+ { 1, 1, 0, 0, "ABS", TGSI_OPCODE_ABS },
+ { 1, 1, 0, 0, "RCC", TGSI_OPCODE_RCC },
+ { 1, 2, 0, 0, "DPH", TGSI_OPCODE_DPH },
+ { 1, 1, 0, 0, "COS", TGSI_OPCODE_COS },
+ { 1, 1, 0, 0, "DDX", TGSI_OPCODE_DDX },
+ { 1, 1, 0, 0, "DDY", TGSI_OPCODE_DDY },
+ { 0, 0, 0, 0, "KILP", TGSI_OPCODE_KILP },
+ { 1, 1, 0, 0, "PK2H", TGSI_OPCODE_PK2H },
+ { 1, 1, 0, 0, "PK2US", TGSI_OPCODE_PK2US },
+ { 1, 1, 0, 0, "PK4B", TGSI_OPCODE_PK4B },
+ { 1, 1, 0, 0, "PK4UB", TGSI_OPCODE_PK4UB },
+ { 1, 2, 0, 0, "RFL", TGSI_OPCODE_RFL },
+ { 1, 2, 0, 0, "SEQ", TGSI_OPCODE_SEQ },
+ { 1, 2, 0, 0, "SFL", TGSI_OPCODE_SFL },
+ { 1, 2, 0, 0, "SGT", TGSI_OPCODE_SGT },
+ { 1, 1, 0, 0, "SIN", TGSI_OPCODE_SIN },
+ { 1, 2, 0, 0, "SLE", TGSI_OPCODE_SLE },
+ { 1, 2, 0, 0, "SNE", TGSI_OPCODE_SNE },
+ { 1, 2, 0, 0, "STR", TGSI_OPCODE_STR },
+ { 1, 2, 1, 0, "TEX", TGSI_OPCODE_TEX },
+ { 1, 4, 1, 0, "TXD", TGSI_OPCODE_TXD },
+ { 1, 2, 1, 0, "TXP", TGSI_OPCODE_TXP },
+ { 1, 1, 0, 0, "UP2H", TGSI_OPCODE_UP2H },
+ { 1, 1, 0, 0, "UP2US", TGSI_OPCODE_UP2US },
+ { 1, 1, 0, 0, "UP4B", TGSI_OPCODE_UP4B },
+ { 1, 1, 0, 0, "UP4UB", TGSI_OPCODE_UP4UB },
+ { 1, 3, 0, 0, "X2D", TGSI_OPCODE_X2D },
+ { 1, 1, 0, 0, "ARA", TGSI_OPCODE_ARA },
+ { 1, 1, 0, 0, "ARR", TGSI_OPCODE_ARR },
+ { 0, 1, 0, 0, "BRA", TGSI_OPCODE_BRA },
+ { 0, 0, 0, 1, "CAL", TGSI_OPCODE_CAL },
+ { 0, 0, 0, 0, "RET", TGSI_OPCODE_RET },
+ { 1, 1, 0, 0, "SSG", TGSI_OPCODE_SSG },
+ { 1, 3, 0, 0, "CMP", TGSI_OPCODE_CMP },
+ { 1, 1, 0, 0, "SCS", TGSI_OPCODE_SCS },
+ { 1, 2, 1, 0, "TXB", TGSI_OPCODE_TXB },
+ { 1, 1, 0, 0, "NRM", TGSI_OPCODE_NRM },
+ { 1, 2, 0, 0, "DIV", TGSI_OPCODE_DIV },
+ { 1, 2, 0, 0, "DP2", TGSI_OPCODE_DP2 },
+ { 1, 2, 1, 0, "TXL", TGSI_OPCODE_TXL },
+ { 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK },
+ { 0, 1, 0, 1, "IF", TGSI_OPCODE_IF },
+ { 0, 0, 0, 0, "LOOP", TGSI_OPCODE_LOOP },
+ { 0, 1, 0, 0, "REP", TGSI_OPCODE_REP },
+ { 0, 0, 0, 1, "ELSE", TGSI_OPCODE_ELSE },
+ { 0, 0, 0, 0, "ENDIF", TGSI_OPCODE_ENDIF },
+ { 0, 0, 0, 0, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
+ { 0, 0, 0, 0, "ENDREP", TGSI_OPCODE_ENDREP },
+ { 0, 1, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA },
+ { 1, 0, 0, 0, "POPA", TGSI_OPCODE_POPA },
+ { 1, 1, 0, 0, "CEIL", TGSI_OPCODE_CEIL },
+ { 1, 1, 0, 0, "I2F", TGSI_OPCODE_I2F },
+ { 1, 1, 0, 0, "NOT", TGSI_OPCODE_NOT },
+ { 1, 1, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC },
+ { 1, 2, 0, 0, "SHL", TGSI_OPCODE_SHL },
+ { 1, 2, 0, 0, "SHR", TGSI_OPCODE_SHR },
+ { 1, 2, 0, 0, "AND", TGSI_OPCODE_AND },
+ { 1, 2, 0, 0, "OR", TGSI_OPCODE_OR },
+ { 1, 2, 0, 0, "MOD", TGSI_OPCODE_MOD },
+ { 1, 2, 0, 0, "XOR", TGSI_OPCODE_XOR },
+ { 1, 3, 0, 0, "SAD", TGSI_OPCODE_SAD },
+ { 1, 2, 1, 0, "TXF", TGSI_OPCODE_TXF },
+ { 1, 2, 1, 0, "TXQ", TGSI_OPCODE_TXQ },
+ { 0, 0, 0, 0, "CONT", TGSI_OPCODE_CONT },
+ { 0, 0, 0, 0, "EMIT", TGSI_OPCODE_EMIT },
+ { 0, 0, 0, 0, "ENDPRIM", TGSI_OPCODE_ENDPRIM },
+ { 0, 0, 0, 1, "BGNLOOP2", TGSI_OPCODE_BGNLOOP2 },
+ { 0, 0, 0, 0, "BGNSUB", TGSI_OPCODE_BGNSUB },
+ { 0, 0, 0, 1, "ENDLOOP2", TGSI_OPCODE_ENDLOOP2 },
+ { 0, 0, 0, 0, "ENDSUB", TGSI_OPCODE_ENDSUB },
+ { 1, 1, 0, 0, "NOISE1", TGSI_OPCODE_NOISE1 },
+ { 1, 1, 0, 0, "NOISE2", TGSI_OPCODE_NOISE2 },
+ { 1, 1, 0, 0, "NOISE3", TGSI_OPCODE_NOISE3 },
+ { 1, 1, 0, 0, "NOISE4", TGSI_OPCODE_NOISE4 },
+ { 0, 0, 0, 0, "NOP", TGSI_OPCODE_NOP },
+ { 1, 2, 0, 0, "M4X3", TGSI_OPCODE_M4X3 },
+ { 1, 2, 0, 0, "M3X4", TGSI_OPCODE_M3X4 },
+ { 1, 2, 0, 0, "M3X3", TGSI_OPCODE_M3X3 },
+ { 1, 2, 0, 0, "M3X2", TGSI_OPCODE_M3X2 },
+ { 1, 1, 0, 0, "NRM4", TGSI_OPCODE_NRM4 },
+ { 0, 1, 0, 0, "CALLNZ", TGSI_OPCODE_CALLNZ },
+ { 0, 1, 0, 0, "IFC", TGSI_OPCODE_IFC },
+ { 0, 1, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC },
+ { 0, 1, 0, 0, "KIL", TGSI_OPCODE_KIL },
+ { 0, 0, 0, 0, "END", TGSI_OPCODE_END },
+ { 1, 1, 0, 0, "SWZ", TGSI_OPCODE_SWZ }
};
const struct tgsi_opcode_info *
tgsi_get_opcode_info( uint opcode )
{
+ static boolean firsttime = 1;
+
+ if (firsttime) {
+ unsigned i;
+ firsttime = 0;
+ for (i = 0; i < Elements(opcode_info); i++)
+ assert(opcode_info[i].opcode == i);
+ }
+
if (opcode < TGSI_OPCODE_LAST)
return &opcode_info[opcode];
+
assert( 0 );
return NULL;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h
index 077e25acd7f..16577598bb0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
@@ -41,8 +41,7 @@ struct tgsi_opcode_info
boolean is_tex;
boolean is_branch;
const char *mnemonic;
- const char *alt_mnemonic1;
- const char *alt_mnemonic2;
+ uint opcode;
};
const struct tgsi_opcode_info *
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index c3470176f93..a2d4627da9c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2064,8 +2064,7 @@ emit_instruction(
}
break;
- case TGSI_OPCODE_LERP:
- /* TGSI_OPCODE_LRP */
+ case TGSI_OPCODE_LRP:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
FETCH( func, *inst, 1, 1, chan_index );
@@ -2085,8 +2084,7 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_DOT2ADD:
- /* TGSI_OPCODE_DP2A */
+ case TGSI_OPCODE_DP2A:
FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */
FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */
emit_mul( func, 0, 1 ); /* xmm0 = xmm0 * xmm1 */
@@ -2109,8 +2107,7 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_FRAC:
- /* TGSI_OPCODE_FRC */
+ case TGSI_OPCODE_FRC:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
emit_frc( func, 0, 0 );
@@ -2122,8 +2119,7 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_FLOOR:
- /* TGSI_OPCODE_FLR */
+ case TGSI_OPCODE_FLR:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
emit_flr( func, 0, 0 );
@@ -2139,8 +2135,7 @@ emit_instruction(
}
break;
- case TGSI_OPCODE_EXPBASE2:
- /* TGSI_OPCODE_EX2 */
+ case TGSI_OPCODE_EX2:
FETCH( func, *inst, 0, 0, CHAN_X );
emit_ex2( func, 0, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -2148,8 +2143,7 @@ emit_instruction(
}
break;
- case TGSI_OPCODE_LOGBASE2:
- /* TGSI_OPCODE_LG2 */
+ case TGSI_OPCODE_LG2:
FETCH( func, *inst, 0, 0, CHAN_X );
emit_lg2( func, 0, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -2157,8 +2151,7 @@ emit_instruction(
}
break;
- case TGSI_OPCODE_POWER:
- /* TGSI_OPCODE_POW */
+ case TGSI_OPCODE_POW:
FETCH( func, *inst, 0, 0, CHAN_X );
FETCH( func, *inst, 1, 1, CHAN_X );
emit_pow( func, 0, 0, 0, 1 );
@@ -2167,8 +2160,7 @@ emit_instruction(
}
break;
- case TGSI_OPCODE_CROSSPRODUCT:
- /* TGSI_OPCODE_XPD */
+ case TGSI_OPCODE_XPD:
if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
FETCH( func, *inst, 1, 1, CHAN_Z );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 3024da6a328..bfcbc40982a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -789,16 +789,6 @@ match_inst_mnemonic(const char **pcur,
if (str_match_no_case(pcur, info->mnemonic)) {
return TRUE;
}
- if (info->alt_mnemonic1) {
- if (str_match_no_case(pcur, info->alt_mnemonic1)) {
- return TRUE;
- }
- if (info->alt_mnemonic2) {
- if (str_match_no_case(pcur, info->alt_mnemonic2)) {
- return TRUE;
- }
- }
- }
return FALSE;
}
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index b87aae61973..3dfc914269a 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -182,31 +182,31 @@ union tgsi_immediate_data
* GL_ATI_fragment_shader
*/
#define TGSI_OPCODE_SUB 17
-#define TGSI_OPCODE_DOT3 TGSI_OPCODE_DP3
-#define TGSI_OPCODE_DOT4 TGSI_OPCODE_DP4
-#define TGSI_OPCODE_LERP 18
+/*#define TGSI_OPCODE_DOT3 TGSI_OPCODE_DP3*/
+/*#define TGSI_OPCODE_DOT4 TGSI_OPCODE_DP4*/
+#define TGSI_OPCODE_LRP 18
#define TGSI_OPCODE_CND 19
#define TGSI_OPCODE_CND0 20
-#define TGSI_OPCODE_DOT2ADD 21
+#define TGSI_OPCODE_DP2A 21
/*
* GL_EXT_vertex_shader
*/
#define TGSI_OPCODE_INDEX 22 /* considered for removal */
#define TGSI_OPCODE_NEGATE 23 /* considered for removal */
-#define TGSI_OPCODE_MADD TGSI_OPCODE_MAD
-#define TGSI_OPCODE_FRAC 24
-#define TGSI_OPCODE_SETGE TGSI_OPCODE_SGE
-#define TGSI_OPCODE_SETLT TGSI_OPCODE_SLT
+/*#define TGSI_OPCODE_MADD TGSI_OPCODE_MAD*/
+#define TGSI_OPCODE_FRC 24
+/*#define TGSI_OPCODE_SETGE TGSI_OPCODE_SGE*/
+/*#define TGSI_OPCODE_SETLT TGSI_OPCODE_SLT*/
#define TGSI_OPCODE_CLAMP 25
-#define TGSI_OPCODE_FLOOR 26
+#define TGSI_OPCODE_FLR 26
#define TGSI_OPCODE_ROUND 27
-#define TGSI_OPCODE_EXPBASE2 28
-#define TGSI_OPCODE_LOGBASE2 29
-#define TGSI_OPCODE_POWER 30
-#define TGSI_OPCODE_RECIP TGSI_OPCODE_RCP
-#define TGSI_OPCODE_RECIPSQRT TGSI_OPCODE_RSQ
-#define TGSI_OPCODE_CROSSPRODUCT 31
+#define TGSI_OPCODE_EX2 28
+#define TGSI_OPCODE_LG2 29
+#define TGSI_OPCODE_POW 30
+/*#define TGSI_OPCODE_RECIP TGSI_OPCODE_RCP*/
+/*#define TGSI_OPCODE_RECIPSQRT TGSI_OPCODE_RSQ*/
+#define TGSI_OPCODE_XPD 31
#define TGSI_OPCODE_MULTIPLYMATRIX 32 /* considered for removal */
/*
@@ -222,17 +222,17 @@ union tgsi_immediate_data
#define TGSI_OPCODE_COS 36
#define TGSI_OPCODE_DDX 37
#define TGSI_OPCODE_DDY 38
-#define TGSI_OPCODE_EX2 TGSI_OPCODE_EXPBASE2
-#define TGSI_OPCODE_FLR TGSI_OPCODE_FLOOR
-#define TGSI_OPCODE_FRC TGSI_OPCODE_FRAC
+/*#define TGSI_OPCODE_EX2 TGSI_OPCODE_EXPBASE2*/
+/*#define TGSI_OPCODE_FLR TGSI_OPCODE_FLOOR*/
+/*#define TGSI_OPCODE_FRC TGSI_OPCODE_FRAC*/
#define TGSI_OPCODE_KILP 39 /* predicated kill */
-#define TGSI_OPCODE_LG2 TGSI_OPCODE_LOGBASE2
-#define TGSI_OPCODE_LRP TGSI_OPCODE_LERP
+/*#define TGSI_OPCODE_LG2 TGSI_OPCODE_LOGBASE2*/
+/*#define TGSI_OPCODE_LRP TGSI_OPCODE_LERP*/
#define TGSI_OPCODE_PK2H 40
#define TGSI_OPCODE_PK2US 41
#define TGSI_OPCODE_PK4B 42
#define TGSI_OPCODE_PK4UB 43
-#define TGSI_OPCODE_POW TGSI_OPCODE_POWER
+/*#define TGSI_OPCODE_POW TGSI_OPCODE_POWER*/
#define TGSI_OPCODE_RFL 44
#define TGSI_OPCODE_SEQ 45
#define TGSI_OPCODE_SFL 46
@@ -264,7 +264,7 @@ union tgsi_immediate_data
* GL_ARB_vertex_program
*/
#define TGSI_OPCODE_SWZ 118
-#define TGSI_OPCODE_XPD TGSI_OPCODE_CROSSPRODUCT
+/*#define TGSI_OPCODE_XPD TGSI_OPCODE_CROSSPRODUCT*/
/*
* GL_ARB_fragment_program
@@ -285,7 +285,7 @@ union tgsi_immediate_data
#define TGSI_OPCODE_NRM 69
#define TGSI_OPCODE_DIV 70
#define TGSI_OPCODE_DP2 71
-#define TGSI_OPCODE_DP2A TGSI_OPCODE_DOT2ADD
+/*#define TGSI_OPCODE_DP2A TGSI_OPCODE_DOT2ADD*/
#define TGSI_OPCODE_TXL 72
#define TGSI_OPCODE_BRK 73
#define TGSI_OPCODE_IF 74
@@ -348,7 +348,7 @@ union tgsi_immediate_data
#define TGSI_OPCODE_BGNSUB 100
#define TGSI_OPCODE_ENDLOOP2 101
#define TGSI_OPCODE_ENDSUB 102
-#define TGSI_OPCODE_INT TGSI_OPCODE_TRUNC
+/*#define TGSI_OPCODE_INT TGSI_OPCODE_TRUNC*/
#define TGSI_OPCODE_NOISE1 103
#define TGSI_OPCODE_NOISE2 104
#define TGSI_OPCODE_NOISE3 105
@@ -358,7 +358,7 @@ union tgsi_immediate_data
/*
* ps_1_1
*/
-#define TGSI_OPCODE_TEXKILL TGSI_OPCODE_KIL
+/*#define TGSI_OPCODE_TEXKILL TGSI_OPCODE_KIL*/
/*
* ps_1_2
@@ -373,45 +373,45 @@ union tgsi_immediate_data
/*
* ps_1_4
*/
-#define TGSI_OPCODE_TEXLD TGSI_OPCODE_TEX
+/*#define TGSI_OPCODE_TEXLD TGSI_OPCODE_TEX*/
/*
* ps_2_0
*/
-#define TGSI_OPCODE_M4X4 TGSI_OPCODE_MULTIPLYMATRIX
+/*#define TGSI_OPCODE_M4X4 TGSI_OPCODE_MULTIPLYMATRIX*/
#define TGSI_OPCODE_M4X3 108
#define TGSI_OPCODE_M3X4 109
#define TGSI_OPCODE_M3X3 110
#define TGSI_OPCODE_M3X2 111
-#define TGSI_OPCODE_CRS TGSI_OPCODE_XPD
+/*#define TGSI_OPCODE_CRS TGSI_OPCODE_XPD*/
#define TGSI_OPCODE_NRM4 112
-#define TGSI_OPCODE_SINCOS TGSI_OPCODE_SCS
-#define TGSI_OPCODE_TEXLDB TGSI_OPCODE_TXB
-#define TGSI_OPCODE_DP2ADD TGSI_OPCODE_DP2A
+/*#define TGSI_OPCODE_SINCOS TGSI_OPCODE_SCS*/
+/*#define TGSI_OPCODE_TEXLDB TGSI_OPCODE_TXB*/
+/*#define TGSI_OPCODE_DP2ADD TGSI_OPCODE_DP2A*/
/*
* ps_2_x
*/
-#define TGSI_OPCODE_CALL TGSI_OPCODE_CAL
+/*#define TGSI_OPCODE_CALL TGSI_OPCODE_CAL*/
#define TGSI_OPCODE_CALLNZ 113
#define TGSI_OPCODE_IFC 114
-#define TGSI_OPCODE_BREAK TGSI_OPCODE_BRK
+/*#define TGSI_OPCODE_BREAK TGSI_OPCODE_BRK*/
#define TGSI_OPCODE_BREAKC 115
-#define TGSI_OPCODE_DSX TGSI_OPCODE_DDX
-#define TGSI_OPCODE_DSY TGSI_OPCODE_DDY
-#define TGSI_OPCODE_TEXLDD TGSI_OPCODE_TXD
+/*#define TGSI_OPCODE_DSX TGSI_OPCODE_DDX*/
+/*#define TGSI_OPCODE_DSY TGSI_OPCODE_DDY*/
+/*#define TGSI_OPCODE_TEXLDD TGSI_OPCODE_TXD*/
/*
* vs_1_1
*/
-#define TGSI_OPCODE_EXPP TGSI_OPCODE_EXP
-#define TGSI_OPCODE_LOGP TGSI_OPCODE_LG2
+/*#define TGSI_OPCODE_EXPP TGSI_OPCODE_EXP*/
+/*#define TGSI_OPCODE_LOGP TGSI_OPCODE_LG2*/
/*
* vs_2_0
*/
-#define TGSI_OPCODE_SGN TGSI_OPCODE_SSG
-#define TGSI_OPCODE_MOVA TGSI_OPCODE_ARR
+/*#define TGSI_OPCODE_SGN TGSI_OPCODE_SSG*/
+/*#define TGSI_OPCODE_MOVA TGSI_OPCODE_ARR*/
/* EXPP - use TGSI_OPCODE_EX2 */
/*
--
cgit v1.2.3
From b9e2e32daf5505896a662dc8df60104c0f51b4b9 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Wed, 22 Jul 2009 12:34:07 -0600
Subject: mesa: include GLEW sources in MesaDemos tarball
---
Makefile | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/Makefile b/Makefile
index 91515d6fdb2..3c83160687e 100644
--- a/Makefile
+++ b/Makefile
@@ -388,7 +388,14 @@ GLW_FILES = \
$(DIRECTORY)/src/glw/glw.pc.in \
$(DIRECTORY)/src/glw/depend
+GLEW_FILES = \
+ $(DIRECTORY)/src/glew/*.c \
+ $(DIRECTORY)/src/glew/Makefile \
+ $(DIRECTORY)/src/glew/SConscript \
+ $(DIRECTORY)/src/glew/LICENSE.txt
+
DEMO_FILES = \
+ $(GLEW_FILES) \
$(DIRECTORY)/progs/beos/*.cpp \
$(DIRECTORY)/progs/beos/Makefile \
$(DIRECTORY)/progs/images/*.rgb \
--
cgit v1.2.3
From 9c4b877519f73f46eac35885d3b8801753168e14 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Wed, 22 Jul 2009 12:46:53 -0600
Subject: mesa: bump version to 7.5.1
---
Makefile | 2 +-
configs/default | 2 +-
src/mesa/main/version.h | 8 ++++----
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/Makefile b/Makefile
index 3c83160687e..9ac45bd93f2 100644
--- a/Makefile
+++ b/Makefile
@@ -182,7 +182,7 @@ ultrix-gcc:
# Rules for making release tarballs
-VERSION=7.5
+VERSION=7.5.1
DIRECTORY = Mesa-$(VERSION)
LIB_NAME = MesaLib-$(VERSION)
DEMO_NAME = MesaDemos-$(VERSION)
diff --git a/configs/default b/configs/default
index b5ef80afb36..2981bb9a8fc 100644
--- a/configs/default
+++ b/configs/default
@@ -10,7 +10,7 @@ CONFIG_NAME = default
# Version info
MESA_MAJOR=7
MESA_MINOR=5
-MESA_TINY=0
+MESA_TINY=1
MESA_VERSION = $(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY)
# external projects. This should be useless now that we use libdrm.
diff --git a/src/mesa/main/version.h b/src/mesa/main/version.h
index ba027465d48..50c1143ccc0 100644
--- a/src/mesa/main/version.h
+++ b/src/mesa/main/version.h
@@ -1,8 +1,8 @@
/*
* Mesa 3-D graphics library
- * Version: 7.5
+ * Version: 7.5.1
*
- * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
+ * Copyright (C) 1999-2009 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -30,8 +30,8 @@
/* Mesa version */
#define MESA_MAJOR 7
#define MESA_MINOR 5
-#define MESA_PATCH 0
-#define MESA_VERSION_STRING "7.5"
+#define MESA_PATCH 1
+#define MESA_VERSION_STRING "7.5.1"
/* To make version comparison easy */
#define MESA_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
--
cgit v1.2.3
From b4ba6a66b70b89e9c049f6ce9b65c1edda118651 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Wed, 22 Jul 2009 12:49:34 -0600
Subject: docs: initial release notes for 7.5.1
---
docs/relnotes-7.5.1.html | 54 ++++++++++++++++++++++++++++++++++++++++++++++++
docs/relnotes.html | 1 +
2 files changed, 55 insertions(+)
create mode 100644 docs/relnotes-7.5.1.html
diff --git a/docs/relnotes-7.5.1.html b/docs/relnotes-7.5.1.html
new file mode 100644
index 00000000000..0f475ad2b47
--- /dev/null
+++ b/docs/relnotes-7.5.1.html
@@ -0,0 +1,54 @@
+
+
+Mesa Release Notes
+
+
+
+
+
+
+
+Mesa 7.5.1 Release Notes / (date TBD)
+
+
+Mesa 7.5.1 is a bug-fix release fixing issues found since the 7.5 release.
+
+
+The main new feature of Mesa 7.5.x is the
+Gallium3D infrastructure.
+
+
+Mesa 7.5.1 implements the OpenGL 2.1 API, but the version reported by
+glGetString(GL_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 2.1.
+
+
+See the Compiling/Installing page for prerequisites
+for DRI hardware acceleration.
+
+
+
+MD5 checksums
+
+tbd
+
+
+
+New features
+
+
+
+Bug fixes
+
+Added missing GLEW library to MesaDemos tarballs.
+
+
+
+Changes
+
+
+
+
diff --git a/docs/relnotes.html b/docs/relnotes.html
index 4764eb689d8..7b91a3dc5ec 100644
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -13,6 +13,7 @@ The release notes summarize what's new or changed in each Mesa release.
+7.5.1 release notes
7.5 release notes
7.4.4 release notes
7.4.3 release notes
--
cgit v1.2.3
From db40bb028726b10e86cd9d683dc9db18181b638d Mon Sep 17 00:00:00 2001
From: Patrice Mandin
Date: Wed, 22 Jul 2009 21:10:20 +0200
Subject: nouveau: Take into account sx,sy parameters to read from source
surface
---
src/gallium/drivers/nv04/nv04_surface_2d.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index f315cf54f05..d794c076f0f 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -153,8 +153,8 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
OUT_RING (chan, src_pitch |
NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER |
NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE);
- OUT_RELOCl(chan, src_bo, src->offset + cy * src_pitch +
- cx * src->texture->block.size, NOUVEAU_BO_GART |
+ OUT_RELOCl(chan, src_bo, src->offset + (cy+sy) * src_pitch +
+ (cx+sx) * src->texture->block.size, NOUVEAU_BO_GART |
NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
OUT_RING (chan, 0);
}
--
cgit v1.2.3
From b129c55f428b37325094a3f0da55c337ee94e515 Mon Sep 17 00:00:00 2001
From: Maciej Cencora
Date: Wed, 22 Jul 2009 22:10:22 +0200
Subject: r300: fix address register handling in NQSSADCE
For address register we always use X component
---
src/mesa/drivers/dri/r300/radeon_nqssadce.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c
index 840c9733b16..202a8532b6d 100644
--- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c
+++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.c
@@ -117,13 +117,15 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
struct register_state *regstate;
- if (inst->SrcReg[src].RelAddr)
+ if (inst->SrcReg[src].RelAddr) {
regstate = get_reg_state(s, PROGRAM_ADDRESS, 0);
- else
+ if (regstate)
+ regstate->Sourced |= WRITEMASK_X;
+ } else {
regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
-
- if (regstate)
- regstate->Sourced |= deswz_source & 0xf;
+ if (regstate)
+ regstate->Sourced |= deswz_source & 0xf;
+ }
return inst;
}
--
cgit v1.2.3
From b7042399fd0a1cf66f99340486c03374d15af36c Mon Sep 17 00:00:00 2001
From: Patrice Mandin
Date: Wed, 22 Jul 2009 22:25:36 +0200
Subject: nouveau: nv30: wrong variable for format
---
src/gallium/drivers/nv30/nv30_screen.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
index c8b40784b05..f8285e4455f 100644
--- a/src/gallium/drivers/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -95,7 +95,7 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen,
}
} else
if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) {
- switch (tex_usage) {
+ switch (format) {
case PIPE_FORMAT_Z24S8_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z16_UNORM:
--
cgit v1.2.3
From d602966f89d78725f96d63f69b9f8691c1da04f4 Mon Sep 17 00:00:00 2001
From: Michel Dänzer
Date: Thu, 23 Jul 2009 00:47:32 +0200
Subject: gallium: Fix PPC build.
---
src/gallium/auxiliary/tgsi/tgsi_ppc.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index fddf54460a2..8fa21f6c408 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -619,17 +619,17 @@ emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
ppc_vandc(gen->f, v1, v0, bit31_vec); /* v1 = v0 & ~bit31 */
}
break;
- case TGSI_OPCODE_FLOOR:
+ case TGSI_OPCODE_FLR:
ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */
break;
- case TGSI_OPCODE_FRAC:
+ case TGSI_OPCODE_FRC:
ppc_vrfim(gen->f, v1, v0); /* tmp = floor(v0) */
ppc_vsubfp(gen->f, v1, v0, v1); /* v1 = v0 - v1 */
break;
- case TGSI_OPCODE_EXPBASE2:
+ case TGSI_OPCODE_EX2:
ppc_vexptefp(gen->f, v1, v0); /* v1 = 2^v0 */
break;
- case TGSI_OPCODE_LOGBASE2:
+ case TGSI_OPCODE_LG2:
/* XXX this may be broken! */
ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */
break;
@@ -1111,10 +1111,10 @@ emit_instruction(struct gen_context *gen,
case TGSI_OPCODE_MOV:
case TGSI_OPCODE_SWZ:
case TGSI_OPCODE_ABS:
- case TGSI_OPCODE_FLOOR:
- case TGSI_OPCODE_FRAC:
- case TGSI_OPCODE_EXPBASE2:
- case TGSI_OPCODE_LOGBASE2:
+ case TGSI_OPCODE_FLR:
+ case TGSI_OPCODE_FRC:
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
emit_unaryop(gen, inst);
break;
case TGSI_OPCODE_RSQ:
--
cgit v1.2.3
From 895c435defa83f49b2145f316a7d8d203b2fe374 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Wed, 22 Jul 2009 21:04:07 -0600
Subject: cell: update TGSI_OPCODE_ cases
---
src/gallium/drivers/cell/spu/spu_exec.c | 26 ++++++++------------------
1 file changed, 8 insertions(+), 18 deletions(-)
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index e27df2dfb38..6655842d593 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -952,7 +952,6 @@ exec_instruction(
break;
case TGSI_OPCODE_RCP:
- /* TGSI_OPCODE_RECIP */
FETCH( &r[0], 0, CHAN_X );
r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -961,7 +960,6 @@ exec_instruction(
break;
case TGSI_OPCODE_RSQ:
- /* TGSI_OPCODE_RECIPSQRT */
FETCH( &r[0], 0, CHAN_X );
r[0].q = micro_sqrt(r[0].q);
r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
@@ -1115,7 +1113,6 @@ exec_instruction(
break;
case TGSI_OPCODE_MAD:
- /* TGSI_OPCODE_MADD */
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
@@ -1136,8 +1133,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_LERP:
- /* TGSI_OPCODE_LRP */
+ case TGSI_OPCODE_LRP:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
@@ -1158,8 +1154,7 @@ exec_instruction(
ASSERT (0);
break;
- case TGSI_OPCODE_DOT2ADD:
- /* TGSI_OPCODE_DP2A */
+ case TGSI_OPCODE_DP2A:
ASSERT (0);
break;
@@ -1171,8 +1166,7 @@ exec_instruction(
ASSERT (0);
break;
- case TGSI_OPCODE_FRAC:
- /* TGSI_OPCODE_FRC */
+ case TGSI_OPCODE_FRC:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
r[0].q = micro_frc(r[0].q);
@@ -1184,8 +1178,7 @@ exec_instruction(
ASSERT (0);
break;
- case TGSI_OPCODE_FLOOR:
- /* TGSI_OPCODE_FLR */
+ case TGSI_OPCODE_FLR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
r[0].q = micro_flr(r[0].q);
@@ -1201,8 +1194,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_EXPBASE2:
- /* TGSI_OPCODE_EX2 */
+ case TGSI_OPCODE_EX2:
FETCH(&r[0], 0, CHAN_X);
r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q);
@@ -1212,8 +1204,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_LOGBASE2:
- /* TGSI_OPCODE_LG2 */
+ case TGSI_OPCODE_LG2:
FETCH( &r[0], 0, CHAN_X );
r[0].q = micro_lg2(r[0].q);
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -1221,8 +1212,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_POWER:
- /* TGSI_OPCODE_POW */
+ case TGSI_OPCODE_POW:
FETCH(&r[0], 0, CHAN_X);
FETCH(&r[1], 1, CHAN_X);
@@ -1233,7 +1223,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_CROSSPRODUCT:
+ case TGSI_OPCODE_XPD:
/* TGSI_OPCODE_XPD */
FETCH(&r[0], 0, CHAN_Y);
FETCH(&r[1], 1, CHAN_Z);
--
cgit v1.2.3
From 07961bb05e5ba05205b9f53834863664f1023870 Mon Sep 17 00:00:00 2001
From: Corbin Simpson
Date: Wed, 22 Jul 2009 23:58:35 -0700
Subject: r300g: Actually mark shaders as translated/untranslated.
Also trust that Gallium will not give us TGSI that miscounts shader consts.
This creates a 20x speedup on glxgears, from 8 FPS to 160 FPS.
---
src/gallium/drivers/r300/r300_fs.c | 3 +++
src/gallium/drivers/r300/r300_state.c | 4 ++--
src/gallium/drivers/r300/r300_vs.c | 3 +++
3 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 8672e211bc4..ca8ef999024 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -105,4 +105,7 @@ void r300_translate_fragment_shader(struct r300_context* r300,
tgsi_parse_free(&parser);
FREE(assembler);
+
+ /* And, finally... */
+ fs->translated = TRUE;
}
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 68da0aa4cbb..162740f594d 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -155,7 +155,7 @@ static void
}
r300->dirty_state |= R300_NEW_CONSTANTS;
-
+#if 0
/* If the number of constants have changed, invalidate the shader. */
if (r300->shader_constants[shader].user_count != i) {
if (shader == PIPE_SHADER_FRAGMENT && r300->fs &&
@@ -168,6 +168,7 @@ static void
r300_translate_vertex_shader(r300, r300->vs);
}
}
+#endif
}
/* Create a new depth, stencil, and alpha state based on the CSO dsa state.
@@ -315,7 +316,6 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
r300_translate_fragment_shader(r300, fs);
}
- fs->translated = TRUE;
r300->fs = fs;
r300->dirty_state |= R300_NEW_FRAGMENT_SHADER;
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index a664a316e8c..741a1b69895 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -408,4 +408,7 @@ void r300_translate_vertex_shader(struct r300_context* r300,
tgsi_parse_free(&parser);
FREE(assembler);
+
+ /* And, finally... */
+ vs->translated = TRUE;
}
--
cgit v1.2.3
From af1163cc415265e125b5aa5041ce5c75b978bb1a Mon Sep 17 00:00:00 2001
From: Vinson Lee
Date: Thu, 23 Jul 2009 14:07:31 +0100
Subject: util: Add support for Mac OS.
---
src/gallium/auxiliary/util/u_time.c | 12 ++++++------
src/gallium/auxiliary/util/u_time.h | 6 +++---
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c
index 5268cbf79ce..c16cdd0b226 100644
--- a/src/gallium/auxiliary/util/u_time.c
+++ b/src/gallium/auxiliary/util/u_time.c
@@ -35,7 +35,7 @@
#include "pipe/p_config.h"
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
#include
#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
#include
@@ -77,7 +77,7 @@ util_time_get_frequency(void)
void
util_time_get(struct util_time *t)
{
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
gettimeofday(&t->tv, NULL);
#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
LONGLONG temp;
@@ -102,7 +102,7 @@ util_time_add(const struct util_time *t1,
int64_t usecs,
struct util_time *t2)
{
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
t2->tv.tv_sec = t1->tv.tv_sec + usecs / 1000000;
t2->tv.tv_usec = t1->tv.tv_usec + usecs % 1000000;
#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
@@ -124,7 +124,7 @@ int64_t
util_time_diff(const struct util_time *t1,
const struct util_time *t2)
{
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
return (t2->tv.tv_usec - t1->tv.tv_usec) +
(t2->tv.tv_sec - t1->tv.tv_sec)*1000000;
#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
@@ -144,7 +144,7 @@ util_time_micros( void )
util_time_get(&t1);
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
return t1.tv.tv_usec + t1.tv.tv_sec*1000000LL;
#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
util_time_get_frequency();
@@ -166,7 +166,7 @@ static INLINE int
util_time_compare(const struct util_time *t1,
const struct util_time *t2)
{
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
if (t1->tv.tv_sec < t2->tv.tv_sec)
return -1;
else if(t1->tv.tv_sec > t2->tv.tv_sec)
diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h
index 6bca6077a2a..7a5c54d9b23 100644
--- a/src/gallium/auxiliary/util/u_time.h
+++ b/src/gallium/auxiliary/util/u_time.h
@@ -38,7 +38,7 @@
#include "pipe/p_config.h"
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
#include /* timeval */
#include /* usleep */
#endif
@@ -58,7 +58,7 @@ extern "C" {
*/
struct util_time
{
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
struct timeval tv;
#else
int64_t counter;
@@ -89,7 +89,7 @@ util_time_timeout(const struct util_time *start,
const struct util_time *end,
const struct util_time *curr);
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
#define util_time_sleep usleep
#else
void
--
cgit v1.2.3
From 8b78294d21ffb2cba41328341457bf193087d969 Mon Sep 17 00:00:00 2001
From: José Fonseca
Date: Thu, 23 Jul 2009 14:11:10 +0100
Subject: r600: Remove CRLF line endings.
---
src/mesa/drivers/dri/r600/r700_vertprog.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index 172e6ee501e..f8d64f71d0b 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -359,7 +359,7 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx)
r600EmitShader(ctx,
&(vp->shaderbo),
(GLvoid *)(vp->r700Shader.pProgram),
- vp->r700Shader.uShaderBinaryDWORDSize,
+ vp->r700Shader.uShaderBinaryDWORDSize,
"VS");
vp->loaded = GL_TRUE;
--
cgit v1.2.3
From 78379abcbf853b2cff8d832b45ecf0eeb54b2c58 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Wed, 22 Jul 2009 11:25:26 +0100
Subject: gallium: remove deprecated TGSI opcodes
Various opcodes which can be implemented trivially with other TGSI opcodes,
such as matrix multiplication and negation. These were not used by any
state tracker or implemented by any of the drivers.
---
src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 26 --------------------------
src/gallium/auxiliary/tgsi/tgsi_exec.c | 15 ---------------
src/gallium/auxiliary/tgsi/tgsi_info.c | 14 +++++++-------
src/gallium/auxiliary/tgsi/tgsi_sse2.c | 12 ------------
src/gallium/drivers/cell/spu/spu_exec.c | 12 ------------
src/gallium/include/pipe/p_shader_tokens.h | 8 --------
6 files changed, 7 insertions(+), 80 deletions(-)
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index 9c8f89d5206..71f79ef5f58 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -354,12 +354,6 @@ translate_instruction(llvm::Module *module,
out = instr->dot2add(inputs[0], inputs[1], inputs[2]);
}
break;
- case TGSI_OPCODE_INDEX:
- break;
- case TGSI_OPCODE_NEGATE: {
- out = instr->neg(inputs[0]);
- }
- break;
case TGSI_OPCODE_FRAC: {
out = instr->frc(inputs[0]);
}
@@ -390,8 +384,6 @@ translate_instruction(llvm::Module *module,
out = instr->cross(inputs[0], inputs[1]);
}
break;
- case TGSI_OPCODE_MULTIPLYMATRIX:
- break;
case TGSI_OPCODE_ABS: {
out = instr->abs(inputs[0]);
}
@@ -619,14 +611,6 @@ translate_instruction(llvm::Module *module,
break;
case TGSI_OPCODE_NOP:
break;
- case TGSI_OPCODE_M4X3:
- break;
- case TGSI_OPCODE_M3X4:
- break;
- case TGSI_OPCODE_M3X3:
- break;
- case TGSI_OPCODE_M3X2:
- break;
case TGSI_OPCODE_CALLNZ:
break;
case TGSI_OPCODE_IFC:
@@ -816,8 +800,6 @@ translate_instructionir(llvm::Module *module,
case TGSI_OPCODE_CROSSPRODUCT: {
}
break;
- case TGSI_OPCODE_MULTIPLYMATRIX:
- break;
case TGSI_OPCODE_ABS: {
out = instr->abs(inputs[0]);
}
@@ -985,14 +967,6 @@ translate_instructionir(llvm::Module *module,
break;
case TGSI_OPCODE_NOP:
break;
- case TGSI_OPCODE_M4X3:
- break;
- case TGSI_OPCODE_M3X4:
- break;
- case TGSI_OPCODE_M3X3:
- break;
- case TGSI_OPCODE_M3X2:
- break;
case TGSI_OPCODE_NRM4:
break;
case TGSI_OPCODE_CALLNZ:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index b193fd7a0a5..0179bba5a21 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2341,16 +2341,6 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_INDEX:
- /* XXX: considered for removal */
- assert (0);
- break;
-
- case TGSI_OPCODE_NEGATE:
- /* XXX: considered for removal */
- assert (0);
- break;
-
case TGSI_OPCODE_FRC:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
@@ -2454,11 +2444,6 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_MULTIPLYMATRIX:
- /* XXX: considered for removal */
- assert (0);
- break;
-
case TGSI_OPCODE_ABS:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 29b81155609..fedda7bff90 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -53,8 +53,8 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{ 1, 3, 0, 0, "CND", TGSI_OPCODE_CND },
{ 1, 3, 0, 0, "CND0", TGSI_OPCODE_CND0 },
{ 1, 3, 0, 0, "DP2A", TGSI_OPCODE_DP2A },
- { 1, 2, 0, 0, "INDEX", TGSI_OPCODE_INDEX },
- { 1, 1, 0, 0, "NEGATE", TGSI_OPCODE_NEGATE },
+ { 0, 0, 0, 0, "", 22 }, /* removed */
+ { 0, 0, 0, 0, "", 23 }, /* removed */
{ 1, 1, 0, 0, "FRC", TGSI_OPCODE_FRC },
{ 1, 3, 0, 0, "CLAMP", TGSI_OPCODE_CLAMP },
{ 1, 1, 0, 0, "FLR", TGSI_OPCODE_FLR },
@@ -63,7 +63,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{ 1, 1, 0, 0, "LG2", TGSI_OPCODE_LG2 },
{ 1, 2, 0, 0, "POW", TGSI_OPCODE_POW },
{ 1, 2, 0, 0, "XPD", TGSI_OPCODE_XPD },
- { 1, 2, 0, 0, "M4X4", TGSI_OPCODE_MULTIPLYMATRIX },
+ { 0, 0, 0, 0, "", 32 }, /* removed */
{ 1, 1, 0, 0, "ABS", TGSI_OPCODE_ABS },
{ 1, 1, 0, 0, "RCC", TGSI_OPCODE_RCC },
{ 1, 2, 0, 0, "DPH", TGSI_OPCODE_DPH },
@@ -139,10 +139,10 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{ 1, 1, 0, 0, "NOISE3", TGSI_OPCODE_NOISE3 },
{ 1, 1, 0, 0, "NOISE4", TGSI_OPCODE_NOISE4 },
{ 0, 0, 0, 0, "NOP", TGSI_OPCODE_NOP },
- { 1, 2, 0, 0, "M4X3", TGSI_OPCODE_M4X3 },
- { 1, 2, 0, 0, "M3X4", TGSI_OPCODE_M3X4 },
- { 1, 2, 0, 0, "M3X3", TGSI_OPCODE_M3X3 },
- { 1, 2, 0, 0, "M3X2", TGSI_OPCODE_M3X2 },
+ { 0, 0, 0, 0, "", 108 }, /* removed */
+ { 0, 0, 0, 0, "", 109 }, /* removed */
+ { 0, 0, 0, 0, "", 110 }, /* removed */
+ { 0, 0, 0, 0, "", 111 }, /* removed */
{ 1, 1, 0, 0, "NRM4", TGSI_OPCODE_NRM4 },
{ 0, 1, 0, 0, "CALLNZ", TGSI_OPCODE_CALLNZ },
{ 0, 1, 0, 0, "IFC", TGSI_OPCODE_IFC },
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index a2d4627da9c..781ea1e75cf 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2099,14 +2099,6 @@ emit_instruction(
}
break;
- case TGSI_OPCODE_INDEX:
- return 0;
- break;
-
- case TGSI_OPCODE_NEGATE:
- return 0;
- break;
-
case TGSI_OPCODE_FRC:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
@@ -2206,10 +2198,6 @@ emit_instruction(
}
break;
- case TGSI_OPCODE_MULTIPLYMATRIX:
- return 0;
- break;
-
case TGSI_OPCODE_ABS:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index 6655842d593..570553e1d68 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -1158,14 +1158,6 @@ exec_instruction(
ASSERT (0);
break;
- case TGSI_OPCODE_INDEX:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_NEGATE:
- ASSERT (0);
- break;
-
case TGSI_OPCODE_FRC:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
@@ -1265,10 +1257,6 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_MULTIPLYMATRIX:
- ASSERT (0);
- break;
-
case TGSI_OPCODE_ABS:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 3dfc914269a..89948d2d8e2 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -192,8 +192,6 @@ union tgsi_immediate_data
/*
* GL_EXT_vertex_shader
*/
-#define TGSI_OPCODE_INDEX 22 /* considered for removal */
-#define TGSI_OPCODE_NEGATE 23 /* considered for removal */
/*#define TGSI_OPCODE_MADD TGSI_OPCODE_MAD*/
#define TGSI_OPCODE_FRC 24
/*#define TGSI_OPCODE_SETGE TGSI_OPCODE_SGE*/
@@ -207,7 +205,6 @@ union tgsi_immediate_data
/*#define TGSI_OPCODE_RECIP TGSI_OPCODE_RCP*/
/*#define TGSI_OPCODE_RECIPSQRT TGSI_OPCODE_RSQ*/
#define TGSI_OPCODE_XPD 31
-#define TGSI_OPCODE_MULTIPLYMATRIX 32 /* considered for removal */
/*
* GL_NV_vertex_program1_1
@@ -378,11 +375,6 @@ union tgsi_immediate_data
/*
* ps_2_0
*/
-/*#define TGSI_OPCODE_M4X4 TGSI_OPCODE_MULTIPLYMATRIX*/
-#define TGSI_OPCODE_M4X3 108
-#define TGSI_OPCODE_M3X4 109
-#define TGSI_OPCODE_M3X3 110
-#define TGSI_OPCODE_M3X2 111
/*#define TGSI_OPCODE_CRS TGSI_OPCODE_XPD*/
#define TGSI_OPCODE_NRM4 112
/*#define TGSI_OPCODE_SINCOS TGSI_OPCODE_SCS*/
--
cgit v1.2.3
From d0d98f3ecfed7d7e0c4426185c13ec4f7c1761f9 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Wed, 22 Jul 2009 11:31:41 +0100
Subject: gallium: clean up opcode definitions
Remove commented-out opcodes. Remove information about API mappings
to opcodes, but add a reference to tgsi-instruction-set.txt where
that information is better presented.
---
src/gallium/include/pipe/p_shader_tokens.h | 156 ++---------------------------
1 file changed, 11 insertions(+), 145 deletions(-)
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 89948d2d8e2..e6e29a04ec0 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -157,8 +157,11 @@ union tgsi_immediate_data
float Float;
};
-/*
- * GL_NV_vertex_program
+/* TGSI opcodes.
+ *
+ * For more information on semantics of opcodes and
+ * which APIs are known to use which opcodes, see
+ * auxiliary/tgsi/tgsi-instruction-set.txt
*/
#define TGSI_OPCODE_ARL 0
#define TGSI_OPCODE_MOV 1
@@ -177,59 +180,32 @@ union tgsi_immediate_data
#define TGSI_OPCODE_SLT 14
#define TGSI_OPCODE_SGE 15
#define TGSI_OPCODE_MAD 16
-
-/*
- * GL_ATI_fragment_shader
- */
#define TGSI_OPCODE_SUB 17
-/*#define TGSI_OPCODE_DOT3 TGSI_OPCODE_DP3*/
-/*#define TGSI_OPCODE_DOT4 TGSI_OPCODE_DP4*/
#define TGSI_OPCODE_LRP 18
#define TGSI_OPCODE_CND 19
#define TGSI_OPCODE_CND0 20
#define TGSI_OPCODE_DP2A 21
-
-/*
- * GL_EXT_vertex_shader
- */
-/*#define TGSI_OPCODE_MADD TGSI_OPCODE_MAD*/
+ /* gap */
#define TGSI_OPCODE_FRC 24
-/*#define TGSI_OPCODE_SETGE TGSI_OPCODE_SGE*/
-/*#define TGSI_OPCODE_SETLT TGSI_OPCODE_SLT*/
#define TGSI_OPCODE_CLAMP 25
#define TGSI_OPCODE_FLR 26
#define TGSI_OPCODE_ROUND 27
#define TGSI_OPCODE_EX2 28
#define TGSI_OPCODE_LG2 29
#define TGSI_OPCODE_POW 30
-/*#define TGSI_OPCODE_RECIP TGSI_OPCODE_RCP*/
-/*#define TGSI_OPCODE_RECIPSQRT TGSI_OPCODE_RSQ*/
#define TGSI_OPCODE_XPD 31
-
-/*
- * GL_NV_vertex_program1_1
- */
+ /* gap */
#define TGSI_OPCODE_ABS 33
#define TGSI_OPCODE_RCC 34
#define TGSI_OPCODE_DPH 35
-
-/*
- * GL_NV_fragment_program
- */
#define TGSI_OPCODE_COS 36
#define TGSI_OPCODE_DDX 37
#define TGSI_OPCODE_DDY 38
-/*#define TGSI_OPCODE_EX2 TGSI_OPCODE_EXPBASE2*/
-/*#define TGSI_OPCODE_FLR TGSI_OPCODE_FLOOR*/
-/*#define TGSI_OPCODE_FRC TGSI_OPCODE_FRAC*/
#define TGSI_OPCODE_KILP 39 /* predicated kill */
-/*#define TGSI_OPCODE_LG2 TGSI_OPCODE_LOGBASE2*/
-/*#define TGSI_OPCODE_LRP TGSI_OPCODE_LERP*/
#define TGSI_OPCODE_PK2H 40
#define TGSI_OPCODE_PK2US 41
#define TGSI_OPCODE_PK4B 42
#define TGSI_OPCODE_PK4UB 43
-/*#define TGSI_OPCODE_POW TGSI_OPCODE_POWER*/
#define TGSI_OPCODE_RFL 44
#define TGSI_OPCODE_SEQ 45
#define TGSI_OPCODE_SFL 46
@@ -246,43 +222,18 @@ union tgsi_immediate_data
#define TGSI_OPCODE_UP4B 57
#define TGSI_OPCODE_UP4UB 58
#define TGSI_OPCODE_X2D 59
-
-/*
- * GL_NV_vertex_program2
- */
#define TGSI_OPCODE_ARA 60
#define TGSI_OPCODE_ARR 61
#define TGSI_OPCODE_BRA 62
#define TGSI_OPCODE_CAL 63
#define TGSI_OPCODE_RET 64
-#define TGSI_OPCODE_SSG 65
-
-/*
- * GL_ARB_vertex_program
- */
-#define TGSI_OPCODE_SWZ 118
-/*#define TGSI_OPCODE_XPD TGSI_OPCODE_CROSSPRODUCT*/
-
-/*
- * GL_ARB_fragment_program
- */
+#define TGSI_OPCODE_SSG 65 /* SGN */
#define TGSI_OPCODE_CMP 66
-#define TGSI_OPCODE_KIL 116 /* conditional kill */
#define TGSI_OPCODE_SCS 67
#define TGSI_OPCODE_TXB 68
-
-/*
- * GL_NV_fragment_program_option
- */
-/* No new opcode */
-
-/*
- * GL_NV_fragment_program2
- */
#define TGSI_OPCODE_NRM 69
#define TGSI_OPCODE_DIV 70
#define TGSI_OPCODE_DP2 71
-/*#define TGSI_OPCODE_DP2A TGSI_OPCODE_DOT2ADD*/
#define TGSI_OPCODE_TXL 72
#define TGSI_OPCODE_BRK 73
#define TGSI_OPCODE_IF 74
@@ -292,20 +243,8 @@ union tgsi_immediate_data
#define TGSI_OPCODE_ENDIF 78
#define TGSI_OPCODE_ENDLOOP 79
#define TGSI_OPCODE_ENDREP 80
-
-/*
- * GL_NV_vertex_program2_option
- */
-
-/*
- * GL_NV_vertex_program3
- */
#define TGSI_OPCODE_PUSHA 81
#define TGSI_OPCODE_POPA 82
-
-/*
- * GL_NV_gpu_program4
- */
#define TGSI_OPCODE_CEIL 83
#define TGSI_OPCODE_I2F 84
#define TGSI_OPCODE_NOT 85
@@ -320,98 +259,25 @@ union tgsi_immediate_data
#define TGSI_OPCODE_TXF 94
#define TGSI_OPCODE_TXQ 95
#define TGSI_OPCODE_CONT 96
-
-/*
- * GL_NV_vertex_program4
- */
-/* Same as GL_NV_gpu_program4 */
-
-/*
- * GL_NV_fragment_program4
- */
-/* Same as GL_NV_gpu_program4 */
-
-/*
- * GL_NV_geometry_program4
- */
-/* Same as GL_NV_gpu_program4 */
#define TGSI_OPCODE_EMIT 97
#define TGSI_OPCODE_ENDPRIM 98
-
-/*
- * GLSL
- */
#define TGSI_OPCODE_BGNLOOP2 99
#define TGSI_OPCODE_BGNSUB 100
#define TGSI_OPCODE_ENDLOOP2 101
#define TGSI_OPCODE_ENDSUB 102
-/*#define TGSI_OPCODE_INT TGSI_OPCODE_TRUNC*/
#define TGSI_OPCODE_NOISE1 103
#define TGSI_OPCODE_NOISE2 104
#define TGSI_OPCODE_NOISE3 105
#define TGSI_OPCODE_NOISE4 106
#define TGSI_OPCODE_NOP 107
-
-/*
- * ps_1_1
- */
-/*#define TGSI_OPCODE_TEXKILL TGSI_OPCODE_KIL*/
-
-/*
- * ps_1_2
- */
-/* CMP - use TGSI_OPCODE_CND0 */
-
-/*
- * ps_1_3
- */
-/* CMP - use TGSI_OPCODE_CND0 */
-
-/*
- * ps_1_4
- */
-/*#define TGSI_OPCODE_TEXLD TGSI_OPCODE_TEX*/
-
-/*
- * ps_2_0
- */
-/*#define TGSI_OPCODE_CRS TGSI_OPCODE_XPD*/
+ /* gap */
#define TGSI_OPCODE_NRM4 112
-/*#define TGSI_OPCODE_SINCOS TGSI_OPCODE_SCS*/
-/*#define TGSI_OPCODE_TEXLDB TGSI_OPCODE_TXB*/
-/*#define TGSI_OPCODE_DP2ADD TGSI_OPCODE_DP2A*/
-
-/*
- * ps_2_x
- */
-/*#define TGSI_OPCODE_CALL TGSI_OPCODE_CAL*/
#define TGSI_OPCODE_CALLNZ 113
#define TGSI_OPCODE_IFC 114
-/*#define TGSI_OPCODE_BREAK TGSI_OPCODE_BRK*/
#define TGSI_OPCODE_BREAKC 115
-/*#define TGSI_OPCODE_DSX TGSI_OPCODE_DDX*/
-/*#define TGSI_OPCODE_DSY TGSI_OPCODE_DDY*/
-/*#define TGSI_OPCODE_TEXLDD TGSI_OPCODE_TXD*/
-
-/*
- * vs_1_1
- */
-/*#define TGSI_OPCODE_EXPP TGSI_OPCODE_EXP*/
-/*#define TGSI_OPCODE_LOGP TGSI_OPCODE_LG2*/
-
-/*
- * vs_2_0
- */
-/*#define TGSI_OPCODE_SGN TGSI_OPCODE_SSG*/
-/*#define TGSI_OPCODE_MOVA TGSI_OPCODE_ARR*/
-/* EXPP - use TGSI_OPCODE_EX2 */
-
-/*
- * vs_2_x
- */
-
+#define TGSI_OPCODE_KIL 116 /* conditional kill */
#define TGSI_OPCODE_END 117 /* aka HALT */
-
+#define TGSI_OPCODE_SWZ 118
#define TGSI_OPCODE_LAST 119
#define TGSI_SAT_NONE 0 /* do not saturate */
--
cgit v1.2.3
From 256eacbde44829b6f3874743e8df6102ce7a6ef0 Mon Sep 17 00:00:00 2001
From: Corbin Simpson
Date: Thu, 23 Jul 2009 01:04:26 -0700
Subject: r300g: PIPE_CAP_BLEND_EQUATION_SEPARATE.
---
src/gallium/drivers/r300/r300_screen.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index da1d5ffe2fd..258e4ac7b2a 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -147,6 +147,8 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
case PIPE_CAP_TGSI_CONT_SUPPORTED:
/* XXX */
return 0;
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ return 1;
default:
debug_printf("r300: Implementation error: Bad param %d\n",
param);
--
cgit v1.2.3
From 27b3c435ba0b9da6ab25cbffac9f975e0adaa66e Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 23 Jul 2009 07:05:21 -0700
Subject: radeon-gallium: Build fixes wrt changed libdrm_radeon space check API
Had to be hacked up a bit to apply to master. Sorry 'bout that. :3
---
src/gallium/winsys/drm/radeon/core/radeon_buffer.h | 5 ---
src/gallium/winsys/drm/radeon/core/radeon_drm.c | 5 +++
src/gallium/winsys/drm/radeon/core/radeon_r300.c | 51 ++--------------------
src/gallium/winsys/drm/radeon/core/radeon_r300.h | 7 +++
4 files changed, 16 insertions(+), 52 deletions(-)
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h
index 8c8b61fa10b..f5153b06af5 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h
+++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h
@@ -61,11 +61,6 @@ struct radeon_winsys_priv {
/* Radeon BO manager. */
struct radeon_bo_manager* bom;
- /* Radeon BO space checker. */
- struct radeon_cs_space_check sc[RADEON_MAX_BOS];
- /* Current BO count. */
- unsigned bo_count;
-
/* Radeon CS manager. */
struct radeon_cs_manager* csm;
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
index da2010184a2..d6e4e4b5eb1 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
@@ -31,6 +31,11 @@
#include "radeon_drm.h"
#include "trace/tr_drm.h"
+#include "r300_screen.h"
+#include "xf86drm.h"
+
+#include
+
/* Create a pipe_screen. */
struct pipe_screen* radeon_create_screen(struct drm_api* api,
int drmFB,
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
index 8c5f756ddf8..ac33ea4c6e2 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
@@ -27,64 +27,23 @@ static boolean radeon_r300_add_buffer(struct r300_winsys* winsys,
uint32_t rd,
uint32_t wd)
{
- int i;
struct radeon_winsys_priv* priv =
(struct radeon_winsys_priv*)winsys->radeon_winsys;
- struct radeon_cs_space_check* sc = priv->sc;
struct radeon_bo* bo = ((struct radeon_pipe_buffer*)pbuffer)->bo;
- /* Check to see if this BO is already in line for validation;
- * find a slot for it otherwise. */
- for (i = 0; i < priv->bo_count; i++) {
- if (sc[i].bo == bo) {
- sc[i].read_domains |= rd;
- sc[i].write_domain |= wd;
- return TRUE;
- }
- }
-
- if (priv->bo_count >= RADEON_MAX_BOS) {
- /* Dohoho. Not falling for that one again. Request a flush. */
- return FALSE;
- }
-
- sc[priv->bo_count].bo = bo;
- sc[priv->bo_count].read_domains = rd;
- sc[priv->bo_count].write_domain = wd;
- priv->bo_count++;
-
+ radeon_cs_space_add_persistent_bo(priv->cs, bo, rd, wd);
return TRUE;
}
static boolean radeon_r300_validate(struct r300_winsys* winsys)
{
- int retval, i;
struct radeon_winsys_priv* priv =
(struct radeon_winsys_priv*)winsys->radeon_winsys;
- struct radeon_cs_space_check* sc = priv->sc;
- retval = radeon_cs_space_check(priv->cs, sc, priv->bo_count);
-
- if (retval == RADEON_CS_SPACE_OP_TO_BIG) {
- /* We might as well HCF, since this is not going to fit in the card,
- * period. */
- /* XXX just drop it on the floor instead */
- exit(1);
- } else if (retval == RADEON_CS_SPACE_FLUSH) {
- /* We must flush before more rendering can commence. */
+ if (radeon_cs_space_check(priv->cs) < 0) {
return TRUE;
}
- /* XXX should probably be its own function */
- for (i = 0; i < priv->bo_count; i++) {
- if (sc[i].read_domains && sc[i].write_domain) {
- /* Cute, cute. We need to flush first. */
- debug_printf("radeon: BO %p can't be read and written; "
- "requesting flush.\n", sc[i].bo);
- return TRUE;
- }
- }
-
/* Things are fine, we can proceed as normal. */
return FALSE;
}
@@ -151,8 +110,7 @@ static void radeon_r300_flush_cs(struct r300_winsys* winsys)
{
struct radeon_winsys_priv* priv =
(struct radeon_winsys_priv*)winsys->radeon_winsys;
- struct radeon_cs_space_check* sc = priv->sc;
- int retval = 1;
+ int retval;
/* Emit the CS. */
retval = radeon_cs_emit(priv->cs);
@@ -163,8 +121,7 @@ static void radeon_r300_flush_cs(struct r300_winsys* winsys)
radeon_cs_erase(priv->cs);
/* Clean out BOs. */
- memset(sc, 0, sizeof(struct radeon_cs_space_check) * RADEON_MAX_BOS);
- priv->bo_count = 0;
+ radeon_cs_space_reset_bos(priv->cs);
}
/* Helper function to do the ioctls needed for setup and init. */
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.h b/src/gallium/winsys/drm/radeon/core/radeon_r300.h
index a2e0e582485..7f0246cc7ad 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_r300.h
+++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.h
@@ -20,6 +20,9 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#ifndef RADEON_R300_H
+#define RADEON_R300_H
+
/* XXX WTF is this! I shouldn't have to include those first three! FUCK! */
#include
#include
@@ -44,5 +47,9 @@ struct drm_radeon_info {
};
#endif
+struct radeon_winsys;
+
struct r300_winsys*
radeon_create_r300_winsys(int fd, struct radeon_winsys* old_winsys);
+
+#endif /* RADEON_R300_H */
--
cgit v1.2.3
From ca83d5a8db510756eb95423a52b19ff52a2d6dc1 Mon Sep 17 00:00:00 2001
From: Corbin Simpson
Date: Thu, 23 Jul 2009 07:14:07 -0700
Subject: r300g, radeon-gallium: Fix API, cleanup.
Something called "validate" should return FALSE on failure, not TRUE.
---
src/gallium/drivers/r300/r300_emit.c | 3 ++-
src/gallium/drivers/r300/r300_surface.c | 6 ++++--
src/gallium/winsys/drm/radeon/core/radeon_drm.c | 9 ++-------
src/gallium/winsys/drm/radeon/core/radeon_drm.h | 8 ++++++++
src/gallium/winsys/drm/radeon/core/radeon_r300.c | 11 ++++++++---
src/gallium/winsys/drm/radeon/core/radeon_r300.h | 13 -------------
6 files changed, 24 insertions(+), 26 deletions(-)
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 7ba56cdc1d2..ac510ffc2ed 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -531,10 +531,11 @@ validate:
} else {
debug_printf("No VBO while emitting dirty state!\n");
}
- if (r300->winsys->validate(r300->winsys)) {
+ if (!r300->winsys->validate(r300->winsys)) {
r300->context.flush(&r300->context, 0, NULL);
if (invalid) {
/* Well, hell. */
+ debug_printf("r300: Stuck in validation loop, gonna quit now.");
exit(1);
}
invalid = TRUE;
diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c
index fdabe4d9cfe..25168ce5e95 100644
--- a/src/gallium/drivers/r300/r300_surface.c
+++ b/src/gallium/drivers/r300/r300_surface.c
@@ -125,9 +125,10 @@ validate:
r300->context.flush(&r300->context, 0, NULL);
goto validate;
}
- if (r300->winsys->validate(r300->winsys)) {
+ if (!r300->winsys->validate(r300->winsys)) {
r300->context.flush(&r300->context, 0, NULL);
if (invalid) {
+ debug_printf("r300: Stuck in validation loop, gonna fallback.");
goto fallback;
}
invalid = TRUE;
@@ -256,9 +257,10 @@ validate:
r300->context.flush(&r300->context, 0, NULL);
goto validate;
}
- if (r300->winsys->validate(r300->winsys)) {
+ if (!r300->winsys->validate(r300->winsys)) {
r300->context.flush(&r300->context, 0, NULL);
if (invalid) {
+ debug_printf("r300: Stuck in validation loop, gonna fallback.");
goto fallback;
}
invalid = TRUE;
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
index d6e4e4b5eb1..8d818cf8301 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
@@ -29,12 +29,6 @@
*/
#include "radeon_drm.h"
-#include "trace/tr_drm.h"
-
-#include "r300_screen.h"
-#include "xf86drm.h"
-
-#include
/* Create a pipe_screen. */
struct pipe_screen* radeon_create_screen(struct drm_api* api,
@@ -59,7 +53,8 @@ struct pipe_context* radeon_create_context(struct drm_api* api,
if (getenv("RADEON_SOFTPIPE")) {
return radeon_create_softpipe(screen->winsys);
} else {
- return r300_create_context(screen, screen->winsys);
+ return r300_create_context(screen,
+ (struct r300_winsys*)screen->winsys);
}
}
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.h b/src/gallium/winsys/drm/radeon/core/radeon_drm.h
index 8560f71db65..88a5c82b284 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_drm.h
+++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.h
@@ -30,8 +30,13 @@
#ifndef RADEON_DRM_H
#define RADEON_DRM_H
+#include
+
+#include "xf86drm.h"
+
#include "pipe/p_screen.h"
+#include "trace/tr_drm.h"
#include "util/u_memory.h"
#include "state_tracker/drm_api.h"
@@ -40,6 +45,9 @@
#include "radeon_r300.h"
#include "radeon_winsys_softpipe.h"
+/* XXX */
+#include "r300_screen.h"
+
struct pipe_screen* radeon_create_screen(struct drm_api* api,
int drmFB,
struct drm_create_screen_arg *arg);
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
index ac33ea4c6e2..e927409e3ab 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
@@ -41,11 +41,11 @@ static boolean radeon_r300_validate(struct r300_winsys* winsys)
(struct radeon_winsys_priv*)winsys->radeon_winsys;
if (radeon_cs_space_check(priv->cs) < 0) {
- return TRUE;
+ return FALSE;
}
/* Things are fine, we can proceed as normal. */
- return FALSE;
+ return TRUE;
}
static boolean radeon_r300_check_cs(struct r300_winsys* winsys, int size)
@@ -118,10 +118,15 @@ static void radeon_r300_flush_cs(struct r300_winsys* winsys)
debug_printf("radeon: Bad CS, dumping...\n");
radeon_cs_print(priv->cs, stderr);
}
- radeon_cs_erase(priv->cs);
/* Clean out BOs. */
radeon_cs_space_reset_bos(priv->cs);
+
+ /* Reset CS.
+ * Someday, when we care about performance, we should really find a way
+ * to rotate between two or three CS objects so that the GPU can be
+ * spinning through one CS while another one is being filled. */
+ radeon_cs_erase(priv->cs);
}
/* Helper function to do the ioctls needed for setup and init. */
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.h b/src/gallium/winsys/drm/radeon/core/radeon_r300.h
index 7f0246cc7ad..741c1371889 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_r300.h
+++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.h
@@ -34,19 +34,6 @@
#include "radeon_buffer.h"
-/* protect us from bonghits */
-#ifndef RADEON_INFO_DEVICE_ID
-#define RADEON_INFO_DEVICE_ID 0
-#endif
-#ifndef DRM_RADEON_INFO
-#define DRM_RADEON_INFO 0x1
-struct drm_radeon_info {
- uint32_t request;
- uint32_t pad;
- uint64_t value;
-};
-#endif
-
struct radeon_winsys;
struct r300_winsys*
--
cgit v1.2.3
From adc6f8cdfc8ca25d7480a50cfe0f85fdeddbfcfc Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Thu, 23 Jul 2009 17:56:41 +0100
Subject: gallivm: updates for TGSI changes
make linux-llvm succeeds, but doesn't seem to be working, at least with
llvm 2.5
---
src/gallium/auxiliary/gallivm/instructionssoa.cpp | 4 +--
src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 36 ++++++++++-------------
2 files changed, 18 insertions(+), 22 deletions(-)
diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
index 2d2af3085e6..721b7d2d833 100644
--- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp
+++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
@@ -128,7 +128,7 @@ void InstructionsSoa::createFunctionMap()
m_functionsMap[TGSI_OPCODE_DP4] = "dp4";
m_functionsMap[TGSI_OPCODE_MIN] = "min";
m_functionsMap[TGSI_OPCODE_MAX] = "max";
- m_functionsMap[TGSI_OPCODE_POWER] = "pow";
+ m_functionsMap[TGSI_OPCODE_POW] = "pow";
m_functionsMap[TGSI_OPCODE_LIT] = "lit";
m_functionsMap[TGSI_OPCODE_RSQ] = "rsq";
m_functionsMap[TGSI_OPCODE_SLT] = "slt";
@@ -311,7 +311,7 @@ std::vector InstructionsSoa::mul(const std::vector i
std::vector InstructionsSoa::pow(const std::vector in1,
const std::vector in2)
{
- llvm::Function *func = function(TGSI_OPCODE_POWER);
+ llvm::Function *func = function(TGSI_OPCODE_POW);
return callBuiltin(func, in1, in2);
}
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index 71f79ef5f58..8d885e48be6 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -338,7 +338,7 @@ translate_instruction(llvm::Module *module,
out = instr->sub(inputs[0], inputs[1]);
}
break;
- case TGSI_OPCODE_LERP: {
+ case TGSI_OPCODE_LRP: {
out = instr->lerp(inputs[0], inputs[1], inputs[2]);
}
break;
@@ -350,11 +350,11 @@ translate_instruction(llvm::Module *module,
out = instr->cnd0(inputs[0], inputs[1], inputs[2]);
}
break;
- case TGSI_OPCODE_DOT2ADD: {
+ case TGSI_OPCODE_DP2A: {
out = instr->dot2add(inputs[0], inputs[1], inputs[2]);
}
break;
- case TGSI_OPCODE_FRAC: {
+ case TGSI_OPCODE_FRC: {
out = instr->frc(inputs[0]);
}
break;
@@ -362,25 +362,25 @@ translate_instruction(llvm::Module *module,
out = instr->clamp(inputs[0]);
}
break;
- case TGSI_OPCODE_FLOOR: {
+ case TGSI_OPCODE_FLR: {
out = instr->floor(inputs[0]);
}
break;
case TGSI_OPCODE_ROUND:
break;
- case TGSI_OPCODE_EXPBASE2: {
+ case TGSI_OPCODE_EX2: {
out = instr->ex2(inputs[0]);
}
break;
- case TGSI_OPCODE_LOGBASE2: {
+ case TGSI_OPCODE_LG2: {
out = instr->lg2(inputs[0]);
}
break;
- case TGSI_OPCODE_POWER: {
+ case TGSI_OPCODE_POW: {
out = instr->pow(inputs[0], inputs[1]);
}
break;
- case TGSI_OPCODE_CROSSPRODUCT: {
+ case TGSI_OPCODE_XPD: {
out = instr->cross(inputs[0], inputs[1]);
}
break;
@@ -764,40 +764,36 @@ translate_instructionir(llvm::Module *module,
out = instr->sub(inputs[0], inputs[1]);
}
break;
- case TGSI_OPCODE_LERP: {
+ case TGSI_OPCODE_LRP: {
}
break;
case TGSI_OPCODE_CND:
break;
case TGSI_OPCODE_CND0:
break;
- case TGSI_OPCODE_DOT2ADD:
+ case TGSI_OPCODE_DP2A:
break;
- case TGSI_OPCODE_INDEX:
- break;
- case TGSI_OPCODE_NEGATE:
- break;
- case TGSI_OPCODE_FRAC: {
+ case TGSI_OPCODE_FRC: {
}
break;
case TGSI_OPCODE_CLAMP:
break;
- case TGSI_OPCODE_FLOOR: {
+ case TGSI_OPCODE_FLR: {
}
break;
case TGSI_OPCODE_ROUND:
break;
- case TGSI_OPCODE_EXPBASE2: {
+ case TGSI_OPCODE_EX2: {
}
break;
- case TGSI_OPCODE_LOGBASE2: {
+ case TGSI_OPCODE_LG2: {
}
break;
- case TGSI_OPCODE_POWER: {
+ case TGSI_OPCODE_POW: {
out = instr->pow(inputs[0], inputs[1]);
}
break;
- case TGSI_OPCODE_CROSSPRODUCT: {
+ case TGSI_OPCODE_XPD: {
}
break;
case TGSI_OPCODE_ABS: {
--
cgit v1.2.3
From aa99a765c15392d06e3a33d4eda377c58bc6afec Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Thu, 23 Jul 2009 18:48:04 +0100
Subject: draw: correct address for machine struct in llvm path
This changed after a recent commit.
---
src/gallium/auxiliary/draw/draw_vs_llvm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index 727977bc3af..b3535c0e48e 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -119,7 +119,7 @@ draw_create_vs_llvm(struct draw_context *draw,
vs->base.create_varient = draw_vs_varient_generic;
vs->base.run_linear = vs_llvm_run_linear;
vs->base.delete = vs_llvm_delete;
- vs->machine = &draw->vs.machine;
+ vs->machine = draw->vs.machine;
{
struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS);
--
cgit v1.2.3
From 3b4235d4eb5818d0e57b768c66a28249ac5d853c Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Thu, 23 Jul 2009 18:23:18 -0400
Subject: r600: fix segfault in morph3d
These attributes still need work, but it shouldn't hurt to
enable them.
---
src/mesa/drivers/dri/r600/r700_vertprog.c | 34 +++++++++++++++----------------
1 file changed, 17 insertions(+), 17 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index f8d64f71d0b..af6a6b8c295 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -52,7 +52,7 @@ unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm,
unsigned int unBit;
unsigned int unTotal = unStart;
- //!!!!!!! THE ORDER MATCH FS INPUT
+ //!!!!!!! THE ORDER MATCH FS INPUT
unBit = 1 << VERT_RESULT_HPOS;
if(mesa_vp->Base.OutputsWritten & unBit)
@@ -73,17 +73,17 @@ unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm,
}
//TODO : dealing back face.
- //unBit = 1 << VERT_RESULT_BFC0;
- //if(mesa_vp->Base.OutputsWritten & unBit)
- //{
- // pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++;
- //}
-
- //unBit = 1 << VERT_RESULT_BFC1;
- //if(mesa_vp->Base.OutputsWritten & unBit)
- //{
- // pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++;
- //}
+ unBit = 1 << VERT_RESULT_BFC0;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_BFC0] = unTotal++;
+ }
+
+ unBit = 1 << VERT_RESULT_BFC1;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_BFC1] = unTotal++;
+ }
//TODO : dealing fog.
unBit = 1 << VERT_RESULT_FOGC;
@@ -93,11 +93,11 @@ unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm,
}
//TODO : dealing point size.
- //unBit = 1 << VERT_RESULT_PSIZ;
- //if(mesa_vp->Base.OutputsWritten & unBit)
- //{
- // pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++;
- //}
+ unBit = 1 << VERT_RESULT_PSIZ;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++;
+ }
for(i=0; i<8; i++)
{
--
cgit v1.2.3
From c57d81ddc9ba3052ff7f6b72091accab2c2db0ae Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Thu, 23 Jul 2009 20:20:39 -0600
Subject: mesa: include glew headers in MesaDemos tarballs
---
Makefile | 3 +++
1 file changed, 3 insertions(+)
diff --git a/Makefile b/Makefile
index 9ac45bd93f2..6ad07e61379 100644
--- a/Makefile
+++ b/Makefile
@@ -389,6 +389,9 @@ GLW_FILES = \
$(DIRECTORY)/src/glw/depend
GLEW_FILES = \
+ $(DIRECTORY)/include/GL/glew.h \
+ $(DIRECTORY)/include/GL/glxew.h \
+ $(DIRECTORY)/include/GL/wglew.h \
$(DIRECTORY)/src/glew/*.c \
$(DIRECTORY)/src/glew/Makefile \
$(DIRECTORY)/src/glew/SConscript \
--
cgit v1.2.3
From 12b183d2506e49774aad23543f5bb477904cb1c7 Mon Sep 17 00:00:00 2001
From: Joakim Sindholt
Date: Fri, 24 Jul 2009 14:54:00 +0200
Subject: radeon-gallium: remove old getparam ioctl
---
src/gallium/winsys/drm/radeon/core/radeon_r300.c | 16 +++-------------
1 file changed, 3 insertions(+), 13 deletions(-)
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
index e927409e3ab..4e9a2ddd161 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
@@ -133,29 +133,19 @@ static void radeon_r300_flush_cs(struct r300_winsys* winsys)
static void do_ioctls(struct r300_winsys* winsys, int fd)
{
struct drm_radeon_gem_info gem_info = {0};
- drm_radeon_getparam_t gp = {0};
struct drm_radeon_info info = {0};
int target = 0;
int retval;
info.value = ⌖
- gp.value = ⌖
/* First, get PCI ID */
info.request = RADEON_INFO_DEVICE_ID;
retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info));
if (retval) {
- fprintf(stderr, "%s: New ioctl for PCI ID failed "
- "(error number %d), trying classic ioctl...\n",
- __FUNCTION__, retval);
- gp.param = RADEON_PARAM_DEVICE_ID;
- retval = drmCommandWriteRead(fd, DRM_RADEON_GETPARAM, &gp,
- sizeof(gp));
- if (retval) {
- fprintf(stderr, "%s: Failed to get PCI ID, "
- "error number %d\n", __FUNCTION__, retval);
- exit(1);
- }
+ fprintf(stderr, "%s: Failed to get PCI ID, "
+ "error number %d\n", __FUNCTION__, retval);
+ exit(1);
}
winsys->pci_id = target;
--
cgit v1.2.3
From 8c30292a6e48448318d84582df876f35c490f968 Mon Sep 17 00:00:00 2001
From: Patrice Mandin
Date: Fri, 24 Jul 2009 23:37:46 +0200
Subject: nouveau: use nv04_surface_copy_swizzle only for POT sizes
---
src/gallium/drivers/nv04/nv04_surface_2d.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index d794c076f0f..c0adf7ce85f 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -110,10 +110,12 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
unsigned cx;
unsigned cy;
+#if 0
/* POT or GTFO */
assert(!(w & (w - 1)) && !(h & (h - 1)));
/* That's the way she likes it */
assert(src_pitch == ((struct nv04_surface *)dst)->pitch);
+#endif
BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1);
OUT_RELOCo(chan, dst_bo,
@@ -258,7 +260,8 @@ nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
assert(src->format == dst->format);
/* Setup transfer to swizzle the texture to vram if needed */
- if (src_linear && !dst_linear && w > 1 && h > 1) {
+ if (src_linear && !dst_linear && w > 1 && h > 1 &&
+ !(w & (w - 1)) && !(h & (h - 1))) { /* POT only */
nv04_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy, w, h);
return;
}
--
cgit v1.2.3
From 77a8a650e61047582794512ef61c8e6525aea059 Mon Sep 17 00:00:00 2001
From: Patrice Mandin
Date: Sun, 26 Jul 2009 12:30:12 +0200
Subject: nouveau: Recursively swizzle an NPOT sized copy
---
src/gallium/drivers/nv04/nv04_surface_2d.c | 26 +++++++++++++++++++++-----
1 file changed, 21 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index c0adf7ce85f..143b8589e8e 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -111,8 +111,6 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
unsigned cy;
#if 0
- /* POT or GTFO */
- assert(!(w & (w - 1)) && !(h & (h - 1)));
/* That's the way she likes it */
assert(src_pitch == ((struct nv04_surface *)dst)->pitch);
#endif
@@ -260,9 +258,27 @@ nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
assert(src->format == dst->format);
/* Setup transfer to swizzle the texture to vram if needed */
- if (src_linear && !dst_linear && w > 1 && h > 1 &&
- !(w & (w - 1)) && !(h & (h - 1))) { /* POT only */
- nv04_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy, w, h);
+ if (src_linear && !dst_linear && w > 1 && h > 1) {
+ int potWidth = 1<0) {
+ nv04_surface_copy(ctx, dst, dx+potWidth, dy,
+ src, sx+potWidth, sy,
+ remainWidth, potHeight);
+ }
+
+ if (remainHeight>0) {
+ nv04_surface_copy(ctx, dst, dx, dy+potHeight,
+ src, sx, sy+potHeight,
+ w, remainHeight);
+ }
+
return;
}
--
cgit v1.2.3
From 2b8a8f75f33931622a46287a2bf633879f23285e Mon Sep 17 00:00:00 2001
From: Patrice Mandin
Date: Sun, 26 Jul 2009 16:18:06 +0200
Subject: nouveau: Take into account destination position for copy_swizzle,
need to split copy a bit more
---
src/gallium/drivers/nv04/nv04_surface_2d.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index 143b8589e8e..ff4e51178dc 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -133,7 +133,7 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
for (cy = 0; cy < h; cy += sub_h) {
for (cx = 0; cx < w; cx += sub_w) {
BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
- OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(cx, cy) *
+ OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(cx+dx, cy+dy) *
dst->texture->block.size, NOUVEAU_BO_GART |
NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
@@ -264,19 +264,29 @@ nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
int remainWidth = w-potWidth;
int remainHeight = h-potHeight;
+ /* top left is always POT */
nv04_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy,
potWidth, potHeight);
+ /* top right */
if (remainWidth>0) {
nv04_surface_copy(ctx, dst, dx+potWidth, dy,
src, sx+potWidth, sy,
remainWidth, potHeight);
}
+ /* bottom left */
if (remainHeight>0) {
nv04_surface_copy(ctx, dst, dx, dy+potHeight,
src, sx, sy+potHeight,
- w, remainHeight);
+ potWidth, remainHeight);
+ }
+
+ /* bottom right */
+ if ((remainWidth>0) && (remainHeight>0)) {
+ nv04_surface_copy(ctx, dst, dx+potWidth, dy+potHeight,
+ src, sx+potWidth, sy+potHeight,
+ remainWidth, remainHeight);
}
return;
--
cgit v1.2.3
From 5276b049b46fd030d7b198a1ecb97248eb73299b Mon Sep 17 00:00:00 2001
From: José Fonseca
Date: Sun, 26 Jul 2009 20:31:11 +0100
Subject: util: fix typo.
---
src/gallium/auxiliary/util/u_tile.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
index a0c8ed88f74..422bc76003a 100644
--- a/src/gallium/auxiliary/util/u_tile.c
+++ b/src/gallium/auxiliary/util/u_tile.c
@@ -153,7 +153,7 @@ a8r8g8b8_put_tile_rgba(unsigned *dst,
}
-/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
+/*** PIPE_FORMAT_X8R8G8B8_UNORM ***/
static void
x8r8g8b8_get_tile_rgba(const unsigned *src,
--
cgit v1.2.3
From 8df35b7b57ff12721556fa7d00b4e337134da374 Mon Sep 17 00:00:00 2001
From: Patrice Mandin
Date: Sun, 26 Jul 2009 22:30:35 +0200
Subject: nouveau: only swizzle square textures for copy
---
src/gallium/drivers/nv04/nv04_surface_2d.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index ff4e51178dc..aba40cfaff7 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -263,10 +263,17 @@ nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
int potHeight = 1<potHeight ? potHeight : potWidth);
+ int x,y;
+
+ /* top left is always POT, but we can only swizzle squares */
+ for (y=0; y0) {
--
cgit v1.2.3
From 2cbd5ecfb666a757c4abef85dbe40fb53d647ec9 Mon Sep 17 00:00:00 2001
From: Corbin Simpson
Date: Fri, 24 Jul 2009 14:37:07 -0700
Subject: r300g: Add some debugging, correct little bits of math in texture
setup.
Simple stuff still works, but not sure about some of the more complex things.
---
src/gallium/drivers/r300/r300_texture.c | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 11c7858d422..1e86020d1fc 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -55,6 +55,9 @@ static void r300_setup_texture_state(struct r300_texture* tex,
if (height > 2048) {
state->format2 |= R500_TXHEIGHT_BIT11;
}
+
+ debug_printf("r300: Set texture state (%dx%d, pitch %d, %d levels)\n",
+ width, height, pitch, levels);
}
static void r300_setup_miptree(struct r300_texture* tex)
@@ -71,19 +74,25 @@ static void r300_setup_miptree(struct r300_texture* tex)
}
base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]);
- base->nblocksy[i] = pf_get_nblocksy(&base->block, base->width[i]);
+ base->nblocksy[i] = pf_get_nblocksy(&base->block, base->height[i]);
/* Radeons enjoy things in multiples of 64.
*
* XXX
* POT, uncompressed, unmippmapped textures can be aligned to 32,
* instead of 64. */
- stride = align(base->nblocksx[i] * base->block.size, 64);
+ stride = align(
+ (base->nblocksx[i] * base->block.size) / base->block.width,
+ 32);
size = stride * base->nblocksy[i] * base->depth[i];
- tex->offset[i] = align(tex->size, 64);
+ tex->offset[i] = align(tex->size, 32);
tex->size = tex->offset[i] + size;
+ debug_printf("r300: Texture miptree: Level %d "
+ "(%dx%dx%d px, pitch %d bytes)\n",
+ i, base->width[i], base->height[i], base->depth[i],
+ stride);
/* Save stride of first level to the texture. */
if (i == 0) {
tex->stride = stride;
--
cgit v1.2.3
From 725c1f004c9ec2f7b99146eecf0a35f1b3e54dca Mon Sep 17 00:00:00 2001
From: Corbin Simpson
Date: Sun, 26 Jul 2009 22:35:26 -0700
Subject: radeon-gallium: If BO allocation fails, return NULL.
---
src/gallium/winsys/drm/radeon/core/radeon_buffer.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
index 684a487f242..775bda8308f 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
@@ -72,6 +72,7 @@ static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws,
alignment, domain, 0);
if (radeon_buffer->bo == NULL) {
FREE(radeon_buffer);
+ return NULL;
}
return &radeon_buffer->base;
}
--
cgit v1.2.3
From 7a10472f095ef0f9f6109ca17d8be16836e56509 Mon Sep 17 00:00:00 2001
From: Corbin Simpson
Date: Sun, 26 Jul 2009 22:48:20 -0700
Subject: r300g: Fix two trivial texture size issues.
Next thing to fix: progs/tests/mipgen.
---
src/gallium/drivers/r300/r300_texture.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 1e86020d1fc..daf1647bee8 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -87,7 +87,7 @@ static void r300_setup_miptree(struct r300_texture* tex)
size = stride * base->nblocksy[i] * base->depth[i];
tex->offset[i] = align(tex->size, 32);
- tex->size = tex->offset[i] + size;
+ tex->size += tex->offset[i] + size;
debug_printf("r300: Texture miptree: Level %d "
"(%dx%dx%d px, pitch %d bytes)\n",
@@ -120,7 +120,7 @@ static struct pipe_texture*
r300_setup_texture_state(tex, template->width[0], template->height[0],
template->width[0], template->last_level);
- tex->buffer = screen->buffer_create(screen, 64,
+ tex->buffer = screen->buffer_create(screen, 1024,
PIPE_BUFFER_USAGE_PIXEL,
tex->size);
--
cgit v1.2.3
From 2c5e55d91992c5954e1d220a7ae497c7138595f5 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Fri, 24 Jul 2009 10:58:47 -0400
Subject: r600: switch tex code to use SETfield macros
for consistency with the rest of the code.
---
src/mesa/drivers/dri/r600/r600_tex.c | 39 ++-
src/mesa/drivers/dri/r600/r600_texstate.c | 503 ++++++++++++++++++------------
2 files changed, 332 insertions(+), 210 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
index 43d9f641af4..a1e89453900 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.c
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -127,10 +127,18 @@ static void r600SetTexDefaultState(radeonTexObjPtr t)
SETfield(t->SQ_TEX_RESOURCE4, SQ_ENDIAN_NONE,
SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift, SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask);
SETfield(t->SQ_TEX_RESOURCE4, 1, REQUEST_SIZE_shift, REQUEST_SIZE_mask);
- t->SQ_TEX_RESOURCE4 |= SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift
- |SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift
- |SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift
- |SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift;
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
SETfield(t->SQ_TEX_RESOURCE4, 0, BASE_LEVEL_shift, BASE_LEVEL_mask); /* mip-maps */
t->SQ_TEX_RESOURCE5 = 0;
@@ -141,17 +149,18 @@ static void r600SetTexDefaultState(radeonTexObjPtr t)
/* Initialize sampler registers */
t->SQ_TEX_SAMPLER0 = 0;
- t->SQ_TEX_SAMPLER0 |=
- SQ_TEX_WRAP << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift
- |SQ_TEX_WRAP << CLAMP_Y_shift
- |SQ_TEX_WRAP << CLAMP_Z_shift
- |SQ_TEX_XY_FILTER_POINT << XY_MAG_FILTER_shift
- |SQ_TEX_XY_FILTER_POINT << XY_MIN_FILTER_shift
- |SQ_TEX_Z_FILTER_NONE << Z_FILTER_shift
- |SQ_TEX_Z_FILTER_NONE << MIP_FILTER_shift
- |SQ_TEX_BORDER_COLOR_TRANS_BLACK << BORDER_COLOR_TYPE_shift;
-
- t->SQ_TEX_SAMPLER1 = 0x7FF << MAX_LOD_shift;
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP, SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift,
+ SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP, CLAMP_Y_shift, CLAMP_Y_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP, CLAMP_Z_shift, CLAMP_Z_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_XY_FILTER_POINT, XY_MAG_FILTER_shift, XY_MAG_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_XY_FILTER_POINT, XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_Z_FILTER_NONE, Z_FILTER_shift, Z_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_Z_FILTER_NONE, MIP_FILTER_shift, MIP_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_TRANS_BLACK, BORDER_COLOR_TYPE_shift, BORDER_COLOR_TYPE_mask);
+
+ t->SQ_TEX_SAMPLER1 = 0;
+ SETfield(t->SQ_TEX_SAMPLER1, MAX_LOD_mask, MAX_LOD_shift, MAX_LOD_mask);
t->SQ_TEX_SAMPLER2 = 0;
SETbit(t->SQ_TEX_SAMPLER2, SQ_TEX_SAMPLER_WORD2_0__TYPE_bit);
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index 2466aa95950..c76292a5f8e 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -75,10 +75,10 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
{
radeonTexObj *t = radeon_tex_obj(tObj);
- t->SQ_TEX_RESOURCE4 &= ~( SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask
- |SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask
- |SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask
- |SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask );
+ CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
switch (mesa_format) /* This is mesa format. */
{
@@ -86,163 +86,211 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_RGBA8888_REV:
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_ARGB8888:
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_ARGB8888_REV:
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_RGB888:
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_RGB565:
SETfield(t->SQ_TEX_RESOURCE1, FMT_5_6_5,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_RGB565_REV:
SETfield(t->SQ_TEX_RESOURCE1, FMT_5_6_5,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_ARGB4444:
SETfield(t->SQ_TEX_RESOURCE1, FMT_4_4_4_4,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_ARGB4444_REV:
SETfield(t->SQ_TEX_RESOURCE1, FMT_4_4_4_4,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_ARGB1555:
SETfield(t->SQ_TEX_RESOURCE1, FMT_1_5_5_5,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_ARGB1555_REV:
SETfield(t->SQ_TEX_RESOURCE1, FMT_1_5_5_5,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_AL88:
case MESA_FORMAT_AL88_REV: /* TODO : Check this. */
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_RGB332:
SETfield(t->SQ_TEX_RESOURCE1, FMT_3_3_2,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_A8: /* ZERO, ZERO, ZERO, X */
SETfield(t->SQ_TEX_RESOURCE1, FMT_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_L8: /* X, X, X, ONE */
SETfield(t->SQ_TEX_RESOURCE1, FMT_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_I8: /* X, X, X, X */
case MESA_FORMAT_CI8:
SETfield(t->SQ_TEX_RESOURCE1, FMT_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
/* YUV422 TODO conversion */ /* X, Y, Z, ONE, G8R8_G8B8 */
/*
@@ -272,121 +320,157 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
SETfield(t->SQ_TEX_RESOURCE1, FMT_32_32_32_32_FLOAT,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_RGBA_FLOAT16:
SETfield(t->SQ_TEX_RESOURCE1, FMT_16_16_16_16_FLOAT,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_RGB_FLOAT32: /* X, Y, Z, ONE */
SETfield(t->SQ_TEX_RESOURCE1, FMT_32_32_32_FLOAT,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_RGB_FLOAT16:
SETfield(t->SQ_TEX_RESOURCE1, FMT_16_16_16_FLOAT,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_ALPHA_FLOAT32: /* ZERO, ZERO, ZERO, X */
SETfield(t->SQ_TEX_RESOURCE1, FMT_32_FLOAT,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_ALPHA_FLOAT16: /* ZERO, ZERO, ZERO, X */
SETfield(t->SQ_TEX_RESOURCE1, FMT_16_FLOAT,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_LUMINANCE_FLOAT32: /* X, X, X, ONE */
SETfield(t->SQ_TEX_RESOURCE1, FMT_32_FLOAT,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_LUMINANCE_FLOAT16: /* X, X, X, ONE */
SETfield(t->SQ_TEX_RESOURCE1, FMT_16_FLOAT,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32:
SETfield(t->SQ_TEX_RESOURCE1, FMT_32_32_FLOAT,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16:
SETfield(t->SQ_TEX_RESOURCE1, FMT_16_16_FLOAT,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */
SETfield(t->SQ_TEX_RESOURCE1, FMT_32_FLOAT,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */
SETfield(t->SQ_TEX_RESOURCE1, FMT_16_FLOAT,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_Z16:
case MESA_FORMAT_Z24_S8:
@@ -407,25 +491,34 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
};
switch (tObj->DepthMode) {
case GL_LUMINANCE: /* X, X, X, ONE */
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case GL_INTENSITY: /* X, X, X, X */
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case GL_ALPHA: /* ZERO, ZERO, ZERO, X */
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
default:
return GL_FALSE;
@@ -441,7 +534,6 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
void r600SetDepthTexMode(struct gl_texture_object *tObj)
{
- const GLuint *format;
radeonTexObjPtr t;
if (!tObj)
@@ -616,11 +708,14 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
pitch_val /= 4;
break;
case 24:
@@ -628,22 +723,28 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
pitch_val /= 4;
break;
case 16:
SETfield(t->SQ_TEX_RESOURCE1, FMT_5_6_5,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
pitch_val /= 2;
break;
}
@@ -739,20 +840,26 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
} else {
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
}
pitch_val /= 4;
break;
@@ -762,22 +869,28 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
pitch_val /= 4;
break;
case 2:
SETfield(t->SQ_TEX_RESOURCE1, FMT_5_6_5,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- t->SQ_TEX_RESOURCE4 |=
- (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
- |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
- |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
- |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
pitch_val /= 2;
break;
}
--
cgit v1.2.3
From 0b3d51ee3118a7f12a44650d1c14e1bfcf50ed58 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Fri, 24 Jul 2009 12:39:51 -0400
Subject: r600: switch vtx resource setup to use SETfield macros
For consistency
---
src/mesa/drivers/dri/r600/r700_chip.c | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index 635dd58e72c..899b464c547 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -306,15 +306,19 @@ void r700SetupVTXConstants(GLcontext * ctx,
uSQ_VTX_CONSTANT_WORD0_0 = paos->offset;
uSQ_VTX_CONSTANT_WORD1_0 = count * (size * 4) - 1;
- uSQ_VTX_CONSTANT_WORD2_0 |= 0 << BASE_ADDRESS_HI_shift /* TODO */
- |stride << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift
- |GetSurfaceFormat(GL_FLOAT, size, NULL) << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift /* TODO : trace back api for initial data type, not only GL_FLOAT */
- |SQ_NUM_FORMAT_SCALED << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift
- |SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
-
- uSQ_VTX_CONSTANT_WORD3_0 |= 1 << MEM_REQUEST_SIZE_shift;
-
- uSQ_VTX_CONSTANT_WORD6_0 |= SQ_TEX_VTX_VALID_BUFFER << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift;
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift,
+ SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask);
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(GL_FLOAT, size, NULL),
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift,
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED,
+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
+ SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit);
+
+ SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask);
+ SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER,
+ SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask);
BEGIN_BATCH_NO_AUTOSTATE(9);
--
cgit v1.2.3
From 600a53a32edea7d03efa21103ad7122670c4ed4a Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Mon, 27 Jul 2009 01:57:25 -0400
Subject: r600: Use R600_SCRATCH_REG_OFFSET rather than
RADEON_SCRATCH_REG_OFFSET
noticed by vehemens on IRC.
---
src/mesa/drivers/dri/radeon/radeon_screen.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 290ef2394de..118e74008fe 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -1025,7 +1025,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
else
screen->scratch = (__volatile__ uint32_t *)
- ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
+ ((GLubyte *)screen->status.map + R600_SCRATCH_REG_OFFSET);
screen->buffers = drmMapBufs( sPriv->fd );
if ( !screen->buffers ) {
--
cgit v1.2.3
From 48b2fea142af93317e095461fc1f7ef6531268c2 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Mon, 27 Jul 2009 02:07:59 -0400
Subject: r600: set VGT NUM_INSTANCES as part of the draw command
set VGT NUM_INSTANCES as part of the draw command rather
than as state as recommended by the pm4 guide. Also,
use the NUM_INSTANCES packet.
---
src/mesa/drivers/dri/r600/r700_chip.c | 1 -
src/mesa/drivers/dri/r600/r700_chip.h | 1 -
src/mesa/drivers/dri/r600/r700_render.c | 30 ++++++++++++++++++------------
src/mesa/drivers/dri/r600/r700_state.c | 3 ---
4 files changed, 18 insertions(+), 17 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index 899b464c547..e683c8cf920 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -160,7 +160,6 @@ GLboolean r700InitChipObject(context_t *context)
LINK_STATES(VGT_GROUP_VECT_1_FMT_CNTL);
LINK_STATES(VGT_GS_MODE);
LINK_STATES(VGT_PRIMITIVEID_EN);
- LINK_STATES(VGT_DMA_NUM_INSTANCES);
LINK_STATES(VGT_MULTI_PRIM_IB_RESET_EN);
LINK_STATES(VGT_INSTANCE_STEP_RATE_0);
LINK_STATES(VGT_INSTANCE_STEP_RATE_1);
diff --git a/src/mesa/drivers/dri/r600/r700_chip.h b/src/mesa/drivers/dri/r600/r700_chip.h
index fa419aa4995..ca3364bb489 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.h
+++ b/src/mesa/drivers/dri/r600/r700_chip.h
@@ -393,7 +393,6 @@ typedef struct _R700_CHIP_CONTEXT
union UINT_FLOAT VGT_GROUP_VECT_1_FMT_CNTL ; /* 0xA28F */
union UINT_FLOAT VGT_GS_MODE ; /* 0xA290 */
union UINT_FLOAT VGT_PRIMITIVEID_EN ; /* 0xA2A1 */
- union UINT_FLOAT VGT_DMA_NUM_INSTANCES ; /* 0xA2A2 */
union UINT_FLOAT VGT_MULTI_PRIM_IB_RESET_EN; /* 0xA2A5 */
union UINT_FLOAT VGT_INSTANCE_STEP_RATE_0 ; /* 0xA2A8 */
union UINT_FLOAT VGT_INSTANCE_STEP_RATE_1 ; /* 0xA2A9 */
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 77cbe3cfd0f..0a5e0415479 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -329,7 +329,7 @@ static GLboolean r700RunRender(GLcontext * ctx,
r700SendDepthTargetState(context);
/* richard test code */
- for (i = 0; i < vb->PrimitiveCount; i++)
+ for (i = 0; i < vb->PrimitiveCount; i++)
{
GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
GLuint start = vb->Primitive[i].start;
@@ -341,25 +341,31 @@ static GLboolean r700RunRender(GLcontext * ctx,
unsigned int VGT_INDEX_TYPE = 0;
unsigned int VGT_PRIMITIVE_TYPE = 0;
unsigned int VGT_NUM_INDICES = 0;
-
- numEntires = 2 /* VGT_INDEX_TYPE */
- + 3 /* VGT_PRIMITIVE_TYPE */
- + numIndices + 3; /* DRAW_INDEX_IMMD */
-
- BEGIN_BATCH_NO_AUTOSTATE(numEntires);
- VGT_INDEX_TYPE |= DI_INDEX_SIZE_32_BIT << INDEX_TYPE_shift;
-
- R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
- R600_OUT_BATCH(VGT_INDEX_TYPE);
+ numEntires = 3 /* VGT_PRIMITIVE_TYPE */
+ + 2 /* VGT_INDEX_TYPE */
+ + 2 /* NUM_INSTANCES */
+ + numIndices + 3; /* DRAW_INDEX_IMMD */
- VGT_NUM_INDICES = numIndices;
+ BEGIN_BATCH_NO_AUTOSTATE(numEntires);
+ // prim
VGT_PRIMITIVE_TYPE |= r700PrimitiveType(prim) << VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift;
R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
R600_OUT_BATCH(VGT_PRIMITIVE_TYPE);
+ // index type
+ VGT_INDEX_TYPE |= DI_INDEX_SIZE_32_BIT << INDEX_TYPE_shift;
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+ R600_OUT_BATCH(VGT_INDEX_TYPE);
+
+ // num instances
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+ R600_OUT_BATCH(1);
+
+ // draw packet
+ VGT_NUM_INDICES = numIndices;
VGT_DRAW_INITIATOR |= DI_SRC_SEL_IMMEDIATE << SOURCE_SELECT_shift;
VGT_DRAW_INITIATOR |= DI_MAJOR_MODE_0 << MAJOR_MODE_shift;
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index a2ddebb1cee..bd0abc06e38 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -1653,9 +1653,6 @@ void r700InitState(GLcontext * ctx) //-------------------
r700->VGT_MIN_VTX_INDX.u32All = 0;
r700->VGT_INDX_OFFSET.u32All = 0;
- /* Specify the number of instances */
- r700->VGT_DMA_NUM_INSTANCES.u32All = 1;
-
/* default shader connections. */
r700->SPI_VS_OUT_ID_0.u32All = 0x03020100;
r700->SPI_VS_OUT_ID_1.u32All = 0x07060504;
--
cgit v1.2.3
From 7e6819f8430e77012d6cd9278cabaf1d4238117a Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Mon, 27 Jul 2009 02:25:24 -0400
Subject: r600: don't draw when num indices is 0
fixes engine demo
---
src/mesa/drivers/dri/r600/r700_render.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 0a5e0415479..5a2bf84b59e 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -342,6 +342,9 @@ static GLboolean r700RunRender(GLcontext * ctx,
unsigned int VGT_PRIMITIVE_TYPE = 0;
unsigned int VGT_NUM_INDICES = 0;
+ if (numIndices < 1)
+ continue;
+
numEntires = 3 /* VGT_PRIMITIVE_TYPE */
+ 2 /* VGT_INDEX_TYPE */
+ 2 /* NUM_INSTANCES */
--
cgit v1.2.3
From 7d3190a85b17e747981d0aafe13d1ab1946f1649 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Mon, 27 Jul 2009 03:52:37 -0400
Subject: r600: fix textures
We weren't allocating enough gprs for the fragment shader
in some cases. There are likely other issues that still need
to be sorted out for textures, but at least they now work.
---
src/mesa/drivers/dri/r600/r700_fragprog.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index 44de2aebee6..3afd0b05288 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -299,7 +299,7 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
ui = (r700->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift);
- ui = ui ? ui : unNumOfReg;
+ ui = (unNumOfReg < ui) ? ui : unNumOfReg;
SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
--
cgit v1.2.3
From 1ee3bcfff08599961c69549a1ad699c02df49bec Mon Sep 17 00:00:00 2001
From: Patrice Mandin
Date: Mon, 27 Jul 2009 10:57:53 +0200
Subject: nouveau: swizzle a single row or column, doing it one pixel at a time
---
src/gallium/drivers/nv04/nv04_surface_2d.c | 108 +++++++++++++++++++++--------
1 file changed, 80 insertions(+), 28 deletions(-)
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index aba40cfaff7..bbbcb54c467 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -209,6 +209,43 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx,
return 0;
}
+static int
+nv04_surface_copy_m2mf_swizzle(struct nv04_surface_2d *ctx,
+ struct pipe_surface *dst, int dx, int dy,
+ struct pipe_surface *src, int sx, int sy)
+{
+ struct nouveau_channel *chan = ctx->m2mf->channel;
+ struct nouveau_grobj *m2mf = ctx->m2mf;
+ struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src));
+ struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst));
+ unsigned src_pitch = ((struct nv04_surface *)src)->pitch;
+ unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch;
+ unsigned dst_offset = dst->offset + nv04_swizzle_bits(dx, dy) *
+ dst->texture->block.size;
+ unsigned src_offset = src->offset + sy * src_pitch +
+ sx * src->texture->block.size;
+
+ BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
+ OUT_RELOCo(chan, src_bo,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCo(chan, dst_bo,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+ BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+ OUT_RELOCl(chan, src_bo, src_offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, dst_bo, dst_offset,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ OUT_RING (chan, src_pitch);
+ OUT_RING (chan, dst_pitch);
+ OUT_RING (chan, 1 * src->texture->block.size);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0x0101);
+ OUT_RING (chan, 0);
+
+ return 0;
+}
+
static int
nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
int dx, int dy, struct pipe_surface *src, int sx, int sy,
@@ -258,42 +295,57 @@ nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
assert(src->format == dst->format);
/* Setup transfer to swizzle the texture to vram if needed */
- if (src_linear && !dst_linear && w > 1 && h > 1) {
- int potWidth = 1<potHeight ? potHeight : potWidth);
+ if (src_linear && !dst_linear) {
int x,y;
- /* top left is always POT, but we can only swizzle squares */
- for (y=0; y1) && (h>1)) {
+ int potWidth = 1<potHeight ? potHeight : potWidth);
+
+ /* top left is always POT, but we can only swizzle squares */
+ for (y=0; y0) {
+ /* top right */
+ if (remainWidth>0) {
nv04_surface_copy(ctx, dst, dx+potWidth, dy,
- src, sx+potWidth, sy,
- remainWidth, potHeight);
- }
+ src, sx+potWidth, sy,
+ remainWidth, potHeight);
+ }
- /* bottom left */
- if (remainHeight>0) {
- nv04_surface_copy(ctx, dst, dx, dy+potHeight,
+ /* bottom left */
+ if (remainHeight>0) {
+ nv04_surface_copy(ctx, dst, dx, dy+potHeight,
src, sx, sy+potHeight,
- potWidth, remainHeight);
- }
+ potWidth, remainHeight);
+ }
- /* bottom right */
- if ((remainWidth>0) && (remainHeight>0)) {
- nv04_surface_copy(ctx, dst, dx+potWidth, dy+potHeight,
- src, sx+potWidth, sy+potHeight,
- remainWidth, remainHeight);
+ /* bottom right */
+ if ((remainWidth>0) && (remainHeight>0)) {
+ nv04_surface_copy(ctx, dst, dx+potWidth, dy+potHeight,
+ src, sx+potWidth, sy+potHeight,
+ remainWidth, remainHeight);
+ }
+ } else if (w==1) {
+ /* We have a column to copy to a swizzled texture */
+ for (y=0; y
Date: Mon, 27 Jul 2009 17:13:48 +0300
Subject: radeon: Add r6xx/r7xx chip family to get_chip_family_name
This fixes problem that glxinfo was reporting r600+ cards as unknown.
Signed-off-by: Pauli Nieminen
---
src/mesa/drivers/dri/radeon/radeon_common_context.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index 285e015c924..a50cd056e1c 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -85,6 +85,17 @@ static const char* get_chip_family_name(int chip_family)
case CHIP_FAMILY_R580: return "R580";
case CHIP_FAMILY_RV560: return "RV560";
case CHIP_FAMILY_RV570: return "RV570";
+ case CHIP_FAMILY_R600: return "R600";
+ case CHIP_FAMILY_RV610: return "RV610";
+ case CHIP_FAMILY_RV630: return "RV630";
+ case CHIP_FAMILY_RV670: return "RV670";
+ case CHIP_FAMILY_RV620: return "RV620";
+ case CHIP_FAMILY_RV635: return "RV635";
+ case CHIP_FAMILY_RS780: return "RS780";
+ case CHIP_FAMILY_RV770: return "RV770";
+ case CHIP_FAMILY_RV730: return "RV730";
+ case CHIP_FAMILY_RV710: return "RV710";
+ case CHIP_FAMILY_RV740: return "RV740";
default: return "unknown";
}
}
--
cgit v1.2.3
From e5bed439be4fd7c3a349aedc4bff7eec4e4d363e Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Wed, 15 Jul 2009 17:36:42 +0200
Subject: r300: Detangle fragment program compiler from driver-specific
structure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This is in preparation of sharing the fragment program compiler with Gallium:
Compiler code is moved into its own directory and modified so that it no
longer depends on driver structures.
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/Makefile | 22 +-
src/mesa/drivers/dri/r300/compiler/r300_fragprog.c | 450 +++++++++
src/mesa/drivers/dri/r300/compiler/r300_fragprog.h | 49 +
.../drivers/dri/r300/compiler/r300_fragprog_emit.c | 345 +++++++
.../dri/r300/compiler/r300_fragprog_swizzle.c | 225 +++++
.../dri/r300/compiler/r300_fragprog_swizzle.h | 42 +
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 315 ++++++
src/mesa/drivers/dri/r300/compiler/r500_fragprog.c | 482 ++++++++++
src/mesa/drivers/dri/r300/compiler/r500_fragprog.h | 52 +
.../drivers/dri/r300/compiler/r500_fragprog_emit.c | 329 +++++++
.../drivers/dri/r300/compiler/radeon_compiler.h | 162 ++++
.../drivers/dri/r300/compiler/radeon_nqssadce.c | 297 ++++++
.../drivers/dri/r300/compiler/radeon_nqssadce.h | 93 ++
.../drivers/dri/r300/compiler/radeon_program.c | 128 +++
.../drivers/dri/r300/compiler/radeon_program.h | 131 +++
.../drivers/dri/r300/compiler/radeon_program_alu.c | 635 +++++++++++++
.../drivers/dri/r300/compiler/radeon_program_alu.h | 53 ++
.../dri/r300/compiler/radeon_program_pair.c | 1004 ++++++++++++++++++++
.../dri/r300/compiler/radeon_program_pair.h | 126 +++
src/mesa/drivers/dri/r300/r300_context.h | 136 +--
src/mesa/drivers/dri/r300/r300_fragprog.c | 451 ---------
src/mesa/drivers/dri/r300/r300_fragprog.h | 111 ---
src/mesa/drivers/dri/r300/r300_fragprog_common.c | 280 +-----
src/mesa/drivers/dri/r300/r300_fragprog_emit.c | 344 -------
src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c | 225 -----
src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h | 42 -
src/mesa/drivers/dri/r300/r300_ioctl.c | 63 +-
src/mesa/drivers/dri/r300/r300_state.c | 16 +-
src/mesa/drivers/dri/r300/r300_swtcl.c | 8 +-
src/mesa/drivers/dri/r300/r300_vertprog.c | 14 +-
src/mesa/drivers/dri/r300/r500_fragprog.c | 480 ----------
src/mesa/drivers/dri/r300/r500_fragprog.h | 52 -
src/mesa/drivers/dri/r300/r500_fragprog_emit.c | 327 -------
src/mesa/drivers/dri/r300/radeon_nqssadce.c | 297 ------
src/mesa/drivers/dri/r300/radeon_nqssadce.h | 93 --
src/mesa/drivers/dri/r300/radeon_program.c | 128 ---
src/mesa/drivers/dri/r300/radeon_program.h | 131 ---
src/mesa/drivers/dri/r300/radeon_program_alu.c | 635 -------------
src/mesa/drivers/dri/r300/radeon_program_alu.h | 53 --
src/mesa/drivers/dri/r300/radeon_program_pair.c | 1004 --------------------
src/mesa/drivers/dri/r300/radeon_program_pair.h | 126 ---
src/mesa/drivers/dri/radeon/radeon_screen.c | 4 +-
42 files changed, 5026 insertions(+), 4934 deletions(-)
create mode 100644 src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
create mode 100644 src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
create mode 100644 src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
create mode 100644 src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
create mode 100644 src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
create mode 100644 src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
create mode 100644 src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
create mode 100644 src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
create mode 100644 src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h
create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program.c
create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program.h
create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
delete mode 100644 src/mesa/drivers/dri/r300/r300_fragprog.c
delete mode 100644 src/mesa/drivers/dri/r300/r300_fragprog.h
delete mode 100644 src/mesa/drivers/dri/r300/r300_fragprog_emit.c
delete mode 100644 src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c
delete mode 100644 src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h
delete mode 100644 src/mesa/drivers/dri/r300/r500_fragprog.c
delete mode 100644 src/mesa/drivers/dri/r300/r500_fragprog.h
delete mode 100644 src/mesa/drivers/dri/r300/r500_fragprog_emit.c
delete mode 100644 src/mesa/drivers/dri/r300/radeon_nqssadce.c
delete mode 100644 src/mesa/drivers/dri/r300/radeon_nqssadce.h
delete mode 100644 src/mesa/drivers/dri/r300/radeon_program.c
delete mode 100644 src/mesa/drivers/dri/r300/radeon_program.h
delete mode 100644 src/mesa/drivers/dri/r300/radeon_program_alu.c
delete mode 100644 src/mesa/drivers/dri/r300/radeon_program_alu.h
delete mode 100644 src/mesa/drivers/dri/r300/radeon_program_pair.c
delete mode 100644 src/mesa/drivers/dri/r300/radeon_program_pair.h
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
index 7460410ee65..3a7de6d5bec 100644
--- a/src/mesa/drivers/dri/r300/Makefile
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -38,6 +38,18 @@ RADEON_COMMON_SOURCES = \
radeon_span.c \
radeon_fbo.c
+RADEON_COMPILER_SOURCES = \
+ compiler/radeon_nqssadce.c \
+ compiler/radeon_program.c \
+ compiler/radeon_program_alu.c \
+ compiler/radeon_program_pair.c \
+ compiler/r3xx_fragprog.c \
+ compiler/r300_fragprog.c \
+ compiler/r300_fragprog_swizzle.c \
+ compiler/r300_fragprog_emit.c \
+ compiler/r500_fragprog.c \
+ compiler/r500_fragprog_emit.c \
+
DRIVER_SOURCES = \
radeon_screen.c \
r300_context.c \
@@ -48,21 +60,13 @@ DRIVER_SOURCES = \
r300_render.c \
r300_tex.c \
r300_texstate.c \
- radeon_program.c \
- radeon_program_alu.c \
- radeon_program_pair.c \
- radeon_nqssadce.c \
r300_vertprog.c \
r300_fragprog_common.c \
- r300_fragprog.c \
- r300_fragprog_swizzle.c \
- r300_fragprog_emit.c \
- r500_fragprog.c \
- r500_fragprog_emit.c \
r300_shader.c \
r300_emit.c \
r300_swtcl.c \
$(RADEON_COMMON_SOURCES) \
+ $(RADEON_COMPILER_SOURCES) \
$(EGL_SOURCES) \
$(CS_SOURCES)
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
new file mode 100644
index 00000000000..00ef9645711
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
@@ -0,0 +1,450 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "compiler/r300_fragprog.h"
+
+#include "shader/prog_parameter.h"
+
+#include "r300_reg.h"
+
+static void reset_srcreg(struct prog_src_register* reg)
+{
+ _mesa_bzero(reg, sizeof(*reg));
+ reg->Swizzle = SWIZZLE_NOOP;
+}
+
+static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
+{
+ gl_state_index fail_value_tokens[STATE_LENGTH] = {
+ STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
+ };
+ struct prog_src_register reg = { 0, };
+
+ fail_value_tokens[2] = tmu;
+ reg.File = PROGRAM_STATE_VAR;
+ reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
+ reg.Swizzle = SWIZZLE_WWWW;
+ return reg;
+}
+
+/**
+ * Transform TEX, TXP, TXB, and KIL instructions in the following way:
+ * - premultiply texture coordinates for RECT
+ * - extract operand swizzles
+ * - introduce a temporary register when write masks are needed
+ *
+ * \todo If/when r5xx uses the radeon_program architecture, this can probably
+ * be reused.
+ */
+GLboolean r300_transform_TEX(
+ struct radeon_transform_context *t,
+ struct prog_instruction* orig_inst, void* data)
+{
+ struct r300_fragment_program_compiler *compiler =
+ (struct r300_fragment_program_compiler*)data;
+ struct prog_instruction inst = *orig_inst;
+ struct prog_instruction* tgt;
+ GLboolean destredirect = GL_FALSE;
+
+ if (inst.Opcode != OPCODE_TEX &&
+ inst.Opcode != OPCODE_TXB &&
+ inst.Opcode != OPCODE_TXP &&
+ inst.Opcode != OPCODE_KIL)
+ return GL_FALSE;
+
+ if (inst.Opcode != OPCODE_KIL &&
+ t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
+ GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func;
+
+ if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
+ tgt = radeonAppendInstructions(t->Program, 1);
+
+ tgt->Opcode = OPCODE_MOV;
+ tgt->DstReg = inst.DstReg;
+ if (comparefunc == GL_ALWAYS) {
+ tgt->SrcReg[0].File = PROGRAM_BUILTIN;
+ tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
+ } else {
+ tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
+ }
+ return GL_TRUE;
+ }
+
+ inst.DstReg.File = PROGRAM_TEMPORARY;
+ inst.DstReg.Index = radeonFindFreeTemporary(t);
+ inst.DstReg.WriteMask = WRITEMASK_XYZW;
+ }
+
+
+ /* Hardware uses [0..1]x[0..1] range for rectangle textures
+ * instead of [0..Width]x[0..Height].
+ * Add a scaling instruction.
+ */
+ if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) {
+ gl_state_index tokens[STATE_LENGTH] = {
+ STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
+ 0
+ };
+
+ int tempreg = radeonFindFreeTemporary(t);
+ int factor_index;
+
+ tokens[2] = inst.TexSrcUnit;
+ factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens);
+
+ tgt = radeonAppendInstructions(t->Program, 1);
+
+ tgt->Opcode = OPCODE_MUL;
+ tgt->DstReg.File = PROGRAM_TEMPORARY;
+ tgt->DstReg.Index = tempreg;
+ tgt->SrcReg[0] = inst.SrcReg[0];
+ tgt->SrcReg[1].File = PROGRAM_STATE_VAR;
+ tgt->SrcReg[1].Index = factor_index;
+
+ reset_srcreg(&inst.SrcReg[0]);
+ inst.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst.SrcReg[0].Index = tempreg;
+ }
+
+ if (inst.Opcode != OPCODE_KIL) {
+ if (inst.DstReg.File != PROGRAM_TEMPORARY ||
+ inst.DstReg.WriteMask != WRITEMASK_XYZW) {
+ int tempreg = radeonFindFreeTemporary(t);
+
+ inst.DstReg.File = PROGRAM_TEMPORARY;
+ inst.DstReg.Index = tempreg;
+ inst.DstReg.WriteMask = WRITEMASK_XYZW;
+ destredirect = GL_TRUE;
+ } else if (inst.SaturateMode) {
+ destredirect = GL_TRUE;
+ }
+ }
+
+ if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
+ int tmpreg = radeonFindFreeTemporary(t);
+ tgt = radeonAppendInstructions(t->Program, 1);
+ tgt->Opcode = OPCODE_MOV;
+ tgt->DstReg.File = PROGRAM_TEMPORARY;
+ tgt->DstReg.Index = tmpreg;
+ tgt->SrcReg[0] = inst.SrcReg[0];
+
+ reset_srcreg(&inst.SrcReg[0]);
+ inst.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst.SrcReg[0].Index = tmpreg;
+ }
+
+ tgt = radeonAppendInstructions(t->Program, 1);
+ _mesa_copy_instructions(tgt, &inst, 1);
+
+ if (inst.Opcode != OPCODE_KIL &&
+ t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
+ GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func;
+ GLuint depthmode = compiler->state.unit[inst.TexSrcUnit].depth_texture_mode;
+ int rcptemp = radeonFindFreeTemporary(t);
+ int pass, fail;
+
+ tgt = radeonAppendInstructions(t->Program, 3);
+
+ tgt[0].Opcode = OPCODE_RCP;
+ tgt[0].DstReg.File = PROGRAM_TEMPORARY;
+ tgt[0].DstReg.Index = rcptemp;
+ tgt[0].DstReg.WriteMask = WRITEMASK_W;
+ tgt[0].SrcReg[0] = inst.SrcReg[0];
+ tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
+
+ tgt[1].Opcode = OPCODE_MAD;
+ tgt[1].DstReg = inst.DstReg;
+ tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
+ tgt[1].SrcReg[0] = inst.SrcReg[0];
+ tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
+ tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
+ tgt[1].SrcReg[1].Index = rcptemp;
+ tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
+ tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
+ tgt[1].SrcReg[2].Index = inst.DstReg.Index;
+ if (depthmode == 0) /* GL_LUMINANCE */
+ tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
+ else if (depthmode == 2) /* GL_ALPHA */
+ tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
+
+ /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
+ * r < tex <=> -tex+r < 0
+ * r >= tex <=> not (-tex+r < 0 */
+ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
+ tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW;
+ else
+ tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW;
+
+ tgt[2].Opcode = OPCODE_CMP;
+ tgt[2].DstReg = orig_inst->DstReg;
+ tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
+ tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
+
+ if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
+ pass = 1;
+ fail = 2;
+ } else {
+ pass = 2;
+ fail = 1;
+ }
+
+ tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
+ tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
+ tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
+ } else if (destredirect) {
+ tgt = radeonAppendInstructions(t->Program, 1);
+
+ tgt->Opcode = OPCODE_MOV;
+ tgt->DstReg = orig_inst->DstReg;
+ tgt->SaturateMode = inst.SaturateMode;
+ tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
+ tgt->SrcReg[0].Index = inst.DstReg.Index;
+ }
+
+ return GL_TRUE;
+}
+
+/* just some random things... */
+void r300FragmentProgramDump(struct rX00_fragment_program_code *c)
+{
+ struct r300_fragment_program_code *code = &c->code.r300;
+ int n, i, j;
+ static int pc = 0;
+
+ fprintf(stderr, "pc=%d*************************************\n", pc++);
+
+ fprintf(stderr, "Hardware program\n");
+ fprintf(stderr, "----------------\n");
+
+ for (n = 0; n < (code->cur_node + 1); n++) {
+ fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
+ "alu_end: %d, tex_end: %d, flags: %08x\n", n,
+ code->node[n].alu_offset,
+ code->node[n].tex_offset,
+ code->node[n].alu_end, code->node[n].tex_end,
+ code->node[n].flags);
+
+ if (n > 0 || code->first_node_has_tex) {
+ fprintf(stderr, " TEX:\n");
+ for (i = code->node[n].tex_offset;
+ i <= code->node[n].tex_offset + code->node[n].tex_end;
+ ++i) {
+ const char *instr;
+
+ switch ((code->tex.
+ inst[i] >> R300_TEX_INST_SHIFT) &
+ 15) {
+ case R300_TEX_OP_LD:
+ instr = "TEX";
+ break;
+ case R300_TEX_OP_KIL:
+ instr = "KIL";
+ break;
+ case R300_TEX_OP_TXP:
+ instr = "TXP";
+ break;
+ case R300_TEX_OP_TXB:
+ instr = "TXB";
+ break;
+ default:
+ instr = "UNKNOWN";
+ }
+
+ fprintf(stderr,
+ " %s t%i, %c%i, texture[%i] (%08x)\n",
+ instr,
+ (code->tex.
+ inst[i] >> R300_DST_ADDR_SHIFT) & 31,
+ 't',
+ (code->tex.
+ inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
+ (code->tex.
+ inst[i] & R300_TEX_ID_MASK) >>
+ R300_TEX_ID_SHIFT,
+ code->tex.inst[i]);
+ }
+ }
+
+ for (i = code->node[n].alu_offset;
+ i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) {
+ char srcc[3][10], dstc[20];
+ char srca[3][10], dsta[20];
+ char argc[3][20];
+ char arga[3][20];
+ char flags[5], tmp[10];
+
+ for (j = 0; j < 3; ++j) {
+ int regc = code->alu.inst[i].inst1 >> (j * 6);
+ int rega = code->alu.inst[i].inst3 >> (j * 6);
+
+ sprintf(srcc[j], "%c%i",
+ (regc & 32) ? 'c' : 't', regc & 31);
+ sprintf(srca[j], "%c%i",
+ (rega & 32) ? 'c' : 't', rega & 31);
+ }
+
+ dstc[0] = 0;
+ sprintf(flags, "%s%s%s",
+ (code->alu.inst[i].
+ inst1 & R300_ALU_DSTC_REG_X) ? "x" : "",
+ (code->alu.inst[i].
+ inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "",
+ (code->alu.inst[i].
+ inst1 & R300_ALU_DSTC_REG_Z) ? "z" : "");
+ if (flags[0] != 0) {
+ sprintf(dstc, "t%i.%s ",
+ (code->alu.inst[i].
+ inst1 >> R300_ALU_DSTC_SHIFT) & 31,
+ flags);
+ }
+ sprintf(flags, "%s%s%s",
+ (code->alu.inst[i].
+ inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
+ (code->alu.inst[i].
+ inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
+ (code->alu.inst[i].
+ inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
+ if (flags[0] != 0) {
+ sprintf(tmp, "o%i.%s",
+ (code->alu.inst[i].
+ inst1 >> R300_ALU_DSTC_SHIFT) & 31,
+ flags);
+ strcat(dstc, tmp);
+ }
+
+ dsta[0] = 0;
+ if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) {
+ sprintf(dsta, "t%i.w ",
+ (code->alu.inst[i].
+ inst3 >> R300_ALU_DSTA_SHIFT) & 31);
+ }
+ if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) {
+ sprintf(tmp, "o%i.w ",
+ (code->alu.inst[i].
+ inst3 >> R300_ALU_DSTA_SHIFT) & 31);
+ strcat(dsta, tmp);
+ }
+ if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) {
+ strcat(dsta, "Z");
+ }
+
+ fprintf(stderr,
+ "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
+ " w: %3s %3s %3s -> %-20s (%08x)\n", i,
+ srcc[0], srcc[1], srcc[2], dstc,
+ code->alu.inst[i].inst1, srca[0], srca[1],
+ srca[2], dsta, code->alu.inst[i].inst3);
+
+ for (j = 0; j < 3; ++j) {
+ int regc = code->alu.inst[i].inst0 >> (j * 7);
+ int rega = code->alu.inst[i].inst2 >> (j * 7);
+ int d;
+ char buf[20];
+
+ d = regc & 31;
+ if (d < 12) {
+ switch (d % 4) {
+ case R300_ALU_ARGC_SRC0C_XYZ:
+ sprintf(buf, "%s.xyz",
+ srcc[d / 4]);
+ break;
+ case R300_ALU_ARGC_SRC0C_XXX:
+ sprintf(buf, "%s.xxx",
+ srcc[d / 4]);
+ break;
+ case R300_ALU_ARGC_SRC0C_YYY:
+ sprintf(buf, "%s.yyy",
+ srcc[d / 4]);
+ break;
+ case R300_ALU_ARGC_SRC0C_ZZZ:
+ sprintf(buf, "%s.zzz",
+ srcc[d / 4]);
+ break;
+ }
+ } else if (d < 15) {
+ sprintf(buf, "%s.www", srca[d - 12]);
+ } else if (d == 20) {
+ sprintf(buf, "0.0");
+ } else if (d == 21) {
+ sprintf(buf, "1.0");
+ } else if (d == 22) {
+ sprintf(buf, "0.5");
+ } else if (d >= 23 && d < 32) {
+ d -= 23;
+ switch (d / 3) {
+ case 0:
+ sprintf(buf, "%s.yzx",
+ srcc[d % 3]);
+ break;
+ case 1:
+ sprintf(buf, "%s.zxy",
+ srcc[d % 3]);
+ break;
+ case 2:
+ sprintf(buf, "%s.Wzy",
+ srcc[d % 3]);
+ break;
+ }
+ } else {
+ sprintf(buf, "%i", d);
+ }
+
+ sprintf(argc[j], "%s%s%s%s",
+ (regc & 32) ? "-" : "",
+ (regc & 64) ? "|" : "",
+ buf, (regc & 64) ? "|" : "");
+
+ d = rega & 31;
+ if (d < 9) {
+ sprintf(buf, "%s.%c", srcc[d / 3],
+ 'x' + (char)(d % 3));
+ } else if (d < 12) {
+ sprintf(buf, "%s.w", srca[d - 9]);
+ } else if (d == 16) {
+ sprintf(buf, "0.0");
+ } else if (d == 17) {
+ sprintf(buf, "1.0");
+ } else if (d == 18) {
+ sprintf(buf, "0.5");
+ } else {
+ sprintf(buf, "%i", d);
+ }
+
+ sprintf(arga[j], "%s%s%s%s",
+ (rega & 32) ? "-" : "",
+ (rega & 64) ? "|" : "",
+ buf, (rega & 64) ? "|" : "");
+ }
+
+ fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n"
+ " w: %8s %8s %8s op: %08x\n",
+ argc[0], argc[1], argc[2],
+ code->alu.inst[i].inst0, arga[0], arga[1],
+ arga[2], code->alu.inst[i].inst2);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
new file mode 100644
index 00000000000..186fad6490f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ * Ben Skeggs
+ * Jerome Glisse
+ */
+#ifndef __R300_FRAGPROG_H_
+#define __R300_FRAGPROG_H_
+
+#include "shader/program.h"
+#include "shader/prog_instruction.h"
+
+#include "compiler/radeon_compiler.h"
+#include "compiler/radeon_program.h"
+
+
+extern GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
+
+extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c);
+
+extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
new file mode 100644
index 00000000000..1cfb565b6e0
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -0,0 +1,345 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * Emit the r300_fragment_program_code that can be understood by the hardware.
+ * Input is a pre-transformed radeon_program.
+ *
+ * \author Ben Skeggs
+ *
+ * \author Jerome Glisse
+ *
+ * \todo FogOption
+ */
+
+#include "compiler/r300_fragprog.h"
+
+#include "r300_reg.h"
+
+#include "compiler/radeon_program_pair.h"
+#include "compiler/r300_fragprog_swizzle.h"
+
+
+#define PROG_CODE \
+ struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
+ struct r300_fragment_program_code *code = &c->code->code.r300
+
+#define error(fmt, args...) do { \
+ fprintf(stderr, "%s::%s(): " fmt "\n", \
+ __FILE__, __FUNCTION__, ##args); \
+ } while(0)
+
+
+static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex)
+{
+ PROG_CODE;
+
+ for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
+ if (code->constant[*hwindex].File == file &&
+ code->constant[*hwindex].Index == index)
+ break;
+ }
+
+ if (*hwindex >= code->const_nr) {
+ if (*hwindex >= R300_PFS_NUM_CONST_REGS) {
+ error("Out of hw constants!\n");
+ return GL_FALSE;
+ }
+
+ code->const_nr++;
+ code->constant[*hwindex].File = file;
+ code->constant[*hwindex].Index = index;
+ }
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Mark a temporary register as used.
+ */
+static void use_temporary(struct r300_fragment_program_code *code, GLuint index)
+{
+ if (index > code->max_temp_idx)
+ code->max_temp_idx = index;
+}
+
+
+static GLuint translate_rgb_opcode(GLuint opcode)
+{
+ switch(opcode) {
+ case OPCODE_CMP: return R300_ALU_OUTC_CMP;
+ case OPCODE_DP3: return R300_ALU_OUTC_DP3;
+ case OPCODE_DP4: return R300_ALU_OUTC_DP4;
+ case OPCODE_FRC: return R300_ALU_OUTC_FRC;
+ default:
+ error("translate_rgb_opcode(%i): Unknown opcode", opcode);
+ /* fall through */
+ case OPCODE_NOP:
+ /* fall through */
+ case OPCODE_MAD: return R300_ALU_OUTC_MAD;
+ case OPCODE_MAX: return R300_ALU_OUTC_MAX;
+ case OPCODE_MIN: return R300_ALU_OUTC_MIN;
+ case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
+ }
+}
+
+static GLuint translate_alpha_opcode(GLuint opcode)
+{
+ switch(opcode) {
+ case OPCODE_CMP: return R300_ALU_OUTA_CMP;
+ case OPCODE_DP3: return R300_ALU_OUTA_DP4;
+ case OPCODE_DP4: return R300_ALU_OUTA_DP4;
+ case OPCODE_EX2: return R300_ALU_OUTA_EX2;
+ case OPCODE_FRC: return R300_ALU_OUTA_FRC;
+ case OPCODE_LG2: return R300_ALU_OUTA_LG2;
+ default:
+ error("translate_rgb_opcode(%i): Unknown opcode", opcode);
+ /* fall through */
+ case OPCODE_NOP:
+ /* fall through */
+ case OPCODE_MAD: return R300_ALU_OUTA_MAD;
+ case OPCODE_MAX: return R300_ALU_OUTA_MAX;
+ case OPCODE_MIN: return R300_ALU_OUTA_MIN;
+ case OPCODE_RCP: return R300_ALU_OUTA_RCP;
+ case OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
+ }
+}
+
+/**
+ * Emit one paired ALU instruction.
+ */
+static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
+{
+ PROG_CODE;
+
+ if (code->alu.length >= R300_PFS_MAX_ALU_INST) {
+ error("Too many ALU instructions");
+ return GL_FALSE;
+ }
+
+ int ip = code->alu.length++;
+ int j;
+ code->node[code->cur_node].alu_end++;
+
+ code->alu.inst[ip].inst0 = translate_rgb_opcode(inst->RGB.Opcode);
+ code->alu.inst[ip].inst2 = translate_alpha_opcode(inst->Alpha.Opcode);
+
+ for(j = 0; j < 3; ++j) {
+ GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5);
+ if (!inst->RGB.Src[j].Constant)
+ use_temporary(code, inst->RGB.Src[j].Index);
+ code->alu.inst[ip].inst1 |= src << (6*j);
+
+ src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5);
+ if (!inst->Alpha.Src[j].Constant)
+ use_temporary(code, inst->Alpha.Src[j].Index);
+ code->alu.inst[ip].inst3 |= src << (6*j);
+
+ GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
+ arg |= inst->RGB.Arg[j].Abs << 6;
+ arg |= inst->RGB.Arg[j].Negate << 5;
+ code->alu.inst[ip].inst0 |= arg << (7*j);
+
+ arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
+ arg |= inst->Alpha.Arg[j].Abs << 6;
+ arg |= inst->Alpha.Arg[j].Negate << 5;
+ code->alu.inst[ip].inst2 |= arg << (7*j);
+ }
+
+ if (inst->RGB.Saturate)
+ code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP;
+ if (inst->Alpha.Saturate)
+ code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP;
+
+ if (inst->RGB.WriteMask) {
+ use_temporary(code, inst->RGB.DestIndex);
+ code->alu.inst[ip].inst1 |=
+ (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
+ (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
+ }
+ if (inst->RGB.OutputWriteMask) {
+ code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT);
+ code->node[code->cur_node].flags |= R300_RGBA_OUT;
+ }
+
+ if (inst->Alpha.WriteMask) {
+ use_temporary(code, inst->Alpha.DestIndex);
+ code->alu.inst[ip].inst3 |=
+ (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
+ R300_ALU_DSTA_REG;
+ }
+ if (inst->Alpha.OutputWriteMask) {
+ code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT;
+ code->node[code->cur_node].flags |= R300_RGBA_OUT;
+ }
+ if (inst->Alpha.DepthWriteMask) {
+ code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH;
+ code->node[code->cur_node].flags |= R300_W_OUT;
+ c->code->writes_depth = GL_TRUE;
+ }
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Finish the current node without advancing to the next one.
+ */
+static GLboolean finish_node(struct r300_fragment_program_compiler *c)
+{
+ struct r300_fragment_program_code *code = &c->code->code.r300;
+ struct r300_fragment_program_node *node = &code->node[code->cur_node];
+
+ if (node->alu_end < 0) {
+ /* Generate a single NOP for this node */
+ struct radeon_pair_instruction inst;
+ _mesa_bzero(&inst, sizeof(inst));
+ if (!emit_alu(c, &inst))
+ return GL_FALSE;
+ }
+
+ if (node->tex_end < 0) {
+ if (code->cur_node == 0) {
+ node->tex_end = 0;
+ } else {
+ error("Node %i has no TEX instructions", code->cur_node);
+ return GL_FALSE;
+ }
+ } else {
+ if (code->cur_node == 0)
+ code->first_node_has_tex = 1;
+ }
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Begin a block of texture instructions.
+ * Create the necessary indirection.
+ */
+static GLboolean begin_tex(void* data)
+{
+ PROG_CODE;
+
+ if (code->cur_node == 0) {
+ if (code->node[0].alu_end < 0 &&
+ code->node[0].tex_end < 0)
+ return GL_TRUE;
+ }
+
+ if (code->cur_node == 3) {
+ error("Too many texture indirections");
+ return GL_FALSE;
+ }
+
+ if (!finish_node(c))
+ return GL_FALSE;
+
+ struct r300_fragment_program_node *node = &code->node[++code->cur_node];
+ node->alu_offset = code->alu.length;
+ node->alu_end = -1;
+ node->tex_offset = code->tex.length;
+ node->tex_end = -1;
+ return GL_TRUE;
+}
+
+
+static GLboolean emit_tex(void* data, struct prog_instruction* inst)
+{
+ PROG_CODE;
+
+ if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
+ error("Too many TEX instructions");
+ return GL_FALSE;
+ }
+
+ GLuint unit = inst->TexSrcUnit;
+ GLuint dest = inst->DstReg.Index;
+ GLuint opcode;
+
+ switch(inst->Opcode) {
+ case OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
+ case OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
+ case OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
+ case OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
+ default:
+ error("Unknown texture opcode %i", inst->Opcode);
+ return GL_FALSE;
+ }
+
+ if (inst->Opcode == OPCODE_KIL) {
+ unit = 0;
+ dest = 0;
+ } else {
+ use_temporary(code, dest);
+ }
+
+ use_temporary(code, inst->SrcReg[0].Index);
+
+ code->node[code->cur_node].tex_end++;
+ code->tex.inst[code->tex.length++] =
+ (inst->SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
+ (dest << R300_DST_ADDR_SHIFT) |
+ (unit << R300_TEX_ID_SHIFT) |
+ (opcode << R300_TEX_INST_SHIFT);
+ return GL_TRUE;
+}
+
+
+static const struct radeon_pair_handler pair_handler = {
+ .EmitConst = &emit_const,
+ .EmitPaired = &emit_alu,
+ .EmitTex = &emit_tex,
+ .BeginTexBlock = &begin_tex,
+ .MaxHwTemps = R300_PFS_NUM_TEMP_REGS
+};
+
+/**
+ * Final compilation step: Turn the intermediate radeon_program into
+ * machine-readable instructions.
+ */
+GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
+{
+ struct r300_fragment_program_code *code = &compiler->code->code.r300;
+
+ _mesa_bzero(code, sizeof(struct r300_fragment_program_code));
+ code->node[0].alu_end = -1;
+ code->node[0].tex_end = -1;
+
+ if (!radeonPairProgram(compiler->ctx, compiler->program, &pair_handler, compiler))
+ return GL_FALSE;
+
+ if (!finish_node(compiler))
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
new file mode 100644
index 00000000000..fc9d855bce6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ * Utilities to deal with the somewhat odd restriction on R300 fragment
+ * program swizzles.
+ */
+
+#include "r300_fragprog_swizzle.h"
+
+#include "r300_reg.h"
+#include "radeon_nqssadce.h"
+
+#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO))
+
+struct swizzle_data {
+ GLuint hash; /**< swizzle value this matches */
+ GLuint base; /**< base value for hw swizzle */
+ GLuint stride; /**< difference in base between arg0/1/2 */
+};
+
+static const struct swizzle_data native_swizzles[] = {
+ {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4},
+ {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4},
+ {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4},
+ {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4},
+ {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1},
+ {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1},
+ {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1},
+ {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1},
+ {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0},
+ {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0}
+};
+
+static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
+
+
+/**
+ * Find a native RGB swizzle that matches the given swizzle.
+ * Returns 0 if none found.
+ */
+static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle)
+{
+ int i, comp;
+
+ for(i = 0; i < num_native_swizzles; ++i) {
+ const struct swizzle_data* sd = &native_swizzles[i];
+ for(comp = 0; comp < 3; ++comp) {
+ GLuint swz = GET_SWZ(swizzle, comp);
+ if (swz == SWIZZLE_NIL)
+ continue;
+ if (swz != GET_SWZ(sd->hash, comp))
+ break;
+ }
+ if (comp == 3)
+ return sd;
+ }
+
+ return 0;
+}
+
+
+/**
+ * Check whether the given instruction supports the swizzle and negate
+ * combinations in the given source register.
+ */
+GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
+{
+ if (reg.Abs)
+ reg.Negate = NEGATE_NONE;
+
+ if (opcode == OPCODE_KIL ||
+ opcode == OPCODE_TEX ||
+ opcode == OPCODE_TXB ||
+ opcode == OPCODE_TXP) {
+ int j;
+
+ if (reg.Abs || reg.Negate)
+ return GL_FALSE;
+
+ for(j = 0; j < 4; ++j) {
+ GLuint swz = GET_SWZ(reg.Swizzle, j);
+ if (swz == SWIZZLE_NIL)
+ continue;
+ if (swz != j)
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+ }
+
+ GLuint relevant = 0;
+ int j;
+
+ for(j = 0; j < 3; ++j)
+ if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL)
+ relevant |= 1 << j;
+
+ if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
+ return GL_FALSE;
+
+ if (!lookup_native_swizzle(reg.Swizzle))
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Generate MOV dst, src using only native swizzles.
+ */
+void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
+{
+ if (src.Abs)
+ src.Negate = NEGATE_NONE;
+
+ while(dst.WriteMask) {
+ const struct swizzle_data *best_swizzle = 0;
+ GLuint best_matchcount = 0;
+ GLuint best_matchmask = 0;
+ int i, comp;
+
+ for(i = 0; i < num_native_swizzles; ++i) {
+ const struct swizzle_data *sd = &native_swizzles[i];
+ GLuint matchcount = 0;
+ GLuint matchmask = 0;
+ for(comp = 0; comp < 3; ++comp) {
+ if (!GET_BIT(dst.WriteMask, comp))
+ continue;
+ GLuint swz = GET_SWZ(src.Swizzle, comp);
+ if (swz == SWIZZLE_NIL)
+ continue;
+ if (swz == GET_SWZ(sd->hash, comp)) {
+ /* check if the negate bit of current component
+ * is the same for already matched components */
+ if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp))))
+ continue;
+
+ matchcount++;
+ matchmask |= 1 << comp;
+ }
+ }
+ if (matchcount > best_matchcount) {
+ best_swizzle = sd;
+ best_matchcount = matchcount;
+ best_matchmask = matchmask;
+ if (matchmask == (dst.WriteMask & WRITEMASK_XYZ))
+ break;
+ }
+ }
+
+ struct prog_instruction *inst;
+
+ _mesa_insert_instructions(s->Program, s->IP, 1);
+ inst = s->Program->Instructions + s->IP++;
+ inst->Opcode = OPCODE_MOV;
+ inst->DstReg = dst;
+ inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W);
+ inst->SrcReg[0] = src;
+ inst->SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE;
+ /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */
+
+ dst.WriteMask &= ~inst->DstReg.WriteMask;
+ }
+}
+
+
+/**
+ * Translate an RGB (XYZ) swizzle into the hardware code for the given
+ * instruction source.
+ */
+GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle)
+{
+ const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
+
+ if (!sd) {
+ _mesa_printf("Not a native swizzle: %08x\n", swizzle);
+ return 0;
+ }
+
+ return sd->base + src*sd->stride;
+}
+
+
+/**
+ * Translate an Alpha (W) swizzle into the hardware code for the given
+ * instruction source.
+ */
+GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle)
+{
+ if (swizzle < 3)
+ return swizzle + 3*src;
+
+ switch(swizzle) {
+ case SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
+ case SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
+ case SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
+ default: return R300_ALU_ARGA_ONE;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
new file mode 100644
index 00000000000..231bf4eef5f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __R300_FRAGPROG_SWIZZLE_H_
+#define __R300_FRAGPROG_SWIZZLE_H_
+
+#include "main/glheader.h"
+#include "shader/prog_instruction.h"
+
+struct nqssadce_state;
+
+GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg);
+void r300FPBuildSwizzle(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
+
+GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle);
+GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle);
+
+#endif /* __R300_FRAGPROG_SWIZZLE_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
new file mode 100644
index 00000000000..75abdcfc42a
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright 2009 Nicolai Hähnle
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_statevars.h"
+
+#include "radeon_nqssadce.h"
+#include "radeon_program_alu.h"
+#include "r300_fragprog.h"
+#include "r300_fragprog_swizzle.h"
+#include "r500_fragprog.h"
+
+
+static void nqssadce_init(struct nqssadce_state* s)
+{
+ s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW;
+ s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W;
+}
+
+/**
+ * Transform the program to support fragment.position.
+ *
+ * Introduce a small fragment at the start of the program that will be
+ * the only code that directly reads the FRAG_ATTRIB_WPOS input.
+ * All other code pieces that reference that input will be rewritten
+ * to read from a newly allocated temporary.
+ *
+ */
+static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
+{
+ GLuint InputsRead = compiler->program->InputsRead;
+
+ if (!(InputsRead & FRAG_BIT_WPOS)) {
+ compiler->code->wpos_attr = FRAG_ATTRIB_MAX;
+ return;
+ }
+
+ static gl_state_index tokens[STATE_LENGTH] = {
+ STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
+ };
+ struct prog_instruction *fpi;
+ GLuint window_index;
+ int i = 0;
+
+ for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
+ {
+ if (!(InputsRead & (1 << i))) {
+ InputsRead &= ~(1 << FRAG_ATTRIB_WPOS);
+ InputsRead |= 1 << i;
+ compiler->program->InputsRead = InputsRead;
+ compiler->code->wpos_attr = i;
+ break;
+ }
+ }
+
+ GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
+
+ _mesa_insert_instructions(compiler->program, 0, 3);
+ fpi = compiler->program->Instructions;
+ i = 0;
+
+ /* perspective divide */
+ fpi[i].Opcode = OPCODE_RCP;
+
+ fpi[i].DstReg.File = PROGRAM_TEMPORARY;
+ fpi[i].DstReg.Index = tempregi;
+ fpi[i].DstReg.WriteMask = WRITEMASK_W;
+ fpi[i].DstReg.CondMask = COND_TR;
+
+ fpi[i].SrcReg[0].File = PROGRAM_INPUT;
+ fpi[i].SrcReg[0].Index = compiler->code->wpos_attr;
+ fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
+ i++;
+
+ fpi[i].Opcode = OPCODE_MUL;
+
+ fpi[i].DstReg.File = PROGRAM_TEMPORARY;
+ fpi[i].DstReg.Index = tempregi;
+ fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
+ fpi[i].DstReg.CondMask = COND_TR;
+
+ fpi[i].SrcReg[0].File = PROGRAM_INPUT;
+ fpi[i].SrcReg[0].Index = compiler->code->wpos_attr;
+ fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
+ fpi[i].SrcReg[1].Index = tempregi;
+ fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
+ i++;
+
+ /* viewport transformation */
+ window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);
+
+ fpi[i].Opcode = OPCODE_MAD;
+
+ fpi[i].DstReg.File = PROGRAM_TEMPORARY;
+ fpi[i].DstReg.Index = tempregi;
+ fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
+ fpi[i].DstReg.CondMask = COND_TR;
+
+ fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
+ fpi[i].SrcReg[0].Index = tempregi;
+ fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+ fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
+ fpi[i].SrcReg[1].Index = window_index;
+ fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+ fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
+ fpi[i].SrcReg[2].Index = window_index;
+ fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+ i++;
+
+ for (; i < compiler->program->NumInstructions; ++i) {
+ int reg;
+ for (reg = 0; reg < 3; reg++) {
+ if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT &&
+ fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) {
+ fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY;
+ fpi[i].SrcReg[reg].Index = tempregi;
+ }
+ }
+ }
+}
+
+
+/**
+ * Rewrite fragment.fogcoord to use a texture coordinate slot.
+ * Note that fogcoord is forced into an X001 pattern, and this enforcement
+ * is done here.
+ *
+ * See also the counterpart rewriting for vertex programs.
+ */
+static void rewriteFog(struct r300_fragment_program_compiler *compiler)
+{
+ struct rX00_fragment_program_code *code = compiler->code;
+ GLuint InputsRead = compiler->program->InputsRead;
+ int i;
+
+ if (!(InputsRead & FRAG_BIT_FOGC)) {
+ code->fog_attr = FRAG_ATTRIB_MAX;
+ return;
+ }
+
+ for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
+ {
+ if (!(InputsRead & (1 << i))) {
+ InputsRead &= ~(1 << FRAG_ATTRIB_FOGC);
+ InputsRead |= 1 << i;
+ compiler->program->InputsRead = InputsRead;
+ code->fog_attr = i;
+ break;
+ }
+ }
+
+ {
+ struct prog_instruction *inst;
+
+ inst = compiler->program->Instructions;
+ while (inst->Opcode != OPCODE_END) {
+ const int src_regs = _mesa_num_inst_src_regs(inst->Opcode);
+ for (i = 0; i < src_regs; ++i) {
+ if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) {
+ inst->SrcReg[i].Index = code->fog_attr;
+ inst->SrcReg[i].Swizzle = combine_swizzles(
+ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE),
+ inst->SrcReg[i].Swizzle);
+ }
+ }
+ ++inst;
+ }
+ }
+}
+
+
+static void rewrite_depth_out(struct gl_program *prog)
+{
+ struct prog_instruction *inst;
+
+ for (inst = prog->Instructions; inst->Opcode != OPCODE_END; ++inst) {
+ if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != FRAG_RESULT_DEPTH)
+ continue;
+
+ if (inst->DstReg.WriteMask & WRITEMASK_Z) {
+ inst->DstReg.WriteMask = WRITEMASK_W;
+ } else {
+ inst->DstReg.WriteMask = 0;
+ continue;
+ }
+
+ switch (inst->Opcode) {
+ case OPCODE_FRC:
+ case OPCODE_MOV:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ break;
+ case OPCODE_ADD:
+ case OPCODE_MAX:
+ case OPCODE_MIN:
+ case OPCODE_MUL:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ break;
+ case OPCODE_CMP:
+ case OPCODE_MAD:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
+ break;
+ default:
+ // Scalar instructions needn't be reswizzled
+ break;
+ }
+ }
+}
+
+GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
+{
+ GLboolean success = GL_FALSE;
+
+ if (c->debug) {
+ fflush(stdout);
+ _mesa_printf("Fragment Program: Initial program:\n");
+ _mesa_print_program(c->program);
+ fflush(stdout);
+ }
+
+ insert_WPOS_trailer(c);
+
+ rewriteFog(c);
+
+ rewrite_depth_out(c->program);
+
+ if (c->is_r500) {
+ struct radeon_program_transformation transformations[] = {
+ { &r500_transform_TEX, c },
+ { &radeonTransformALU, 0 },
+ { &radeonTransformDeriv, 0 },
+ { &radeonTransformTrigScale, 0 }
+ };
+ radeonLocalTransform(c->ctx, c->program, 4, transformations);
+ } else {
+ struct radeon_program_transformation transformations[] = {
+ { &r300_transform_TEX, c },
+ { &radeonTransformALU, 0 },
+ { &radeonTransformTrigSimple, 0 }
+ };
+ radeonLocalTransform(c->ctx, c->program, 3, transformations);
+ }
+
+ if (c->debug) {
+ _mesa_printf("Fragment Program: After native rewrite:\n");
+ _mesa_print_program(c->program);
+ fflush(stdout);
+ }
+
+ if (c->is_r500) {
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadce_init,
+ .IsNativeSwizzle = &r500FPIsNativeSwizzle,
+ .BuildSwizzle = &r500FPBuildSwizzle
+ };
+ radeonNqssaDce(c->ctx, c->program, &nqssadce);
+ } else {
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadce_init,
+ .IsNativeSwizzle = &r300FPIsNativeSwizzle,
+ .BuildSwizzle = &r300FPBuildSwizzle
+ };
+ radeonNqssaDce(c->ctx, c->program, &nqssadce);
+ }
+
+ if (c->debug) {
+ _mesa_printf("Compiler: after NqSSA-DCE:\n");
+ _mesa_print_program(c->program);
+ fflush(stdout);
+ }
+
+ if (c->is_r500) {
+ success = r500BuildFragmentProgramHwCode(c);
+ } else {
+ success = r300BuildFragmentProgramHwCode(c);
+ }
+
+ if (!success || c->debug) {
+ if (c->is_r500) {
+ r500FragmentProgramDump(c->code);
+ } else {
+ r300FragmentProgramDump(c->code);
+ }
+ }
+
+ return success;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
new file mode 100644
index 00000000000..fdc18caacb7
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -0,0 +1,482 @@
+/*
+ * Copyright 2008 Corbin Simpson
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "compiler/r500_fragprog.h"
+
+#include "r300_reg.h"
+
+static void reset_srcreg(struct prog_src_register* reg)
+{
+ _mesa_bzero(reg, sizeof(*reg));
+ reg->Swizzle = SWIZZLE_NOOP;
+}
+
+static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
+{
+ gl_state_index fail_value_tokens[STATE_LENGTH] = {
+ STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
+ };
+ struct prog_src_register reg = { 0, };
+
+ fail_value_tokens[2] = tmu;
+ reg.File = PROGRAM_STATE_VAR;
+ reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
+ reg.Swizzle = SWIZZLE_WWWW;
+ return reg;
+}
+
+/**
+ * Transform TEX, TXP, TXB, and KIL instructions in the following way:
+ * - premultiply texture coordinates for RECT
+ * - extract operand swizzles
+ * - introduce a temporary register when write masks are needed
+ *
+ */
+GLboolean r500_transform_TEX(
+ struct radeon_transform_context *t,
+ struct prog_instruction* orig_inst, void* data)
+{
+ struct r300_fragment_program_compiler *compiler =
+ (struct r300_fragment_program_compiler*)data;
+ struct prog_instruction inst = *orig_inst;
+ struct prog_instruction* tgt;
+ GLboolean destredirect = GL_FALSE;
+
+ if (inst.Opcode != OPCODE_TEX &&
+ inst.Opcode != OPCODE_TXB &&
+ inst.Opcode != OPCODE_TXP &&
+ inst.Opcode != OPCODE_KIL)
+ return GL_FALSE;
+
+ /* ARB_shadow & EXT_shadow_funcs */
+ if (inst.Opcode != OPCODE_KIL &&
+ t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
+ GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func;
+
+ if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
+ tgt = radeonAppendInstructions(t->Program, 1);
+
+ tgt->Opcode = OPCODE_MOV;
+ tgt->DstReg = inst.DstReg;
+ if (comparefunc == GL_ALWAYS) {
+ tgt->SrcReg[0].File = PROGRAM_BUILTIN;
+ tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
+ } else {
+ tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
+ }
+ return GL_TRUE;
+ }
+
+ inst.DstReg.File = PROGRAM_TEMPORARY;
+ inst.DstReg.Index = radeonFindFreeTemporary(t);
+ inst.DstReg.WriteMask = WRITEMASK_XYZW;
+ } else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) {
+ int tempreg = radeonFindFreeTemporary(t);
+
+ inst.DstReg.File = PROGRAM_TEMPORARY;
+ inst.DstReg.Index = tempreg;
+ inst.DstReg.WriteMask = WRITEMASK_XYZW;
+ destredirect = GL_TRUE;
+ }
+
+ if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
+ int tmpreg = radeonFindFreeTemporary(t);
+ tgt = radeonAppendInstructions(t->Program, 1);
+ tgt->Opcode = OPCODE_MOV;
+ tgt->DstReg.File = PROGRAM_TEMPORARY;
+ tgt->DstReg.Index = tmpreg;
+ tgt->SrcReg[0] = inst.SrcReg[0];
+
+ reset_srcreg(&inst.SrcReg[0]);
+ inst.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst.SrcReg[0].Index = tmpreg;
+ }
+
+ tgt = radeonAppendInstructions(t->Program, 1);
+ _mesa_copy_instructions(tgt, &inst, 1);
+
+ if (inst.Opcode != OPCODE_KIL &&
+ t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
+ GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func;
+ GLuint depthmode = compiler->state.unit[inst.TexSrcUnit].depth_texture_mode;
+ int rcptemp = radeonFindFreeTemporary(t);
+ int pass, fail;
+
+ tgt = radeonAppendInstructions(t->Program, 3);
+
+ tgt[0].Opcode = OPCODE_RCP;
+ tgt[0].DstReg.File = PROGRAM_TEMPORARY;
+ tgt[0].DstReg.Index = rcptemp;
+ tgt[0].DstReg.WriteMask = WRITEMASK_W;
+ tgt[0].SrcReg[0] = inst.SrcReg[0];
+ tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
+
+ tgt[1].Opcode = OPCODE_MAD;
+ tgt[1].DstReg = inst.DstReg;
+ tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
+ tgt[1].SrcReg[0] = inst.SrcReg[0];
+ tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
+ tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
+ tgt[1].SrcReg[1].Index = rcptemp;
+ tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
+ tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
+ tgt[1].SrcReg[2].Index = inst.DstReg.Index;
+ if (depthmode == 0) /* GL_LUMINANCE */
+ tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
+ else if (depthmode == 2) /* GL_ALPHA */
+ tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
+
+ /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
+ * r < tex <=> -tex+r < 0
+ * r >= tex <=> not (-tex+r < 0 */
+ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
+ tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW;
+ else
+ tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW;
+
+ tgt[2].Opcode = OPCODE_CMP;
+ tgt[2].DstReg = orig_inst->DstReg;
+ tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
+ tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
+
+ if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
+ pass = 1;
+ fail = 2;
+ } else {
+ pass = 2;
+ fail = 1;
+ }
+
+ tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
+ tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
+ tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
+ } else if (destredirect) {
+ tgt = radeonAppendInstructions(t->Program, 1);
+
+ tgt->Opcode = OPCODE_MOV;
+ tgt->DstReg = orig_inst->DstReg;
+ tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
+ tgt->SrcReg[0].Index = inst.DstReg.Index;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
+{
+ GLuint relevant;
+ int i;
+
+ if (opcode == OPCODE_TEX ||
+ opcode == OPCODE_TXB ||
+ opcode == OPCODE_TXP ||
+ opcode == OPCODE_KIL) {
+ if (reg.Abs)
+ return GL_FALSE;
+
+ if (opcode == OPCODE_KIL && (reg.Swizzle != SWIZZLE_NOOP || reg.Negate != NEGATE_NONE))
+ return GL_FALSE;
+
+ if (reg.Negate)
+ reg.Negate ^= NEGATE_XYZW;
+
+ for(i = 0; i < 4; ++i) {
+ GLuint swz = GET_SWZ(reg.Swizzle, i);
+ if (swz == SWIZZLE_NIL) {
+ reg.Negate &= ~(1 << i);
+ continue;
+ }
+ if (swz >= 4)
+ return GL_FALSE;
+ }
+
+ if (reg.Negate)
+ return GL_FALSE;
+
+ return GL_TRUE;
+ } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) {
+ /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
+ * if it doesn't fit perfectly into a .xyzw case... */
+ if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate)
+ return GL_TRUE;
+
+ return GL_FALSE;
+ } else {
+ /* ALU instructions support almost everything */
+ if (reg.Abs)
+ return GL_TRUE;
+
+ relevant = 0;
+ for(i = 0; i < 3; ++i) {
+ GLuint swz = GET_SWZ(reg.Swizzle, i);
+ if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
+ relevant |= 1 << i;
+ }
+ if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
+ return GL_FALSE;
+
+ return GL_TRUE;
+ }
+}
+
+/**
+ * Implement a MOV with a potentially non-native swizzle.
+ *
+ * The only thing we *cannot* do in an ALU instruction is per-component
+ * negation. Therefore, we split the MOV into two instructions when necessary.
+ */
+void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
+{
+ struct prog_instruction *inst;
+ GLuint negatebase[2] = { 0, 0 };
+ int i;
+
+ for(i = 0; i < 4; ++i) {
+ GLuint swz = GET_SWZ(src.Swizzle, i);
+ if (swz == SWIZZLE_NIL)
+ continue;
+ negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
+ }
+
+ _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0));
+ inst = s->Program->Instructions + s->IP;
+
+ for(i = 0; i <= 1; ++i) {
+ if (!negatebase[i])
+ continue;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->DstReg = dst;
+ inst->DstReg.WriteMask = negatebase[i];
+ inst->SrcReg[0] = src;
+ inst->SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW;
+ inst++;
+ s->IP++;
+ }
+}
+
+
+static char *toswiz(int swiz_val) {
+ switch(swiz_val) {
+ case 0: return "R";
+ case 1: return "G";
+ case 2: return "B";
+ case 3: return "A";
+ case 4: return "0";
+ case 5: return "1/2";
+ case 6: return "1";
+ case 7: return "U";
+ }
+ return NULL;
+}
+
+static char *toop(int op_val)
+{
+ char *str = NULL;
+ switch (op_val) {
+ case 0: str = "MAD"; break;
+ case 1: str = "DP3"; break;
+ case 2: str = "DP4"; break;
+ case 3: str = "D2A"; break;
+ case 4: str = "MIN"; break;
+ case 5: str = "MAX"; break;
+ case 6: str = "Reserved"; break;
+ case 7: str = "CND"; break;
+ case 8: str = "CMP"; break;
+ case 9: str = "FRC"; break;
+ case 10: str = "SOP"; break;
+ case 11: str = "MDH"; break;
+ case 12: str = "MDV"; break;
+ }
+ return str;
+}
+
+static char *to_alpha_op(int op_val)
+{
+ char *str = NULL;
+ switch (op_val) {
+ case 0: str = "MAD"; break;
+ case 1: str = "DP"; break;
+ case 2: str = "MIN"; break;
+ case 3: str = "MAX"; break;
+ case 4: str = "Reserved"; break;
+ case 5: str = "CND"; break;
+ case 6: str = "CMP"; break;
+ case 7: str = "FRC"; break;
+ case 8: str = "EX2"; break;
+ case 9: str = "LN2"; break;
+ case 10: str = "RCP"; break;
+ case 11: str = "RSQ"; break;
+ case 12: str = "SIN"; break;
+ case 13: str = "COS"; break;
+ case 14: str = "MDH"; break;
+ case 15: str = "MDV"; break;
+ }
+ return str;
+}
+
+static char *to_mask(int val)
+{
+ char *str = NULL;
+ switch(val) {
+ case 0: str = "NONE"; break;
+ case 1: str = "R"; break;
+ case 2: str = "G"; break;
+ case 3: str = "RG"; break;
+ case 4: str = "B"; break;
+ case 5: str = "RB"; break;
+ case 6: str = "GB"; break;
+ case 7: str = "RGB"; break;
+ case 8: str = "A"; break;
+ case 9: str = "AR"; break;
+ case 10: str = "AG"; break;
+ case 11: str = "ARG"; break;
+ case 12: str = "AB"; break;
+ case 13: str = "ARB"; break;
+ case 14: str = "AGB"; break;
+ case 15: str = "ARGB"; break;
+ }
+ return str;
+}
+
+static char *to_texop(int val)
+{
+ switch(val) {
+ case 0: return "NOP";
+ case 1: return "LD";
+ case 2: return "TEXKILL";
+ case 3: return "PROJ";
+ case 4: return "LODBIAS";
+ case 5: return "LOD";
+ case 6: return "DXDY";
+ }
+ return NULL;
+}
+
+void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
+{
+ struct r500_fragment_program_code *code = &c->code.r500;
+ fprintf(stderr, "R500 Fragment Program:\n--------\n");
+
+ int n;
+ uint32_t inst;
+ uint32_t inst0;
+ char *str = NULL;
+
+ if (code->const_nr) {
+ fprintf(stderr, "--------\nConstants:\n");
+ for (n = 0; n < code->const_nr; n++) {
+ fprintf(stderr, "Constant %d: %i[%i]\n", n,
+ code->constant[n].File, code->constant[n].Index);
+ }
+ fprintf(stderr, "--------\n");
+ }
+
+ for (n = 0; n < code->inst_end+1; n++) {
+ inst0 = inst = code->inst[n].inst0;
+ fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
+ switch(inst & 0x3) {
+ case R500_INST_TYPE_ALU: str = "ALU"; break;
+ case R500_INST_TYPE_OUT: str = "OUT"; break;
+ case R500_INST_TYPE_FC: str = "FC"; break;
+ case R500_INST_TYPE_TEX: str = "TEX"; break;
+ };
+ fprintf(stderr,"%s %s %s %s %s ", str,
+ inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
+ inst & R500_INST_LAST ? "LAST" : "",
+ inst & R500_INST_NOP ? "NOP" : "",
+ inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
+ fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
+ to_mask((inst >> 15) & 0xf));
+
+ switch(inst0 & 0x3) {
+ case 0:
+ case 1:
+ fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1);
+ inst = code->inst[n].inst1;
+
+ fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+ inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+ (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+ (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+ (inst >> 30));
+
+ fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2);
+ inst = code->inst[n].inst2;
+ fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+ inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+ (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+ (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+ (inst >> 30));
+ fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3);
+ inst = code->inst[n].inst3;
+ fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
+ (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
+ (inst >> 11) & 0x3,
+ (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
+ (inst >> 24) & 0x3);
+
+
+ fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
+ inst = code->inst[n].inst4;
+ fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf),
+ (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+ (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
+ (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
+ (inst >> 31) & 0x1);
+
+ fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
+ inst = code->inst[n].inst5;
+ fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
+ (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+ (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
+ (inst >> 23) & 0x3,
+ (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
+ break;
+ case 2:
+ break;
+ case 3:
+ inst = code->inst[n].inst1;
+ fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
+ to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
+ (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
+ inst = code->inst[n].inst2;
+ fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
+ inst & 127, inst & (1<<7) ? "(rel)" : "",
+ toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
+ toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
+ (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
+ toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
+ toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
+
+ fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3);
+ break;
+ }
+ fprintf(stderr,"\n");
+ }
+
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
new file mode 100644
index 00000000000..232993f5816
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ * Ben Skeggs
+ * Jerome Glisse
+ */
+#ifndef __R500_FRAGPROG_H_
+#define __R500_FRAGPROG_H_
+
+#include "shader/prog_parameter.h"
+#include "shader/prog_instruction.h"
+
+#include "compiler/radeon_compiler.h"
+#include "compiler/radeon_nqssadce.h"
+
+extern GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
+
+extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c);
+
+extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg);
+
+extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src);
+
+extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
new file mode 100644
index 00000000000..237489e1199
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -0,0 +1,329 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * Copyright 2008 Corbin Simpson
+ * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * \author Ben Skeggs
+ *
+ * \author Jerome Glisse
+ *
+ * \author Corbin Simpson
+ *
+ * \todo Depth write, WPOS/FOGC inputs
+ *
+ * \todo FogOption
+ *
+ */
+
+#include "compiler/r500_fragprog.h"
+
+#include "r300_reg.h"
+
+#include "compiler/radeon_program_pair.h"
+
+
+#define PROG_CODE \
+ struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
+ struct r500_fragment_program_code *code = &c->code->code.r500
+
+#define error(fmt, args...) do { \
+ fprintf(stderr, "%s::%s(): " fmt "\n", \
+ __FILE__, __FUNCTION__, ##args); \
+ } while(0)
+
+
+/**
+ * Callback to register hardware constants.
+ */
+static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex)
+{
+ PROG_CODE;
+
+ for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
+ if (code->constant[*hwindex].File == file &&
+ code->constant[*hwindex].Index == idx)
+ break;
+ }
+
+ if (*hwindex >= code->const_nr) {
+ if (*hwindex >= R500_PFS_NUM_CONST_REGS) {
+ error("Out of hw constants!\n");
+ return GL_FALSE;
+ }
+
+ code->const_nr++;
+ code->constant[*hwindex].File = file;
+ code->constant[*hwindex].Index = idx;
+ }
+
+ return GL_TRUE;
+}
+
+static GLuint translate_rgb_op(GLuint opcode)
+{
+ switch(opcode) {
+ case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
+ case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
+ case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
+ case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
+ case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
+ case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
+ default:
+ error("translate_rgb_op(%d): unknown opcode\n", opcode);
+ /* fall through */
+ case OPCODE_NOP:
+ /* fall through */
+ case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
+ case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
+ case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
+ case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
+ }
+}
+
+static GLuint translate_alpha_op(GLuint opcode)
+{
+ switch(opcode) {
+ case OPCODE_CMP: return R500_ALPHA_OP_CMP;
+ case OPCODE_COS: return R500_ALPHA_OP_COS;
+ case OPCODE_DDX: return R500_ALPHA_OP_MDH;
+ case OPCODE_DDY: return R500_ALPHA_OP_MDV;
+ case OPCODE_DP3: return R500_ALPHA_OP_DP;
+ case OPCODE_DP4: return R500_ALPHA_OP_DP;
+ case OPCODE_EX2: return R500_ALPHA_OP_EX2;
+ case OPCODE_FRC: return R500_ALPHA_OP_FRC;
+ case OPCODE_LG2: return R500_ALPHA_OP_LN2;
+ default:
+ error("translate_alpha_op(%d): unknown opcode\n", opcode);
+ /* fall through */
+ case OPCODE_NOP:
+ /* fall through */
+ case OPCODE_MAD: return R500_ALPHA_OP_MAD;
+ case OPCODE_MAX: return R500_ALPHA_OP_MAX;
+ case OPCODE_MIN: return R500_ALPHA_OP_MIN;
+ case OPCODE_RCP: return R500_ALPHA_OP_RCP;
+ case OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
+ case OPCODE_SIN: return R500_ALPHA_OP_SIN;
+ }
+}
+
+static GLuint fix_hw_swizzle(GLuint swz)
+{
+ if (swz == 5) swz = 6;
+ if (swz == SWIZZLE_NIL) swz = 4;
+ return swz;
+}
+
+static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg)
+{
+ GLuint t = inst->RGB.Arg[arg].Source;
+ int comp;
+ t |= inst->RGB.Arg[arg].Negate << 11;
+ t |= inst->RGB.Arg[arg].Abs << 12;
+
+ for(comp = 0; comp < 3; ++comp)
+ t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
+
+ return t;
+}
+
+static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i)
+{
+ GLuint t = inst->Alpha.Arg[i].Source;
+ t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2;
+ t |= inst->Alpha.Arg[i].Negate << 5;
+ t |= inst->Alpha.Arg[i].Abs << 6;
+ return t;
+}
+
+static void use_temporary(struct r500_fragment_program_code* code, GLuint index)
+{
+ if (index > code->max_temp_idx)
+ code->max_temp_idx = index;
+}
+
+static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
+{
+ if (!src.Constant)
+ use_temporary(code, src.Index);
+ return src.Index | src.Constant << 8;
+}
+
+
+/**
+ * Emit a paired ALU instruction.
+ */
+static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
+{
+ PROG_CODE;
+
+ if (code->inst_end >= 511) {
+ error("emit_alu: Too many instructions");
+ return GL_FALSE;
+ }
+
+ int ip = ++code->inst_end;
+
+ code->inst[ip].inst5 = translate_rgb_op(inst->RGB.Opcode);
+ code->inst[ip].inst4 = translate_alpha_op(inst->Alpha.Opcode);
+
+ if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask)
+ code->inst[ip].inst0 = R500_INST_TYPE_OUT;
+ else
+ code->inst[ip].inst0 = R500_INST_TYPE_ALU;
+ code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
+
+ code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
+ code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
+ if (inst->Alpha.DepthWriteMask) {
+ code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
+ c->code->writes_depth = GL_TRUE;
+ }
+
+ code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
+ code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
+ use_temporary(code, inst->Alpha.DestIndex);
+ use_temporary(code, inst->RGB.DestIndex);
+
+ if (inst->RGB.Saturate)
+ code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
+ if (inst->Alpha.Saturate)
+ code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
+
+ code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
+ code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
+ code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
+
+ code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
+ code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
+ code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
+
+ code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
+ code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
+ code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
+
+ code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
+ code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
+ code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
+
+ return GL_TRUE;
+}
+
+static GLuint translate_strq_swizzle(struct prog_src_register src)
+{
+ GLuint swiz = 0;
+ int i;
+ for (i = 0; i < 4; i++)
+ swiz |= (GET_SWZ(src.Swizzle, i) & 0x3) << i*2;
+ return swiz;
+}
+
+/**
+ * Emit a single TEX instruction
+ */
+static GLboolean emit_tex(void *data, struct prog_instruction *inst)
+{
+ PROG_CODE;
+
+ if (code->inst_end >= 511) {
+ error("emit_tex: Too many instructions");
+ return GL_FALSE;
+ }
+
+ int ip = ++code->inst_end;
+
+ code->inst[ip].inst0 = R500_INST_TYPE_TEX
+ | (inst->DstReg.WriteMask << 11)
+ | R500_INST_TEX_SEM_WAIT;
+ code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
+ | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
+
+ if (inst->TexSrcTarget == TEXTURE_RECT_INDEX)
+ code->inst[ip].inst1 |= R500_TEX_UNSCALED;
+
+ switch (inst->Opcode) {
+ case OPCODE_KIL:
+ code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
+ break;
+ case OPCODE_TEX:
+ code->inst[ip].inst1 |= R500_TEX_INST_LD;
+ break;
+ case OPCODE_TXB:
+ code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
+ break;
+ case OPCODE_TXP:
+ code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
+ break;
+ default:
+ error("emit_tex can't handle opcode %x\n", inst->Opcode);
+ }
+
+ code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
+ | (translate_strq_swizzle(inst->SrcReg[0]) << 8)
+ | R500_TEX_DST_ADDR(inst->DstReg.Index)
+ | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
+ | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
+
+ return GL_TRUE;
+}
+
+static const struct radeon_pair_handler pair_handler = {
+ .EmitConst = emit_const,
+ .EmitPaired = emit_paired,
+ .EmitTex = emit_tex,
+ .MaxHwTemps = 128
+};
+
+GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
+{
+ struct r500_fragment_program_code *code = &compiler->code->code.r500;
+
+ _mesa_bzero(code, sizeof(*code));
+ code->max_temp_idx = 1;
+ code->inst_offset = 0;
+ code->inst_end = -1;
+
+ if (!radeonPairProgram(compiler->ctx, compiler->program, &pair_handler, compiler))
+ return GL_FALSE;
+
+ if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
+ /* This may happen when dead-code elimination is disabled or
+ * when most of the fragment program logic is leading to a KIL */
+ if (code->inst_end >= 511) {
+ error("Introducing fake OUT: Too many instructions");
+ return GL_FALSE;
+ }
+
+ int ip = ++code->inst_end;
+ code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
+ }
+
+ return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
new file mode 100644
index 00000000000..e1a691db4fb
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2009 Nicolai Hähnle
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_COMPILER_H
+#define RADEON_COMPILER_H
+
+#include "main/mtypes.h"
+#include "shader/prog_instruction.h"
+
+#define R300_PFS_MAX_ALU_INST 64
+#define R300_PFS_MAX_TEX_INST 32
+#define R300_PFS_MAX_TEX_INDIRECT 4
+#define R300_PFS_NUM_TEMP_REGS 32
+#define R300_PFS_NUM_CONST_REGS 32
+
+#define R500_PFS_MAX_INST 512
+#define R500_PFS_NUM_TEMP_REGS 128
+#define R500_PFS_NUM_CONST_REGS 256
+
+
+#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
+#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
+
+
+/**
+ * Stores state that influences the compilation of a fragment program.
+ */
+struct r300_fragment_program_external_state {
+ struct {
+ /**
+ * If the sampler is used as a shadow sampler,
+ * this field is:
+ * 0 - GL_LUMINANCE
+ * 1 - GL_INTENSITY
+ * 2 - GL_ALPHA
+ * depending on the depth texture mode.
+ */
+ GLuint depth_texture_mode : 2;
+
+ /**
+ * If the sampler is used as a shadow sampler,
+ * this field is (texture_compare_func - GL_NEVER).
+ * [e.g. if compare function is GL_LEQUAL, this field is 3]
+ *
+ * Otherwise, this field is 0.
+ */
+ GLuint texture_compare_func : 3;
+ } unit[16];
+};
+
+
+
+struct r300_fragment_program_node {
+ int tex_offset; /**< first tex instruction */
+ int tex_end; /**< last tex instruction, relative to tex_offset */
+ int alu_offset; /**< first ALU instruction */
+ int alu_end; /**< last ALU instruction, relative to alu_offset */
+ int flags;
+};
+
+/**
+ * Stores an R300 fragment program in its compiled-to-hardware form.
+ */
+struct r300_fragment_program_code {
+ struct {
+ int length; /**< total # of texture instructions used */
+ GLuint inst[R300_PFS_MAX_TEX_INST];
+ } tex;
+
+ struct {
+ int length; /**< total # of ALU instructions used */
+ struct {
+ GLuint inst0;
+ GLuint inst1;
+ GLuint inst2;
+ GLuint inst3;
+ } inst[R300_PFS_MAX_ALU_INST];
+ } alu;
+
+ struct r300_fragment_program_node node[4];
+ int cur_node;
+ int first_node_has_tex;
+
+ /**
+ * Remember which program register a given hardware constant
+ * belongs to.
+ */
+ struct prog_src_register constant[R300_PFS_NUM_CONST_REGS];
+ int const_nr;
+
+ int max_temp_idx;
+};
+
+
+struct r500_fragment_program_code {
+ struct {
+ GLuint inst0;
+ GLuint inst1;
+ GLuint inst2;
+ GLuint inst3;
+ GLuint inst4;
+ GLuint inst5;
+ } inst[R500_PFS_MAX_INST];
+
+ int inst_offset;
+ int inst_end;
+
+ /**
+ * Remember which program register a given hardware constant
+ * belongs to.
+ */
+ struct prog_src_register constant[R500_PFS_NUM_CONST_REGS];
+ int const_nr;
+
+ int max_temp_idx;
+};
+
+struct rX00_fragment_program_code {
+ union {
+ struct r300_fragment_program_code r300;
+ struct r500_fragment_program_code r500;
+ } code;
+
+ GLboolean writes_depth;
+
+ /* attribute that we are sending the WPOS in */
+ gl_frag_attrib wpos_attr;
+ /* attribute that we are sending the fog coordinate in */
+ gl_frag_attrib fog_attr;
+};
+
+struct r300_fragment_program_compiler {
+ GLcontext * ctx;
+ struct rX00_fragment_program_code *code;
+ struct gl_program *program;
+ struct r300_fragment_program_external_state state;
+ GLboolean is_r500;
+ GLboolean debug;
+};
+
+GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
+
+#endif /* RADEON_COMPILER_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
new file mode 100644
index 00000000000..202a8532b6d
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ *
+ * "Not-quite SSA" and Dead-Code Elimination.
+ *
+ * @note This code uses SWIZZLE_NIL in a source register to indicate that
+ * the corresponding component is ignored by the corresponding instruction.
+ */
+
+#include "radeon_nqssadce.h"
+
+
+/**
+ * Return the @ref register_state for the given register (or 0 for untracked
+ * registers, i.e. constants).
+ */
+static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index)
+{
+ switch(file) {
+ case PROGRAM_TEMPORARY: return &s->Temps[index];
+ case PROGRAM_OUTPUT: return &s->Outputs[index];
+ case PROGRAM_ADDRESS: return &s->Address;
+ default: return 0;
+ }
+}
+
+
+/**
+ * Left multiplication of a register with a swizzle
+ *
+ * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
+ */
+struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
+{
+ struct prog_src_register tmp = srcreg;
+ int i;
+ tmp.Swizzle = 0;
+ tmp.Negate = NEGATE_NONE;
+ for(i = 0; i < 4; ++i) {
+ GLuint swz = GET_SWZ(swizzle, i);
+ if (swz < 4) {
+ tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
+ tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
+ } else {
+ tmp.Swizzle |= swz << (i*3);
+ }
+ }
+ return tmp;
+}
+
+
+static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
+ struct prog_instruction *inst, GLint src, GLuint sourced)
+{
+ int i;
+ GLuint deswz_source = 0;
+
+ for(i = 0; i < 4; ++i) {
+ if (GET_BIT(sourced, i)) {
+ GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i);
+ deswz_source |= 1 << swz;
+ } else {
+ inst->SrcReg[src].Swizzle &= ~(7 << (3*i));
+ inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
+ }
+ }
+
+ if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
+ struct prog_dst_register dstreg = inst->DstReg;
+ dstreg.File = PROGRAM_TEMPORARY;
+ dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
+ dstreg.WriteMask = sourced;
+
+ s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
+
+ inst = s->Program->Instructions + s->IP;
+ inst->SrcReg[src].File = PROGRAM_TEMPORARY;
+ inst->SrcReg[src].Index = dstreg.Index;
+ inst->SrcReg[src].Swizzle = 0;
+ inst->SrcReg[src].Negate = NEGATE_NONE;
+ inst->SrcReg[src].Abs = 0;
+ for(i = 0; i < 4; ++i) {
+ if (GET_BIT(sourced, i))
+ inst->SrcReg[src].Swizzle |= i << (3*i);
+ else
+ inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
+ }
+ deswz_source = sourced;
+ }
+
+ struct register_state *regstate;
+
+ if (inst->SrcReg[src].RelAddr) {
+ regstate = get_reg_state(s, PROGRAM_ADDRESS, 0);
+ if (regstate)
+ regstate->Sourced |= WRITEMASK_X;
+ } else {
+ regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
+ if (regstate)
+ regstate->Sourced |= deswz_source & 0xf;
+ }
+
+ return inst;
+}
+
+static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
+{
+ int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
+ int i;
+ for(i = 0; i < nsrc; ++i)
+ if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex)
+ inst->SrcReg[i].Index = newindex;
+}
+
+static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
+{
+ GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
+ int ip;
+ for(ip = 0; ip < s->IP; ++ip) {
+ struct prog_instruction* inst = s->Program->Instructions + ip;
+ if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex)
+ inst->DstReg.Index = newindex;
+ unalias_srcregs(inst, oldindex, newindex);
+ }
+ unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex);
+}
+
+
+/**
+ * Handle one instruction.
+ */
+static void process_instruction(struct nqssadce_state* s)
+{
+ struct prog_instruction *inst = s->Program->Instructions + s->IP;
+
+ if (inst->Opcode == OPCODE_END)
+ return;
+
+ if (inst->Opcode != OPCODE_KIL) {
+ struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
+ if (!regstate) {
+ _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n",
+ inst->DstReg.File, inst->DstReg.Index);
+ return;
+ }
+
+ inst->DstReg.WriteMask &= regstate->Sourced;
+ regstate->Sourced &= ~inst->DstReg.WriteMask;
+
+ if (inst->DstReg.WriteMask == 0) {
+ _mesa_delete_instructions(s->Program, s->IP, 1);
+ return;
+ }
+
+ if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced)
+ unalias_temporary(s, inst->DstReg.Index);
+ }
+
+ /* Attention: Due to swizzle emulation code, the following
+ * might change the instruction stream under us, so we have
+ * to be careful with the inst pointer. */
+ switch (inst->Opcode) {
+ case OPCODE_ARL:
+ case OPCODE_DDX:
+ case OPCODE_DDY:
+ case OPCODE_FRC:
+ case OPCODE_MOV:
+ inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
+ break;
+ case OPCODE_ADD:
+ case OPCODE_MAX:
+ case OPCODE_MIN:
+ case OPCODE_MUL:
+ case OPCODE_SGE:
+ case OPCODE_SLT:
+ inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
+ inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
+ break;
+ case OPCODE_CMP:
+ case OPCODE_MAD:
+ inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
+ inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
+ inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask);
+ break;
+ case OPCODE_COS:
+ case OPCODE_EX2:
+ case OPCODE_LG2:
+ case OPCODE_RCP:
+ case OPCODE_RSQ:
+ case OPCODE_SIN:
+ inst = track_used_srcreg(s, inst, 0, 0x1);
+ break;
+ case OPCODE_DP3:
+ inst = track_used_srcreg(s, inst, 0, 0x7);
+ inst = track_used_srcreg(s, inst, 1, 0x7);
+ break;
+ case OPCODE_DP4:
+ inst = track_used_srcreg(s, inst, 0, 0xf);
+ inst = track_used_srcreg(s, inst, 1, 0xf);
+ break;
+ case OPCODE_KIL:
+ case OPCODE_TEX:
+ case OPCODE_TXB:
+ case OPCODE_TXP:
+ inst = track_used_srcreg(s, inst, 0, 0xf);
+ break;
+ case OPCODE_DST:
+ inst = track_used_srcreg(s, inst, 0, 0x6);
+ inst = track_used_srcreg(s, inst, 1, 0xa);
+ break;
+ case OPCODE_EXP:
+ case OPCODE_LOG:
+ case OPCODE_POW:
+ inst = track_used_srcreg(s, inst, 0, 0x3);
+ break;
+ case OPCODE_LIT:
+ inst = track_used_srcreg(s, inst, 0, 0xb);
+ break;
+ default:
+ _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
+ return;
+ }
+}
+
+static void calculateInputsOutputs(struct gl_program *p)
+{
+ struct prog_instruction *inst;
+ GLuint InputsRead, OutputsWritten;
+
+ inst = p->Instructions;
+ InputsRead = 0;
+ OutputsWritten = 0;
+ while (inst->Opcode != OPCODE_END)
+ {
+ int i, num_src_regs;
+
+ num_src_regs = _mesa_num_inst_src_regs(inst->Opcode);
+ for (i = 0; i < num_src_regs; ++i) {
+ if (inst->SrcReg[i].File == PROGRAM_INPUT)
+ InputsRead |= 1 << inst->SrcReg[i].Index;
+ }
+
+ if (inst->DstReg.File == PROGRAM_OUTPUT)
+ OutputsWritten |= 1 << inst->DstReg.Index;
+
+ ++inst;
+ }
+
+ p->InputsRead = InputsRead;
+ p->OutputsWritten = OutputsWritten;
+}
+
+void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr)
+{
+ struct nqssadce_state s;
+
+ _mesa_bzero(&s, sizeof(s));
+ s.Ctx = ctx;
+ s.Program = p;
+ s.Descr = descr;
+ s.Descr->Init(&s);
+ s.IP = p->NumInstructions;
+
+ while(s.IP > 0) {
+ s.IP--;
+ process_instruction(&s);
+ }
+
+ calculateInputsOutputs(p);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h
new file mode 100644
index 00000000000..8626f21c25e
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_NQSSADCE_H_
+#define __RADEON_PROGRAM_NQSSADCE_H_
+
+#include "radeon_program.h"
+
+
+struct register_state {
+ /**
+ * Bitmask indicating which components of the register are sourced
+ * by later instructions.
+ */
+ GLuint Sourced : 4;
+};
+
+/**
+ * Maintain state such as which registers are used, which registers are
+ * read from, etc.
+ */
+struct nqssadce_state {
+ GLcontext *Ctx;
+ struct gl_program *Program;
+ struct radeon_nqssadce_descr *Descr;
+
+ /**
+ * All instructions after this instruction pointer have been dealt with.
+ */
+ int IP;
+
+ /**
+ * Which registers are read by subsequent instructions?
+ */
+ struct register_state Temps[MAX_PROGRAM_TEMPS];
+ struct register_state Outputs[VERT_RESULT_MAX];
+ struct register_state Address;
+};
+
+
+/**
+ * This structure contains a description of the hardware in-so-far as
+ * it is required for the NqSSA-DCE pass.
+ */
+struct radeon_nqssadce_descr {
+ /**
+ * Fill in which outputs
+ */
+ void (*Init)(struct nqssadce_state *);
+
+ /**
+ * Check whether the given swizzle, absolute and negate combination
+ * can be implemented natively by the hardware for this opcode.
+ */
+ GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg);
+
+ /**
+ * Emit (at the current IP) the instruction MOV dst, src;
+ * The transformation will work recursively on the emitted instruction(s).
+ */
+ void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
+
+ void *Data;
+};
+
+void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr);
+struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg);
+
+#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
new file mode 100644
index 00000000000..da5e7aefce5
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program.h"
+
+#include "shader/prog_print.h"
+
+
+/**
+ * Transform the given clause in the following way:
+ * 1. Replace it with an empty clause
+ * 2. For every instruction in the original clause, try the given
+ * transformations in order.
+ * 3. If one of the transformations returns GL_TRUE, assume that it
+ * has emitted the appropriate instruction(s) into the new clause;
+ * otherwise, copy the instruction verbatim.
+ *
+ * \note The transformation is currently not recursive; in other words,
+ * instructions emitted by transformations are not transformed.
+ *
+ * \note The transform is called 'local' because it can only look at
+ * one instruction at a time.
+ */
+void radeonLocalTransform(
+ GLcontext *Ctx,
+ struct gl_program *program,
+ int num_transformations,
+ struct radeon_program_transformation* transformations)
+{
+ struct radeon_transform_context ctx;
+ int ip;
+
+ ctx.Ctx = Ctx;
+ ctx.Program = program;
+ ctx.OldInstructions = program->Instructions;
+ ctx.OldNumInstructions = program->NumInstructions;
+
+ program->Instructions = 0;
+ program->NumInstructions = 0;
+
+ for(ip = 0; ip < ctx.OldNumInstructions; ++ip) {
+ struct prog_instruction *instr = ctx.OldInstructions + ip;
+ int i;
+
+ for(i = 0; i < num_transformations; ++i) {
+ struct radeon_program_transformation* t = transformations + i;
+
+ if (t->function(&ctx, instr, t->userData))
+ break;
+ }
+
+ if (i >= num_transformations) {
+ struct prog_instruction* dest = radeonAppendInstructions(program, 1);
+ _mesa_copy_instructions(dest, instr, 1);
+ }
+ }
+
+ _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions);
+}
+
+
+static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count)
+{
+ GLuint i;
+ for (i = 0; i < count; i++) {
+ const struct prog_instruction *inst = insts + i;
+ const GLuint n = _mesa_num_inst_src_regs(inst->Opcode);
+ GLuint k;
+
+ for (k = 0; k < n; k++) {
+ if (inst->SrcReg[k].File == PROGRAM_TEMPORARY)
+ used[inst->SrcReg[k].Index] = GL_TRUE;
+ }
+ }
+}
+
+GLint radeonFindFreeTemporary(struct radeon_transform_context *t)
+{
+ GLboolean used[MAX_PROGRAM_TEMPS];
+ GLuint i;
+
+ _mesa_memset(used, 0, sizeof(used));
+ scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions);
+ scan_instructions(used, t->OldInstructions, t->OldNumInstructions);
+
+ for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+ if (!used[i])
+ return i;
+ }
+
+ return -1;
+}
+
+
+/**
+ * Append the given number of instructions to the program and return a
+ * pointer to the first new instruction.
+ */
+struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count)
+{
+ int oldnum = program->NumInstructions;
+ _mesa_insert_instructions(program, oldnum, count);
+ return program->Instructions + oldnum;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
new file mode 100644
index 00000000000..88474d43a22
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_H_
+#define __RADEON_PROGRAM_H_
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+#include "shader/prog_instruction.h"
+
+
+enum {
+ CLAUSE_MIXED = 0,
+ CLAUSE_ALU,
+ CLAUSE_TEX
+};
+
+enum {
+ PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */
+};
+
+enum {
+ OPCODE_REPL_ALPHA = MAX_OPCODE /**< used in paired instructions */
+};
+
+#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO)
+#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE)
+
+static inline GLuint get_swz(GLuint swz, GLuint idx)
+{
+ if (idx & 0x4)
+ return idx;
+ return GET_SWZ(swz, idx);
+}
+
+static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w)
+{
+ GLuint ret = 0;
+
+ ret |= get_swz(src, swz_x);
+ ret |= get_swz(src, swz_y) << 3;
+ ret |= get_swz(src, swz_z) << 6;
+ ret |= get_swz(src, swz_w) << 9;
+
+ return ret;
+}
+
+static inline GLuint combine_swizzles(GLuint src, GLuint swz)
+{
+ GLuint ret = 0;
+
+ ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X));
+ ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3;
+ ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6;
+ ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9;
+
+ return ret;
+}
+
+
+/**
+ * Transformation context that is passed to local transformations.
+ *
+ * Care must be taken with some operations during transformation,
+ * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary
+ */
+struct radeon_transform_context {
+ GLcontext *Ctx;
+ struct gl_program *Program;
+ struct prog_instruction *OldInstructions;
+ GLuint OldNumInstructions;
+};
+
+/**
+ * A transformation that can be passed to \ref radeonLocalTransform.
+ *
+ * The function will be called once for each instruction.
+ * It has to either emit the appropriate transformed code for the instruction
+ * and return GL_TRUE, or return GL_FALSE if it doesn't understand the
+ * instruction.
+ *
+ * The function gets passed the userData as last parameter.
+ */
+struct radeon_program_transformation {
+ GLboolean (*function)(
+ struct radeon_transform_context*,
+ struct prog_instruction*,
+ void*);
+ void *userData;
+};
+
+void radeonLocalTransform(
+ GLcontext* ctx,
+ struct gl_program *program,
+ int num_transformations,
+ struct radeon_program_transformation* transformations);
+
+/**
+ * Find a usable free temporary register during program transformation
+ */
+GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx);
+
+struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
new file mode 100644
index 00000000000..8283723bad7
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -0,0 +1,635 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ *
+ * Shareable transformations that transform "special" ALU instructions
+ * into ALU instructions that are supported by hardware.
+ *
+ */
+
+#include "radeon_program_alu.h"
+
+#include "shader/prog_parameter.h"
+
+
+static struct prog_instruction *emit1(struct gl_program* p,
+ gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
+ struct prog_src_register SrcReg)
+{
+ struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
+
+ fpi->Opcode = Opcode;
+ fpi->SaturateMode = Saturate;
+ fpi->DstReg = DstReg;
+ fpi->SrcReg[0] = SrcReg;
+ return fpi;
+}
+
+static struct prog_instruction *emit2(struct gl_program* p,
+ gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
+ struct prog_src_register SrcReg0, struct prog_src_register SrcReg1)
+{
+ struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
+
+ fpi->Opcode = Opcode;
+ fpi->SaturateMode = Saturate;
+ fpi->DstReg = DstReg;
+ fpi->SrcReg[0] = SrcReg0;
+ fpi->SrcReg[1] = SrcReg1;
+ return fpi;
+}
+
+static struct prog_instruction *emit3(struct gl_program* p,
+ gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
+ struct prog_src_register SrcReg0, struct prog_src_register SrcReg1,
+ struct prog_src_register SrcReg2)
+{
+ struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
+
+ fpi->Opcode = Opcode;
+ fpi->SaturateMode = Saturate;
+ fpi->DstReg = DstReg;
+ fpi->SrcReg[0] = SrcReg0;
+ fpi->SrcReg[1] = SrcReg1;
+ fpi->SrcReg[2] = SrcReg2;
+ return fpi;
+}
+
+static struct prog_dst_register dstreg(int file, int index)
+{
+ struct prog_dst_register dst;
+ dst.File = file;
+ dst.Index = index;
+ dst.WriteMask = WRITEMASK_XYZW;
+ dst.CondMask = COND_TR;
+ dst.CondSwizzle = SWIZZLE_NOOP;
+ dst.CondSrc = 0;
+ dst.pad = 0;
+ return dst;
+}
+
+static struct prog_dst_register dstregtmpmask(int index, int mask)
+{
+ struct prog_dst_register dst;
+ dst.File = PROGRAM_TEMPORARY;
+ dst.Index = index;
+ dst.WriteMask = mask;
+ dst.CondMask = COND_TR;
+ dst.CondSwizzle = SWIZZLE_NOOP;
+ dst.CondSrc = 0;
+ dst.pad = 0;
+ return dst;
+}
+
+static const struct prog_src_register builtin_zero = {
+ .File = PROGRAM_BUILTIN,
+ .Index = 0,
+ .Swizzle = SWIZZLE_0000
+};
+static const struct prog_src_register builtin_one = {
+ .File = PROGRAM_BUILTIN,
+ .Index = 0,
+ .Swizzle = SWIZZLE_1111
+};
+static const struct prog_src_register srcreg_undefined = {
+ .File = PROGRAM_UNDEFINED,
+ .Index = 0,
+ .Swizzle = SWIZZLE_NOOP
+};
+
+static struct prog_src_register srcreg(int file, int index)
+{
+ struct prog_src_register src = srcreg_undefined;
+ src.File = file;
+ src.Index = index;
+ return src;
+}
+
+static struct prog_src_register srcregswz(int file, int index, int swz)
+{
+ struct prog_src_register src = srcreg_undefined;
+ src.File = file;
+ src.Index = index;
+ src.Swizzle = swz;
+ return src;
+}
+
+static struct prog_src_register absolute(struct prog_src_register reg)
+{
+ struct prog_src_register newreg = reg;
+ newreg.Abs = 1;
+ newreg.Negate = NEGATE_NONE;
+ return newreg;
+}
+
+static struct prog_src_register negate(struct prog_src_register reg)
+{
+ struct prog_src_register newreg = reg;
+ newreg.Negate = newreg.Negate ^ NEGATE_XYZW;
+ return newreg;
+}
+
+static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w)
+{
+ struct prog_src_register swizzled = reg;
+ swizzled.Swizzle = MAKE_SWIZZLE4(
+ x >= 4 ? x : GET_SWZ(reg.Swizzle, x),
+ y >= 4 ? y : GET_SWZ(reg.Swizzle, y),
+ z >= 4 ? z : GET_SWZ(reg.Swizzle, z),
+ w >= 4 ? w : GET_SWZ(reg.Swizzle, w));
+ return swizzled;
+}
+
+static struct prog_src_register scalar(struct prog_src_register reg)
+{
+ return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
+}
+
+static void transform_ABS(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ struct prog_src_register src = inst->SrcReg[0];
+ src.Abs = 1;
+ src.Negate = NEGATE_NONE;
+ emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src);
+}
+
+static void transform_DPH(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ struct prog_src_register src0 = inst->SrcReg[0];
+ src0.Negate &= ~NEGATE_W;
+ src0.Swizzle &= ~(7 << (3 * 3));
+ src0.Swizzle |= SWIZZLE_ONE << (3 * 3);
+ emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]);
+}
+
+/**
+ * [1, src0.y*src1.y, src0.z, src1.w]
+ * So basically MUL with lotsa swizzling.
+ */
+static void transform_DST(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg,
+ swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE),
+ swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W));
+}
+
+static void transform_FLR(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ int tempreg = radeonFindFreeTemporary(t);
+ emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]);
+ emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg,
+ inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+}
+
+/**
+ * Definition of LIT (from ARB_fragment_program):
+ *
+ * tmp = VectorLoad(op0);
+ * if (tmp.x < 0) tmp.x = 0;
+ * if (tmp.y < 0) tmp.y = 0;
+ * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
+ * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
+ * result.x = 1.0;
+ * result.y = tmp.x;
+ * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
+ * result.w = 1.0;
+ *
+ * The longest path of computation is the one leading to result.z,
+ * consisting of 5 operations. This implementation of LIT takes
+ * 5 slots, if the subsequent optimization passes are clever enough
+ * to pair instructions correctly.
+ */
+static void transform_LIT(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ static const GLfloat LitConst[4] = { -127.999999 };
+
+ GLuint constant;
+ GLuint constant_swizzle;
+ GLuint temp;
+ int needTemporary = 0;
+ struct prog_src_register srctemp;
+
+ constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle);
+
+ if (inst->DstReg.WriteMask != WRITEMASK_XYZW) {
+ needTemporary = 1;
+ } else if (inst->DstReg.File != PROGRAM_TEMPORARY) {
+ // LIT is typically followed by DP3/DP4, so there's no point
+ // in creating special code for this case
+ needTemporary = 1;
+ }
+
+ if (needTemporary) {
+ temp = radeonFindFreeTemporary(t);
+ } else {
+ temp = inst->DstReg.Index;
+ }
+ srctemp = srcreg(PROGRAM_TEMPORARY, temp);
+
+ // tmp.x = max(0.0, Src.x);
+ // tmp.y = max(0.0, Src.y);
+ // tmp.w = clamp(Src.z, -128+eps, 128-eps);
+ emit2(t->Program, OPCODE_MAX, 0,
+ dstregtmpmask(temp, WRITEMASK_XYW),
+ inst->SrcReg[0],
+ swizzle(srcreg(PROGRAM_CONSTANT, constant),
+ SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3));
+ emit2(t->Program, OPCODE_MIN, 0,
+ dstregtmpmask(temp, WRITEMASK_Z),
+ swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)));
+
+ // tmp.w = Pow(tmp.y, tmp.w)
+ emit1(t->Program, OPCODE_LG2, 0,
+ dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
+ emit2(t->Program, OPCODE_MUL, 0,
+ dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z));
+ emit1(t->Program, OPCODE_EX2, 0,
+ dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
+
+ // tmp.z = (tmp.x > 0) ? tmp.w : 0.0
+ emit3(t->Program, OPCODE_CMP, inst->SaturateMode,
+ dstregtmpmask(temp, WRITEMASK_Z),
+ negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
+ swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ builtin_zero);
+
+ // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
+ emit1(t->Program, OPCODE_MOV, inst->SaturateMode,
+ dstregtmpmask(temp, WRITEMASK_XYW),
+ swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE));
+
+ if (needTemporary)
+ emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp);
+}
+
+static void transform_LRP(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ int tempreg = radeonFindFreeTemporary(t);
+
+ emit2(t->Program, OPCODE_ADD, 0,
+ dstreg(PROGRAM_TEMPORARY, tempreg),
+ inst->SrcReg[1], negate(inst->SrcReg[2]));
+ emit3(t->Program, OPCODE_MAD, inst->SaturateMode,
+ inst->DstReg,
+ inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]);
+}
+
+static void transform_POW(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ int tempreg = radeonFindFreeTemporary(t);
+ struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg);
+ struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg);
+ tempdst.WriteMask = WRITEMASK_W;
+ tempsrc.Swizzle = SWIZZLE_WWWW;
+
+ emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0]));
+ emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1]));
+ emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc);
+}
+
+static void transform_RSQ(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0]));
+}
+
+static void transform_SGE(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ int tempreg = radeonFindFreeTemporary(t);
+
+ emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
+ emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
+ srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);
+}
+
+static void transform_SLT(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ int tempreg = radeonFindFreeTemporary(t);
+
+ emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
+ emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
+ srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);
+}
+
+static void transform_SUB(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1]));
+}
+
+static void transform_SWZ(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]);
+}
+
+static void transform_XPD(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ int tempreg = radeonFindFreeTemporary(t);
+
+ emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),
+ swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
+ swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
+ emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg,
+ swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
+ swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
+ negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+}
+
+
+/**
+ * Can be used as a transformation for @ref radeonClauseLocalTransform,
+ * no userData necessary.
+ *
+ * Eliminates the following ALU instructions:
+ * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD
+ * using:
+ * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
+ *
+ * Transforms RSQ to Radeon's native RSQ by explicitly setting
+ * absolute value.
+ *
+ * @note should be applicable to R300 and R500 fragment programs.
+ */
+GLboolean radeonTransformALU(struct radeon_transform_context* t,
+ struct prog_instruction* inst,
+ void* unused)
+{
+ switch(inst->Opcode) {
+ case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE;
+ case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
+ case OPCODE_DST: transform_DST(t, inst); return GL_TRUE;
+ case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
+ case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE;
+ case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE;
+ case OPCODE_POW: transform_POW(t, inst); return GL_TRUE;
+ case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE;
+ case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE;
+ case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE;
+ case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE;
+ case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE;
+ case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE;
+ default:
+ return GL_FALSE;
+ }
+}
+
+
+static void sincos_constants(struct radeon_transform_context* t, GLuint *constants)
+{
+ static const GLfloat SinCosConsts[2][4] = {
+ {
+ 1.273239545, // 4/PI
+ -0.405284735, // -4/(PI*PI)
+ 3.141592654, // PI
+ 0.2225 // weight
+ },
+ {
+ 0.75,
+ 0.5,
+ 0.159154943, // 1/(2*PI)
+ 6.283185307 // 2*PI
+ }
+ };
+ int i;
+
+ for(i = 0; i < 2; ++i) {
+ GLuint swz;
+ constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz);
+ ASSERT(swz == SWIZZLE_NOOP);
+ }
+}
+
+/**
+ * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
+ *
+ * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
+ * MAD tmp.x, tmp.y, |src|, tmp.x
+ * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
+ * MAD dest, tmp.y, weight, tmp.x
+ */
+static void sin_approx(struct radeon_transform_context* t,
+ struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants)
+{
+ GLuint tempreg = radeonFindFreeTemporary(t);
+
+ emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ srcreg(PROGRAM_CONSTANT, constants[0]));
+ emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+ absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
+ emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
+ negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)));
+ emit3(t->Program, OPCODE_MAD, 0, dst,
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
+}
+
+/**
+ * Translate the trigonometric functions COS, SIN, and SCS
+ * using only the basic instructions
+ * MOV, ADD, MUL, MAD, FRC
+ */
+GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
+ struct prog_instruction* inst,
+ void* unused)
+{
+ if (inst->Opcode != OPCODE_COS &&
+ inst->Opcode != OPCODE_SIN &&
+ inst->Opcode != OPCODE_SCS)
+ return GL_FALSE;
+
+ GLuint constants[2];
+ GLuint tempreg = radeonFindFreeTemporary(t);
+
+ sincos_constants(t, constants);
+
+ if (inst->Opcode == OPCODE_COS) {
+ // MAD tmp.x, src, 1/(2*PI), 0.75
+ // FRC tmp.x, tmp.x
+ // MAD tmp.z, tmp.x, 2*PI, -PI
+ emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
+ emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
+ emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
+
+ sin_approx(t, inst->DstReg,
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ constants);
+ } else if (inst->Opcode == OPCODE_SIN) {
+ emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
+ emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
+ emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
+
+ sin_approx(t, inst->DstReg,
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ constants);
+ } else {
+ emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W));
+ emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ srcreg(PROGRAM_TEMPORARY, tempreg));
+ emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ srcreg(PROGRAM_TEMPORARY, tempreg),
+ swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
+
+ struct prog_dst_register dst = inst->DstReg;
+
+ dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X;
+ sin_approx(t, dst,
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ constants);
+
+ dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y;
+ sin_approx(t, dst,
+ swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+ constants);
+ }
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * to include pre-scaling by 1/(2*PI) and taking the fractional
+ * part, so that the input to COS and SIN is always in the range [0,1).
+ * SCS is replaced by one COS and one SIN instruction.
+ *
+ * @warning This transformation implicitly changes the semantics of SIN and COS!
+ */
+GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
+ struct prog_instruction* inst,
+ void* unused)
+{
+ if (inst->Opcode != OPCODE_COS &&
+ inst->Opcode != OPCODE_SIN &&
+ inst->Opcode != OPCODE_SCS)
+ return GL_FALSE;
+
+ static const GLfloat RCP_2PI[] = { 0.15915494309189535 };
+ GLuint temp;
+ GLuint constant;
+ GLuint constant_swizzle;
+
+ temp = radeonFindFreeTemporary(t);
+ constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle);
+
+ emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle));
+ emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),
+ srcreg(PROGRAM_TEMPORARY, temp));
+
+ if (inst->Opcode == OPCODE_COS) {
+ emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg,
+ srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ } else if (inst->Opcode == OPCODE_SIN) {
+ emit1(t->Program, OPCODE_SIN, inst->SaturateMode,
+ inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ } else if (inst->Opcode == OPCODE_SCS) {
+ struct prog_dst_register moddst = inst->DstReg;
+
+ if (inst->DstReg.WriteMask & WRITEMASK_X) {
+ moddst.WriteMask = WRITEMASK_X;
+ emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst,
+ srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ }
+ if (inst->DstReg.WriteMask & WRITEMASK_Y) {
+ moddst.WriteMask = WRITEMASK_Y;
+ emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst,
+ srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ }
+ }
+
+ return GL_TRUE;
+}
+
+/**
+ * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
+ * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
+ * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
+ *
+ * @warning This explicitly changes the form of DDX and DDY!
+ */
+
+GLboolean radeonTransformDeriv(struct radeon_transform_context* t,
+ struct prog_instruction* inst,
+ void* unused)
+{
+ if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY)
+ return GL_FALSE;
+
+ struct prog_src_register B = inst->SrcReg[1];
+
+ B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE,
+ SWIZZLE_ONE, SWIZZLE_ONE);
+ B.Negate = NEGATE_XYZW;
+
+ emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg,
+ inst->SrcReg[0], B);
+
+ return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
new file mode 100644
index 00000000000..b45958115cf
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_ALU_H_
+#define __RADEON_PROGRAM_ALU_H_
+
+#include "radeon_program.h"
+
+GLboolean radeonTransformALU(
+ struct radeon_transform_context *t,
+ struct prog_instruction*,
+ void*);
+
+GLboolean radeonTransformTrigSimple(
+ struct radeon_transform_context *t,
+ struct prog_instruction*,
+ void*);
+
+GLboolean radeonTransformTrigScale(
+ struct radeon_transform_context *t,
+ struct prog_instruction*,
+ void*);
+
+GLboolean radeonTransformDeriv(
+ struct radeon_transform_context *t,
+ struct prog_instruction*,
+ void*);
+
+#endif /* __RADEON_PROGRAM_ALU_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
new file mode 100644
index 00000000000..d6fb474cf23
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -0,0 +1,1004 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ *
+ * Perform temporary register allocation and attempt to pair off instructions
+ * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction
+ * vs. ALU instruction scheduling.
+ */
+
+#include "radeon_program_pair.h"
+
+#include "radeon_common.h"
+
+#include "shader/prog_print.h"
+
+#define error(fmt, args...) do { \
+ _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \
+ __FILE__, __FUNCTION__, ##args); \
+ s->Error = GL_TRUE; \
+} while(0)
+
+struct pair_state_instruction {
+ GLuint IsTex:1; /**< Is a texture instruction */
+ GLuint NeedRGB:1; /**< Needs the RGB ALU */
+ GLuint NeedAlpha:1; /**< Needs the Alpha ALU */
+ GLuint IsTranscendent:1; /**< Is a special transcendent instruction */
+
+ /**
+ * Number of (read and write) dependencies that must be resolved before
+ * this instruction can be scheduled.
+ */
+ GLuint NumDependencies:5;
+
+ /**
+ * Next instruction in the linked list of ready instructions.
+ */
+ struct pair_state_instruction *NextReady;
+
+ /**
+ * Values that this instruction writes
+ */
+ struct reg_value *Values[4];
+};
+
+
+/**
+ * Used to keep track of which instructions read a value.
+ */
+struct reg_value_reader {
+ GLuint IP; /**< IP of the instruction that performs this access */
+ struct reg_value_reader *Next;
+};
+
+/**
+ * Used to keep track which values are stored in each component of a
+ * PROGRAM_TEMPORARY.
+ */
+struct reg_value {
+ GLuint IP; /**< IP of the instruction that writes this value */
+ struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */
+
+ /**
+ * Unordered linked list of instructions that read from this value.
+ */
+ struct reg_value_reader *Readers;
+
+ /**
+ * Number of readers of this value. This is calculated during @ref scan_instructions
+ * and continually decremented during code emission.
+ * When this count reaches zero, the instruction that writes the @ref Next value
+ * can be scheduled.
+ */
+ GLuint NumReaders;
+};
+
+/**
+ * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register
+ * to the proper hardware temporary.
+ */
+struct pair_register_translation {
+ GLuint Allocated:1;
+ GLuint HwIndex:8;
+ GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */
+
+ /**
+ * Notes the value that is currently contained in each component
+ * (only used for PROGRAM_TEMPORARY registers).
+ */
+ struct reg_value *Value[4];
+};
+
+struct pair_state {
+ GLcontext *Ctx;
+ struct gl_program *Program;
+ const struct radeon_pair_handler *Handler;
+ GLboolean Error;
+ GLboolean Debug;
+ GLboolean Verbose;
+ void *UserData;
+
+ /**
+ * Translate Mesa registers to hardware registers
+ */
+ struct pair_register_translation Inputs[FRAG_ATTRIB_MAX];
+ struct pair_register_translation Temps[MAX_PROGRAM_TEMPS];
+
+ /**
+ * Derived information about program instructions.
+ */
+ struct pair_state_instruction *Instructions;
+
+ struct {
+ GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */
+ } HwTemps[128];
+
+ /**
+ * Linked list of instructions that can be scheduled right now,
+ * based on which ALU/TEX resources they require.
+ */
+ struct pair_state_instruction *ReadyFullALU;
+ struct pair_state_instruction *ReadyRGB;
+ struct pair_state_instruction *ReadyAlpha;
+ struct pair_state_instruction *ReadyTEX;
+
+ /**
+ * Pool of @ref reg_value structures for fast allocation.
+ */
+ struct reg_value *ValuePool;
+ GLuint ValuePoolUsed;
+ struct reg_value_reader *ReaderPool;
+ GLuint ReaderPoolUsed;
+};
+
+
+static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index)
+{
+ switch(file) {
+ case PROGRAM_TEMPORARY: return &s->Temps[index];
+ case PROGRAM_INPUT: return &s->Inputs[index];
+ default: return 0;
+ }
+}
+
+static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex)
+{
+ struct pair_register_translation *t = get_register(s, file, index);
+ ASSERT(!s->HwTemps[hwindex].RefCount);
+ ASSERT(!t->Allocated);
+ s->HwTemps[hwindex].RefCount = t->RefCount;
+ t->Allocated = 1;
+ t->HwIndex = hwindex;
+}
+
+static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index)
+{
+ GLuint hwindex;
+
+ struct pair_register_translation *t = get_register(s, file, index);
+ if (!t) {
+ _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index);
+ return 0;
+ }
+
+ if (t->Allocated)
+ return t->HwIndex;
+
+ for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex)
+ if (!s->HwTemps[hwindex].RefCount)
+ break;
+
+ if (hwindex >= s->Handler->MaxHwTemps) {
+ error("Ran out of hardware temporaries");
+ return 0;
+ }
+
+ alloc_hw_reg(s, file, index, hwindex);
+ return hwindex;
+}
+
+
+static void deref_hw_reg(struct pair_state *s, GLuint hwindex)
+{
+ if (!s->HwTemps[hwindex].RefCount) {
+ error("Hwindex %i refcount error", hwindex);
+ return;
+ }
+
+ s->HwTemps[hwindex].RefCount--;
+}
+
+static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst)
+{
+ pairinst->NextReady = *list;
+ *list = pairinst;
+}
+
+/**
+ * The instruction at the given IP has become ready. Link it into the ready
+ * instructions.
+ */
+static void instruction_ready(struct pair_state *s, int ip)
+{
+ struct pair_state_instruction *pairinst = s->Instructions + ip;
+
+ if (s->Verbose)
+ _mesa_printf("instruction_ready(%i)\n", ip);
+
+ if (pairinst->IsTex)
+ add_pairinst_to_list(&s->ReadyTEX, pairinst);
+ else if (!pairinst->NeedAlpha)
+ add_pairinst_to_list(&s->ReadyRGB, pairinst);
+ else if (!pairinst->NeedRGB)
+ add_pairinst_to_list(&s->ReadyAlpha, pairinst);
+ else
+ add_pairinst_to_list(&s->ReadyFullALU, pairinst);
+}
+
+
+/**
+ * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
+ * and reverse the order of arguments for CMP.
+ */
+static void final_rewrite(struct pair_state *s, struct prog_instruction *inst)
+{
+ struct prog_src_register tmp;
+
+ switch(inst->Opcode) {
+ case OPCODE_ADD:
+ inst->SrcReg[2] = inst->SrcReg[1];
+ inst->SrcReg[1].File = PROGRAM_BUILTIN;
+ inst->SrcReg[1].Swizzle = SWIZZLE_1111;
+ inst->SrcReg[1].Negate = NEGATE_NONE;
+ inst->Opcode = OPCODE_MAD;
+ break;
+ case OPCODE_CMP:
+ tmp = inst->SrcReg[2];
+ inst->SrcReg[2] = inst->SrcReg[0];
+ inst->SrcReg[0] = tmp;
+ break;
+ case OPCODE_MOV:
+ /* AMD say we should use CMP.
+ * However, when we transform
+ * KIL -r0;
+ * into
+ * CMP tmp, -r0, -r0, 0;
+ * KIL tmp;
+ * we get incorrect behaviour on R500 when r0 == 0.0.
+ * It appears that the R500 KIL hardware treats -0.0 as less
+ * than zero.
+ */
+ inst->SrcReg[1].File = PROGRAM_BUILTIN;
+ inst->SrcReg[1].Swizzle = SWIZZLE_1111;
+ inst->SrcReg[2].File = PROGRAM_BUILTIN;
+ inst->SrcReg[2].Swizzle = SWIZZLE_0000;
+ inst->Opcode = OPCODE_MAD;
+ break;
+ case OPCODE_MUL:
+ inst->SrcReg[2].File = PROGRAM_BUILTIN;
+ inst->SrcReg[2].Swizzle = SWIZZLE_0000;
+ inst->Opcode = OPCODE_MAD;
+ break;
+ default:
+ /* nothing to do */
+ break;
+ }
+}
+
+
+/**
+ * Classify an instruction according to which ALUs etc. it needs
+ */
+static void classify_instruction(struct pair_state *s,
+ struct prog_instruction *inst, struct pair_state_instruction *pairinst)
+{
+ pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0;
+ pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0;
+
+ switch(inst->Opcode) {
+ case OPCODE_ADD:
+ case OPCODE_CMP:
+ case OPCODE_DDX:
+ case OPCODE_DDY:
+ case OPCODE_FRC:
+ case OPCODE_MAD:
+ case OPCODE_MAX:
+ case OPCODE_MIN:
+ case OPCODE_MOV:
+ case OPCODE_MUL:
+ break;
+ case OPCODE_COS:
+ case OPCODE_EX2:
+ case OPCODE_LG2:
+ case OPCODE_RCP:
+ case OPCODE_RSQ:
+ case OPCODE_SIN:
+ pairinst->IsTranscendent = 1;
+ pairinst->NeedAlpha = 1;
+ break;
+ case OPCODE_DP4:
+ pairinst->NeedAlpha = 1;
+ /* fall through */
+ case OPCODE_DP3:
+ pairinst->NeedRGB = 1;
+ break;
+ case OPCODE_KIL:
+ case OPCODE_TEX:
+ case OPCODE_TXB:
+ case OPCODE_TXP:
+ case OPCODE_END:
+ pairinst->IsTex = 1;
+ break;
+ default:
+ error("Unknown opcode %d\n", inst->Opcode);
+ break;
+ }
+}
+
+
+/**
+ * Count which (input, temporary) register is read and written how often,
+ * and scan the instruction stream to find dependencies.
+ */
+static void scan_instructions(struct pair_state *s)
+{
+ struct prog_instruction *inst;
+ struct pair_state_instruction *pairinst;
+ GLuint ip;
+
+ for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0;
+ inst->Opcode != OPCODE_END;
+ ++inst, ++pairinst, ++ip) {
+ final_rewrite(s, inst);
+ classify_instruction(s, inst, pairinst);
+
+ int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
+ int j;
+ for(j = 0; j < nsrc; j++) {
+ struct pair_register_translation *t =
+ get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index);
+ if (!t)
+ continue;
+
+ t->RefCount++;
+
+ if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
+ int i;
+ for(i = 0; i < 4; ++i) {
+ GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i);
+ if (swz >= 4)
+ continue; /* constant or NIL swizzle */
+ if (!t->Value[swz])
+ continue; /* this is an undefined read */
+
+ /* Do not add a dependency if this instruction
+ * also rewrites the value. The code below adds
+ * a dependency for the DstReg, which is a superset
+ * of the SrcReg dependency. */
+ if (inst->DstReg.File == PROGRAM_TEMPORARY &&
+ inst->DstReg.Index == inst->SrcReg[j].Index &&
+ GET_BIT(inst->DstReg.WriteMask, swz))
+ continue;
+
+ struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++];
+ pairinst->NumDependencies++;
+ t->Value[swz]->NumReaders++;
+ r->IP = ip;
+ r->Next = t->Value[swz]->Readers;
+ t->Value[swz]->Readers = r;
+ }
+ }
+ }
+
+ int ndst = _mesa_num_inst_dst_regs(inst->Opcode);
+ if (ndst) {
+ struct pair_register_translation *t =
+ get_register(s, inst->DstReg.File, inst->DstReg.Index);
+ if (t) {
+ t->RefCount++;
+
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ int j;
+ for(j = 0; j < 4; ++j) {
+ if (!GET_BIT(inst->DstReg.WriteMask, j))
+ continue;
+
+ struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++];
+ v->IP = ip;
+ if (t->Value[j]) {
+ pairinst->NumDependencies++;
+ t->Value[j]->Next = v;
+ }
+ t->Value[j] = v;
+ pairinst->Values[j] = v;
+ }
+ }
+ }
+ }
+
+ if (s->Verbose)
+ _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies);
+
+ if (!pairinst->NumDependencies)
+ instruction_ready(s, ip);
+ }
+
+ /* Clear the PROGRAM_TEMPORARY state */
+ int i, j;
+ for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) {
+ for(j = 0; j < 4; ++j)
+ s->Temps[i].Value[j] = 0;
+ }
+}
+
+
+/**
+ * Reserve hardware temporary registers for the program inputs.
+ *
+ * @note This allocation is performed explicitly, because the order of inputs
+ * is determined by the RS hardware.
+ */
+static void allocate_input_registers(struct pair_state *s)
+{
+ GLuint InputsRead = s->Program->InputsRead;
+ int i;
+ GLuint hwindex = 0;
+
+ /* Primary colour */
+ if (InputsRead & FRAG_BIT_COL0)
+ alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++);
+ InputsRead &= ~FRAG_BIT_COL0;
+
+ /* Secondary color */
+ if (InputsRead & FRAG_BIT_COL1)
+ alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++);
+ InputsRead &= ~FRAG_BIT_COL1;
+
+ /* Texcoords */
+ for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) {
+ if (InputsRead & (FRAG_BIT_TEX0 << i))
+ alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++);
+ }
+ InputsRead &= ~FRAG_BITS_TEX_ANY;
+
+ /* Fogcoords treated as a texcoord */
+ if (InputsRead & FRAG_BIT_FOGC)
+ alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++);
+ InputsRead &= ~FRAG_BIT_FOGC;
+
+ /* fragment position treated as a texcoord */
+ if (InputsRead & FRAG_BIT_WPOS)
+ alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++);
+ InputsRead &= ~FRAG_BIT_WPOS;
+
+ /* Anything else */
+ if (InputsRead)
+ error("Don't know how to handle inputs 0x%x\n", InputsRead);
+}
+
+
+static void decrement_dependencies(struct pair_state *s, int ip)
+{
+ struct pair_state_instruction *pairinst = s->Instructions + ip;
+ ASSERT(pairinst->NumDependencies > 0);
+ if (!--pairinst->NumDependencies)
+ instruction_ready(s, ip);
+}
+
+/**
+ * Update the dependency tracking state based on what the instruction
+ * at the given IP does.
+ */
+static void commit_instruction(struct pair_state *s, int ip)
+{
+ struct prog_instruction *inst = s->Program->Instructions + ip;
+ struct pair_state_instruction *pairinst = s->Instructions + ip;
+
+ if (s->Verbose)
+ _mesa_printf("commit_instruction(%i)\n", ip);
+
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ struct pair_register_translation *t = &s->Temps[inst->DstReg.Index];
+ deref_hw_reg(s, t->HwIndex);
+
+ int i;
+ for(i = 0; i < 4; ++i) {
+ if (!GET_BIT(inst->DstReg.WriteMask, i))
+ continue;
+
+ t->Value[i] = pairinst->Values[i];
+ if (t->Value[i]->NumReaders) {
+ struct reg_value_reader *r;
+ for(r = pairinst->Values[i]->Readers; r; r = r->Next)
+ decrement_dependencies(s, r->IP);
+ } else if (t->Value[i]->Next) {
+ /* This happens when the only reader writes
+ * the register at the same time */
+ decrement_dependencies(s, t->Value[i]->Next->IP);
+ }
+ }
+ }
+
+ int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
+ int i;
+ for(i = 0; i < nsrc; i++) {
+ struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index);
+ if (!t)
+ continue;
+
+ deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index));
+
+ if (inst->SrcReg[i].File != PROGRAM_TEMPORARY)
+ continue;
+
+ int j;
+ for(j = 0; j < 4; ++j) {
+ GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
+ if (swz >= 4)
+ continue;
+ if (!t->Value[swz])
+ continue;
+
+ /* Do not free a dependency if this instruction
+ * also rewrites the value. See scan_instructions. */
+ if (inst->DstReg.File == PROGRAM_TEMPORARY &&
+ inst->DstReg.Index == inst->SrcReg[i].Index &&
+ GET_BIT(inst->DstReg.WriteMask, swz))
+ continue;
+
+ if (!--t->Value[swz]->NumReaders) {
+ if (t->Value[swz]->Next)
+ decrement_dependencies(s, t->Value[swz]->Next->IP);
+ }
+ }
+ }
+}
+
+
+/**
+ * Emit all ready texture instructions in a single block.
+ *
+ * Emit as a single block to (hopefully) sample many textures in parallel,
+ * and to avoid hardware indirections on R300.
+ *
+ * In R500, we don't really know when the result of a texture instruction
+ * arrives. So allocate all destinations first, to make sure they do not
+ * arrive early and overwrite a texture coordinate we're going to use later
+ * in the block.
+ */
+static void emit_all_tex(struct pair_state *s)
+{
+ struct pair_state_instruction *readytex;
+ struct pair_state_instruction *pairinst;
+
+ ASSERT(s->ReadyTEX);
+
+ // Don't let the ready list change under us!
+ readytex = s->ReadyTEX;
+ s->ReadyTEX = 0;
+
+ // Allocate destination hardware registers in one block to avoid conflicts.
+ for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
+ int ip = pairinst - s->Instructions;
+ struct prog_instruction *inst = s->Program->Instructions + ip;
+ if (inst->Opcode != OPCODE_KIL)
+ get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
+ }
+
+ if (s->Debug)
+ _mesa_printf(" BEGIN_TEX\n");
+
+ if (s->Handler->BeginTexBlock)
+ s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData);
+
+ for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
+ int ip = pairinst - s->Instructions;
+ struct prog_instruction *inst = s->Program->Instructions + ip;
+ commit_instruction(s, ip);
+
+ if (inst->Opcode != OPCODE_KIL)
+ inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
+ inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index);
+
+ if (s->Debug) {
+ _mesa_printf(" ");
+ _mesa_print_instruction(inst);
+ fflush(stdout);
+ }
+ s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst);
+ }
+
+ if (s->Debug)
+ _mesa_printf(" END_TEX\n");
+}
+
+
+static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair,
+ struct prog_src_register src, GLboolean rgb, GLboolean alpha)
+{
+ int candidate = -1;
+ int candidate_quality = -1;
+ int i;
+
+ if (!rgb && !alpha)
+ return 0;
+
+ GLuint constant;
+ GLuint index;
+
+ if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) {
+ constant = 0;
+ index = get_hw_reg(s, src.File, src.Index);
+ } else {
+ constant = 1;
+ s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index);
+ }
+
+ for(i = 0; i < 3; ++i) {
+ int q = 0;
+ if (rgb) {
+ if (pair->RGB.Src[i].Used) {
+ if (pair->RGB.Src[i].Constant != constant ||
+ pair->RGB.Src[i].Index != index)
+ continue;
+ q++;
+ }
+ }
+ if (alpha) {
+ if (pair->Alpha.Src[i].Used) {
+ if (pair->Alpha.Src[i].Constant != constant ||
+ pair->Alpha.Src[i].Index != index)
+ continue;
+ q++;
+ }
+ }
+ if (q > candidate_quality) {
+ candidate_quality = q;
+ candidate = i;
+ }
+ }
+
+ if (candidate >= 0) {
+ if (rgb) {
+ pair->RGB.Src[candidate].Used = 1;
+ pair->RGB.Src[candidate].Constant = constant;
+ pair->RGB.Src[candidate].Index = index;
+ }
+ if (alpha) {
+ pair->Alpha.Src[candidate].Used = 1;
+ pair->Alpha.Src[candidate].Constant = constant;
+ pair->Alpha.Src[candidate].Index = index;
+ }
+ }
+
+ return candidate;
+}
+
+/**
+ * Fill the given ALU instruction's opcodes and source operands into the given pair,
+ * if possible.
+ */
+static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip)
+{
+ struct pair_state_instruction *pairinst = s->Instructions + ip;
+ struct prog_instruction *inst = s->Program->Instructions + ip;
+
+ ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP);
+ ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP);
+
+ if (pairinst->NeedRGB) {
+ if (pairinst->IsTranscendent)
+ pair->RGB.Opcode = OPCODE_REPL_ALPHA;
+ else
+ pair->RGB.Opcode = inst->Opcode;
+ if (inst->SaturateMode == SATURATE_ZERO_ONE)
+ pair->RGB.Saturate = 1;
+ }
+ if (pairinst->NeedAlpha) {
+ pair->Alpha.Opcode = inst->Opcode;
+ if (inst->SaturateMode == SATURATE_ZERO_ONE)
+ pair->Alpha.Saturate = 1;
+ }
+
+ int nargs = _mesa_num_inst_src_regs(inst->Opcode);
+ int i;
+
+ /* Special case for DDX/DDY (MDH/MDV). */
+ if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) {
+ if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used)
+ return GL_FALSE;
+ else
+ nargs++;
+ }
+
+ for(i = 0; i < nargs; ++i) {
+ int source;
+ if (pairinst->NeedRGB && !pairinst->IsTranscendent) {
+ GLboolean srcrgb = GL_FALSE;
+ GLboolean srcalpha = GL_FALSE;
+ int j;
+ for(j = 0; j < 3; ++j) {
+ GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
+ if (swz < 3)
+ srcrgb = GL_TRUE;
+ else if (swz < 4)
+ srcalpha = GL_TRUE;
+ }
+ source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha);
+ if (source < 0)
+ return GL_FALSE;
+ pair->RGB.Arg[i].Source = source;
+ pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
+ pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
+ pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (NEGATE_X | NEGATE_Y | NEGATE_Z));
+ }
+ if (pairinst->NeedAlpha) {
+ GLboolean srcrgb = GL_FALSE;
+ GLboolean srcalpha = GL_FALSE;
+ GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3);
+ if (swz < 3)
+ srcrgb = GL_TRUE;
+ else if (swz < 4)
+ srcalpha = GL_TRUE;
+ source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha);
+ if (source < 0)
+ return GL_FALSE;
+ pair->Alpha.Arg[i].Source = source;
+ pair->Alpha.Arg[i].Swizzle = swz;
+ pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
+ pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & NEGATE_W);
+ }
+ }
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Fill in the destination register information.
+ *
+ * This is split from filling in source registers because we want
+ * to avoid allocating hardware temporaries for destinations until
+ * we are absolutely certain that we're going to emit a certain
+ * instruction pairing.
+ */
+static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip)
+{
+ struct pair_state_instruction *pairinst = s->Instructions + ip;
+ struct prog_instruction *inst = s->Program->Instructions + ip;
+
+ if (inst->DstReg.File == PROGRAM_OUTPUT) {
+ if (inst->DstReg.Index == FRAG_RESULT_COLOR) {
+ pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ;
+ pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) {
+ pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ }
+ } else {
+ GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
+ if (pairinst->NeedRGB) {
+ pair->RGB.DestIndex = hwindex;
+ pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ;
+ }
+ if (pairinst->NeedAlpha) {
+ pair->Alpha.DestIndex = hwindex;
+ pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ }
+ }
+}
+
+
+/**
+ * Find a good ALU instruction or pair of ALU instruction and emit it.
+ *
+ * Prefer emitting full ALU instructions, so that when we reach a point
+ * where no full ALU instruction can be emitted, we have more candidates
+ * for RGB/Alpha pairing.
+ */
+static void emit_alu(struct pair_state *s)
+{
+ struct radeon_pair_instruction pair;
+
+ if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
+ int ip;
+ if (s->ReadyFullALU) {
+ ip = s->ReadyFullALU - s->Instructions;
+ s->ReadyFullALU = s->ReadyFullALU->NextReady;
+ } else if (s->ReadyRGB) {
+ ip = s->ReadyRGB - s->Instructions;
+ s->ReadyRGB = s->ReadyRGB->NextReady;
+ } else {
+ ip = s->ReadyAlpha - s->Instructions;
+ s->ReadyAlpha = s->ReadyAlpha->NextReady;
+ }
+
+ _mesa_bzero(&pair, sizeof(pair));
+ fill_instruction_into_pair(s, &pair, ip);
+ fill_dest_into_pair(s, &pair, ip);
+ commit_instruction(s, ip);
+ } else {
+ struct pair_state_instruction **prgb;
+ struct pair_state_instruction **palpha;
+
+ /* Some pairings might fail because they require too
+ * many source slots; try all possible pairings if necessary */
+ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
+ for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
+ int rgbip = *prgb - s->Instructions;
+ int alphaip = *palpha - s->Instructions;
+ _mesa_bzero(&pair, sizeof(pair));
+ fill_instruction_into_pair(s, &pair, rgbip);
+ if (!fill_instruction_into_pair(s, &pair, alphaip))
+ continue;
+ *prgb = (*prgb)->NextReady;
+ *palpha = (*palpha)->NextReady;
+ fill_dest_into_pair(s, &pair, rgbip);
+ fill_dest_into_pair(s, &pair, alphaip);
+ commit_instruction(s, rgbip);
+ commit_instruction(s, alphaip);
+ goto success;
+ }
+ }
+
+ /* No success in pairing; just take the first RGB instruction */
+ int ip = s->ReadyRGB - s->Instructions;
+ s->ReadyRGB = s->ReadyRGB->NextReady;
+ _mesa_bzero(&pair, sizeof(pair));
+ fill_instruction_into_pair(s, &pair, ip);
+ fill_dest_into_pair(s, &pair, ip);
+ commit_instruction(s, ip);
+ success: ;
+ }
+
+ if (s->Debug)
+ radeonPrintPairInstruction(&pair);
+
+ s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair);
+}
+
+
+GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
+ const struct radeon_pair_handler* handler, void *userdata)
+{
+ struct pair_state s;
+
+ _mesa_bzero(&s, sizeof(s));
+ s.Ctx = ctx;
+ s.Program = _mesa_clone_program(ctx, program);
+ s.Handler = handler;
+ s.UserData = userdata;
+ s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
+ s.Verbose = GL_FALSE && s.Debug;
+
+ s.Instructions = (struct pair_state_instruction*)_mesa_calloc(
+ sizeof(struct pair_state_instruction)*s.Program->NumInstructions);
+ s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4);
+ s.ReaderPool = (struct reg_value_reader*)_mesa_calloc(
+ sizeof(struct reg_value_reader)*s.Program->NumInstructions*12);
+
+ if (s.Debug)
+ _mesa_printf("Emit paired program\n");
+
+ scan_instructions(&s);
+ allocate_input_registers(&s);
+
+ while(!s.Error &&
+ (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
+ if (s.ReadyTEX)
+ emit_all_tex(&s);
+
+ while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)
+ emit_alu(&s);
+ }
+
+ if (s.Debug)
+ _mesa_printf(" END\n");
+
+ _mesa_free(s.Instructions);
+ _mesa_free(s.ValuePool);
+ _mesa_free(s.ReaderPool);
+
+ _mesa_reference_program(ctx, &s.Program, NULL);
+
+ return !s.Error;
+}
+
+
+static void print_pair_src(int i, struct radeon_pair_instruction_source* src)
+{
+ _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index);
+}
+
+static const char* opcode_string(GLuint opcode)
+{
+ if (opcode == OPCODE_REPL_ALPHA)
+ return "SOP";
+ else
+ return _mesa_opcode_string(opcode);
+}
+
+static int num_pairinst_args(GLuint opcode)
+{
+ if (opcode == OPCODE_REPL_ALPHA)
+ return 0;
+ else
+ return _mesa_num_inst_src_regs(opcode);
+}
+
+static char swizzle_char(GLuint swz)
+{
+ switch(swz) {
+ case SWIZZLE_X: return 'x';
+ case SWIZZLE_Y: return 'y';
+ case SWIZZLE_Z: return 'z';
+ case SWIZZLE_W: return 'w';
+ case SWIZZLE_ZERO: return '0';
+ case SWIZZLE_ONE: return '1';
+ case SWIZZLE_NIL: return '_';
+ default: return '?';
+ }
+}
+
+void radeonPrintPairInstruction(struct radeon_pair_instruction *inst)
+{
+ int nargs;
+ int i;
+
+ _mesa_printf(" RGB: ");
+ for(i = 0; i < 3; ++i) {
+ if (inst->RGB.Src[i].Used)
+ print_pair_src(i, inst->RGB.Src + i);
+ }
+ _mesa_printf("\n");
+ _mesa_printf(" Alpha:");
+ for(i = 0; i < 3; ++i) {
+ if (inst->Alpha.Src[i].Used)
+ print_pair_src(i, inst->Alpha.Src + i);
+ }
+ _mesa_printf("\n");
+
+ _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : "");
+ if (inst->RGB.WriteMask)
+ _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex,
+ (inst->RGB.WriteMask & 1) ? "x" : "",
+ (inst->RGB.WriteMask & 2) ? "y" : "",
+ (inst->RGB.WriteMask & 4) ? "z" : "");
+ if (inst->RGB.OutputWriteMask)
+ _mesa_printf(" COLOR.%s%s%s",
+ (inst->RGB.OutputWriteMask & 1) ? "x" : "",
+ (inst->RGB.OutputWriteMask & 2) ? "y" : "",
+ (inst->RGB.OutputWriteMask & 4) ? "z" : "");
+ nargs = num_pairinst_args(inst->RGB.Opcode);
+ for(i = 0; i < nargs; ++i) {
+ const char* abs = inst->RGB.Arg[i].Abs ? "|" : "";
+ const char* neg = inst->RGB.Arg[i].Negate ? "-" : "";
+ _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source,
+ swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)),
+ swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)),
+ swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)),
+ abs);
+ }
+ _mesa_printf("\n");
+
+ _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : "");
+ if (inst->Alpha.WriteMask)
+ _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex);
+ if (inst->Alpha.OutputWriteMask)
+ _mesa_printf(" COLOR.w");
+ if (inst->Alpha.DepthWriteMask)
+ _mesa_printf(" DEPTH.w");
+ nargs = num_pairinst_args(inst->Alpha.Opcode);
+ for(i = 0; i < nargs; ++i) {
+ const char* abs = inst->Alpha.Arg[i].Abs ? "|" : "";
+ const char* neg = inst->Alpha.Arg[i].Negate ? "-" : "";
+ _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source,
+ swizzle_char(inst->Alpha.Arg[i].Swizzle), abs);
+ }
+ _mesa_printf("\n");
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
new file mode 100644
index 00000000000..4624a246298
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_PAIR_H_
+#define __RADEON_PROGRAM_PAIR_H_
+
+#include "radeon_program.h"
+
+
+/**
+ * Represents a paired instruction, as found in R300 and R500
+ * fragment programs.
+ */
+struct radeon_pair_instruction_source {
+ GLuint Index:8;
+ GLuint Constant:1;
+ GLuint Used:1;
+};
+
+struct radeon_pair_instruction_rgb {
+ GLuint Opcode:8;
+ GLuint DestIndex:8;
+ GLuint WriteMask:3;
+ GLuint OutputWriteMask:3;
+ GLuint Saturate:1;
+
+ struct radeon_pair_instruction_source Src[3];
+
+ struct {
+ GLuint Source:2;
+ GLuint Swizzle:9;
+ GLuint Abs:1;
+ GLuint Negate:1;
+ } Arg[3];
+};
+
+struct radeon_pair_instruction_alpha {
+ GLuint Opcode:8;
+ GLuint DestIndex:8;
+ GLuint WriteMask:1;
+ GLuint OutputWriteMask:1;
+ GLuint DepthWriteMask:1;
+ GLuint Saturate:1;
+
+ struct radeon_pair_instruction_source Src[3];
+
+ struct {
+ GLuint Source:2;
+ GLuint Swizzle:3;
+ GLuint Abs:1;
+ GLuint Negate:1;
+ } Arg[3];
+};
+
+struct radeon_pair_instruction {
+ struct radeon_pair_instruction_rgb RGB;
+ struct radeon_pair_instruction_alpha Alpha;
+};
+
+
+/**
+ *
+ */
+struct radeon_pair_handler {
+ /**
+ * Fill in the proper hardware index for the given constant register.
+ *
+ * @return GL_FALSE on error.
+ */
+ GLboolean (*EmitConst)(void*, GLuint file, GLuint index, GLuint *hwindex);
+
+ /**
+ * Write a paired instruction to the hardware.
+ *
+ * @return GL_FALSE on error.
+ */
+ GLboolean (*EmitPaired)(void*, struct radeon_pair_instruction*);
+
+ /**
+ * Write a texture instruction to the hardware.
+ * Register indices have already been rewritten to the allocated
+ * hardware register numbers.
+ *
+ * @return GL_FALSE on error.
+ */
+ GLboolean (*EmitTex)(void*, struct prog_instruction*);
+
+ /**
+ * Called before a block of contiguous, independent texture
+ * instructions is emitted.
+ */
+ GLboolean (*BeginTexBlock)(void*);
+
+ GLuint MaxHwTemps;
+};
+
+GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
+ const struct radeon_pair_handler*, void *userdata);
+
+void radeonPrintPairInstruction(struct radeon_pair_instruction *inst);
+
+#endif /* __RADEON_PROGRAM_PAIR_H_ */
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index f7af7d4e574..b692f8bf809 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/mtypes.h"
#include "shader/prog_instruction.h"
+#include "compiler/radeon_compiler.h"
struct r300_context;
typedef struct r300_context r300ContextRec;
@@ -396,9 +397,6 @@ struct r300_hw_state {
/* Can be tested with colormat currently. */
#define VSF_MAX_FRAGMENT_TEMPS (14)
-#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
-#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
-
#define COLOR_IS_RGBA
#define TAG(x) r300##x
#include "tnl_dd/t_dd_vertex.h"
@@ -413,7 +411,7 @@ struct r300_vertex_program {
GLuint FogAttr;
GLuint WPosAttr;
} key;
-
+
struct r300_vertex_shader_hw_code {
int length;
union {
@@ -441,110 +439,6 @@ struct r300_vertex_program_cont {
struct r300_vertex_program *progs;
};
-#define R300_PFS_MAX_ALU_INST 64
-#define R300_PFS_MAX_TEX_INST 32
-#define R300_PFS_MAX_TEX_INDIRECT 4
-#define R300_PFS_NUM_TEMP_REGS 32
-#define R300_PFS_NUM_CONST_REGS 32
-
-#define R500_PFS_MAX_INST 512
-#define R500_PFS_NUM_TEMP_REGS 128
-#define R500_PFS_NUM_CONST_REGS 256
-
-struct r300_pfs_compile_state;
-struct r500_pfs_compile_state;
-
-/**
- * Stores state that influences the compilation of a fragment program.
- */
-struct r300_fragment_program_external_state {
- struct {
- /**
- * If the sampler is used as a shadow sampler,
- * this field is:
- * 0 - GL_LUMINANCE
- * 1 - GL_INTENSITY
- * 2 - GL_ALPHA
- * depending on the depth texture mode.
- */
- GLuint depth_texture_mode : 2;
-
- /**
- * If the sampler is used as a shadow sampler,
- * this field is (texture_compare_func - GL_NEVER).
- * [e.g. if compare function is GL_LEQUAL, this field is 3]
- *
- * Otherwise, this field is 0.
- */
- GLuint texture_compare_func : 3;
- } unit[16];
-};
-
-
-struct r300_fragment_program_node {
- int tex_offset; /**< first tex instruction */
- int tex_end; /**< last tex instruction, relative to tex_offset */
- int alu_offset; /**< first ALU instruction */
- int alu_end; /**< last ALU instruction, relative to alu_offset */
- int flags;
-};
-
-/**
- * Stores an R300 fragment program in its compiled-to-hardware form.
- */
-struct r300_fragment_program_code {
- struct {
- int length; /**< total # of texture instructions used */
- GLuint inst[R300_PFS_MAX_TEX_INST];
- } tex;
-
- struct {
- int length; /**< total # of ALU instructions used */
- struct {
- GLuint inst0;
- GLuint inst1;
- GLuint inst2;
- GLuint inst3;
- } inst[R300_PFS_MAX_ALU_INST];
- } alu;
-
- struct r300_fragment_program_node node[4];
- int cur_node;
- int first_node_has_tex;
-
- /**
- * Remember which program register a given hardware constant
- * belongs to.
- */
- struct prog_src_register constant[R300_PFS_NUM_CONST_REGS];
- int const_nr;
-
- int max_temp_idx;
-};
-
-
-struct r500_fragment_program_code {
- struct {
- GLuint inst0;
- GLuint inst1;
- GLuint inst2;
- GLuint inst3;
- GLuint inst4;
- GLuint inst5;
- } inst[R500_PFS_MAX_INST];
-
- int inst_offset;
- int inst_end;
-
- /**
- * Remember which program register a given hardware constant
- * belongs to.
- */
- struct prog_src_register constant[R500_PFS_NUM_CONST_REGS];
- int const_nr;
-
- int max_temp_idx;
-};
/**
* Store everything about a fragment program that is needed
@@ -555,22 +449,10 @@ struct r300_fragment_program {
GLboolean translated;
GLboolean error;
-
- struct r300_fragment_program_external_state state;
- union rX00_fragment_program_code {
- struct r300_fragment_program_code r300;
- struct r500_fragment_program_code r500;
- } code;
-
- GLboolean writes_depth;
- GLuint optimization;
-
struct r300_fragment_program *next;
+ struct r300_fragment_program_external_state state;
- /* attribute that we are sending the WPOS in */
- gl_frag_attrib wpos_attr;
- /* attribute that we are sending the fog coordinate in */
- gl_frag_attrib fog_attr;
+ struct rX00_fragment_program_code code;
};
struct r300_fragment_program_cont {
@@ -583,12 +465,6 @@ struct r300_fragment_program_cont {
struct r300_fragment_program *progs;
};
-struct r300_fragment_program_compiler {
- r300ContextPtr r300;
- struct r300_fragment_program *fp;
- union rX00_fragment_program_code *code;
- struct gl_program *program;
-};
#define R300_MAX_AOS_ARRAYS 16
@@ -610,8 +486,6 @@ struct r300_swtcl_info {
struct r300_vtable {
void (* SetupRSUnit)(GLcontext *ctx);
void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings);
- GLboolean (* BuildFragmentProgramHwCode)(struct r300_fragment_program_compiler *compiler);
- void (* FragmentProgramDump)(union rX00_fragment_program_code *code);
void (* SetupPixelShader)(GLcontext *ctx);
};
@@ -669,7 +543,7 @@ struct r300_context {
uint32_t s3tc_force_disabled:1;
uint32_t stencil_two_side_disabled:1;
} options;
-
+
struct r300_swtcl_info swtcl;
struct r300_vertex_buffer vbuf;
struct r300_index_buffer ind_buf;
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
deleted file mode 100644
index 55c1cfe6317..00000000000
--- a/src/mesa/drivers/dri/r300/r300_fragprog.c
+++ /dev/null
@@ -1,451 +0,0 @@
-/*
- * Copyright (C) 2005 Ben Skeggs.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "r300_fragprog.h"
-
-#include "shader/prog_parameter.h"
-
-#include "r300_context.h"
-#include "r300_fragprog_swizzle.h"
-
-static void reset_srcreg(struct prog_src_register* reg)
-{
- _mesa_bzero(reg, sizeof(*reg));
- reg->Swizzle = SWIZZLE_NOOP;
-}
-
-static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
-{
- gl_state_index fail_value_tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
- };
- struct prog_src_register reg = { 0, };
-
- fail_value_tokens[2] = tmu;
- reg.File = PROGRAM_STATE_VAR;
- reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
- reg.Swizzle = SWIZZLE_WWWW;
- return reg;
-}
-
-/**
- * Transform TEX, TXP, TXB, and KIL instructions in the following way:
- * - premultiply texture coordinates for RECT
- * - extract operand swizzles
- * - introduce a temporary register when write masks are needed
- *
- * \todo If/when r5xx uses the radeon_program architecture, this can probably
- * be reused.
- */
-GLboolean r300_transform_TEX(
- struct radeon_transform_context *t,
- struct prog_instruction* orig_inst, void* data)
-{
- struct r300_fragment_program_compiler *compiler =
- (struct r300_fragment_program_compiler*)data;
- struct prog_instruction inst = *orig_inst;
- struct prog_instruction* tgt;
- GLboolean destredirect = GL_FALSE;
-
- if (inst.Opcode != OPCODE_TEX &&
- inst.Opcode != OPCODE_TXB &&
- inst.Opcode != OPCODE_TXP &&
- inst.Opcode != OPCODE_KIL)
- return GL_FALSE;
-
- if (inst.Opcode != OPCODE_KIL &&
- t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
-
- if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
- tgt = radeonAppendInstructions(t->Program, 1);
-
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg = inst.DstReg;
- if (comparefunc == GL_ALWAYS) {
- tgt->SrcReg[0].File = PROGRAM_BUILTIN;
- tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
- } else {
- tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
- }
- return GL_TRUE;
- }
-
- inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = radeonFindFreeTemporary(t);
- inst.DstReg.WriteMask = WRITEMASK_XYZW;
- }
-
-
- /* Hardware uses [0..1]x[0..1] range for rectangle textures
- * instead of [0..Width]x[0..Height].
- * Add a scaling instruction.
- */
- if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) {
- gl_state_index tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
- 0
- };
-
- int tempreg = radeonFindFreeTemporary(t);
- int factor_index;
-
- tokens[2] = inst.TexSrcUnit;
- factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens);
-
- tgt = radeonAppendInstructions(t->Program, 1);
-
- tgt->Opcode = OPCODE_MUL;
- tgt->DstReg.File = PROGRAM_TEMPORARY;
- tgt->DstReg.Index = tempreg;
- tgt->SrcReg[0] = inst.SrcReg[0];
- tgt->SrcReg[1].File = PROGRAM_STATE_VAR;
- tgt->SrcReg[1].Index = factor_index;
-
- reset_srcreg(&inst.SrcReg[0]);
- inst.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst.SrcReg[0].Index = tempreg;
- }
-
- if (inst.Opcode != OPCODE_KIL) {
- if (inst.DstReg.File != PROGRAM_TEMPORARY ||
- inst.DstReg.WriteMask != WRITEMASK_XYZW) {
- int tempreg = radeonFindFreeTemporary(t);
-
- inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = tempreg;
- inst.DstReg.WriteMask = WRITEMASK_XYZW;
- destredirect = GL_TRUE;
- } else if (inst.SaturateMode) {
- destredirect = GL_TRUE;
- }
- }
-
- if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
- int tmpreg = radeonFindFreeTemporary(t);
- tgt = radeonAppendInstructions(t->Program, 1);
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg.File = PROGRAM_TEMPORARY;
- tgt->DstReg.Index = tmpreg;
- tgt->SrcReg[0] = inst.SrcReg[0];
-
- reset_srcreg(&inst.SrcReg[0]);
- inst.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst.SrcReg[0].Index = tmpreg;
- }
-
- tgt = radeonAppendInstructions(t->Program, 1);
- _mesa_copy_instructions(tgt, &inst, 1);
-
- if (inst.Opcode != OPCODE_KIL &&
- t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
- GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
- int rcptemp = radeonFindFreeTemporary(t);
- int pass, fail;
-
- tgt = radeonAppendInstructions(t->Program, 3);
-
- tgt[0].Opcode = OPCODE_RCP;
- tgt[0].DstReg.File = PROGRAM_TEMPORARY;
- tgt[0].DstReg.Index = rcptemp;
- tgt[0].DstReg.WriteMask = WRITEMASK_W;
- tgt[0].SrcReg[0] = inst.SrcReg[0];
- tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
-
- tgt[1].Opcode = OPCODE_MAD;
- tgt[1].DstReg = inst.DstReg;
- tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
- tgt[1].SrcReg[0] = inst.SrcReg[0];
- tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
- tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
- tgt[1].SrcReg[1].Index = rcptemp;
- tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
- tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
- tgt[1].SrcReg[2].Index = inst.DstReg.Index;
- if (depthmode == 0) /* GL_LUMINANCE */
- tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
- else if (depthmode == 2) /* GL_ALPHA */
- tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
-
- /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
- * r < tex <=> -tex+r < 0
- * r >= tex <=> not (-tex+r < 0 */
- if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
- tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW;
- else
- tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW;
-
- tgt[2].Opcode = OPCODE_CMP;
- tgt[2].DstReg = orig_inst->DstReg;
- tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
-
- if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
- pass = 1;
- fail = 2;
- } else {
- pass = 2;
- fail = 1;
- }
-
- tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
- tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
- tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
- } else if (destredirect) {
- tgt = radeonAppendInstructions(t->Program, 1);
-
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg = orig_inst->DstReg;
- tgt->SaturateMode = inst.SaturateMode;
- tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt->SrcReg[0].Index = inst.DstReg.Index;
- }
-
- return GL_TRUE;
-}
-
-/* just some random things... */
-void r300FragmentProgramDump(union rX00_fragment_program_code *c)
-{
- struct r300_fragment_program_code *code = &c->r300;
- int n, i, j;
- static int pc = 0;
-
- fprintf(stderr, "pc=%d*************************************\n", pc++);
-
- fprintf(stderr, "Hardware program\n");
- fprintf(stderr, "----------------\n");
-
- for (n = 0; n < (code->cur_node + 1); n++) {
- fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
- "alu_end: %d, tex_end: %d, flags: %08x\n", n,
- code->node[n].alu_offset,
- code->node[n].tex_offset,
- code->node[n].alu_end, code->node[n].tex_end,
- code->node[n].flags);
-
- if (n > 0 || code->first_node_has_tex) {
- fprintf(stderr, " TEX:\n");
- for (i = code->node[n].tex_offset;
- i <= code->node[n].tex_offset + code->node[n].tex_end;
- ++i) {
- const char *instr;
-
- switch ((code->tex.
- inst[i] >> R300_TEX_INST_SHIFT) &
- 15) {
- case R300_TEX_OP_LD:
- instr = "TEX";
- break;
- case R300_TEX_OP_KIL:
- instr = "KIL";
- break;
- case R300_TEX_OP_TXP:
- instr = "TXP";
- break;
- case R300_TEX_OP_TXB:
- instr = "TXB";
- break;
- default:
- instr = "UNKNOWN";
- }
-
- fprintf(stderr,
- " %s t%i, %c%i, texture[%i] (%08x)\n",
- instr,
- (code->tex.
- inst[i] >> R300_DST_ADDR_SHIFT) & 31,
- 't',
- (code->tex.
- inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
- (code->tex.
- inst[i] & R300_TEX_ID_MASK) >>
- R300_TEX_ID_SHIFT,
- code->tex.inst[i]);
- }
- }
-
- for (i = code->node[n].alu_offset;
- i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) {
- char srcc[3][10], dstc[20];
- char srca[3][10], dsta[20];
- char argc[3][20];
- char arga[3][20];
- char flags[5], tmp[10];
-
- for (j = 0; j < 3; ++j) {
- int regc = code->alu.inst[i].inst1 >> (j * 6);
- int rega = code->alu.inst[i].inst3 >> (j * 6);
-
- sprintf(srcc[j], "%c%i",
- (regc & 32) ? 'c' : 't', regc & 31);
- sprintf(srca[j], "%c%i",
- (rega & 32) ? 'c' : 't', rega & 31);
- }
-
- dstc[0] = 0;
- sprintf(flags, "%s%s%s",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_REG_X) ? "x" : "",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_REG_Z) ? "z" : "");
- if (flags[0] != 0) {
- sprintf(dstc, "t%i.%s ",
- (code->alu.inst[i].
- inst1 >> R300_ALU_DSTC_SHIFT) & 31,
- flags);
- }
- sprintf(flags, "%s%s%s",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
- if (flags[0] != 0) {
- sprintf(tmp, "o%i.%s",
- (code->alu.inst[i].
- inst1 >> R300_ALU_DSTC_SHIFT) & 31,
- flags);
- strcat(dstc, tmp);
- }
-
- dsta[0] = 0;
- if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) {
- sprintf(dsta, "t%i.w ",
- (code->alu.inst[i].
- inst3 >> R300_ALU_DSTA_SHIFT) & 31);
- }
- if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) {
- sprintf(tmp, "o%i.w ",
- (code->alu.inst[i].
- inst3 >> R300_ALU_DSTA_SHIFT) & 31);
- strcat(dsta, tmp);
- }
- if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) {
- strcat(dsta, "Z");
- }
-
- fprintf(stderr,
- "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
- " w: %3s %3s %3s -> %-20s (%08x)\n", i,
- srcc[0], srcc[1], srcc[2], dstc,
- code->alu.inst[i].inst1, srca[0], srca[1],
- srca[2], dsta, code->alu.inst[i].inst3);
-
- for (j = 0; j < 3; ++j) {
- int regc = code->alu.inst[i].inst0 >> (j * 7);
- int rega = code->alu.inst[i].inst2 >> (j * 7);
- int d;
- char buf[20];
-
- d = regc & 31;
- if (d < 12) {
- switch (d % 4) {
- case R300_ALU_ARGC_SRC0C_XYZ:
- sprintf(buf, "%s.xyz",
- srcc[d / 4]);
- break;
- case R300_ALU_ARGC_SRC0C_XXX:
- sprintf(buf, "%s.xxx",
- srcc[d / 4]);
- break;
- case R300_ALU_ARGC_SRC0C_YYY:
- sprintf(buf, "%s.yyy",
- srcc[d / 4]);
- break;
- case R300_ALU_ARGC_SRC0C_ZZZ:
- sprintf(buf, "%s.zzz",
- srcc[d / 4]);
- break;
- }
- } else if (d < 15) {
- sprintf(buf, "%s.www", srca[d - 12]);
- } else if (d == 20) {
- sprintf(buf, "0.0");
- } else if (d == 21) {
- sprintf(buf, "1.0");
- } else if (d == 22) {
- sprintf(buf, "0.5");
- } else if (d >= 23 && d < 32) {
- d -= 23;
- switch (d / 3) {
- case 0:
- sprintf(buf, "%s.yzx",
- srcc[d % 3]);
- break;
- case 1:
- sprintf(buf, "%s.zxy",
- srcc[d % 3]);
- break;
- case 2:
- sprintf(buf, "%s.Wzy",
- srcc[d % 3]);
- break;
- }
- } else {
- sprintf(buf, "%i", d);
- }
-
- sprintf(argc[j], "%s%s%s%s",
- (regc & 32) ? "-" : "",
- (regc & 64) ? "|" : "",
- buf, (regc & 64) ? "|" : "");
-
- d = rega & 31;
- if (d < 9) {
- sprintf(buf, "%s.%c", srcc[d / 3],
- 'x' + (char)(d % 3));
- } else if (d < 12) {
- sprintf(buf, "%s.w", srca[d - 9]);
- } else if (d == 16) {
- sprintf(buf, "0.0");
- } else if (d == 17) {
- sprintf(buf, "1.0");
- } else if (d == 18) {
- sprintf(buf, "0.5");
- } else {
- sprintf(buf, "%i", d);
- }
-
- sprintf(arga[j], "%s%s%s%s",
- (rega & 32) ? "-" : "",
- (rega & 64) ? "|" : "",
- buf, (rega & 64) ? "|" : "");
- }
-
- fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n"
- " w: %8s %8s %8s op: %08x\n",
- argc[0], argc[1], argc[2],
- code->alu.inst[i].inst0, arga[0], arga[1],
- arga[2], code->alu.inst[i].inst2);
- }
- }
-}
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h
deleted file mode 100644
index 5ce6f33cee7..00000000000
--- a/src/mesa/drivers/dri/r300/r300_fragprog.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (C) 2005 Ben Skeggs.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-/*
- * Authors:
- * Ben Skeggs
- * Jerome Glisse
- */
-#ifndef __R300_FRAGPROG_H_
-#define __R300_FRAGPROG_H_
-
-#include "shader/program.h"
-#include "shader/prog_instruction.h"
-
-#include "r300_context.h"
-#include "radeon_program.h"
-
-#define DRI_CONF_FP_OPTIMIZATION_SPEED 0
-#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1
-
-#if 1
-
-/**
- * Fragment program helper macros
- */
-
-/* Produce unshifted source selectors */
-#define FP_TMP(idx) (idx)
-#define FP_CONST(idx) ((idx) | (1 << 5))
-
-/* Produce source/dest selector dword */
-#define FP_SELC_MASK_NO 0
-#define FP_SELC_MASK_X 1
-#define FP_SELC_MASK_Y 2
-#define FP_SELC_MASK_XY 3
-#define FP_SELC_MASK_Z 4
-#define FP_SELC_MASK_XZ 5
-#define FP_SELC_MASK_YZ 6
-#define FP_SELC_MASK_XYZ 7
-
-#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \
- (((destidx) << R300_ALU_DSTC_SHIFT) | \
- (FP_SELC_MASK_##regmask << 23) | \
- (FP_SELC_MASK_##outmask << 26) | \
- ((src0) << R300_ALU_SRC0C_SHIFT) | \
- ((src1) << R300_ALU_SRC1C_SHIFT) | \
- ((src2) << R300_ALU_SRC2C_SHIFT))
-
-#define FP_SELA_MASK_NO 0
-#define FP_SELA_MASK_W 1
-
-#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \
- (((destidx) << R300_ALU_DSTA_SHIFT) | \
- (FP_SELA_MASK_##regmask << 23) | \
- (FP_SELA_MASK_##outmask << 24) | \
- ((src0) << R300_ALU_SRC0A_SHIFT) | \
- ((src1) << R300_ALU_SRC1A_SHIFT) | \
- ((src2) << R300_ALU_SRC2A_SHIFT))
-
-/* Produce unshifted argument selectors */
-#define FP_ARGC(source) R300_ALU_ARGC_##source
-#define FP_ARGA(source) R300_ALU_ARGA_##source
-#define FP_ABS(arg) ((arg) | (1 << 6))
-#define FP_NEG(arg) ((arg) ^ (1 << 5))
-
-/* Produce instruction dword */
-#define FP_INSTRC(opcode,arg0,arg1,arg2) \
- (R300_ALU_OUTC_##opcode | \
- ((arg0) << R300_ALU_ARG0C_SHIFT) | \
- ((arg1) << R300_ALU_ARG1C_SHIFT) | \
- ((arg2) << R300_ALU_ARG2C_SHIFT))
-
-#define FP_INSTRA(opcode,arg0,arg1,arg2) \
- (R300_ALU_OUTA_##opcode | \
- ((arg0) << R300_ALU_ARG0A_SHIFT) | \
- ((arg1) << R300_ALU_ARG1A_SHIFT) | \
- ((arg2) << R300_ALU_ARG2A_SHIFT))
-
-#endif
-
-extern GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
-
-extern void r300FragmentProgramDump(union rX00_fragment_program_code *c);
-
-extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data);
-
-#endif
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index f5c4c0f4a0e..f28162a2b6a 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -43,175 +43,13 @@
#include "shader/prog_print.h"
#include "r300_state.h"
-#include "r300_fragprog.h"
-#include "r300_fragprog_swizzle.h"
-#include "r500_fragprog.h"
-#include "radeon_program.h"
-#include "radeon_program_alu.h"
+#include "compiler/radeon_program.h"
+#include "compiler/radeon_program_alu.h"
+#include "compiler/r300_fragprog_swizzle.h"
+#include "compiler/r300_fragprog.h"
+#include "compiler/r500_fragprog.h"
-static void nqssadce_init(struct nqssadce_state* s)
-{
- s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW;
- s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W;
-}
-
-/**
- * Transform the program to support fragment.position.
- *
- * Introduce a small fragment at the start of the program that will be
- * the only code that directly reads the FRAG_ATTRIB_WPOS input.
- * All other code pieces that reference that input will be rewritten
- * to read from a newly allocated temporary.
- *
- */
-static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
-{
- GLuint InputsRead = compiler->fp->Base->InputsRead;
-
- if (!(InputsRead & FRAG_BIT_WPOS)) {
- compiler->fp->wpos_attr = FRAG_ATTRIB_MAX;
- return;
- }
-
- static gl_state_index tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
- };
- struct prog_instruction *fpi;
- GLuint window_index;
- int i = 0;
-
- for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
- {
- if (!(InputsRead & (1 << i))) {
- InputsRead &= ~(1 << FRAG_ATTRIB_WPOS);
- InputsRead |= 1 << i;
- compiler->fp->Base->InputsRead = InputsRead;
- compiler->fp->wpos_attr = i;
- break;
- }
- }
-
- GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
-
- _mesa_insert_instructions(compiler->program, 0, 3);
- fpi = compiler->program->Instructions;
- i = 0;
-
- /* perspective divide */
- fpi[i].Opcode = OPCODE_RCP;
-
- fpi[i].DstReg.File = PROGRAM_TEMPORARY;
- fpi[i].DstReg.Index = tempregi;
- fpi[i].DstReg.WriteMask = WRITEMASK_W;
- fpi[i].DstReg.CondMask = COND_TR;
-
- fpi[i].SrcReg[0].File = PROGRAM_INPUT;
- fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr;
- fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
- i++;
-
- fpi[i].Opcode = OPCODE_MUL;
-
- fpi[i].DstReg.File = PROGRAM_TEMPORARY;
- fpi[i].DstReg.Index = tempregi;
- fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
- fpi[i].DstReg.CondMask = COND_TR;
-
- fpi[i].SrcReg[0].File = PROGRAM_INPUT;
- fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr;
- fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
- fpi[i].SrcReg[1].Index = tempregi;
- fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
- i++;
-
- /* viewport transformation */
- window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);
-
- fpi[i].Opcode = OPCODE_MAD;
-
- fpi[i].DstReg.File = PROGRAM_TEMPORARY;
- fpi[i].DstReg.Index = tempregi;
- fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
- fpi[i].DstReg.CondMask = COND_TR;
-
- fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
- fpi[i].SrcReg[0].Index = tempregi;
- fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
- fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
- fpi[i].SrcReg[1].Index = window_index;
- fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
- fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
- fpi[i].SrcReg[2].Index = window_index;
- fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
- i++;
-
- for (; i < compiler->program->NumInstructions; ++i) {
- int reg;
- for (reg = 0; reg < 3; reg++) {
- if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT &&
- fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) {
- fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY;
- fpi[i].SrcReg[reg].Index = tempregi;
- }
- }
- }
-}
-
-
-/**
- * Rewrite fragment.fogcoord to use a texture coordinate slot.
- * Note that fogcoord is forced into an X001 pattern, and this enforcement
- * is done here.
- *
- * See also the counterpart rewriting for vertex programs.
- */
-static void rewriteFog(struct r300_fragment_program_compiler *compiler)
-{
- struct r300_fragment_program *fp = compiler->fp;
- GLuint InputsRead;
- int i;
-
- InputsRead = fp->Base->InputsRead;
-
- if (!(InputsRead & FRAG_BIT_FOGC)) {
- fp->fog_attr = FRAG_ATTRIB_MAX;
- return;
- }
-
- for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
- {
- if (!(InputsRead & (1 << i))) {
- InputsRead &= ~(1 << FRAG_ATTRIB_FOGC);
- InputsRead |= 1 << i;
- fp->Base->InputsRead = InputsRead;
- fp->fog_attr = i;
- break;
- }
- }
-
- {
- struct prog_instruction *inst;
-
- inst = compiler->program->Instructions;
- while (inst->Opcode != OPCODE_END) {
- const int src_regs = _mesa_num_inst_src_regs(inst->Opcode);
- for (i = 0; i < src_regs; ++i) {
- if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) {
- inst->SrcReg[i].Index = fp->fog_attr;
- inst->SrcReg[i].Swizzle = combine_swizzles(
- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE),
- inst->SrcReg[i].Swizzle);
- }
- }
- ++inst;
- }
- }
-}
static GLuint build_dtm(GLuint depthmode)
{
@@ -251,121 +89,23 @@ static void build_state(
}
}
-static void rewrite_depth_out(struct gl_program *prog)
-{
- struct prog_instruction *inst;
-
- for (inst = prog->Instructions; inst->Opcode != OPCODE_END; ++inst) {
- if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != FRAG_RESULT_DEPTH)
- continue;
-
- if (inst->DstReg.WriteMask & WRITEMASK_Z) {
- inst->DstReg.WriteMask = WRITEMASK_W;
- } else {
- inst->DstReg.WriteMask = 0;
- continue;
- }
-
- switch (inst->Opcode) {
- case OPCODE_FRC:
- case OPCODE_MOV:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- break;
- case OPCODE_ADD:
- case OPCODE_MAX:
- case OPCODE_MIN:
- case OPCODE_MUL:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
- break;
- case OPCODE_CMP:
- case OPCODE_MAD:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
- inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
- break;
- default:
- // Scalar instructions needn't be reswizzled
- break;
- }
- }
-}
void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_fragment_program_compiler compiler;
- compiler.r300 = r300;
- compiler.fp = fp;
+ compiler.ctx = ctx;
compiler.code = &fp->code;
+ compiler.state = fp->state;
compiler.program = fp->Base;
+ compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE;
+ compiler.debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- fflush(stdout);
- _mesa_printf("Fragment Program: Initial program:\n");
- _mesa_print_program(compiler.program);
- fflush(stdout);
- }
-
- insert_WPOS_trailer(&compiler);
-
- rewriteFog(&compiler);
-
- rewrite_depth_out(compiler.program);
-
- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
- struct radeon_program_transformation transformations[] = {
- { &r500_transform_TEX, &compiler },
- { &radeonTransformALU, 0 },
- { &radeonTransformDeriv, 0 },
- { &radeonTransformTrigScale, 0 }
- };
- radeonLocalTransform(ctx, compiler.program, 4, transformations);
- } else {
- struct radeon_program_transformation transformations[] = {
- { &r300_transform_TEX, &compiler },
- { &radeonTransformALU, 0 },
- { &radeonTransformTrigSimple, 0 }
- };
- radeonLocalTransform(ctx, compiler.program, 3, transformations);
- }
-
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Fragment Program: After native rewrite:\n");
- _mesa_print_program(compiler.program);
- fflush(stdout);
- }
-
- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &r500FPIsNativeSwizzle,
- .BuildSwizzle = &r500FPBuildSwizzle
- };
- radeonNqssaDce(ctx, compiler.program, &nqssadce);
- } else {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &r300FPIsNativeSwizzle,
- .BuildSwizzle = &r300FPBuildSwizzle
- };
- radeonNqssaDce(ctx, compiler.program, &nqssadce);
- }
-
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Compiler: after NqSSA-DCE:\n");
- _mesa_print_program(compiler.program);
- fflush(stdout);
- }
-
- if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler))
+ if (!r3xx_compile_fragment_program(&compiler))
fp->error = GL_TRUE;
fp->translated = GL_TRUE;
-
- if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL))
- r300->vtbl.FragmentProgramDump(&fp->code);
}
struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx)
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
deleted file mode 100644
index b75656e7ee1..00000000000
--- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * Copyright (C) 2005 Ben Skeggs.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-/**
- * \file
- *
- * Emit the r300_fragment_program_code that can be understood by the hardware.
- * Input is a pre-transformed radeon_program.
- *
- * \author Ben Skeggs
- *
- * \author Jerome Glisse
- *
- * \todo FogOption
- */
-
-#include "r300_fragprog.h"
-
-#include "radeon_program_pair.h"
-#include "r300_fragprog_swizzle.h"
-#include "r300_reg.h"
-
-
-#define PROG_CODE \
- struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
- struct r300_fragment_program_code *code = &c->code->r300
-
-#define error(fmt, args...) do { \
- fprintf(stderr, "%s::%s(): " fmt "\n", \
- __FILE__, __FUNCTION__, ##args); \
- } while(0)
-
-
-static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex)
-{
- PROG_CODE;
-
- for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
- if (code->constant[*hwindex].File == file &&
- code->constant[*hwindex].Index == index)
- break;
- }
-
- if (*hwindex >= code->const_nr) {
- if (*hwindex >= R300_PFS_NUM_CONST_REGS) {
- error("Out of hw constants!\n");
- return GL_FALSE;
- }
-
- code->const_nr++;
- code->constant[*hwindex].File = file;
- code->constant[*hwindex].Index = index;
- }
-
- return GL_TRUE;
-}
-
-
-/**
- * Mark a temporary register as used.
- */
-static void use_temporary(struct r300_fragment_program_code *code, GLuint index)
-{
- if (index > code->max_temp_idx)
- code->max_temp_idx = index;
-}
-
-
-static GLuint translate_rgb_opcode(GLuint opcode)
-{
- switch(opcode) {
- case OPCODE_CMP: return R300_ALU_OUTC_CMP;
- case OPCODE_DP3: return R300_ALU_OUTC_DP3;
- case OPCODE_DP4: return R300_ALU_OUTC_DP4;
- case OPCODE_FRC: return R300_ALU_OUTC_FRC;
- default:
- error("translate_rgb_opcode(%i): Unknown opcode", opcode);
- /* fall through */
- case OPCODE_NOP:
- /* fall through */
- case OPCODE_MAD: return R300_ALU_OUTC_MAD;
- case OPCODE_MAX: return R300_ALU_OUTC_MAX;
- case OPCODE_MIN: return R300_ALU_OUTC_MIN;
- case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
- }
-}
-
-static GLuint translate_alpha_opcode(GLuint opcode)
-{
- switch(opcode) {
- case OPCODE_CMP: return R300_ALU_OUTA_CMP;
- case OPCODE_DP3: return R300_ALU_OUTA_DP4;
- case OPCODE_DP4: return R300_ALU_OUTA_DP4;
- case OPCODE_EX2: return R300_ALU_OUTA_EX2;
- case OPCODE_FRC: return R300_ALU_OUTA_FRC;
- case OPCODE_LG2: return R300_ALU_OUTA_LG2;
- default:
- error("translate_rgb_opcode(%i): Unknown opcode", opcode);
- /* fall through */
- case OPCODE_NOP:
- /* fall through */
- case OPCODE_MAD: return R300_ALU_OUTA_MAD;
- case OPCODE_MAX: return R300_ALU_OUTA_MAX;
- case OPCODE_MIN: return R300_ALU_OUTA_MIN;
- case OPCODE_RCP: return R300_ALU_OUTA_RCP;
- case OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
- }
-}
-
-/**
- * Emit one paired ALU instruction.
- */
-static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
-{
- PROG_CODE;
-
- if (code->alu.length >= R300_PFS_MAX_ALU_INST) {
- error("Too many ALU instructions");
- return GL_FALSE;
- }
-
- int ip = code->alu.length++;
- int j;
- code->node[code->cur_node].alu_end++;
-
- code->alu.inst[ip].inst0 = translate_rgb_opcode(inst->RGB.Opcode);
- code->alu.inst[ip].inst2 = translate_alpha_opcode(inst->Alpha.Opcode);
-
- for(j = 0; j < 3; ++j) {
- GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5);
- if (!inst->RGB.Src[j].Constant)
- use_temporary(code, inst->RGB.Src[j].Index);
- code->alu.inst[ip].inst1 |= src << (6*j);
-
- src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5);
- if (!inst->Alpha.Src[j].Constant)
- use_temporary(code, inst->Alpha.Src[j].Index);
- code->alu.inst[ip].inst3 |= src << (6*j);
-
- GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
- arg |= inst->RGB.Arg[j].Abs << 6;
- arg |= inst->RGB.Arg[j].Negate << 5;
- code->alu.inst[ip].inst0 |= arg << (7*j);
-
- arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
- arg |= inst->Alpha.Arg[j].Abs << 6;
- arg |= inst->Alpha.Arg[j].Negate << 5;
- code->alu.inst[ip].inst2 |= arg << (7*j);
- }
-
- if (inst->RGB.Saturate)
- code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP;
- if (inst->Alpha.Saturate)
- code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP;
-
- if (inst->RGB.WriteMask) {
- use_temporary(code, inst->RGB.DestIndex);
- code->alu.inst[ip].inst1 |=
- (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
- (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
- }
- if (inst->RGB.OutputWriteMask) {
- code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT);
- code->node[code->cur_node].flags |= R300_RGBA_OUT;
- }
-
- if (inst->Alpha.WriteMask) {
- use_temporary(code, inst->Alpha.DestIndex);
- code->alu.inst[ip].inst3 |=
- (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
- R300_ALU_DSTA_REG;
- }
- if (inst->Alpha.OutputWriteMask) {
- code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT;
- code->node[code->cur_node].flags |= R300_RGBA_OUT;
- }
- if (inst->Alpha.DepthWriteMask) {
- code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH;
- code->node[code->cur_node].flags |= R300_W_OUT;
- c->fp->writes_depth = GL_TRUE;
- }
-
- return GL_TRUE;
-}
-
-
-/**
- * Finish the current node without advancing to the next one.
- */
-static GLboolean finish_node(struct r300_fragment_program_compiler *c)
-{
- struct r300_fragment_program_code *code = &c->code->r300;
- struct r300_fragment_program_node *node = &code->node[code->cur_node];
-
- if (node->alu_end < 0) {
- /* Generate a single NOP for this node */
- struct radeon_pair_instruction inst;
- _mesa_bzero(&inst, sizeof(inst));
- if (!emit_alu(c, &inst))
- return GL_FALSE;
- }
-
- if (node->tex_end < 0) {
- if (code->cur_node == 0) {
- node->tex_end = 0;
- } else {
- error("Node %i has no TEX instructions", code->cur_node);
- return GL_FALSE;
- }
- } else {
- if (code->cur_node == 0)
- code->first_node_has_tex = 1;
- }
-
- return GL_TRUE;
-}
-
-
-/**
- * Begin a block of texture instructions.
- * Create the necessary indirection.
- */
-static GLboolean begin_tex(void* data)
-{
- PROG_CODE;
-
- if (code->cur_node == 0) {
- if (code->node[0].alu_end < 0 &&
- code->node[0].tex_end < 0)
- return GL_TRUE;
- }
-
- if (code->cur_node == 3) {
- error("Too many texture indirections");
- return GL_FALSE;
- }
-
- if (!finish_node(c))
- return GL_FALSE;
-
- struct r300_fragment_program_node *node = &code->node[++code->cur_node];
- node->alu_offset = code->alu.length;
- node->alu_end = -1;
- node->tex_offset = code->tex.length;
- node->tex_end = -1;
- return GL_TRUE;
-}
-
-
-static GLboolean emit_tex(void* data, struct prog_instruction* inst)
-{
- PROG_CODE;
-
- if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
- error("Too many TEX instructions");
- return GL_FALSE;
- }
-
- GLuint unit = inst->TexSrcUnit;
- GLuint dest = inst->DstReg.Index;
- GLuint opcode;
-
- switch(inst->Opcode) {
- case OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
- case OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
- case OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
- case OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
- default:
- error("Unknown texture opcode %i", inst->Opcode);
- return GL_FALSE;
- }
-
- if (inst->Opcode == OPCODE_KIL) {
- unit = 0;
- dest = 0;
- } else {
- use_temporary(code, dest);
- }
-
- use_temporary(code, inst->SrcReg[0].Index);
-
- code->node[code->cur_node].tex_end++;
- code->tex.inst[code->tex.length++] =
- (inst->SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
- (dest << R300_DST_ADDR_SHIFT) |
- (unit << R300_TEX_ID_SHIFT) |
- (opcode << R300_TEX_INST_SHIFT);
- return GL_TRUE;
-}
-
-
-static const struct radeon_pair_handler pair_handler = {
- .EmitConst = &emit_const,
- .EmitPaired = &emit_alu,
- .EmitTex = &emit_tex,
- .BeginTexBlock = &begin_tex,
- .MaxHwTemps = R300_PFS_NUM_TEMP_REGS
-};
-
-/**
- * Final compilation step: Turn the intermediate radeon_program into
- * machine-readable instructions.
- */
-GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
-{
- struct r300_fragment_program_code *code = &compiler->code->r300;
-
- _mesa_bzero(code, sizeof(struct r300_fragment_program_code));
- code->node[0].alu_end = -1;
- code->node[0].tex_end = -1;
-
- if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler))
- return GL_FALSE;
-
- if (!finish_node(compiler))
- return GL_FALSE;
-
- return GL_TRUE;
-}
-
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c
deleted file mode 100644
index fc9d855bce6..00000000000
--- a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-/**
- * @file
- * Utilities to deal with the somewhat odd restriction on R300 fragment
- * program swizzles.
- */
-
-#include "r300_fragprog_swizzle.h"
-
-#include "r300_reg.h"
-#include "radeon_nqssadce.h"
-
-#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO))
-
-struct swizzle_data {
- GLuint hash; /**< swizzle value this matches */
- GLuint base; /**< base value for hw swizzle */
- GLuint stride; /**< difference in base between arg0/1/2 */
-};
-
-static const struct swizzle_data native_swizzles[] = {
- {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4},
- {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4},
- {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4},
- {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4},
- {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1},
- {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1},
- {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1},
- {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1},
- {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0},
- {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0}
-};
-
-static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
-
-
-/**
- * Find a native RGB swizzle that matches the given swizzle.
- * Returns 0 if none found.
- */
-static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle)
-{
- int i, comp;
-
- for(i = 0; i < num_native_swizzles; ++i) {
- const struct swizzle_data* sd = &native_swizzles[i];
- for(comp = 0; comp < 3; ++comp) {
- GLuint swz = GET_SWZ(swizzle, comp);
- if (swz == SWIZZLE_NIL)
- continue;
- if (swz != GET_SWZ(sd->hash, comp))
- break;
- }
- if (comp == 3)
- return sd;
- }
-
- return 0;
-}
-
-
-/**
- * Check whether the given instruction supports the swizzle and negate
- * combinations in the given source register.
- */
-GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
-{
- if (reg.Abs)
- reg.Negate = NEGATE_NONE;
-
- if (opcode == OPCODE_KIL ||
- opcode == OPCODE_TEX ||
- opcode == OPCODE_TXB ||
- opcode == OPCODE_TXP) {
- int j;
-
- if (reg.Abs || reg.Negate)
- return GL_FALSE;
-
- for(j = 0; j < 4; ++j) {
- GLuint swz = GET_SWZ(reg.Swizzle, j);
- if (swz == SWIZZLE_NIL)
- continue;
- if (swz != j)
- return GL_FALSE;
- }
-
- return GL_TRUE;
- }
-
- GLuint relevant = 0;
- int j;
-
- for(j = 0; j < 3; ++j)
- if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL)
- relevant |= 1 << j;
-
- if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
- return GL_FALSE;
-
- if (!lookup_native_swizzle(reg.Swizzle))
- return GL_FALSE;
-
- return GL_TRUE;
-}
-
-
-/**
- * Generate MOV dst, src using only native swizzles.
- */
-void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
-{
- if (src.Abs)
- src.Negate = NEGATE_NONE;
-
- while(dst.WriteMask) {
- const struct swizzle_data *best_swizzle = 0;
- GLuint best_matchcount = 0;
- GLuint best_matchmask = 0;
- int i, comp;
-
- for(i = 0; i < num_native_swizzles; ++i) {
- const struct swizzle_data *sd = &native_swizzles[i];
- GLuint matchcount = 0;
- GLuint matchmask = 0;
- for(comp = 0; comp < 3; ++comp) {
- if (!GET_BIT(dst.WriteMask, comp))
- continue;
- GLuint swz = GET_SWZ(src.Swizzle, comp);
- if (swz == SWIZZLE_NIL)
- continue;
- if (swz == GET_SWZ(sd->hash, comp)) {
- /* check if the negate bit of current component
- * is the same for already matched components */
- if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp))))
- continue;
-
- matchcount++;
- matchmask |= 1 << comp;
- }
- }
- if (matchcount > best_matchcount) {
- best_swizzle = sd;
- best_matchcount = matchcount;
- best_matchmask = matchmask;
- if (matchmask == (dst.WriteMask & WRITEMASK_XYZ))
- break;
- }
- }
-
- struct prog_instruction *inst;
-
- _mesa_insert_instructions(s->Program, s->IP, 1);
- inst = s->Program->Instructions + s->IP++;
- inst->Opcode = OPCODE_MOV;
- inst->DstReg = dst;
- inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W);
- inst->SrcReg[0] = src;
- inst->SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE;
- /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */
-
- dst.WriteMask &= ~inst->DstReg.WriteMask;
- }
-}
-
-
-/**
- * Translate an RGB (XYZ) swizzle into the hardware code for the given
- * instruction source.
- */
-GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle)
-{
- const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
-
- if (!sd) {
- _mesa_printf("Not a native swizzle: %08x\n", swizzle);
- return 0;
- }
-
- return sd->base + src*sd->stride;
-}
-
-
-/**
- * Translate an Alpha (W) swizzle into the hardware code for the given
- * instruction source.
- */
-GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle)
-{
- if (swizzle < 3)
- return swizzle + 3*src;
-
- switch(swizzle) {
- case SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
- case SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
- case SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
- default: return R300_ALU_ARGA_ONE;
- }
-}
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h
deleted file mode 100644
index 231bf4eef5f..00000000000
--- a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __R300_FRAGPROG_SWIZZLE_H_
-#define __R300_FRAGPROG_SWIZZLE_H_
-
-#include "main/glheader.h"
-#include "shader/prog_instruction.h"
-
-struct nqssadce_state;
-
-GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg);
-void r300FPBuildSwizzle(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
-
-GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle);
-GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle);
-
-#endif /* __R300_FRAGPROG_SWIZZLE_H_ */
diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c
index ddabd539925..2fa626bab24 100644
--- a/src/mesa/drivers/dri/r300/r300_ioctl.c
+++ b/src/mesa/drivers/dri/r300/r300_ioctl.c
@@ -55,7 +55,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_vertprog.h"
#include "radeon_reg.h"
#include "r300_emit.h"
-#include "r300_fragprog.h"
#include "r300_context.h"
#include "vblank.h"
@@ -66,6 +65,66 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define CLEARBUFFER_DEPTH 0x2
#define CLEARBUFFER_STENCIL 0x4
+#if 1
+
+/**
+ * Fragment program helper macros
+ */
+
+/* Produce unshifted source selectors */
+#define FP_TMP(idx) (idx)
+#define FP_CONST(idx) ((idx) | (1 << 5))
+
+/* Produce source/dest selector dword */
+#define FP_SELC_MASK_NO 0
+#define FP_SELC_MASK_X 1
+#define FP_SELC_MASK_Y 2
+#define FP_SELC_MASK_XY 3
+#define FP_SELC_MASK_Z 4
+#define FP_SELC_MASK_XZ 5
+#define FP_SELC_MASK_YZ 6
+#define FP_SELC_MASK_XYZ 7
+
+#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \
+ (((destidx) << R300_ALU_DSTC_SHIFT) | \
+ (FP_SELC_MASK_##regmask << 23) | \
+ (FP_SELC_MASK_##outmask << 26) | \
+ ((src0) << R300_ALU_SRC0C_SHIFT) | \
+ ((src1) << R300_ALU_SRC1C_SHIFT) | \
+ ((src2) << R300_ALU_SRC2C_SHIFT))
+
+#define FP_SELA_MASK_NO 0
+#define FP_SELA_MASK_W 1
+
+#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \
+ (((destidx) << R300_ALU_DSTA_SHIFT) | \
+ (FP_SELA_MASK_##regmask << 23) | \
+ (FP_SELA_MASK_##outmask << 24) | \
+ ((src0) << R300_ALU_SRC0A_SHIFT) | \
+ ((src1) << R300_ALU_SRC1A_SHIFT) | \
+ ((src2) << R300_ALU_SRC2A_SHIFT))
+
+/* Produce unshifted argument selectors */
+#define FP_ARGC(source) R300_ALU_ARGC_##source
+#define FP_ARGA(source) R300_ALU_ARGA_##source
+#define FP_ABS(arg) ((arg) | (1 << 6))
+#define FP_NEG(arg) ((arg) ^ (1 << 5))
+
+/* Produce instruction dword */
+#define FP_INSTRC(opcode,arg0,arg1,arg2) \
+ (R300_ALU_OUTC_##opcode | \
+ ((arg0) << R300_ALU_ARG0C_SHIFT) | \
+ ((arg1) << R300_ALU_ARG1C_SHIFT) | \
+ ((arg2) << R300_ALU_ARG2C_SHIFT))
+
+#define FP_INSTRA(opcode,arg0,arg1,arg2) \
+ (R300_ALU_OUTA_##opcode | \
+ ((arg0) << R300_ALU_ARG0A_SHIFT) | \
+ ((arg1) << R300_ALU_ARG1A_SHIFT) | \
+ ((arg2) << R300_ALU_ARG2A_SHIFT))
+
+#endif
+
static void r300EmitClearState(GLcontext * ctx);
static void r300ClearBuffer(r300ContextPtr r300, int flags,
@@ -546,7 +605,7 @@ static int r300KernelClear(GLcontext *ctx, GLuint flags)
/* Make sure it fits there. */
radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs);
-
+
if (flags & BUFFER_BIT_COLOR0) {
rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0);
radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs,
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index 12fbf281d99..62a03281ca8 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -62,8 +62,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_emit.h"
#include "r300_tex.h"
#include "r300_fragprog_common.h"
-#include "r300_fragprog.h"
-#include "r500_fragprog.h"
#include "r300_render.h"
#include "r300_vertprog.h"
@@ -458,7 +456,7 @@ static GLboolean current_fragment_program_writes_depth(GLcontext* ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- return ctx->FragmentProgram._Current && r300->selected_fp->writes_depth;
+ return ctx->FragmentProgram._Current && r300->selected_fp->code.writes_depth;
}
static void r300SetEarlyZState(GLcontext * ctx)
@@ -1230,7 +1228,7 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
int i;
- struct r300_fragment_program_code *code = &r300->selected_fp->code.r300;
+ struct r300_fragment_program_code *code = &r300->selected_fp->code.code.r300;
R300_STATECHANGE(r300, fpt);
@@ -1272,7 +1270,7 @@ static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
int i;
- struct r500_fragment_program_code *code = &r300->selected_fp->code.r500;
+ struct r500_fragment_program_code *code = &r300->selected_fp->code.code.r500;
/* find all the texture instructions and relocate the texture units */
for (i = 0; i < code->inst_end + 1; i++) {
@@ -2063,7 +2061,7 @@ static void r300SetupPixelShader(GLcontext *ctx)
struct r300_fragment_program_code *code;
int i, k;
- code = &fp->code.r300;
+ code = &fp->code.code.r300;
R300_STATECHANGE(rmesa, fpi[0]);
R300_STATECHANGE(rmesa, fpi[1]);
@@ -2137,7 +2135,7 @@ static void r500SetupPixelShader(GLcontext *ctx)
((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0;
((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0;
- code = &fp->code.r500;
+ code = &fp->code.code.r500;
R300_STATECHANGE(rmesa, fp);
rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx;
@@ -2345,13 +2343,9 @@ void r300InitShaderFunctions(r300ContextPtr r300)
r300->vtbl.SetupRSUnit = r500SetupRSUnit;
r300->vtbl.SetupPixelShader = r500SetupPixelShader;
r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures;
- r300->vtbl.BuildFragmentProgramHwCode = r500BuildFragmentProgramHwCode;
- r300->vtbl.FragmentProgramDump = r500FragmentProgramDump;
} else {
r300->vtbl.SetupRSUnit = r300SetupRSUnit;
r300->vtbl.SetupPixelShader = r300SetupPixelShader;
r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures;
- r300->vtbl.BuildFragmentProgramHwCode = r300BuildFragmentProgramHwCode;
- r300->vtbl.FragmentProgramDump = r300FragmentProgramDump;
}
}
diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
index 56ed519cf41..a7e8e711499 100644
--- a/src/mesa/drivers/dri/r300/r300_swtcl.c
+++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
@@ -150,16 +150,16 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_
ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0);
}
- if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) {
- int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0;
+ if (rmesa->selected_fp->code.wpos_attr != FRAG_ATTRIB_MAX) {
+ int tex_id = rmesa->selected_fp->code.wpos_attr - FRAG_ATTRIB_TEX0;
VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
}
- if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) {
- int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0;
+ if (rmesa->selected_fp->code.fog_attr != FRAG_ATTRIB_MAX) {
+ int tex_id = rmesa->selected_fp->code.fog_attr - FRAG_ATTRIB_TEX0;
VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index de32013032f..ab5ca4322e2 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -40,7 +40,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "shader/prog_statevars.h"
#include "tnl/tnl.h"
-#include "radeon_nqssadce.h"
+#include "compiler/radeon_nqssadce.h"
#include "r300_context.h"
#include "r300_state.h"
@@ -1558,12 +1558,12 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
_mesa_insert_mvp_code(ctx, vp->Base);
}
- if (r300->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) {
- pos_as_texcoord(&vp->Base->Base, r300->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0);
+ if (r300->selected_fp->code.wpos_attr != FRAG_ATTRIB_MAX) {
+ pos_as_texcoord(&vp->Base->Base, r300->selected_fp->code.wpos_attr - FRAG_ATTRIB_TEX0);
}
- if (r300->selected_fp->fog_attr != FRAG_ATTRIB_MAX) {
- fog_as_texcoord(&vp->Base->Base, r300->selected_fp->fog_attr - FRAG_ATTRIB_TEX0);
+ if (r300->selected_fp->code.fog_attr != FRAG_ATTRIB_MAX) {
+ fog_as_texcoord(&vp->Base->Base, r300->selected_fp->code.fog_attr - FRAG_ATTRIB_TEX0);
}
addArtificialOutputs(ctx, prog);
@@ -1640,8 +1640,8 @@ struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx)
vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
wanted_key.FpReads = r300->selected_fp->Base->InputsRead;
- wanted_key.FogAttr = r300->selected_fp->fog_attr;
- wanted_key.WPosAttr = r300->selected_fp->wpos_attr;
+ wanted_key.FogAttr = r300->selected_fp->code.fog_attr;
+ wanted_key.WPosAttr = r300->selected_fp->code.wpos_attr;
for (vp = vpc->progs; vp; vp = vp->next) {
if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c
deleted file mode 100644
index 4d58cf21622..00000000000
--- a/src/mesa/drivers/dri/r300/r500_fragprog.c
+++ /dev/null
@@ -1,480 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "r500_fragprog.h"
-
-static void reset_srcreg(struct prog_src_register* reg)
-{
- _mesa_bzero(reg, sizeof(*reg));
- reg->Swizzle = SWIZZLE_NOOP;
-}
-
-static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
-{
- gl_state_index fail_value_tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
- };
- struct prog_src_register reg = { 0, };
-
- fail_value_tokens[2] = tmu;
- reg.File = PROGRAM_STATE_VAR;
- reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
- reg.Swizzle = SWIZZLE_WWWW;
- return reg;
-}
-
-/**
- * Transform TEX, TXP, TXB, and KIL instructions in the following way:
- * - premultiply texture coordinates for RECT
- * - extract operand swizzles
- * - introduce a temporary register when write masks are needed
- *
- */
-GLboolean r500_transform_TEX(
- struct radeon_transform_context *t,
- struct prog_instruction* orig_inst, void* data)
-{
- struct r300_fragment_program_compiler *compiler =
- (struct r300_fragment_program_compiler*)data;
- struct prog_instruction inst = *orig_inst;
- struct prog_instruction* tgt;
- GLboolean destredirect = GL_FALSE;
-
- if (inst.Opcode != OPCODE_TEX &&
- inst.Opcode != OPCODE_TXB &&
- inst.Opcode != OPCODE_TXP &&
- inst.Opcode != OPCODE_KIL)
- return GL_FALSE;
-
- /* ARB_shadow & EXT_shadow_funcs */
- if (inst.Opcode != OPCODE_KIL &&
- t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
-
- if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
- tgt = radeonAppendInstructions(t->Program, 1);
-
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg = inst.DstReg;
- if (comparefunc == GL_ALWAYS) {
- tgt->SrcReg[0].File = PROGRAM_BUILTIN;
- tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
- } else {
- tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
- }
- return GL_TRUE;
- }
-
- inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = radeonFindFreeTemporary(t);
- inst.DstReg.WriteMask = WRITEMASK_XYZW;
- } else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) {
- int tempreg = radeonFindFreeTemporary(t);
-
- inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = tempreg;
- inst.DstReg.WriteMask = WRITEMASK_XYZW;
- destredirect = GL_TRUE;
- }
-
- if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
- int tmpreg = radeonFindFreeTemporary(t);
- tgt = radeonAppendInstructions(t->Program, 1);
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg.File = PROGRAM_TEMPORARY;
- tgt->DstReg.Index = tmpreg;
- tgt->SrcReg[0] = inst.SrcReg[0];
-
- reset_srcreg(&inst.SrcReg[0]);
- inst.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst.SrcReg[0].Index = tmpreg;
- }
-
- tgt = radeonAppendInstructions(t->Program, 1);
- _mesa_copy_instructions(tgt, &inst, 1);
-
- if (inst.Opcode != OPCODE_KIL &&
- t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
- GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
- int rcptemp = radeonFindFreeTemporary(t);
- int pass, fail;
-
- tgt = radeonAppendInstructions(t->Program, 3);
-
- tgt[0].Opcode = OPCODE_RCP;
- tgt[0].DstReg.File = PROGRAM_TEMPORARY;
- tgt[0].DstReg.Index = rcptemp;
- tgt[0].DstReg.WriteMask = WRITEMASK_W;
- tgt[0].SrcReg[0] = inst.SrcReg[0];
- tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
-
- tgt[1].Opcode = OPCODE_MAD;
- tgt[1].DstReg = inst.DstReg;
- tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
- tgt[1].SrcReg[0] = inst.SrcReg[0];
- tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
- tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
- tgt[1].SrcReg[1].Index = rcptemp;
- tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
- tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
- tgt[1].SrcReg[2].Index = inst.DstReg.Index;
- if (depthmode == 0) /* GL_LUMINANCE */
- tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
- else if (depthmode == 2) /* GL_ALPHA */
- tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
-
- /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
- * r < tex <=> -tex+r < 0
- * r >= tex <=> not (-tex+r < 0 */
- if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
- tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW;
- else
- tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW;
-
- tgt[2].Opcode = OPCODE_CMP;
- tgt[2].DstReg = orig_inst->DstReg;
- tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
-
- if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
- pass = 1;
- fail = 2;
- } else {
- pass = 2;
- fail = 1;
- }
-
- tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
- tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
- tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
- } else if (destredirect) {
- tgt = radeonAppendInstructions(t->Program, 1);
-
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg = orig_inst->DstReg;
- tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt->SrcReg[0].Index = inst.DstReg.Index;
- }
-
- return GL_TRUE;
-}
-
-GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
-{
- GLuint relevant;
- int i;
-
- if (opcode == OPCODE_TEX ||
- opcode == OPCODE_TXB ||
- opcode == OPCODE_TXP ||
- opcode == OPCODE_KIL) {
- if (reg.Abs)
- return GL_FALSE;
-
- if (opcode == OPCODE_KIL && (reg.Swizzle != SWIZZLE_NOOP || reg.Negate != NEGATE_NONE))
- return GL_FALSE;
-
- if (reg.Negate)
- reg.Negate ^= NEGATE_XYZW;
-
- for(i = 0; i < 4; ++i) {
- GLuint swz = GET_SWZ(reg.Swizzle, i);
- if (swz == SWIZZLE_NIL) {
- reg.Negate &= ~(1 << i);
- continue;
- }
- if (swz >= 4)
- return GL_FALSE;
- }
-
- if (reg.Negate)
- return GL_FALSE;
-
- return GL_TRUE;
- } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) {
- /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
- * if it doesn't fit perfectly into a .xyzw case... */
- if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate)
- return GL_TRUE;
-
- return GL_FALSE;
- } else {
- /* ALU instructions support almost everything */
- if (reg.Abs)
- return GL_TRUE;
-
- relevant = 0;
- for(i = 0; i < 3; ++i) {
- GLuint swz = GET_SWZ(reg.Swizzle, i);
- if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
- relevant |= 1 << i;
- }
- if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
- return GL_FALSE;
-
- return GL_TRUE;
- }
-}
-
-/**
- * Implement a MOV with a potentially non-native swizzle.
- *
- * The only thing we *cannot* do in an ALU instruction is per-component
- * negation. Therefore, we split the MOV into two instructions when necessary.
- */
-void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
-{
- struct prog_instruction *inst;
- GLuint negatebase[2] = { 0, 0 };
- int i;
-
- for(i = 0; i < 4; ++i) {
- GLuint swz = GET_SWZ(src.Swizzle, i);
- if (swz == SWIZZLE_NIL)
- continue;
- negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
- }
-
- _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0));
- inst = s->Program->Instructions + s->IP;
-
- for(i = 0; i <= 1; ++i) {
- if (!negatebase[i])
- continue;
-
- inst->Opcode = OPCODE_MOV;
- inst->DstReg = dst;
- inst->DstReg.WriteMask = negatebase[i];
- inst->SrcReg[0] = src;
- inst->SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW;
- inst++;
- s->IP++;
- }
-}
-
-
-static char *toswiz(int swiz_val) {
- switch(swiz_val) {
- case 0: return "R";
- case 1: return "G";
- case 2: return "B";
- case 3: return "A";
- case 4: return "0";
- case 5: return "1/2";
- case 6: return "1";
- case 7: return "U";
- }
- return NULL;
-}
-
-static char *toop(int op_val)
-{
- char *str = NULL;
- switch (op_val) {
- case 0: str = "MAD"; break;
- case 1: str = "DP3"; break;
- case 2: str = "DP4"; break;
- case 3: str = "D2A"; break;
- case 4: str = "MIN"; break;
- case 5: str = "MAX"; break;
- case 6: str = "Reserved"; break;
- case 7: str = "CND"; break;
- case 8: str = "CMP"; break;
- case 9: str = "FRC"; break;
- case 10: str = "SOP"; break;
- case 11: str = "MDH"; break;
- case 12: str = "MDV"; break;
- }
- return str;
-}
-
-static char *to_alpha_op(int op_val)
-{
- char *str = NULL;
- switch (op_val) {
- case 0: str = "MAD"; break;
- case 1: str = "DP"; break;
- case 2: str = "MIN"; break;
- case 3: str = "MAX"; break;
- case 4: str = "Reserved"; break;
- case 5: str = "CND"; break;
- case 6: str = "CMP"; break;
- case 7: str = "FRC"; break;
- case 8: str = "EX2"; break;
- case 9: str = "LN2"; break;
- case 10: str = "RCP"; break;
- case 11: str = "RSQ"; break;
- case 12: str = "SIN"; break;
- case 13: str = "COS"; break;
- case 14: str = "MDH"; break;
- case 15: str = "MDV"; break;
- }
- return str;
-}
-
-static char *to_mask(int val)
-{
- char *str = NULL;
- switch(val) {
- case 0: str = "NONE"; break;
- case 1: str = "R"; break;
- case 2: str = "G"; break;
- case 3: str = "RG"; break;
- case 4: str = "B"; break;
- case 5: str = "RB"; break;
- case 6: str = "GB"; break;
- case 7: str = "RGB"; break;
- case 8: str = "A"; break;
- case 9: str = "AR"; break;
- case 10: str = "AG"; break;
- case 11: str = "ARG"; break;
- case 12: str = "AB"; break;
- case 13: str = "ARB"; break;
- case 14: str = "AGB"; break;
- case 15: str = "ARGB"; break;
- }
- return str;
-}
-
-static char *to_texop(int val)
-{
- switch(val) {
- case 0: return "NOP";
- case 1: return "LD";
- case 2: return "TEXKILL";
- case 3: return "PROJ";
- case 4: return "LODBIAS";
- case 5: return "LOD";
- case 6: return "DXDY";
- }
- return NULL;
-}
-
-void r500FragmentProgramDump(union rX00_fragment_program_code *c)
-{
- struct r500_fragment_program_code *code = &c->r500;
- fprintf(stderr, "R500 Fragment Program:\n--------\n");
-
- int n;
- uint32_t inst;
- uint32_t inst0;
- char *str = NULL;
-
- if (code->const_nr) {
- fprintf(stderr, "--------\nConstants:\n");
- for (n = 0; n < code->const_nr; n++) {
- fprintf(stderr, "Constant %d: %i[%i]\n", n,
- code->constant[n].File, code->constant[n].Index);
- }
- fprintf(stderr, "--------\n");
- }
-
- for (n = 0; n < code->inst_end+1; n++) {
- inst0 = inst = code->inst[n].inst0;
- fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
- switch(inst & 0x3) {
- case R500_INST_TYPE_ALU: str = "ALU"; break;
- case R500_INST_TYPE_OUT: str = "OUT"; break;
- case R500_INST_TYPE_FC: str = "FC"; break;
- case R500_INST_TYPE_TEX: str = "TEX"; break;
- };
- fprintf(stderr,"%s %s %s %s %s ", str,
- inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
- inst & R500_INST_LAST ? "LAST" : "",
- inst & R500_INST_NOP ? "NOP" : "",
- inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
- fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
- to_mask((inst >> 15) & 0xf));
-
- switch(inst0 & 0x3) {
- case 0:
- case 1:
- fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1);
- inst = code->inst[n].inst1;
-
- fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
- inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
- (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
- (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
- (inst >> 30));
-
- fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2);
- inst = code->inst[n].inst2;
- fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
- inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
- (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
- (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
- (inst >> 30));
- fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3);
- inst = code->inst[n].inst3;
- fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
- (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
- (inst >> 11) & 0x3,
- (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
- (inst >> 24) & 0x3);
-
-
- fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
- inst = code->inst[n].inst4;
- fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf),
- (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
- (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
- (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
- (inst >> 31) & 0x1);
-
- fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
- inst = code->inst[n].inst5;
- fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
- (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
- (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
- (inst >> 23) & 0x3,
- (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
- break;
- case 2:
- break;
- case 3:
- inst = code->inst[n].inst1;
- fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
- to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
- (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
- inst = code->inst[n].inst2;
- fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
- inst & 127, inst & (1<<7) ? "(rel)" : "",
- toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
- toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
- (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
- toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
- toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
-
- fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3);
- break;
- }
- fprintf(stderr,"\n");
- }
-
-}
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h
deleted file mode 100644
index 1179bf66073..00000000000
--- a/src/mesa/drivers/dri/r300/r500_fragprog.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2005 Ben Skeggs.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-/*
- * Authors:
- * Ben Skeggs
- * Jerome Glisse
- */
-#ifndef __R500_FRAGPROG_H_
-#define __R500_FRAGPROG_H_
-
-#include "shader/prog_parameter.h"
-#include "shader/prog_instruction.h"
-
-#include "r300_context.h"
-#include "radeon_nqssadce.h"
-
-extern GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
-
-extern void r500FragmentProgramDump(union rX00_fragment_program_code *c);
-
-extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg);
-
-extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src);
-
-extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data);
-
-#endif
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
deleted file mode 100644
index 30f4514897e..00000000000
--- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Copyright (C) 2005 Ben Skeggs.
- *
- * Copyright 2008 Corbin Simpson
- * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-/**
- * \file
- *
- * \author Ben Skeggs
- *
- * \author Jerome Glisse
- *
- * \author Corbin Simpson
- *
- * \todo Depth write, WPOS/FOGC inputs
- *
- * \todo FogOption
- *
- */
-
-#include "r500_fragprog.h"
-
-#include "radeon_program_pair.h"
-
-
-#define PROG_CODE \
- struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
- struct r500_fragment_program_code *code = &c->code->r500
-
-#define error(fmt, args...) do { \
- fprintf(stderr, "%s::%s(): " fmt "\n", \
- __FILE__, __FUNCTION__, ##args); \
- } while(0)
-
-
-/**
- * Callback to register hardware constants.
- */
-static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex)
-{
- PROG_CODE;
-
- for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
- if (code->constant[*hwindex].File == file &&
- code->constant[*hwindex].Index == idx)
- break;
- }
-
- if (*hwindex >= code->const_nr) {
- if (*hwindex >= R500_PFS_NUM_CONST_REGS) {
- error("Out of hw constants!\n");
- return GL_FALSE;
- }
-
- code->const_nr++;
- code->constant[*hwindex].File = file;
- code->constant[*hwindex].Index = idx;
- }
-
- return GL_TRUE;
-}
-
-static GLuint translate_rgb_op(GLuint opcode)
-{
- switch(opcode) {
- case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
- case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
- case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
- case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
- case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
- case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
- default:
- error("translate_rgb_op(%d): unknown opcode\n", opcode);
- /* fall through */
- case OPCODE_NOP:
- /* fall through */
- case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
- case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
- case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
- case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
- }
-}
-
-static GLuint translate_alpha_op(GLuint opcode)
-{
- switch(opcode) {
- case OPCODE_CMP: return R500_ALPHA_OP_CMP;
- case OPCODE_COS: return R500_ALPHA_OP_COS;
- case OPCODE_DDX: return R500_ALPHA_OP_MDH;
- case OPCODE_DDY: return R500_ALPHA_OP_MDV;
- case OPCODE_DP3: return R500_ALPHA_OP_DP;
- case OPCODE_DP4: return R500_ALPHA_OP_DP;
- case OPCODE_EX2: return R500_ALPHA_OP_EX2;
- case OPCODE_FRC: return R500_ALPHA_OP_FRC;
- case OPCODE_LG2: return R500_ALPHA_OP_LN2;
- default:
- error("translate_alpha_op(%d): unknown opcode\n", opcode);
- /* fall through */
- case OPCODE_NOP:
- /* fall through */
- case OPCODE_MAD: return R500_ALPHA_OP_MAD;
- case OPCODE_MAX: return R500_ALPHA_OP_MAX;
- case OPCODE_MIN: return R500_ALPHA_OP_MIN;
- case OPCODE_RCP: return R500_ALPHA_OP_RCP;
- case OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
- case OPCODE_SIN: return R500_ALPHA_OP_SIN;
- }
-}
-
-static GLuint fix_hw_swizzle(GLuint swz)
-{
- if (swz == 5) swz = 6;
- if (swz == SWIZZLE_NIL) swz = 4;
- return swz;
-}
-
-static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg)
-{
- GLuint t = inst->RGB.Arg[arg].Source;
- int comp;
- t |= inst->RGB.Arg[arg].Negate << 11;
- t |= inst->RGB.Arg[arg].Abs << 12;
-
- for(comp = 0; comp < 3; ++comp)
- t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
-
- return t;
-}
-
-static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i)
-{
- GLuint t = inst->Alpha.Arg[i].Source;
- t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2;
- t |= inst->Alpha.Arg[i].Negate << 5;
- t |= inst->Alpha.Arg[i].Abs << 6;
- return t;
-}
-
-static void use_temporary(struct r500_fragment_program_code* code, GLuint index)
-{
- if (index > code->max_temp_idx)
- code->max_temp_idx = index;
-}
-
-static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
-{
- if (!src.Constant)
- use_temporary(code, src.Index);
- return src.Index | src.Constant << 8;
-}
-
-
-/**
- * Emit a paired ALU instruction.
- */
-static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
-{
- PROG_CODE;
-
- if (code->inst_end >= 511) {
- error("emit_alu: Too many instructions");
- return GL_FALSE;
- }
-
- int ip = ++code->inst_end;
-
- code->inst[ip].inst5 = translate_rgb_op(inst->RGB.Opcode);
- code->inst[ip].inst4 = translate_alpha_op(inst->Alpha.Opcode);
-
- if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask)
- code->inst[ip].inst0 = R500_INST_TYPE_OUT;
- else
- code->inst[ip].inst0 = R500_INST_TYPE_ALU;
- code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
-
- code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
- code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
- if (inst->Alpha.DepthWriteMask) {
- code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
- c->fp->writes_depth = GL_TRUE;
- }
-
- code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
- code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
- use_temporary(code, inst->Alpha.DestIndex);
- use_temporary(code, inst->RGB.DestIndex);
-
- if (inst->RGB.Saturate)
- code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
- if (inst->Alpha.Saturate)
- code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
-
- code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
- code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
- code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
-
- code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
- code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
- code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
-
- code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
- code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
- code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
-
- code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
- code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
- code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
-
- return GL_TRUE;
-}
-
-static GLuint translate_strq_swizzle(struct prog_src_register src)
-{
- GLuint swiz = 0;
- int i;
- for (i = 0; i < 4; i++)
- swiz |= (GET_SWZ(src.Swizzle, i) & 0x3) << i*2;
- return swiz;
-}
-
-/**
- * Emit a single TEX instruction
- */
-static GLboolean emit_tex(void *data, struct prog_instruction *inst)
-{
- PROG_CODE;
-
- if (code->inst_end >= 511) {
- error("emit_tex: Too many instructions");
- return GL_FALSE;
- }
-
- int ip = ++code->inst_end;
-
- code->inst[ip].inst0 = R500_INST_TYPE_TEX
- | (inst->DstReg.WriteMask << 11)
- | R500_INST_TEX_SEM_WAIT;
- code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
- | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
-
- if (inst->TexSrcTarget == TEXTURE_RECT_INDEX)
- code->inst[ip].inst1 |= R500_TEX_UNSCALED;
-
- switch (inst->Opcode) {
- case OPCODE_KIL:
- code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
- break;
- case OPCODE_TEX:
- code->inst[ip].inst1 |= R500_TEX_INST_LD;
- break;
- case OPCODE_TXB:
- code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
- break;
- case OPCODE_TXP:
- code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
- break;
- default:
- error("emit_tex can't handle opcode %x\n", inst->Opcode);
- }
-
- code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
- | (translate_strq_swizzle(inst->SrcReg[0]) << 8)
- | R500_TEX_DST_ADDR(inst->DstReg.Index)
- | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
- | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
-
- return GL_TRUE;
-}
-
-static const struct radeon_pair_handler pair_handler = {
- .EmitConst = emit_const,
- .EmitPaired = emit_paired,
- .EmitTex = emit_tex,
- .MaxHwTemps = 128
-};
-
-GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
-{
- struct r500_fragment_program_code *code = &compiler->code->r500;
-
- _mesa_bzero(code, sizeof(*code));
- code->max_temp_idx = 1;
- code->inst_offset = 0;
- code->inst_end = -1;
-
- if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler))
- return GL_FALSE;
-
- if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
- /* This may happen when dead-code elimination is disabled or
- * when most of the fragment program logic is leading to a KIL */
- if (code->inst_end >= 511) {
- error("Introducing fake OUT: Too many instructions");
- return GL_FALSE;
- }
-
- int ip = ++code->inst_end;
- code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
- }
-
- return GL_TRUE;
-}
diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c
deleted file mode 100644
index 202a8532b6d..00000000000
--- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-/**
- * @file
- *
- * "Not-quite SSA" and Dead-Code Elimination.
- *
- * @note This code uses SWIZZLE_NIL in a source register to indicate that
- * the corresponding component is ignored by the corresponding instruction.
- */
-
-#include "radeon_nqssadce.h"
-
-
-/**
- * Return the @ref register_state for the given register (or 0 for untracked
- * registers, i.e. constants).
- */
-static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index)
-{
- switch(file) {
- case PROGRAM_TEMPORARY: return &s->Temps[index];
- case PROGRAM_OUTPUT: return &s->Outputs[index];
- case PROGRAM_ADDRESS: return &s->Address;
- default: return 0;
- }
-}
-
-
-/**
- * Left multiplication of a register with a swizzle
- *
- * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
- */
-struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
-{
- struct prog_src_register tmp = srcreg;
- int i;
- tmp.Swizzle = 0;
- tmp.Negate = NEGATE_NONE;
- for(i = 0; i < 4; ++i) {
- GLuint swz = GET_SWZ(swizzle, i);
- if (swz < 4) {
- tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
- tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
- } else {
- tmp.Swizzle |= swz << (i*3);
- }
- }
- return tmp;
-}
-
-
-static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
- struct prog_instruction *inst, GLint src, GLuint sourced)
-{
- int i;
- GLuint deswz_source = 0;
-
- for(i = 0; i < 4; ++i) {
- if (GET_BIT(sourced, i)) {
- GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i);
- deswz_source |= 1 << swz;
- } else {
- inst->SrcReg[src].Swizzle &= ~(7 << (3*i));
- inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
- }
- }
-
- if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
- struct prog_dst_register dstreg = inst->DstReg;
- dstreg.File = PROGRAM_TEMPORARY;
- dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
- dstreg.WriteMask = sourced;
-
- s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
-
- inst = s->Program->Instructions + s->IP;
- inst->SrcReg[src].File = PROGRAM_TEMPORARY;
- inst->SrcReg[src].Index = dstreg.Index;
- inst->SrcReg[src].Swizzle = 0;
- inst->SrcReg[src].Negate = NEGATE_NONE;
- inst->SrcReg[src].Abs = 0;
- for(i = 0; i < 4; ++i) {
- if (GET_BIT(sourced, i))
- inst->SrcReg[src].Swizzle |= i << (3*i);
- else
- inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
- }
- deswz_source = sourced;
- }
-
- struct register_state *regstate;
-
- if (inst->SrcReg[src].RelAddr) {
- regstate = get_reg_state(s, PROGRAM_ADDRESS, 0);
- if (regstate)
- regstate->Sourced |= WRITEMASK_X;
- } else {
- regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
- if (regstate)
- regstate->Sourced |= deswz_source & 0xf;
- }
-
- return inst;
-}
-
-static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
-{
- int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
- int i;
- for(i = 0; i < nsrc; ++i)
- if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex)
- inst->SrcReg[i].Index = newindex;
-}
-
-static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
-{
- GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
- int ip;
- for(ip = 0; ip < s->IP; ++ip) {
- struct prog_instruction* inst = s->Program->Instructions + ip;
- if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex)
- inst->DstReg.Index = newindex;
- unalias_srcregs(inst, oldindex, newindex);
- }
- unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex);
-}
-
-
-/**
- * Handle one instruction.
- */
-static void process_instruction(struct nqssadce_state* s)
-{
- struct prog_instruction *inst = s->Program->Instructions + s->IP;
-
- if (inst->Opcode == OPCODE_END)
- return;
-
- if (inst->Opcode != OPCODE_KIL) {
- struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
- if (!regstate) {
- _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n",
- inst->DstReg.File, inst->DstReg.Index);
- return;
- }
-
- inst->DstReg.WriteMask &= regstate->Sourced;
- regstate->Sourced &= ~inst->DstReg.WriteMask;
-
- if (inst->DstReg.WriteMask == 0) {
- _mesa_delete_instructions(s->Program, s->IP, 1);
- return;
- }
-
- if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced)
- unalias_temporary(s, inst->DstReg.Index);
- }
-
- /* Attention: Due to swizzle emulation code, the following
- * might change the instruction stream under us, so we have
- * to be careful with the inst pointer. */
- switch (inst->Opcode) {
- case OPCODE_ARL:
- case OPCODE_DDX:
- case OPCODE_DDY:
- case OPCODE_FRC:
- case OPCODE_MOV:
- inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
- break;
- case OPCODE_ADD:
- case OPCODE_MAX:
- case OPCODE_MIN:
- case OPCODE_MUL:
- case OPCODE_SGE:
- case OPCODE_SLT:
- inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
- inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
- break;
- case OPCODE_CMP:
- case OPCODE_MAD:
- inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
- inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
- inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask);
- break;
- case OPCODE_COS:
- case OPCODE_EX2:
- case OPCODE_LG2:
- case OPCODE_RCP:
- case OPCODE_RSQ:
- case OPCODE_SIN:
- inst = track_used_srcreg(s, inst, 0, 0x1);
- break;
- case OPCODE_DP3:
- inst = track_used_srcreg(s, inst, 0, 0x7);
- inst = track_used_srcreg(s, inst, 1, 0x7);
- break;
- case OPCODE_DP4:
- inst = track_used_srcreg(s, inst, 0, 0xf);
- inst = track_used_srcreg(s, inst, 1, 0xf);
- break;
- case OPCODE_KIL:
- case OPCODE_TEX:
- case OPCODE_TXB:
- case OPCODE_TXP:
- inst = track_used_srcreg(s, inst, 0, 0xf);
- break;
- case OPCODE_DST:
- inst = track_used_srcreg(s, inst, 0, 0x6);
- inst = track_used_srcreg(s, inst, 1, 0xa);
- break;
- case OPCODE_EXP:
- case OPCODE_LOG:
- case OPCODE_POW:
- inst = track_used_srcreg(s, inst, 0, 0x3);
- break;
- case OPCODE_LIT:
- inst = track_used_srcreg(s, inst, 0, 0xb);
- break;
- default:
- _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
- return;
- }
-}
-
-static void calculateInputsOutputs(struct gl_program *p)
-{
- struct prog_instruction *inst;
- GLuint InputsRead, OutputsWritten;
-
- inst = p->Instructions;
- InputsRead = 0;
- OutputsWritten = 0;
- while (inst->Opcode != OPCODE_END)
- {
- int i, num_src_regs;
-
- num_src_regs = _mesa_num_inst_src_regs(inst->Opcode);
- for (i = 0; i < num_src_regs; ++i) {
- if (inst->SrcReg[i].File == PROGRAM_INPUT)
- InputsRead |= 1 << inst->SrcReg[i].Index;
- }
-
- if (inst->DstReg.File == PROGRAM_OUTPUT)
- OutputsWritten |= 1 << inst->DstReg.Index;
-
- ++inst;
- }
-
- p->InputsRead = InputsRead;
- p->OutputsWritten = OutputsWritten;
-}
-
-void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr)
-{
- struct nqssadce_state s;
-
- _mesa_bzero(&s, sizeof(s));
- s.Ctx = ctx;
- s.Program = p;
- s.Descr = descr;
- s.Descr->Init(&s);
- s.IP = p->NumInstructions;
-
- while(s.IP > 0) {
- s.IP--;
- process_instruction(&s);
- }
-
- calculateInputsOutputs(p);
-}
diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/radeon_nqssadce.h
deleted file mode 100644
index 8626f21c25e..00000000000
--- a/src/mesa/drivers/dri/r300/radeon_nqssadce.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __RADEON_PROGRAM_NQSSADCE_H_
-#define __RADEON_PROGRAM_NQSSADCE_H_
-
-#include "radeon_program.h"
-
-
-struct register_state {
- /**
- * Bitmask indicating which components of the register are sourced
- * by later instructions.
- */
- GLuint Sourced : 4;
-};
-
-/**
- * Maintain state such as which registers are used, which registers are
- * read from, etc.
- */
-struct nqssadce_state {
- GLcontext *Ctx;
- struct gl_program *Program;
- struct radeon_nqssadce_descr *Descr;
-
- /**
- * All instructions after this instruction pointer have been dealt with.
- */
- int IP;
-
- /**
- * Which registers are read by subsequent instructions?
- */
- struct register_state Temps[MAX_PROGRAM_TEMPS];
- struct register_state Outputs[VERT_RESULT_MAX];
- struct register_state Address;
-};
-
-
-/**
- * This structure contains a description of the hardware in-so-far as
- * it is required for the NqSSA-DCE pass.
- */
-struct radeon_nqssadce_descr {
- /**
- * Fill in which outputs
- */
- void (*Init)(struct nqssadce_state *);
-
- /**
- * Check whether the given swizzle, absolute and negate combination
- * can be implemented natively by the hardware for this opcode.
- */
- GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg);
-
- /**
- * Emit (at the current IP) the instruction MOV dst, src;
- * The transformation will work recursively on the emitted instruction(s).
- */
- void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
-
- void *Data;
-};
-
-void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr);
-struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg);
-
-#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */
diff --git a/src/mesa/drivers/dri/r300/radeon_program.c b/src/mesa/drivers/dri/r300/radeon_program.c
deleted file mode 100644
index da5e7aefce5..00000000000
--- a/src/mesa/drivers/dri/r300/radeon_program.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "radeon_program.h"
-
-#include "shader/prog_print.h"
-
-
-/**
- * Transform the given clause in the following way:
- * 1. Replace it with an empty clause
- * 2. For every instruction in the original clause, try the given
- * transformations in order.
- * 3. If one of the transformations returns GL_TRUE, assume that it
- * has emitted the appropriate instruction(s) into the new clause;
- * otherwise, copy the instruction verbatim.
- *
- * \note The transformation is currently not recursive; in other words,
- * instructions emitted by transformations are not transformed.
- *
- * \note The transform is called 'local' because it can only look at
- * one instruction at a time.
- */
-void radeonLocalTransform(
- GLcontext *Ctx,
- struct gl_program *program,
- int num_transformations,
- struct radeon_program_transformation* transformations)
-{
- struct radeon_transform_context ctx;
- int ip;
-
- ctx.Ctx = Ctx;
- ctx.Program = program;
- ctx.OldInstructions = program->Instructions;
- ctx.OldNumInstructions = program->NumInstructions;
-
- program->Instructions = 0;
- program->NumInstructions = 0;
-
- for(ip = 0; ip < ctx.OldNumInstructions; ++ip) {
- struct prog_instruction *instr = ctx.OldInstructions + ip;
- int i;
-
- for(i = 0; i < num_transformations; ++i) {
- struct radeon_program_transformation* t = transformations + i;
-
- if (t->function(&ctx, instr, t->userData))
- break;
- }
-
- if (i >= num_transformations) {
- struct prog_instruction* dest = radeonAppendInstructions(program, 1);
- _mesa_copy_instructions(dest, instr, 1);
- }
- }
-
- _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions);
-}
-
-
-static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count)
-{
- GLuint i;
- for (i = 0; i < count; i++) {
- const struct prog_instruction *inst = insts + i;
- const GLuint n = _mesa_num_inst_src_regs(inst->Opcode);
- GLuint k;
-
- for (k = 0; k < n; k++) {
- if (inst->SrcReg[k].File == PROGRAM_TEMPORARY)
- used[inst->SrcReg[k].Index] = GL_TRUE;
- }
- }
-}
-
-GLint radeonFindFreeTemporary(struct radeon_transform_context *t)
-{
- GLboolean used[MAX_PROGRAM_TEMPS];
- GLuint i;
-
- _mesa_memset(used, 0, sizeof(used));
- scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions);
- scan_instructions(used, t->OldInstructions, t->OldNumInstructions);
-
- for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
- if (!used[i])
- return i;
- }
-
- return -1;
-}
-
-
-/**
- * Append the given number of instructions to the program and return a
- * pointer to the first new instruction.
- */
-struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count)
-{
- int oldnum = program->NumInstructions;
- _mesa_insert_instructions(program, oldnum, count);
- return program->Instructions + oldnum;
-}
diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/radeon_program.h
deleted file mode 100644
index 88474d43a22..00000000000
--- a/src/mesa/drivers/dri/r300/radeon_program.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __RADEON_PROGRAM_H_
-#define __RADEON_PROGRAM_H_
-
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/program.h"
-#include "shader/prog_instruction.h"
-
-
-enum {
- CLAUSE_MIXED = 0,
- CLAUSE_ALU,
- CLAUSE_TEX
-};
-
-enum {
- PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */
-};
-
-enum {
- OPCODE_REPL_ALPHA = MAX_OPCODE /**< used in paired instructions */
-};
-
-#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO)
-#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE)
-
-static inline GLuint get_swz(GLuint swz, GLuint idx)
-{
- if (idx & 0x4)
- return idx;
- return GET_SWZ(swz, idx);
-}
-
-static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w)
-{
- GLuint ret = 0;
-
- ret |= get_swz(src, swz_x);
- ret |= get_swz(src, swz_y) << 3;
- ret |= get_swz(src, swz_z) << 6;
- ret |= get_swz(src, swz_w) << 9;
-
- return ret;
-}
-
-static inline GLuint combine_swizzles(GLuint src, GLuint swz)
-{
- GLuint ret = 0;
-
- ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X));
- ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3;
- ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6;
- ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9;
-
- return ret;
-}
-
-
-/**
- * Transformation context that is passed to local transformations.
- *
- * Care must be taken with some operations during transformation,
- * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary
- */
-struct radeon_transform_context {
- GLcontext *Ctx;
- struct gl_program *Program;
- struct prog_instruction *OldInstructions;
- GLuint OldNumInstructions;
-};
-
-/**
- * A transformation that can be passed to \ref radeonLocalTransform.
- *
- * The function will be called once for each instruction.
- * It has to either emit the appropriate transformed code for the instruction
- * and return GL_TRUE, or return GL_FALSE if it doesn't understand the
- * instruction.
- *
- * The function gets passed the userData as last parameter.
- */
-struct radeon_program_transformation {
- GLboolean (*function)(
- struct radeon_transform_context*,
- struct prog_instruction*,
- void*);
- void *userData;
-};
-
-void radeonLocalTransform(
- GLcontext* ctx,
- struct gl_program *program,
- int num_transformations,
- struct radeon_program_transformation* transformations);
-
-/**
- * Find a usable free temporary register during program transformation
- */
-GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx);
-
-struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count);
-
-#endif
diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c
deleted file mode 100644
index 8283723bad7..00000000000
--- a/src/mesa/drivers/dri/r300/radeon_program_alu.c
+++ /dev/null
@@ -1,635 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-/**
- * @file
- *
- * Shareable transformations that transform "special" ALU instructions
- * into ALU instructions that are supported by hardware.
- *
- */
-
-#include "radeon_program_alu.h"
-
-#include "shader/prog_parameter.h"
-
-
-static struct prog_instruction *emit1(struct gl_program* p,
- gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
- struct prog_src_register SrcReg)
-{
- struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
-
- fpi->Opcode = Opcode;
- fpi->SaturateMode = Saturate;
- fpi->DstReg = DstReg;
- fpi->SrcReg[0] = SrcReg;
- return fpi;
-}
-
-static struct prog_instruction *emit2(struct gl_program* p,
- gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
- struct prog_src_register SrcReg0, struct prog_src_register SrcReg1)
-{
- struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
-
- fpi->Opcode = Opcode;
- fpi->SaturateMode = Saturate;
- fpi->DstReg = DstReg;
- fpi->SrcReg[0] = SrcReg0;
- fpi->SrcReg[1] = SrcReg1;
- return fpi;
-}
-
-static struct prog_instruction *emit3(struct gl_program* p,
- gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
- struct prog_src_register SrcReg0, struct prog_src_register SrcReg1,
- struct prog_src_register SrcReg2)
-{
- struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
-
- fpi->Opcode = Opcode;
- fpi->SaturateMode = Saturate;
- fpi->DstReg = DstReg;
- fpi->SrcReg[0] = SrcReg0;
- fpi->SrcReg[1] = SrcReg1;
- fpi->SrcReg[2] = SrcReg2;
- return fpi;
-}
-
-static struct prog_dst_register dstreg(int file, int index)
-{
- struct prog_dst_register dst;
- dst.File = file;
- dst.Index = index;
- dst.WriteMask = WRITEMASK_XYZW;
- dst.CondMask = COND_TR;
- dst.CondSwizzle = SWIZZLE_NOOP;
- dst.CondSrc = 0;
- dst.pad = 0;
- return dst;
-}
-
-static struct prog_dst_register dstregtmpmask(int index, int mask)
-{
- struct prog_dst_register dst;
- dst.File = PROGRAM_TEMPORARY;
- dst.Index = index;
- dst.WriteMask = mask;
- dst.CondMask = COND_TR;
- dst.CondSwizzle = SWIZZLE_NOOP;
- dst.CondSrc = 0;
- dst.pad = 0;
- return dst;
-}
-
-static const struct prog_src_register builtin_zero = {
- .File = PROGRAM_BUILTIN,
- .Index = 0,
- .Swizzle = SWIZZLE_0000
-};
-static const struct prog_src_register builtin_one = {
- .File = PROGRAM_BUILTIN,
- .Index = 0,
- .Swizzle = SWIZZLE_1111
-};
-static const struct prog_src_register srcreg_undefined = {
- .File = PROGRAM_UNDEFINED,
- .Index = 0,
- .Swizzle = SWIZZLE_NOOP
-};
-
-static struct prog_src_register srcreg(int file, int index)
-{
- struct prog_src_register src = srcreg_undefined;
- src.File = file;
- src.Index = index;
- return src;
-}
-
-static struct prog_src_register srcregswz(int file, int index, int swz)
-{
- struct prog_src_register src = srcreg_undefined;
- src.File = file;
- src.Index = index;
- src.Swizzle = swz;
- return src;
-}
-
-static struct prog_src_register absolute(struct prog_src_register reg)
-{
- struct prog_src_register newreg = reg;
- newreg.Abs = 1;
- newreg.Negate = NEGATE_NONE;
- return newreg;
-}
-
-static struct prog_src_register negate(struct prog_src_register reg)
-{
- struct prog_src_register newreg = reg;
- newreg.Negate = newreg.Negate ^ NEGATE_XYZW;
- return newreg;
-}
-
-static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w)
-{
- struct prog_src_register swizzled = reg;
- swizzled.Swizzle = MAKE_SWIZZLE4(
- x >= 4 ? x : GET_SWZ(reg.Swizzle, x),
- y >= 4 ? y : GET_SWZ(reg.Swizzle, y),
- z >= 4 ? z : GET_SWZ(reg.Swizzle, z),
- w >= 4 ? w : GET_SWZ(reg.Swizzle, w));
- return swizzled;
-}
-
-static struct prog_src_register scalar(struct prog_src_register reg)
-{
- return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
-}
-
-static void transform_ABS(struct radeon_transform_context* t,
- struct prog_instruction* inst)
-{
- struct prog_src_register src = inst->SrcReg[0];
- src.Abs = 1;
- src.Negate = NEGATE_NONE;
- emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src);
-}
-
-static void transform_DPH(struct radeon_transform_context* t,
- struct prog_instruction* inst)
-{
- struct prog_src_register src0 = inst->SrcReg[0];
- src0.Negate &= ~NEGATE_W;
- src0.Swizzle &= ~(7 << (3 * 3));
- src0.Swizzle |= SWIZZLE_ONE << (3 * 3);
- emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]);
-}
-
-/**
- * [1, src0.y*src1.y, src0.z, src1.w]
- * So basically MUL with lotsa swizzling.
- */
-static void transform_DST(struct radeon_transform_context* t,
- struct prog_instruction* inst)
-{
- emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg,
- swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE),
- swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W));
-}
-
-static void transform_FLR(struct radeon_transform_context* t,
- struct prog_instruction* inst)
-{
- int tempreg = radeonFindFreeTemporary(t);
- emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]);
- emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg,
- inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
-}
-
-/**
- * Definition of LIT (from ARB_fragment_program):
- *
- * tmp = VectorLoad(op0);
- * if (tmp.x < 0) tmp.x = 0;
- * if (tmp.y < 0) tmp.y = 0;
- * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
- * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
- * result.x = 1.0;
- * result.y = tmp.x;
- * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
- * result.w = 1.0;
- *
- * The longest path of computation is the one leading to result.z,
- * consisting of 5 operations. This implementation of LIT takes
- * 5 slots, if the subsequent optimization passes are clever enough
- * to pair instructions correctly.
- */
-static void transform_LIT(struct radeon_transform_context* t,
- struct prog_instruction* inst)
-{
- static const GLfloat LitConst[4] = { -127.999999 };
-
- GLuint constant;
- GLuint constant_swizzle;
- GLuint temp;
- int needTemporary = 0;
- struct prog_src_register srctemp;
-
- constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle);
-
- if (inst->DstReg.WriteMask != WRITEMASK_XYZW) {
- needTemporary = 1;
- } else if (inst->DstReg.File != PROGRAM_TEMPORARY) {
- // LIT is typically followed by DP3/DP4, so there's no point
- // in creating special code for this case
- needTemporary = 1;
- }
-
- if (needTemporary) {
- temp = radeonFindFreeTemporary(t);
- } else {
- temp = inst->DstReg.Index;
- }
- srctemp = srcreg(PROGRAM_TEMPORARY, temp);
-
- // tmp.x = max(0.0, Src.x);
- // tmp.y = max(0.0, Src.y);
- // tmp.w = clamp(Src.z, -128+eps, 128-eps);
- emit2(t->Program, OPCODE_MAX, 0,
- dstregtmpmask(temp, WRITEMASK_XYW),
- inst->SrcReg[0],
- swizzle(srcreg(PROGRAM_CONSTANT, constant),
- SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3));
- emit2(t->Program, OPCODE_MIN, 0,
- dstregtmpmask(temp, WRITEMASK_Z),
- swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)));
-
- // tmp.w = Pow(tmp.y, tmp.w)
- emit1(t->Program, OPCODE_LG2, 0,
- dstregtmpmask(temp, WRITEMASK_W),
- swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
- emit2(t->Program, OPCODE_MUL, 0,
- dstregtmpmask(temp, WRITEMASK_W),
- swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z));
- emit1(t->Program, OPCODE_EX2, 0,
- dstregtmpmask(temp, WRITEMASK_W),
- swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
-
- // tmp.z = (tmp.x > 0) ? tmp.w : 0.0
- emit3(t->Program, OPCODE_CMP, inst->SaturateMode,
- dstregtmpmask(temp, WRITEMASK_Z),
- negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
- swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- builtin_zero);
-
- // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
- emit1(t->Program, OPCODE_MOV, inst->SaturateMode,
- dstregtmpmask(temp, WRITEMASK_XYW),
- swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE));
-
- if (needTemporary)
- emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp);
-}
-
-static void transform_LRP(struct radeon_transform_context* t,
- struct prog_instruction* inst)
-{
- int tempreg = radeonFindFreeTemporary(t);
-
- emit2(t->Program, OPCODE_ADD, 0,
- dstreg(PROGRAM_TEMPORARY, tempreg),
- inst->SrcReg[1], negate(inst->SrcReg[2]));
- emit3(t->Program, OPCODE_MAD, inst->SaturateMode,
- inst->DstReg,
- inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]);
-}
-
-static void transform_POW(struct radeon_transform_context* t,
- struct prog_instruction* inst)
-{
- int tempreg = radeonFindFreeTemporary(t);
- struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg);
- struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg);
- tempdst.WriteMask = WRITEMASK_W;
- tempsrc.Swizzle = SWIZZLE_WWWW;
-
- emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0]));
- emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1]));
- emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc);
-}
-
-static void transform_RSQ(struct radeon_transform_context* t,
- struct prog_instruction* inst)
-{
- emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0]));
-}
-
-static void transform_SGE(struct radeon_transform_context* t,
- struct prog_instruction* inst)
-{
- int tempreg = radeonFindFreeTemporary(t);
-
- emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
- emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
- srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);
-}
-
-static void transform_SLT(struct radeon_transform_context* t,
- struct prog_instruction* inst)
-{
- int tempreg = radeonFindFreeTemporary(t);
-
- emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
- emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
- srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);
-}
-
-static void transform_SUB(struct radeon_transform_context* t,
- struct prog_instruction* inst)
-{
- emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1]));
-}
-
-static void transform_SWZ(struct radeon_transform_context* t,
- struct prog_instruction* inst)
-{
- emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]);
-}
-
-static void transform_XPD(struct radeon_transform_context* t,
- struct prog_instruction* inst)
-{
- int tempreg = radeonFindFreeTemporary(t);
-
- emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),
- swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
- swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
- emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg,
- swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
- swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
- negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
-}
-
-
-/**
- * Can be used as a transformation for @ref radeonClauseLocalTransform,
- * no userData necessary.
- *
- * Eliminates the following ALU instructions:
- * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD
- * using:
- * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
- *
- * Transforms RSQ to Radeon's native RSQ by explicitly setting
- * absolute value.
- *
- * @note should be applicable to R300 and R500 fragment programs.
- */
-GLboolean radeonTransformALU(struct radeon_transform_context* t,
- struct prog_instruction* inst,
- void* unused)
-{
- switch(inst->Opcode) {
- case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE;
- case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
- case OPCODE_DST: transform_DST(t, inst); return GL_TRUE;
- case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
- case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE;
- case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE;
- case OPCODE_POW: transform_POW(t, inst); return GL_TRUE;
- case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE;
- case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE;
- case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE;
- case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE;
- case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE;
- case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE;
- default:
- return GL_FALSE;
- }
-}
-
-
-static void sincos_constants(struct radeon_transform_context* t, GLuint *constants)
-{
- static const GLfloat SinCosConsts[2][4] = {
- {
- 1.273239545, // 4/PI
- -0.405284735, // -4/(PI*PI)
- 3.141592654, // PI
- 0.2225 // weight
- },
- {
- 0.75,
- 0.5,
- 0.159154943, // 1/(2*PI)
- 6.283185307 // 2*PI
- }
- };
- int i;
-
- for(i = 0; i < 2; ++i) {
- GLuint swz;
- constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz);
- ASSERT(swz == SWIZZLE_NOOP);
- }
-}
-
-/**
- * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
- *
- * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
- * MAD tmp.x, tmp.y, |src|, tmp.x
- * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
- * MAD dest, tmp.y, weight, tmp.x
- */
-static void sin_approx(struct radeon_transform_context* t,
- struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants)
-{
- GLuint tempreg = radeonFindFreeTemporary(t);
-
- emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
- swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- srcreg(PROGRAM_CONSTANT, constants[0]));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
- absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
- negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)));
- emit3(t->Program, OPCODE_MAD, 0, dst,
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
-}
-
-/**
- * Translate the trigonometric functions COS, SIN, and SCS
- * using only the basic instructions
- * MOV, ADD, MUL, MAD, FRC
- */
-GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
- struct prog_instruction* inst,
- void* unused)
-{
- if (inst->Opcode != OPCODE_COS &&
- inst->Opcode != OPCODE_SIN &&
- inst->Opcode != OPCODE_SCS)
- return GL_FALSE;
-
- GLuint constants[2];
- GLuint tempreg = radeonFindFreeTemporary(t);
-
- sincos_constants(t, constants);
-
- if (inst->Opcode == OPCODE_COS) {
- // MAD tmp.x, src, 1/(2*PI), 0.75
- // FRC tmp.x, tmp.x
- // MAD tmp.z, tmp.x, 2*PI, -PI
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
- emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
-
- sin_approx(t, inst->DstReg,
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- constants);
- } else if (inst->Opcode == OPCODE_SIN) {
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
- emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
-
- sin_approx(t, inst->DstReg,
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- constants);
- } else {
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
- swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W));
- emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
- srcreg(PROGRAM_TEMPORARY, tempreg));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
- srcreg(PROGRAM_TEMPORARY, tempreg),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
-
- struct prog_dst_register dst = inst->DstReg;
-
- dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X;
- sin_approx(t, dst,
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- constants);
-
- dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y;
- sin_approx(t, dst,
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
- constants);
- }
-
- return GL_TRUE;
-}
-
-
-/**
- * Transform the trigonometric functions COS, SIN, and SCS
- * to include pre-scaling by 1/(2*PI) and taking the fractional
- * part, so that the input to COS and SIN is always in the range [0,1).
- * SCS is replaced by one COS and one SIN instruction.
- *
- * @warning This transformation implicitly changes the semantics of SIN and COS!
- */
-GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
- struct prog_instruction* inst,
- void* unused)
-{
- if (inst->Opcode != OPCODE_COS &&
- inst->Opcode != OPCODE_SIN &&
- inst->Opcode != OPCODE_SCS)
- return GL_FALSE;
-
- static const GLfloat RCP_2PI[] = { 0.15915494309189535 };
- GLuint temp;
- GLuint constant;
- GLuint constant_swizzle;
-
- temp = radeonFindFreeTemporary(t);
- constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle);
-
- emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),
- swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle));
- emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),
- srcreg(PROGRAM_TEMPORARY, temp));
-
- if (inst->Opcode == OPCODE_COS) {
- emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg,
- srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
- } else if (inst->Opcode == OPCODE_SIN) {
- emit1(t->Program, OPCODE_SIN, inst->SaturateMode,
- inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
- } else if (inst->Opcode == OPCODE_SCS) {
- struct prog_dst_register moddst = inst->DstReg;
-
- if (inst->DstReg.WriteMask & WRITEMASK_X) {
- moddst.WriteMask = WRITEMASK_X;
- emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst,
- srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
- }
- if (inst->DstReg.WriteMask & WRITEMASK_Y) {
- moddst.WriteMask = WRITEMASK_Y;
- emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst,
- srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
- }
- }
-
- return GL_TRUE;
-}
-
-/**
- * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
- * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
- * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
- *
- * @warning This explicitly changes the form of DDX and DDY!
- */
-
-GLboolean radeonTransformDeriv(struct radeon_transform_context* t,
- struct prog_instruction* inst,
- void* unused)
-{
- if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY)
- return GL_FALSE;
-
- struct prog_src_register B = inst->SrcReg[1];
-
- B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE,
- SWIZZLE_ONE, SWIZZLE_ONE);
- B.Negate = NEGATE_XYZW;
-
- emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg,
- inst->SrcReg[0], B);
-
- return GL_TRUE;
-}
diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.h b/src/mesa/drivers/dri/r300/radeon_program_alu.h
deleted file mode 100644
index b45958115cf..00000000000
--- a/src/mesa/drivers/dri/r300/radeon_program_alu.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __RADEON_PROGRAM_ALU_H_
-#define __RADEON_PROGRAM_ALU_H_
-
-#include "radeon_program.h"
-
-GLboolean radeonTransformALU(
- struct radeon_transform_context *t,
- struct prog_instruction*,
- void*);
-
-GLboolean radeonTransformTrigSimple(
- struct radeon_transform_context *t,
- struct prog_instruction*,
- void*);
-
-GLboolean radeonTransformTrigScale(
- struct radeon_transform_context *t,
- struct prog_instruction*,
- void*);
-
-GLboolean radeonTransformDeriv(
- struct radeon_transform_context *t,
- struct prog_instruction*,
- void*);
-
-#endif /* __RADEON_PROGRAM_ALU_H_ */
diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c
deleted file mode 100644
index d6fb474cf23..00000000000
--- a/src/mesa/drivers/dri/r300/radeon_program_pair.c
+++ /dev/null
@@ -1,1004 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-/**
- * @file
- *
- * Perform temporary register allocation and attempt to pair off instructions
- * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction
- * vs. ALU instruction scheduling.
- */
-
-#include "radeon_program_pair.h"
-
-#include "radeon_common.h"
-
-#include "shader/prog_print.h"
-
-#define error(fmt, args...) do { \
- _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \
- __FILE__, __FUNCTION__, ##args); \
- s->Error = GL_TRUE; \
-} while(0)
-
-struct pair_state_instruction {
- GLuint IsTex:1; /**< Is a texture instruction */
- GLuint NeedRGB:1; /**< Needs the RGB ALU */
- GLuint NeedAlpha:1; /**< Needs the Alpha ALU */
- GLuint IsTranscendent:1; /**< Is a special transcendent instruction */
-
- /**
- * Number of (read and write) dependencies that must be resolved before
- * this instruction can be scheduled.
- */
- GLuint NumDependencies:5;
-
- /**
- * Next instruction in the linked list of ready instructions.
- */
- struct pair_state_instruction *NextReady;
-
- /**
- * Values that this instruction writes
- */
- struct reg_value *Values[4];
-};
-
-
-/**
- * Used to keep track of which instructions read a value.
- */
-struct reg_value_reader {
- GLuint IP; /**< IP of the instruction that performs this access */
- struct reg_value_reader *Next;
-};
-
-/**
- * Used to keep track which values are stored in each component of a
- * PROGRAM_TEMPORARY.
- */
-struct reg_value {
- GLuint IP; /**< IP of the instruction that writes this value */
- struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */
-
- /**
- * Unordered linked list of instructions that read from this value.
- */
- struct reg_value_reader *Readers;
-
- /**
- * Number of readers of this value. This is calculated during @ref scan_instructions
- * and continually decremented during code emission.
- * When this count reaches zero, the instruction that writes the @ref Next value
- * can be scheduled.
- */
- GLuint NumReaders;
-};
-
-/**
- * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register
- * to the proper hardware temporary.
- */
-struct pair_register_translation {
- GLuint Allocated:1;
- GLuint HwIndex:8;
- GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */
-
- /**
- * Notes the value that is currently contained in each component
- * (only used for PROGRAM_TEMPORARY registers).
- */
- struct reg_value *Value[4];
-};
-
-struct pair_state {
- GLcontext *Ctx;
- struct gl_program *Program;
- const struct radeon_pair_handler *Handler;
- GLboolean Error;
- GLboolean Debug;
- GLboolean Verbose;
- void *UserData;
-
- /**
- * Translate Mesa registers to hardware registers
- */
- struct pair_register_translation Inputs[FRAG_ATTRIB_MAX];
- struct pair_register_translation Temps[MAX_PROGRAM_TEMPS];
-
- /**
- * Derived information about program instructions.
- */
- struct pair_state_instruction *Instructions;
-
- struct {
- GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */
- } HwTemps[128];
-
- /**
- * Linked list of instructions that can be scheduled right now,
- * based on which ALU/TEX resources they require.
- */
- struct pair_state_instruction *ReadyFullALU;
- struct pair_state_instruction *ReadyRGB;
- struct pair_state_instruction *ReadyAlpha;
- struct pair_state_instruction *ReadyTEX;
-
- /**
- * Pool of @ref reg_value structures for fast allocation.
- */
- struct reg_value *ValuePool;
- GLuint ValuePoolUsed;
- struct reg_value_reader *ReaderPool;
- GLuint ReaderPoolUsed;
-};
-
-
-static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index)
-{
- switch(file) {
- case PROGRAM_TEMPORARY: return &s->Temps[index];
- case PROGRAM_INPUT: return &s->Inputs[index];
- default: return 0;
- }
-}
-
-static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex)
-{
- struct pair_register_translation *t = get_register(s, file, index);
- ASSERT(!s->HwTemps[hwindex].RefCount);
- ASSERT(!t->Allocated);
- s->HwTemps[hwindex].RefCount = t->RefCount;
- t->Allocated = 1;
- t->HwIndex = hwindex;
-}
-
-static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index)
-{
- GLuint hwindex;
-
- struct pair_register_translation *t = get_register(s, file, index);
- if (!t) {
- _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index);
- return 0;
- }
-
- if (t->Allocated)
- return t->HwIndex;
-
- for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex)
- if (!s->HwTemps[hwindex].RefCount)
- break;
-
- if (hwindex >= s->Handler->MaxHwTemps) {
- error("Ran out of hardware temporaries");
- return 0;
- }
-
- alloc_hw_reg(s, file, index, hwindex);
- return hwindex;
-}
-
-
-static void deref_hw_reg(struct pair_state *s, GLuint hwindex)
-{
- if (!s->HwTemps[hwindex].RefCount) {
- error("Hwindex %i refcount error", hwindex);
- return;
- }
-
- s->HwTemps[hwindex].RefCount--;
-}
-
-static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst)
-{
- pairinst->NextReady = *list;
- *list = pairinst;
-}
-
-/**
- * The instruction at the given IP has become ready. Link it into the ready
- * instructions.
- */
-static void instruction_ready(struct pair_state *s, int ip)
-{
- struct pair_state_instruction *pairinst = s->Instructions + ip;
-
- if (s->Verbose)
- _mesa_printf("instruction_ready(%i)\n", ip);
-
- if (pairinst->IsTex)
- add_pairinst_to_list(&s->ReadyTEX, pairinst);
- else if (!pairinst->NeedAlpha)
- add_pairinst_to_list(&s->ReadyRGB, pairinst);
- else if (!pairinst->NeedRGB)
- add_pairinst_to_list(&s->ReadyAlpha, pairinst);
- else
- add_pairinst_to_list(&s->ReadyFullALU, pairinst);
-}
-
-
-/**
- * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
- * and reverse the order of arguments for CMP.
- */
-static void final_rewrite(struct pair_state *s, struct prog_instruction *inst)
-{
- struct prog_src_register tmp;
-
- switch(inst->Opcode) {
- case OPCODE_ADD:
- inst->SrcReg[2] = inst->SrcReg[1];
- inst->SrcReg[1].File = PROGRAM_BUILTIN;
- inst->SrcReg[1].Swizzle = SWIZZLE_1111;
- inst->SrcReg[1].Negate = NEGATE_NONE;
- inst->Opcode = OPCODE_MAD;
- break;
- case OPCODE_CMP:
- tmp = inst->SrcReg[2];
- inst->SrcReg[2] = inst->SrcReg[0];
- inst->SrcReg[0] = tmp;
- break;
- case OPCODE_MOV:
- /* AMD say we should use CMP.
- * However, when we transform
- * KIL -r0;
- * into
- * CMP tmp, -r0, -r0, 0;
- * KIL tmp;
- * we get incorrect behaviour on R500 when r0 == 0.0.
- * It appears that the R500 KIL hardware treats -0.0 as less
- * than zero.
- */
- inst->SrcReg[1].File = PROGRAM_BUILTIN;
- inst->SrcReg[1].Swizzle = SWIZZLE_1111;
- inst->SrcReg[2].File = PROGRAM_BUILTIN;
- inst->SrcReg[2].Swizzle = SWIZZLE_0000;
- inst->Opcode = OPCODE_MAD;
- break;
- case OPCODE_MUL:
- inst->SrcReg[2].File = PROGRAM_BUILTIN;
- inst->SrcReg[2].Swizzle = SWIZZLE_0000;
- inst->Opcode = OPCODE_MAD;
- break;
- default:
- /* nothing to do */
- break;
- }
-}
-
-
-/**
- * Classify an instruction according to which ALUs etc. it needs
- */
-static void classify_instruction(struct pair_state *s,
- struct prog_instruction *inst, struct pair_state_instruction *pairinst)
-{
- pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0;
- pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0;
-
- switch(inst->Opcode) {
- case OPCODE_ADD:
- case OPCODE_CMP:
- case OPCODE_DDX:
- case OPCODE_DDY:
- case OPCODE_FRC:
- case OPCODE_MAD:
- case OPCODE_MAX:
- case OPCODE_MIN:
- case OPCODE_MOV:
- case OPCODE_MUL:
- break;
- case OPCODE_COS:
- case OPCODE_EX2:
- case OPCODE_LG2:
- case OPCODE_RCP:
- case OPCODE_RSQ:
- case OPCODE_SIN:
- pairinst->IsTranscendent = 1;
- pairinst->NeedAlpha = 1;
- break;
- case OPCODE_DP4:
- pairinst->NeedAlpha = 1;
- /* fall through */
- case OPCODE_DP3:
- pairinst->NeedRGB = 1;
- break;
- case OPCODE_KIL:
- case OPCODE_TEX:
- case OPCODE_TXB:
- case OPCODE_TXP:
- case OPCODE_END:
- pairinst->IsTex = 1;
- break;
- default:
- error("Unknown opcode %d\n", inst->Opcode);
- break;
- }
-}
-
-
-/**
- * Count which (input, temporary) register is read and written how often,
- * and scan the instruction stream to find dependencies.
- */
-static void scan_instructions(struct pair_state *s)
-{
- struct prog_instruction *inst;
- struct pair_state_instruction *pairinst;
- GLuint ip;
-
- for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0;
- inst->Opcode != OPCODE_END;
- ++inst, ++pairinst, ++ip) {
- final_rewrite(s, inst);
- classify_instruction(s, inst, pairinst);
-
- int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
- int j;
- for(j = 0; j < nsrc; j++) {
- struct pair_register_translation *t =
- get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index);
- if (!t)
- continue;
-
- t->RefCount++;
-
- if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
- int i;
- for(i = 0; i < 4; ++i) {
- GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i);
- if (swz >= 4)
- continue; /* constant or NIL swizzle */
- if (!t->Value[swz])
- continue; /* this is an undefined read */
-
- /* Do not add a dependency if this instruction
- * also rewrites the value. The code below adds
- * a dependency for the DstReg, which is a superset
- * of the SrcReg dependency. */
- if (inst->DstReg.File == PROGRAM_TEMPORARY &&
- inst->DstReg.Index == inst->SrcReg[j].Index &&
- GET_BIT(inst->DstReg.WriteMask, swz))
- continue;
-
- struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++];
- pairinst->NumDependencies++;
- t->Value[swz]->NumReaders++;
- r->IP = ip;
- r->Next = t->Value[swz]->Readers;
- t->Value[swz]->Readers = r;
- }
- }
- }
-
- int ndst = _mesa_num_inst_dst_regs(inst->Opcode);
- if (ndst) {
- struct pair_register_translation *t =
- get_register(s, inst->DstReg.File, inst->DstReg.Index);
- if (t) {
- t->RefCount++;
-
- if (inst->DstReg.File == PROGRAM_TEMPORARY) {
- int j;
- for(j = 0; j < 4; ++j) {
- if (!GET_BIT(inst->DstReg.WriteMask, j))
- continue;
-
- struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++];
- v->IP = ip;
- if (t->Value[j]) {
- pairinst->NumDependencies++;
- t->Value[j]->Next = v;
- }
- t->Value[j] = v;
- pairinst->Values[j] = v;
- }
- }
- }
- }
-
- if (s->Verbose)
- _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies);
-
- if (!pairinst->NumDependencies)
- instruction_ready(s, ip);
- }
-
- /* Clear the PROGRAM_TEMPORARY state */
- int i, j;
- for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) {
- for(j = 0; j < 4; ++j)
- s->Temps[i].Value[j] = 0;
- }
-}
-
-
-/**
- * Reserve hardware temporary registers for the program inputs.
- *
- * @note This allocation is performed explicitly, because the order of inputs
- * is determined by the RS hardware.
- */
-static void allocate_input_registers(struct pair_state *s)
-{
- GLuint InputsRead = s->Program->InputsRead;
- int i;
- GLuint hwindex = 0;
-
- /* Primary colour */
- if (InputsRead & FRAG_BIT_COL0)
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++);
- InputsRead &= ~FRAG_BIT_COL0;
-
- /* Secondary color */
- if (InputsRead & FRAG_BIT_COL1)
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++);
- InputsRead &= ~FRAG_BIT_COL1;
-
- /* Texcoords */
- for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) {
- if (InputsRead & (FRAG_BIT_TEX0 << i))
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++);
- }
- InputsRead &= ~FRAG_BITS_TEX_ANY;
-
- /* Fogcoords treated as a texcoord */
- if (InputsRead & FRAG_BIT_FOGC)
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++);
- InputsRead &= ~FRAG_BIT_FOGC;
-
- /* fragment position treated as a texcoord */
- if (InputsRead & FRAG_BIT_WPOS)
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++);
- InputsRead &= ~FRAG_BIT_WPOS;
-
- /* Anything else */
- if (InputsRead)
- error("Don't know how to handle inputs 0x%x\n", InputsRead);
-}
-
-
-static void decrement_dependencies(struct pair_state *s, int ip)
-{
- struct pair_state_instruction *pairinst = s->Instructions + ip;
- ASSERT(pairinst->NumDependencies > 0);
- if (!--pairinst->NumDependencies)
- instruction_ready(s, ip);
-}
-
-/**
- * Update the dependency tracking state based on what the instruction
- * at the given IP does.
- */
-static void commit_instruction(struct pair_state *s, int ip)
-{
- struct prog_instruction *inst = s->Program->Instructions + ip;
- struct pair_state_instruction *pairinst = s->Instructions + ip;
-
- if (s->Verbose)
- _mesa_printf("commit_instruction(%i)\n", ip);
-
- if (inst->DstReg.File == PROGRAM_TEMPORARY) {
- struct pair_register_translation *t = &s->Temps[inst->DstReg.Index];
- deref_hw_reg(s, t->HwIndex);
-
- int i;
- for(i = 0; i < 4; ++i) {
- if (!GET_BIT(inst->DstReg.WriteMask, i))
- continue;
-
- t->Value[i] = pairinst->Values[i];
- if (t->Value[i]->NumReaders) {
- struct reg_value_reader *r;
- for(r = pairinst->Values[i]->Readers; r; r = r->Next)
- decrement_dependencies(s, r->IP);
- } else if (t->Value[i]->Next) {
- /* This happens when the only reader writes
- * the register at the same time */
- decrement_dependencies(s, t->Value[i]->Next->IP);
- }
- }
- }
-
- int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
- int i;
- for(i = 0; i < nsrc; i++) {
- struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index);
- if (!t)
- continue;
-
- deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index));
-
- if (inst->SrcReg[i].File != PROGRAM_TEMPORARY)
- continue;
-
- int j;
- for(j = 0; j < 4; ++j) {
- GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
- if (swz >= 4)
- continue;
- if (!t->Value[swz])
- continue;
-
- /* Do not free a dependency if this instruction
- * also rewrites the value. See scan_instructions. */
- if (inst->DstReg.File == PROGRAM_TEMPORARY &&
- inst->DstReg.Index == inst->SrcReg[i].Index &&
- GET_BIT(inst->DstReg.WriteMask, swz))
- continue;
-
- if (!--t->Value[swz]->NumReaders) {
- if (t->Value[swz]->Next)
- decrement_dependencies(s, t->Value[swz]->Next->IP);
- }
- }
- }
-}
-
-
-/**
- * Emit all ready texture instructions in a single block.
- *
- * Emit as a single block to (hopefully) sample many textures in parallel,
- * and to avoid hardware indirections on R300.
- *
- * In R500, we don't really know when the result of a texture instruction
- * arrives. So allocate all destinations first, to make sure they do not
- * arrive early and overwrite a texture coordinate we're going to use later
- * in the block.
- */
-static void emit_all_tex(struct pair_state *s)
-{
- struct pair_state_instruction *readytex;
- struct pair_state_instruction *pairinst;
-
- ASSERT(s->ReadyTEX);
-
- // Don't let the ready list change under us!
- readytex = s->ReadyTEX;
- s->ReadyTEX = 0;
-
- // Allocate destination hardware registers in one block to avoid conflicts.
- for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
- int ip = pairinst - s->Instructions;
- struct prog_instruction *inst = s->Program->Instructions + ip;
- if (inst->Opcode != OPCODE_KIL)
- get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
- }
-
- if (s->Debug)
- _mesa_printf(" BEGIN_TEX\n");
-
- if (s->Handler->BeginTexBlock)
- s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData);
-
- for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
- int ip = pairinst - s->Instructions;
- struct prog_instruction *inst = s->Program->Instructions + ip;
- commit_instruction(s, ip);
-
- if (inst->Opcode != OPCODE_KIL)
- inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
- inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index);
-
- if (s->Debug) {
- _mesa_printf(" ");
- _mesa_print_instruction(inst);
- fflush(stdout);
- }
- s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst);
- }
-
- if (s->Debug)
- _mesa_printf(" END_TEX\n");
-}
-
-
-static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair,
- struct prog_src_register src, GLboolean rgb, GLboolean alpha)
-{
- int candidate = -1;
- int candidate_quality = -1;
- int i;
-
- if (!rgb && !alpha)
- return 0;
-
- GLuint constant;
- GLuint index;
-
- if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) {
- constant = 0;
- index = get_hw_reg(s, src.File, src.Index);
- } else {
- constant = 1;
- s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index);
- }
-
- for(i = 0; i < 3; ++i) {
- int q = 0;
- if (rgb) {
- if (pair->RGB.Src[i].Used) {
- if (pair->RGB.Src[i].Constant != constant ||
- pair->RGB.Src[i].Index != index)
- continue;
- q++;
- }
- }
- if (alpha) {
- if (pair->Alpha.Src[i].Used) {
- if (pair->Alpha.Src[i].Constant != constant ||
- pair->Alpha.Src[i].Index != index)
- continue;
- q++;
- }
- }
- if (q > candidate_quality) {
- candidate_quality = q;
- candidate = i;
- }
- }
-
- if (candidate >= 0) {
- if (rgb) {
- pair->RGB.Src[candidate].Used = 1;
- pair->RGB.Src[candidate].Constant = constant;
- pair->RGB.Src[candidate].Index = index;
- }
- if (alpha) {
- pair->Alpha.Src[candidate].Used = 1;
- pair->Alpha.Src[candidate].Constant = constant;
- pair->Alpha.Src[candidate].Index = index;
- }
- }
-
- return candidate;
-}
-
-/**
- * Fill the given ALU instruction's opcodes and source operands into the given pair,
- * if possible.
- */
-static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip)
-{
- struct pair_state_instruction *pairinst = s->Instructions + ip;
- struct prog_instruction *inst = s->Program->Instructions + ip;
-
- ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP);
- ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP);
-
- if (pairinst->NeedRGB) {
- if (pairinst->IsTranscendent)
- pair->RGB.Opcode = OPCODE_REPL_ALPHA;
- else
- pair->RGB.Opcode = inst->Opcode;
- if (inst->SaturateMode == SATURATE_ZERO_ONE)
- pair->RGB.Saturate = 1;
- }
- if (pairinst->NeedAlpha) {
- pair->Alpha.Opcode = inst->Opcode;
- if (inst->SaturateMode == SATURATE_ZERO_ONE)
- pair->Alpha.Saturate = 1;
- }
-
- int nargs = _mesa_num_inst_src_regs(inst->Opcode);
- int i;
-
- /* Special case for DDX/DDY (MDH/MDV). */
- if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) {
- if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used)
- return GL_FALSE;
- else
- nargs++;
- }
-
- for(i = 0; i < nargs; ++i) {
- int source;
- if (pairinst->NeedRGB && !pairinst->IsTranscendent) {
- GLboolean srcrgb = GL_FALSE;
- GLboolean srcalpha = GL_FALSE;
- int j;
- for(j = 0; j < 3; ++j) {
- GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
- if (swz < 3)
- srcrgb = GL_TRUE;
- else if (swz < 4)
- srcalpha = GL_TRUE;
- }
- source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha);
- if (source < 0)
- return GL_FALSE;
- pair->RGB.Arg[i].Source = source;
- pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
- pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
- pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (NEGATE_X | NEGATE_Y | NEGATE_Z));
- }
- if (pairinst->NeedAlpha) {
- GLboolean srcrgb = GL_FALSE;
- GLboolean srcalpha = GL_FALSE;
- GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3);
- if (swz < 3)
- srcrgb = GL_TRUE;
- else if (swz < 4)
- srcalpha = GL_TRUE;
- source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha);
- if (source < 0)
- return GL_FALSE;
- pair->Alpha.Arg[i].Source = source;
- pair->Alpha.Arg[i].Swizzle = swz;
- pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
- pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & NEGATE_W);
- }
- }
-
- return GL_TRUE;
-}
-
-
-/**
- * Fill in the destination register information.
- *
- * This is split from filling in source registers because we want
- * to avoid allocating hardware temporaries for destinations until
- * we are absolutely certain that we're going to emit a certain
- * instruction pairing.
- */
-static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip)
-{
- struct pair_state_instruction *pairinst = s->Instructions + ip;
- struct prog_instruction *inst = s->Program->Instructions + ip;
-
- if (inst->DstReg.File == PROGRAM_OUTPUT) {
- if (inst->DstReg.Index == FRAG_RESULT_COLOR) {
- pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ;
- pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
- } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) {
- pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
- }
- } else {
- GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
- if (pairinst->NeedRGB) {
- pair->RGB.DestIndex = hwindex;
- pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ;
- }
- if (pairinst->NeedAlpha) {
- pair->Alpha.DestIndex = hwindex;
- pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
- }
- }
-}
-
-
-/**
- * Find a good ALU instruction or pair of ALU instruction and emit it.
- *
- * Prefer emitting full ALU instructions, so that when we reach a point
- * where no full ALU instruction can be emitted, we have more candidates
- * for RGB/Alpha pairing.
- */
-static void emit_alu(struct pair_state *s)
-{
- struct radeon_pair_instruction pair;
-
- if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
- int ip;
- if (s->ReadyFullALU) {
- ip = s->ReadyFullALU - s->Instructions;
- s->ReadyFullALU = s->ReadyFullALU->NextReady;
- } else if (s->ReadyRGB) {
- ip = s->ReadyRGB - s->Instructions;
- s->ReadyRGB = s->ReadyRGB->NextReady;
- } else {
- ip = s->ReadyAlpha - s->Instructions;
- s->ReadyAlpha = s->ReadyAlpha->NextReady;
- }
-
- _mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, ip);
- fill_dest_into_pair(s, &pair, ip);
- commit_instruction(s, ip);
- } else {
- struct pair_state_instruction **prgb;
- struct pair_state_instruction **palpha;
-
- /* Some pairings might fail because they require too
- * many source slots; try all possible pairings if necessary */
- for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
- for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
- int rgbip = *prgb - s->Instructions;
- int alphaip = *palpha - s->Instructions;
- _mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, rgbip);
- if (!fill_instruction_into_pair(s, &pair, alphaip))
- continue;
- *prgb = (*prgb)->NextReady;
- *palpha = (*palpha)->NextReady;
- fill_dest_into_pair(s, &pair, rgbip);
- fill_dest_into_pair(s, &pair, alphaip);
- commit_instruction(s, rgbip);
- commit_instruction(s, alphaip);
- goto success;
- }
- }
-
- /* No success in pairing; just take the first RGB instruction */
- int ip = s->ReadyRGB - s->Instructions;
- s->ReadyRGB = s->ReadyRGB->NextReady;
- _mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, ip);
- fill_dest_into_pair(s, &pair, ip);
- commit_instruction(s, ip);
- success: ;
- }
-
- if (s->Debug)
- radeonPrintPairInstruction(&pair);
-
- s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair);
-}
-
-
-GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
- const struct radeon_pair_handler* handler, void *userdata)
-{
- struct pair_state s;
-
- _mesa_bzero(&s, sizeof(s));
- s.Ctx = ctx;
- s.Program = _mesa_clone_program(ctx, program);
- s.Handler = handler;
- s.UserData = userdata;
- s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
- s.Verbose = GL_FALSE && s.Debug;
-
- s.Instructions = (struct pair_state_instruction*)_mesa_calloc(
- sizeof(struct pair_state_instruction)*s.Program->NumInstructions);
- s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4);
- s.ReaderPool = (struct reg_value_reader*)_mesa_calloc(
- sizeof(struct reg_value_reader)*s.Program->NumInstructions*12);
-
- if (s.Debug)
- _mesa_printf("Emit paired program\n");
-
- scan_instructions(&s);
- allocate_input_registers(&s);
-
- while(!s.Error &&
- (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
- if (s.ReadyTEX)
- emit_all_tex(&s);
-
- while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)
- emit_alu(&s);
- }
-
- if (s.Debug)
- _mesa_printf(" END\n");
-
- _mesa_free(s.Instructions);
- _mesa_free(s.ValuePool);
- _mesa_free(s.ReaderPool);
-
- _mesa_reference_program(ctx, &s.Program, NULL);
-
- return !s.Error;
-}
-
-
-static void print_pair_src(int i, struct radeon_pair_instruction_source* src)
-{
- _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index);
-}
-
-static const char* opcode_string(GLuint opcode)
-{
- if (opcode == OPCODE_REPL_ALPHA)
- return "SOP";
- else
- return _mesa_opcode_string(opcode);
-}
-
-static int num_pairinst_args(GLuint opcode)
-{
- if (opcode == OPCODE_REPL_ALPHA)
- return 0;
- else
- return _mesa_num_inst_src_regs(opcode);
-}
-
-static char swizzle_char(GLuint swz)
-{
- switch(swz) {
- case SWIZZLE_X: return 'x';
- case SWIZZLE_Y: return 'y';
- case SWIZZLE_Z: return 'z';
- case SWIZZLE_W: return 'w';
- case SWIZZLE_ZERO: return '0';
- case SWIZZLE_ONE: return '1';
- case SWIZZLE_NIL: return '_';
- default: return '?';
- }
-}
-
-void radeonPrintPairInstruction(struct radeon_pair_instruction *inst)
-{
- int nargs;
- int i;
-
- _mesa_printf(" RGB: ");
- for(i = 0; i < 3; ++i) {
- if (inst->RGB.Src[i].Used)
- print_pair_src(i, inst->RGB.Src + i);
- }
- _mesa_printf("\n");
- _mesa_printf(" Alpha:");
- for(i = 0; i < 3; ++i) {
- if (inst->Alpha.Src[i].Used)
- print_pair_src(i, inst->Alpha.Src + i);
- }
- _mesa_printf("\n");
-
- _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : "");
- if (inst->RGB.WriteMask)
- _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex,
- (inst->RGB.WriteMask & 1) ? "x" : "",
- (inst->RGB.WriteMask & 2) ? "y" : "",
- (inst->RGB.WriteMask & 4) ? "z" : "");
- if (inst->RGB.OutputWriteMask)
- _mesa_printf(" COLOR.%s%s%s",
- (inst->RGB.OutputWriteMask & 1) ? "x" : "",
- (inst->RGB.OutputWriteMask & 2) ? "y" : "",
- (inst->RGB.OutputWriteMask & 4) ? "z" : "");
- nargs = num_pairinst_args(inst->RGB.Opcode);
- for(i = 0; i < nargs; ++i) {
- const char* abs = inst->RGB.Arg[i].Abs ? "|" : "";
- const char* neg = inst->RGB.Arg[i].Negate ? "-" : "";
- _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source,
- swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)),
- swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)),
- swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)),
- abs);
- }
- _mesa_printf("\n");
-
- _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : "");
- if (inst->Alpha.WriteMask)
- _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex);
- if (inst->Alpha.OutputWriteMask)
- _mesa_printf(" COLOR.w");
- if (inst->Alpha.DepthWriteMask)
- _mesa_printf(" DEPTH.w");
- nargs = num_pairinst_args(inst->Alpha.Opcode);
- for(i = 0; i < nargs; ++i) {
- const char* abs = inst->Alpha.Arg[i].Abs ? "|" : "";
- const char* neg = inst->Alpha.Arg[i].Negate ? "-" : "";
- _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source,
- swizzle_char(inst->Alpha.Arg[i].Swizzle), abs);
- }
- _mesa_printf("\n");
-}
diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.h b/src/mesa/drivers/dri/r300/radeon_program_pair.h
deleted file mode 100644
index 4624a246298..00000000000
--- a/src/mesa/drivers/dri/r300/radeon_program_pair.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __RADEON_PROGRAM_PAIR_H_
-#define __RADEON_PROGRAM_PAIR_H_
-
-#include "radeon_program.h"
-
-
-/**
- * Represents a paired instruction, as found in R300 and R500
- * fragment programs.
- */
-struct radeon_pair_instruction_source {
- GLuint Index:8;
- GLuint Constant:1;
- GLuint Used:1;
-};
-
-struct radeon_pair_instruction_rgb {
- GLuint Opcode:8;
- GLuint DestIndex:8;
- GLuint WriteMask:3;
- GLuint OutputWriteMask:3;
- GLuint Saturate:1;
-
- struct radeon_pair_instruction_source Src[3];
-
- struct {
- GLuint Source:2;
- GLuint Swizzle:9;
- GLuint Abs:1;
- GLuint Negate:1;
- } Arg[3];
-};
-
-struct radeon_pair_instruction_alpha {
- GLuint Opcode:8;
- GLuint DestIndex:8;
- GLuint WriteMask:1;
- GLuint OutputWriteMask:1;
- GLuint DepthWriteMask:1;
- GLuint Saturate:1;
-
- struct radeon_pair_instruction_source Src[3];
-
- struct {
- GLuint Source:2;
- GLuint Swizzle:3;
- GLuint Abs:1;
- GLuint Negate:1;
- } Arg[3];
-};
-
-struct radeon_pair_instruction {
- struct radeon_pair_instruction_rgb RGB;
- struct radeon_pair_instruction_alpha Alpha;
-};
-
-
-/**
- *
- */
-struct radeon_pair_handler {
- /**
- * Fill in the proper hardware index for the given constant register.
- *
- * @return GL_FALSE on error.
- */
- GLboolean (*EmitConst)(void*, GLuint file, GLuint index, GLuint *hwindex);
-
- /**
- * Write a paired instruction to the hardware.
- *
- * @return GL_FALSE on error.
- */
- GLboolean (*EmitPaired)(void*, struct radeon_pair_instruction*);
-
- /**
- * Write a texture instruction to the hardware.
- * Register indices have already been rewritten to the allocated
- * hardware register numbers.
- *
- * @return GL_FALSE on error.
- */
- GLboolean (*EmitTex)(void*, struct prog_instruction*);
-
- /**
- * Called before a block of contiguous, independent texture
- * instructions is emitted.
- */
- GLboolean (*BeginTexBlock)(void*);
-
- GLuint MaxHwTemps;
-};
-
-GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
- const struct radeon_pair_handler*, void *userdata);
-
-void radeonPrintPairInstruction(struct radeon_pair_instruction *inst);
-
-#endif /* __RADEON_PROGRAM_PAIR_H_ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 118e74008fe..5f1af5b0da5 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -57,7 +57,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r200_tex.h"
#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
#include "r300_context.h"
-#include "r300_fragprog.h"
#include "r300_tex.h"
#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
#include "r600_context.h"
@@ -150,6 +149,9 @@ extern const struct dri_extension point_extensions[];
#elif RADEON_COMMON && (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600))
+#define DRI_CONF_FP_OPTIMIZATION_SPEED 0
+#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1
+
/* TODO: integrate these into xmlpool.h! */
#define DRI_CONF_MAX_TEXTURE_IMAGE_UNITS(def,min,max) \
DRI_CONF_OPT_BEGIN_V(texture_image_units,int,def, # min ":" # max ) \
--
cgit v1.2.3
From 78f88d8b8799f013e4ab3abc87666b25071ed917 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Wed, 15 Jul 2009 22:25:28 +0200
Subject: r300/program_pair: Dynamically allocate instructions temporarily
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
In preparation of using TGSI, where we cannot easily predict the number
of instructions.
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/Makefile | 1 +
src/mesa/drivers/dri/r300/compiler/memory_pool.c | 95 ++++++++++
src/mesa/drivers/dri/r300/compiler/memory_pool.h | 49 +++++
.../dri/r300/compiler/radeon_program_pair.c | 197 ++++++++++-----------
4 files changed, 237 insertions(+), 105 deletions(-)
create mode 100644 src/mesa/drivers/dri/r300/compiler/memory_pool.c
create mode 100644 src/mesa/drivers/dri/r300/compiler/memory_pool.h
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
index 3a7de6d5bec..8a85293756f 100644
--- a/src/mesa/drivers/dri/r300/Makefile
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -49,6 +49,7 @@ RADEON_COMPILER_SOURCES = \
compiler/r300_fragprog_emit.c \
compiler/r500_fragprog.c \
compiler/r500_fragprog_emit.c \
+ compiler/memory_pool.c
DRIVER_SOURCES = \
radeon_screen.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.c b/src/mesa/drivers/dri/r300/compiler/memory_pool.c
new file mode 100644
index 00000000000..37aa2b65798
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2009 Nicolai Hähnle
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "memory_pool.h"
+
+#include
+#include
+#include
+
+
+#define POOL_LARGE_ALLOC 4096
+#define POOL_ALIGN 4
+
+
+struct memory_block {
+ struct memory_block * next;
+};
+
+void memory_pool_init(struct memory_pool * pool)
+{
+ memset(pool, 0, sizeof(struct memory_pool));
+}
+
+
+void memory_pool_destroy(struct memory_pool * pool)
+{
+ while(pool->blocks) {
+ struct memory_block * block = pool->blocks;
+ pool->blocks = block->next;
+ free(block);
+ }
+}
+
+static void refill_pool(struct memory_pool * pool)
+{
+ unsigned int blocksize = pool->total_allocated;
+ struct memory_block * newblock;
+
+ if (!blocksize)
+ blocksize = 2*POOL_LARGE_ALLOC;
+
+ newblock = (struct memory_block*)malloc(blocksize);
+ newblock->next = pool->blocks;
+ pool->blocks = newblock;
+
+ pool->head = (unsigned char*)(newblock + 1);
+ pool->end = ((unsigned char*)newblock) + blocksize;
+ pool->total_allocated += blocksize;
+}
+
+
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes)
+{
+ if (bytes < POOL_LARGE_ALLOC) {
+ if (pool->head + bytes > pool->end)
+ refill_pool(pool);
+
+ assert(pool->head + bytes <= pool->end);
+
+ void * ptr = pool->head;
+
+ pool->head += bytes;
+ pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1));
+
+ return ptr;
+ } else {
+ struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block));
+
+ block->next = pool->blocks;
+ pool->blocks = block;
+
+ return (block + 1);
+ }
+}
+
+
diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.h b/src/mesa/drivers/dri/r300/compiler/memory_pool.h
new file mode 100644
index 00000000000..ce23c319ad3
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2009 Nicolai Hähnle
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef MEMORY_POOL_H
+#define MEMORY_POOL_H
+
+struct memory_block;
+
+/**
+ * Provides a pool of memory that can quickly be allocated from, at the
+ * cost of being unable to explicitly free one of the allocated blocks.
+ * Instead, the entire pool can be freed at once.
+ *
+ * The idea is to allow one to quickly allocate a flexible amount of
+ * memory during operations like shader compilation while avoiding
+ * reference counting headaches.
+ */
+struct memory_pool {
+ unsigned char * head;
+ unsigned char * end;
+ unsigned int total_allocated;
+ struct memory_block * blocks;
+};
+
+
+void memory_pool_init(struct memory_pool * pool);
+void memory_pool_destroy(struct memory_pool * pool);
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes);
+
+#endif /* MEMORY_POOL_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index d6fb474cf23..6ce6b4d8ffd 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -37,6 +37,7 @@
#include "radeon_common.h"
+#include "memory_pool.h"
#include "shader/prog_print.h"
#define error(fmt, args...) do { \
@@ -46,6 +47,9 @@
} while(0)
struct pair_state_instruction {
+ struct prog_instruction Instruction;
+ GLuint IP; /**< Position of this instruction in original program */
+
GLuint IsTex:1; /**< Is a texture instruction */
GLuint NeedRGB:1; /**< Needs the RGB ALU */
GLuint NeedAlpha:1; /**< Needs the Alpha ALU */
@@ -73,7 +77,7 @@ struct pair_state_instruction {
* Used to keep track of which instructions read a value.
*/
struct reg_value_reader {
- GLuint IP; /**< IP of the instruction that performs this access */
+ struct pair_state_instruction *Reader;
struct reg_value_reader *Next;
};
@@ -82,7 +86,7 @@ struct reg_value_reader {
* PROGRAM_TEMPORARY.
*/
struct reg_value {
- GLuint IP; /**< IP of the instruction that writes this value */
+ struct pair_state_instruction *Writer;
struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */
/**
@@ -117,6 +121,7 @@ struct pair_register_translation {
struct pair_state {
GLcontext *Ctx;
+ struct memory_pool Pool;
struct gl_program *Program;
const struct radeon_pair_handler *Handler;
GLboolean Error;
@@ -130,11 +135,6 @@ struct pair_state {
struct pair_register_translation Inputs[FRAG_ATTRIB_MAX];
struct pair_register_translation Temps[MAX_PROGRAM_TEMPS];
- /**
- * Derived information about program instructions.
- */
- struct pair_state_instruction *Instructions;
-
struct {
GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */
} HwTemps[128];
@@ -147,14 +147,6 @@ struct pair_state {
struct pair_state_instruction *ReadyRGB;
struct pair_state_instruction *ReadyAlpha;
struct pair_state_instruction *ReadyTEX;
-
- /**
- * Pool of @ref reg_value structures for fast allocation.
- */
- struct reg_value *ValuePool;
- GLuint ValuePoolUsed;
- struct reg_value_reader *ReaderPool;
- GLuint ReaderPoolUsed;
};
@@ -221,15 +213,13 @@ static void add_pairinst_to_list(struct pair_state_instruction **list, struct pa
}
/**
- * The instruction at the given IP has become ready. Link it into the ready
+ * The given instruction has become ready. Link it into the ready
* instructions.
*/
-static void instruction_ready(struct pair_state *s, int ip)
+static void instruction_ready(struct pair_state *s, struct pair_state_instruction *pairinst)
{
- struct pair_state_instruction *pairinst = s->Instructions + ip;
-
if (s->Verbose)
- _mesa_printf("instruction_ready(%i)\n", ip);
+ _mesa_printf("instruction_ready(%i)\n", pairinst->IP);
if (pairinst->IsTex)
add_pairinst_to_list(&s->ReadyTEX, pairinst);
@@ -296,12 +286,12 @@ static void final_rewrite(struct pair_state *s, struct prog_instruction *inst)
* Classify an instruction according to which ALUs etc. it needs
*/
static void classify_instruction(struct pair_state *s,
- struct prog_instruction *inst, struct pair_state_instruction *pairinst)
+ struct pair_state_instruction *psi)
{
- pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0;
- pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0;
+ psi->NeedRGB = (psi->Instruction.DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0;
+ psi->NeedAlpha = (psi->Instruction.DstReg.WriteMask & WRITEMASK_W) ? 1 : 0;
- switch(inst->Opcode) {
+ switch(psi->Instruction.Opcode) {
case OPCODE_ADD:
case OPCODE_CMP:
case OPCODE_DDX:
@@ -319,24 +309,24 @@ static void classify_instruction(struct pair_state *s,
case OPCODE_RCP:
case OPCODE_RSQ:
case OPCODE_SIN:
- pairinst->IsTranscendent = 1;
- pairinst->NeedAlpha = 1;
+ psi->IsTranscendent = 1;
+ psi->NeedAlpha = 1;
break;
case OPCODE_DP4:
- pairinst->NeedAlpha = 1;
+ psi->NeedAlpha = 1;
/* fall through */
case OPCODE_DP3:
- pairinst->NeedRGB = 1;
+ psi->NeedRGB = 1;
break;
case OPCODE_KIL:
case OPCODE_TEX:
case OPCODE_TXB:
case OPCODE_TXP:
case OPCODE_END:
- pairinst->IsTex = 1;
+ psi->IsTex = 1;
break;
default:
- error("Unknown opcode %d\n", inst->Opcode);
+ error("Unknown opcode %d\n", psi->Instruction.Opcode);
break;
}
}
@@ -348,30 +338,34 @@ static void classify_instruction(struct pair_state *s,
*/
static void scan_instructions(struct pair_state *s)
{
- struct prog_instruction *inst;
- struct pair_state_instruction *pairinst;
+ struct prog_instruction *source;
GLuint ip;
- for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0;
- inst->Opcode != OPCODE_END;
- ++inst, ++pairinst, ++ip) {
- final_rewrite(s, inst);
- classify_instruction(s, inst, pairinst);
+ for(source = s->Program->Instructions, ip = 0;
+ source->Opcode != OPCODE_END;
+ ++source, ++ip) {
+ struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Pool, sizeof(*pairinst));
+ memset(pairinst, 0, sizeof(struct pair_state_instruction));
+
+ pairinst->Instruction = *source;
+ pairinst->IP = ip;
+ final_rewrite(s, &pairinst->Instruction);
+ classify_instruction(s, pairinst);
- int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
+ int nsrc = _mesa_num_inst_src_regs(pairinst->Instruction.Opcode);
int j;
for(j = 0; j < nsrc; j++) {
struct pair_register_translation *t =
- get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index);
+ get_register(s, pairinst->Instruction.SrcReg[j].File, pairinst->Instruction.SrcReg[j].Index);
if (!t)
continue;
t->RefCount++;
- if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
+ if (pairinst->Instruction.SrcReg[j].File == PROGRAM_TEMPORARY) {
int i;
for(i = 0; i < 4; ++i) {
- GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i);
+ GLuint swz = GET_SWZ(pairinst->Instruction.SrcReg[j].Swizzle, i);
if (swz >= 4)
continue; /* constant or NIL swizzle */
if (!t->Value[swz])
@@ -381,36 +375,37 @@ static void scan_instructions(struct pair_state *s)
* also rewrites the value. The code below adds
* a dependency for the DstReg, which is a superset
* of the SrcReg dependency. */
- if (inst->DstReg.File == PROGRAM_TEMPORARY &&
- inst->DstReg.Index == inst->SrcReg[j].Index &&
- GET_BIT(inst->DstReg.WriteMask, swz))
+ if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY &&
+ pairinst->Instruction.DstReg.Index == pairinst->Instruction.SrcReg[j].Index &&
+ GET_BIT(pairinst->Instruction.DstReg.WriteMask, swz))
continue;
- struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++];
+ struct reg_value_reader* r = memory_pool_malloc(&s->Pool, sizeof(*r));
pairinst->NumDependencies++;
t->Value[swz]->NumReaders++;
- r->IP = ip;
+ r->Reader = pairinst;
r->Next = t->Value[swz]->Readers;
t->Value[swz]->Readers = r;
}
}
}
- int ndst = _mesa_num_inst_dst_regs(inst->Opcode);
+ int ndst = _mesa_num_inst_dst_regs(pairinst->Instruction.Opcode);
if (ndst) {
struct pair_register_translation *t =
- get_register(s, inst->DstReg.File, inst->DstReg.Index);
+ get_register(s, pairinst->Instruction.DstReg.File, pairinst->Instruction.DstReg.Index);
if (t) {
t->RefCount++;
- if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY) {
int j;
for(j = 0; j < 4; ++j) {
- if (!GET_BIT(inst->DstReg.WriteMask, j))
+ if (!GET_BIT(pairinst->Instruction.DstReg.WriteMask, j))
continue;
- struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++];
- v->IP = ip;
+ struct reg_value* v = memory_pool_malloc(&s->Pool, sizeof(*v));
+ memset(v, 0, sizeof(struct reg_value));
+ v->Writer = pairinst;
if (t->Value[j]) {
pairinst->NumDependencies++;
t->Value[j]->Next = v;
@@ -426,7 +421,7 @@ static void scan_instructions(struct pair_state *s)
_mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies);
if (!pairinst->NumDependencies)
- instruction_ready(s, ip);
+ instruction_ready(s, pairinst);
}
/* Clear the PROGRAM_TEMPORARY state */
@@ -483,25 +478,23 @@ static void allocate_input_registers(struct pair_state *s)
}
-static void decrement_dependencies(struct pair_state *s, int ip)
+static void decrement_dependencies(struct pair_state *s, struct pair_state_instruction *pairinst)
{
- struct pair_state_instruction *pairinst = s->Instructions + ip;
ASSERT(pairinst->NumDependencies > 0);
if (!--pairinst->NumDependencies)
- instruction_ready(s, ip);
+ instruction_ready(s, pairinst);
}
/**
* Update the dependency tracking state based on what the instruction
* at the given IP does.
*/
-static void commit_instruction(struct pair_state *s, int ip)
+static void commit_instruction(struct pair_state *s, struct pair_state_instruction *pairinst)
{
- struct prog_instruction *inst = s->Program->Instructions + ip;
- struct pair_state_instruction *pairinst = s->Instructions + ip;
+ struct prog_instruction *inst = &pairinst->Instruction;
if (s->Verbose)
- _mesa_printf("commit_instruction(%i)\n", ip);
+ _mesa_printf("commit_instruction(%i)\n", pairinst->IP);
if (inst->DstReg.File == PROGRAM_TEMPORARY) {
struct pair_register_translation *t = &s->Temps[inst->DstReg.Index];
@@ -516,11 +509,11 @@ static void commit_instruction(struct pair_state *s, int ip)
if (t->Value[i]->NumReaders) {
struct reg_value_reader *r;
for(r = pairinst->Values[i]->Readers; r; r = r->Next)
- decrement_dependencies(s, r->IP);
+ decrement_dependencies(s, r->Reader);
} else if (t->Value[i]->Next) {
/* This happens when the only reader writes
* the register at the same time */
- decrement_dependencies(s, t->Value[i]->Next->IP);
+ decrement_dependencies(s, t->Value[i]->Next->Writer);
}
}
}
@@ -554,7 +547,7 @@ static void commit_instruction(struct pair_state *s, int ip)
if (!--t->Value[swz]->NumReaders) {
if (t->Value[swz]->Next)
- decrement_dependencies(s, t->Value[swz]->Next->IP);
+ decrement_dependencies(s, t->Value[swz]->Next->Writer);
}
}
}
@@ -585,8 +578,7 @@ static void emit_all_tex(struct pair_state *s)
// Allocate destination hardware registers in one block to avoid conflicts.
for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
- int ip = pairinst - s->Instructions;
- struct prog_instruction *inst = s->Program->Instructions + ip;
+ struct prog_instruction *inst = &pairinst->Instruction;
if (inst->Opcode != OPCODE_KIL)
get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
}
@@ -598,9 +590,8 @@ static void emit_all_tex(struct pair_state *s)
s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData);
for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
- int ip = pairinst - s->Instructions;
- struct prog_instruction *inst = s->Program->Instructions + ip;
- commit_instruction(s, ip);
+ struct prog_instruction *inst = &pairinst->Instruction;
+ commit_instruction(s, pairinst);
if (inst->Opcode != OPCODE_KIL)
inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
@@ -684,10 +675,12 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio
* Fill the given ALU instruction's opcodes and source operands into the given pair,
* if possible.
*/
-static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip)
+static GLboolean fill_instruction_into_pair(
+ struct pair_state *s,
+ struct radeon_pair_instruction *pair,
+ struct pair_state_instruction *pairinst)
{
- struct pair_state_instruction *pairinst = s->Instructions + ip;
- struct prog_instruction *inst = s->Program->Instructions + ip;
+ struct prog_instruction *inst = &pairinst->Instruction;
ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP);
ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP);
@@ -768,10 +761,12 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_
* we are absolutely certain that we're going to emit a certain
* instruction pairing.
*/
-static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip)
+static void fill_dest_into_pair(
+ struct pair_state *s,
+ struct radeon_pair_instruction *pair,
+ struct pair_state_instruction *pairinst)
{
- struct pair_state_instruction *pairinst = s->Instructions + ip;
- struct prog_instruction *inst = s->Program->Instructions + ip;
+ struct prog_instruction *inst = &pairinst->Instruction;
if (inst->DstReg.File == PROGRAM_OUTPUT) {
if (inst->DstReg.Index == FRAG_RESULT_COLOR) {
@@ -804,24 +799,24 @@ static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruc
static void emit_alu(struct pair_state *s)
{
struct radeon_pair_instruction pair;
+ struct pair_state_instruction *psi;
if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
- int ip;
if (s->ReadyFullALU) {
- ip = s->ReadyFullALU - s->Instructions;
+ psi = s->ReadyFullALU;
s->ReadyFullALU = s->ReadyFullALU->NextReady;
} else if (s->ReadyRGB) {
- ip = s->ReadyRGB - s->Instructions;
+ psi = s->ReadyRGB;
s->ReadyRGB = s->ReadyRGB->NextReady;
} else {
- ip = s->ReadyAlpha - s->Instructions;
+ psi = s->ReadyAlpha;
s->ReadyAlpha = s->ReadyAlpha->NextReady;
}
_mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, ip);
- fill_dest_into_pair(s, &pair, ip);
- commit_instruction(s, ip);
+ fill_instruction_into_pair(s, &pair, psi);
+ fill_dest_into_pair(s, &pair, psi);
+ commit_instruction(s, psi);
} else {
struct pair_state_instruction **prgb;
struct pair_state_instruction **palpha;
@@ -830,29 +825,30 @@ static void emit_alu(struct pair_state *s)
* many source slots; try all possible pairings if necessary */
for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
- int rgbip = *prgb - s->Instructions;
- int alphaip = *palpha - s->Instructions;
+ struct pair_state_instruction * psirgb = *prgb;
+ struct pair_state_instruction * psialpha = *palpha;
_mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, rgbip);
- if (!fill_instruction_into_pair(s, &pair, alphaip))
+ fill_instruction_into_pair(s, &pair, psirgb);
+ if (!fill_instruction_into_pair(s, &pair, psialpha))
continue;
*prgb = (*prgb)->NextReady;
*palpha = (*palpha)->NextReady;
- fill_dest_into_pair(s, &pair, rgbip);
- fill_dest_into_pair(s, &pair, alphaip);
- commit_instruction(s, rgbip);
- commit_instruction(s, alphaip);
+ fill_dest_into_pair(s, &pair, psirgb);
+ fill_dest_into_pair(s, &pair, psialpha);
+ commit_instruction(s, psirgb);
+ commit_instruction(s, psialpha);
goto success;
}
}
/* No success in pairing; just take the first RGB instruction */
- int ip = s->ReadyRGB - s->Instructions;
+ psi = s->ReadyRGB;
s->ReadyRGB = s->ReadyRGB->NextReady;
+
_mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, ip);
- fill_dest_into_pair(s, &pair, ip);
- commit_instruction(s, ip);
+ fill_instruction_into_pair(s, &pair, psi);
+ fill_dest_into_pair(s, &pair, psi);
+ commit_instruction(s, psi);
success: ;
}
@@ -870,18 +866,13 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
_mesa_bzero(&s, sizeof(s));
s.Ctx = ctx;
- s.Program = _mesa_clone_program(ctx, program);
+ memory_pool_init(&s.Pool);
+ s.Program = program;
s.Handler = handler;
s.UserData = userdata;
s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
s.Verbose = GL_FALSE && s.Debug;
- s.Instructions = (struct pair_state_instruction*)_mesa_calloc(
- sizeof(struct pair_state_instruction)*s.Program->NumInstructions);
- s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4);
- s.ReaderPool = (struct reg_value_reader*)_mesa_calloc(
- sizeof(struct reg_value_reader)*s.Program->NumInstructions*12);
-
if (s.Debug)
_mesa_printf("Emit paired program\n");
@@ -900,11 +891,7 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
if (s.Debug)
_mesa_printf(" END\n");
- _mesa_free(s.Instructions);
- _mesa_free(s.ValuePool);
- _mesa_free(s.ReaderPool);
-
- _mesa_reference_program(ctx, &s.Program, NULL);
+ memory_pool_destroy(&s.Pool);
return !s.Error;
}
--
cgit v1.2.3
From cb8c694adb8e0a9e1d357cac9cf7a7ce263df3ae Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 16 Jul 2009 19:57:43 +0200
Subject: r300/program_pair: Introduce driver-specific texture instruction
structure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This is to prepare more experimentation and possible internal changes in the
compiler.
Signed-off-by: Nicolai Hähnle
---
.../drivers/dri/r300/compiler/r300_fragprog_emit.c | 18 ++++++++---------
.../drivers/dri/r300/compiler/r500_fragprog_emit.c | 22 ++++++++++-----------
.../dri/r300/compiler/radeon_program_pair.c | 20 ++++++++++++++++++-
.../dri/r300/compiler/radeon_program_pair.h | 23 +++++++++++++++++++++-
4 files changed, 61 insertions(+), 22 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index 1cfb565b6e0..1aeba8f4984 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -272,7 +272,7 @@ static GLboolean begin_tex(void* data)
}
-static GLboolean emit_tex(void* data, struct prog_instruction* inst)
+static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* inst)
{
PROG_CODE;
@@ -282,31 +282,31 @@ static GLboolean emit_tex(void* data, struct prog_instruction* inst)
}
GLuint unit = inst->TexSrcUnit;
- GLuint dest = inst->DstReg.Index;
+ GLuint dest = inst->DestIndex;
GLuint opcode;
switch(inst->Opcode) {
- case OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
- case OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
- case OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
- case OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
+ case RADEON_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
+ case RADEON_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
+ case RADEON_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
+ case RADEON_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
default:
error("Unknown texture opcode %i", inst->Opcode);
return GL_FALSE;
}
- if (inst->Opcode == OPCODE_KIL) {
+ if (inst->Opcode == RADEON_OPCODE_KIL) {
unit = 0;
dest = 0;
} else {
use_temporary(code, dest);
}
- use_temporary(code, inst->SrcReg[0].Index);
+ use_temporary(code, inst->SrcIndex);
code->node[code->cur_node].tex_end++;
code->tex.inst[code->tex.length++] =
- (inst->SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
+ (inst->SrcIndex << R300_SRC_ADDR_SHIFT) |
(dest << R300_DST_ADDR_SHIFT) |
(unit << R300_TEX_ID_SHIFT) |
(opcode << R300_TEX_INST_SHIFT);
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 237489e1199..b5f665b66c3 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -236,19 +236,19 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
return GL_TRUE;
}
-static GLuint translate_strq_swizzle(struct prog_src_register src)
+static GLuint translate_strq_swizzle(GLuint swizzle)
{
GLuint swiz = 0;
int i;
for (i = 0; i < 4; i++)
- swiz |= (GET_SWZ(src.Swizzle, i) & 0x3) << i*2;
+ swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
return swiz;
}
/**
* Emit a single TEX instruction
*/
-static GLboolean emit_tex(void *data, struct prog_instruction *inst)
+static GLboolean emit_tex(void *data, struct radeon_pair_texture_instruction *inst)
{
PROG_CODE;
@@ -260,7 +260,7 @@ static GLboolean emit_tex(void *data, struct prog_instruction *inst)
int ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_TEX
- | (inst->DstReg.WriteMask << 11)
+ | (inst->WriteMask << 11)
| R500_INST_TEX_SEM_WAIT;
code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
| R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
@@ -269,25 +269,25 @@ static GLboolean emit_tex(void *data, struct prog_instruction *inst)
code->inst[ip].inst1 |= R500_TEX_UNSCALED;
switch (inst->Opcode) {
- case OPCODE_KIL:
+ case RADEON_OPCODE_KIL:
code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
break;
- case OPCODE_TEX:
+ case RADEON_OPCODE_TEX:
code->inst[ip].inst1 |= R500_TEX_INST_LD;
break;
- case OPCODE_TXB:
+ case RADEON_OPCODE_TXB:
code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
break;
- case OPCODE_TXP:
+ case RADEON_OPCODE_TXP:
code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
break;
default:
error("emit_tex can't handle opcode %x\n", inst->Opcode);
}
- code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
- | (translate_strq_swizzle(inst->SrcReg[0]) << 8)
- | R500_TEX_DST_ADDR(inst->DstReg.Index)
+ code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcIndex)
+ | (translate_strq_swizzle(inst->SrcSwizzle) << 8)
+ | R500_TEX_DST_ADDR(inst->DestIndex)
| R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
| R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index 6ce6b4d8ffd..3274c83d4da 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -602,7 +602,25 @@ static void emit_all_tex(struct pair_state *s)
_mesa_print_instruction(inst);
fflush(stdout);
}
- s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst);
+
+ struct radeon_pair_texture_instruction rpti;
+
+ switch(inst->Opcode) {
+ case OPCODE_TEX: rpti.Opcode = RADEON_OPCODE_TEX; break;
+ case OPCODE_TXB: rpti.Opcode = RADEON_OPCODE_TXB; break;
+ case OPCODE_TXP: rpti.Opcode = RADEON_OPCODE_TXP; break;
+ default:
+ case OPCODE_KIL: rpti.Opcode = RADEON_OPCODE_KIL; break;
+ }
+
+ rpti.DestIndex = inst->DstReg.Index;
+ rpti.WriteMask = inst->DstReg.WriteMask;
+ rpti.TexSrcUnit = inst->TexSrcUnit;
+ rpti.TexSrcTarget = inst->TexSrcTarget;
+ rpti.SrcIndex = inst->SrcReg[0].Index;
+ rpti.SrcSwizzle = inst->SrcReg[0].Swizzle;
+
+ s->Error = s->Error || !s->Handler->EmitTex(s->UserData, &rpti);
}
if (s->Debug)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index 4624a246298..f203d4886f2 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -82,6 +82,27 @@ struct radeon_pair_instruction {
};
+enum {
+ RADEON_OPCODE_TEX = 0,
+ RADEON_OPCODE_TXB,
+ RADEON_OPCODE_TXP,
+ RADEON_OPCODE_KIL
+};
+
+struct radeon_pair_texture_instruction {
+ GLuint Opcode:2; /**< one of RADEON_OPCODE_xxx */
+
+ GLuint DestIndex:8;
+ GLuint WriteMask:4;
+
+ GLuint TexSrcUnit:5;
+ GLuint TexSrcTarget:3;
+
+ GLuint SrcIndex:8;
+ GLuint SrcSwizzle:12;
+};
+
+
/**
*
*/
@@ -107,7 +128,7 @@ struct radeon_pair_handler {
*
* @return GL_FALSE on error.
*/
- GLboolean (*EmitTex)(void*, struct prog_instruction*);
+ GLboolean (*EmitTex)(void*, struct radeon_pair_texture_instruction*);
/**
* Called before a block of contiguous, independent texture
--
cgit v1.2.3
From d29cdde569cc685beb791a6693f8ae28e2ef5115 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 16 Jul 2009 22:23:48 +0200
Subject: r300: Remove GLcontext requirement from radeon_program_pair
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c | 2 +-
src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c | 2 +-
src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c | 10 ++++------
src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h | 2 +-
4 files changed, 7 insertions(+), 9 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index 1aeba8f4984..d6784d1c22a 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -334,7 +334,7 @@ GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *
code->node[0].alu_end = -1;
code->node[0].tex_end = -1;
- if (!radeonPairProgram(compiler->ctx, compiler->program, &pair_handler, compiler))
+ if (!radeonPairProgram(compiler->program, &pair_handler, compiler))
return GL_FALSE;
if (!finish_node(compiler))
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index b5f665b66c3..3d5ddbb981b 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -310,7 +310,7 @@ GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *
code->inst_offset = 0;
code->inst_end = -1;
- if (!radeonPairProgram(compiler->ctx, compiler->program, &pair_handler, compiler))
+ if (!radeonPairProgram(compiler->program, &pair_handler, compiler))
return GL_FALSE;
if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index 3274c83d4da..4be948b0c11 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -41,7 +41,7 @@
#include "shader/prog_print.h"
#define error(fmt, args...) do { \
- _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \
+ fprintf(stderr, "r300 driver problem: %s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
s->Error = GL_TRUE; \
} while(0)
@@ -120,7 +120,6 @@ struct pair_register_translation {
};
struct pair_state {
- GLcontext *Ctx;
struct memory_pool Pool;
struct gl_program *Program;
const struct radeon_pair_handler *Handler;
@@ -175,7 +174,7 @@ static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index)
struct pair_register_translation *t = get_register(s, file, index);
if (!t) {
- _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index);
+ error("get_hw_reg: %i[%i]\n", file, index);
return 0;
}
@@ -456,7 +455,7 @@ static void allocate_input_registers(struct pair_state *s)
InputsRead &= ~FRAG_BIT_COL1;
/* Texcoords */
- for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) {
+ for (i = 0; i < 8; i++) {
if (InputsRead & (FRAG_BIT_TEX0 << i))
alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++);
}
@@ -877,13 +876,12 @@ static void emit_alu(struct pair_state *s)
}
-GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
+GLboolean radeonPairProgram(struct gl_program *program,
const struct radeon_pair_handler* handler, void *userdata)
{
struct pair_state s;
_mesa_bzero(&s, sizeof(s));
- s.Ctx = ctx;
memory_pool_init(&s.Pool);
s.Program = program;
s.Handler = handler;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index f203d4886f2..9f479766332 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -139,7 +139,7 @@ struct radeon_pair_handler {
GLuint MaxHwTemps;
};
-GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
+GLboolean radeonPairProgram(struct gl_program *program,
const struct radeon_pair_handler*, void *userdata);
void radeonPrintPairInstruction(struct radeon_pair_instruction *inst);
--
cgit v1.2.3
From b4b286b9804203568f71a010ce1c1f163f0f8d6f Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 16 Jul 2009 22:32:46 +0200
Subject: r300: Remove GLcontext requirement from radeon_nqssadce
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 4 ++--
src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c | 8 ++++----
src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h | 7 +++----
src/mesa/drivers/dri/r300/r300_vertprog.c | 4 ++--
4 files changed, 11 insertions(+), 12 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 75abdcfc42a..8b8c957e5c4 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -281,14 +281,14 @@ GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c
.IsNativeSwizzle = &r500FPIsNativeSwizzle,
.BuildSwizzle = &r500FPBuildSwizzle
};
- radeonNqssaDce(c->ctx, c->program, &nqssadce);
+ radeonNqssaDce(c->program, &nqssadce, 0);
} else {
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadce_init,
.IsNativeSwizzle = &r300FPIsNativeSwizzle,
.BuildSwizzle = &r300FPBuildSwizzle
};
- radeonNqssaDce(c->ctx, c->program, &nqssadce);
+ radeonNqssaDce(c->program, &nqssadce, 0);
}
if (c->debug) {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
index 202a8532b6d..ee2e1cbc54b 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
@@ -166,7 +166,7 @@ static void process_instruction(struct nqssadce_state* s)
if (inst->Opcode != OPCODE_KIL) {
struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
if (!regstate) {
- _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n",
+ fprintf(stderr, "r300 driver: NqssaDce: bad destination register (%i[%i])\n",
inst->DstReg.File, inst->DstReg.Index);
return;
}
@@ -244,7 +244,7 @@ static void process_instruction(struct nqssadce_state* s)
inst = track_used_srcreg(s, inst, 0, 0xb);
break;
default:
- _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
+ fprintf(stderr, "r300 driver: NqssaDce: Unknown opcode %d\n", inst->Opcode);
return;
}
}
@@ -277,14 +277,14 @@ static void calculateInputsOutputs(struct gl_program *p)
p->OutputsWritten = OutputsWritten;
}
-void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr)
+void radeonNqssaDce(struct gl_program *p, struct radeon_nqssadce_descr* descr, void * data)
{
struct nqssadce_state s;
_mesa_bzero(&s, sizeof(s));
- s.Ctx = ctx;
s.Program = p;
s.Descr = descr;
+ s.UserData = data;
s.Descr->Init(&s);
s.IP = p->NumInstructions;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h
index 8626f21c25e..8059b3b66d6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h
@@ -44,7 +44,6 @@ struct register_state {
* read from, etc.
*/
struct nqssadce_state {
- GLcontext *Ctx;
struct gl_program *Program;
struct radeon_nqssadce_descr *Descr;
@@ -59,6 +58,8 @@ struct nqssadce_state {
struct register_state Temps[MAX_PROGRAM_TEMPS];
struct register_state Outputs[VERT_RESULT_MAX];
struct register_state Address;
+
+ void * UserData;
};
@@ -83,11 +84,9 @@ struct radeon_nqssadce_descr {
* The transformation will work recursively on the emitted instruction(s).
*/
void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
-
- void *Data;
};
-void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr);
+void radeonNqssaDce(struct gl_program *p, struct radeon_nqssadce_descr* descr, void * data);
struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg);
#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index ab5ca4322e2..f98de34e93c 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -1496,7 +1496,7 @@ static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog)
static void nqssadceInit(struct nqssadce_state* s)
{
- r300ContextPtr r300 = R300_CONTEXT(s->Ctx);
+ r300ContextPtr r300 = (r300ContextPtr)(s->UserData);
GLuint fp_reads;
fp_reads = r300->selected_fp->Base->InputsRead;
@@ -1582,7 +1582,7 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
.IsNativeSwizzle = &swizzleIsNative,
.BuildSwizzle = NULL
};
- radeonNqssaDce(ctx, prog, &nqssadce);
+ radeonNqssaDce(prog, &nqssadce, r300);
/* We need this step for reusing temporary registers */
_mesa_optimize_program(ctx, prog);
--
cgit v1.2.3
From 9ceee4d3e45ab65d9d0b9d0eb1d062883f241669 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 16 Jul 2009 22:36:54 +0200
Subject: r300: Remove unused enums
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/radeon_program.h | 6 ------
1 file changed, 6 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index 88474d43a22..3d4dbfecf72 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -35,12 +35,6 @@
#include "shader/prog_instruction.h"
-enum {
- CLAUSE_MIXED = 0,
- CLAUSE_ALU,
- CLAUSE_TEX
-};
-
enum {
PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */
};
--
cgit v1.2.3
From e93d70e3e93df956e89c46678020de1a352f9ecf Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 16 Jul 2009 22:39:16 +0200
Subject: r300: Remove GLcontext requirement from radeonLocalTransform
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 4 ++--
src/mesa/drivers/dri/r300/compiler/radeon_program.c | 2 --
src/mesa/drivers/dri/r300/compiler/radeon_program.h | 2 --
3 files changed, 2 insertions(+), 6 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 8b8c957e5c4..daef20fe675 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -259,14 +259,14 @@ GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c
{ &radeonTransformDeriv, 0 },
{ &radeonTransformTrigScale, 0 }
};
- radeonLocalTransform(c->ctx, c->program, 4, transformations);
+ radeonLocalTransform(c->program, 4, transformations);
} else {
struct radeon_program_transformation transformations[] = {
{ &r300_transform_TEX, c },
{ &radeonTransformALU, 0 },
{ &radeonTransformTrigSimple, 0 }
};
- radeonLocalTransform(c->ctx, c->program, 3, transformations);
+ radeonLocalTransform(c->program, 3, transformations);
}
if (c->debug) {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
index da5e7aefce5..0022d0a76ce 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -46,7 +46,6 @@
* one instruction at a time.
*/
void radeonLocalTransform(
- GLcontext *Ctx,
struct gl_program *program,
int num_transformations,
struct radeon_program_transformation* transformations)
@@ -54,7 +53,6 @@ void radeonLocalTransform(
struct radeon_transform_context ctx;
int ip;
- ctx.Ctx = Ctx;
ctx.Program = program;
ctx.OldInstructions = program->Instructions;
ctx.OldNumInstructions = program->NumInstructions;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index 3d4dbfecf72..5b42883812a 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -85,7 +85,6 @@ static inline GLuint combine_swizzles(GLuint src, GLuint swz)
* e.g. finding new temporary registers must use @ref radeonFindFreeTemporary
*/
struct radeon_transform_context {
- GLcontext *Ctx;
struct gl_program *Program;
struct prog_instruction *OldInstructions;
GLuint OldNumInstructions;
@@ -110,7 +109,6 @@ struct radeon_program_transformation {
};
void radeonLocalTransform(
- GLcontext* ctx,
struct gl_program *program,
int num_transformations,
struct radeon_program_transformation* transformations);
--
cgit v1.2.3
From b54e0832012e6793b9c381d64aafbb8185b7144d Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 16 Jul 2009 22:40:53 +0200
Subject: r300: Remove GLcontext from r300_fragment_program_compiler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/radeon_compiler.h | 1 -
src/mesa/drivers/dri/r300/r300_fragprog_common.c | 1 -
2 files changed, 2 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index e1a691db4fb..92560b5d8ae 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -149,7 +149,6 @@ struct rX00_fragment_program_code {
};
struct r300_fragment_program_compiler {
- GLcontext * ctx;
struct rX00_fragment_program_code *code;
struct gl_program *program;
struct r300_fragment_program_external_state state;
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index f28162a2b6a..b37f2969126 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -95,7 +95,6 @@ void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *f
r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_fragment_program_compiler compiler;
- compiler.ctx = ctx;
compiler.code = &fp->code;
compiler.state = fp->state;
compiler.program = fp->Base;
--
cgit v1.2.3
From f70d3ee3710a3453289aabf637f6818e198c67a5 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 16 Jul 2009 22:58:13 +0200
Subject: r300: Remove some dependencies on additional fragment program copies
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The copy is still needed because some program transforms add state variables
or constants.
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/r300_context.h | 5 ++---
src/mesa/drivers/dri/r300/r300_fragprog_common.c | 14 ++++++++------
src/mesa/drivers/dri/r300/r300_fragprog_common.h | 4 +---
src/mesa/drivers/dri/r300/r300_shader.c | 4 +---
src/mesa/drivers/dri/r300/r300_state.c | 16 ++++++++--------
src/mesa/drivers/dri/r300/r300_swtcl.c | 2 +-
src/mesa/drivers/dri/r300/r300_vertprog.c | 6 +++---
7 files changed, 24 insertions(+), 27 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index b692f8bf809..ea30d3e12ff 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -445,14 +445,13 @@ struct r300_vertex_program_cont {
* to render with that program.
*/
struct r300_fragment_program {
- struct gl_program *Base;
-
- GLboolean translated;
GLboolean error;
+ struct gl_program *Base;
struct r300_fragment_program *next;
struct r300_fragment_program_external_state state;
struct rX00_fragment_program_code code;
+ GLbitfield InputsRead;
};
struct r300_fragment_program_cont {
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index b37f2969126..1c57ba49e5b 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -90,24 +90,26 @@ static void build_state(
}
-void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp)
+static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_program_cont *cont, struct r300_fragment_program *fp)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_fragment_program_compiler compiler;
compiler.code = &fp->code;
compiler.state = fp->state;
- compiler.program = fp->Base;
+ compiler.program = _mesa_clone_program(ctx, &cont->Base.Base);
compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE;
compiler.debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
if (!r3xx_compile_fragment_program(&compiler))
fp->error = GL_TRUE;
- fp->translated = GL_TRUE;
+ fp->InputsRead = compiler.program->InputsRead;
+
+ fp->Base = compiler.program;
}
-struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx)
+struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_fragment_program_cont *fp_list;
@@ -128,11 +130,11 @@ struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx)
fp = _mesa_calloc(sizeof(struct r300_fragment_program));
fp->state = state;
- fp->translated = GL_FALSE;
- fp->Base = _mesa_clone_program(ctx, &ctx->FragmentProgram._Current->Base);
fp->next = fp_list->progs;
fp_list->progs = fp;
+ translate_fragment_program(ctx, fp_list, fp);
+
return r300->selected_fp = fp;
}
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.h b/src/mesa/drivers/dri/r300/r300_fragprog_common.h
index 5e103ee4086..3d64c08cee9 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.h
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.h
@@ -32,8 +32,6 @@
#include "r300_context.h"
-extern void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp);
-
-struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx);
+struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx);
#endif
diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c
index 62228a3786e..06c893881e5 100644
--- a/src/mesa/drivers/dri/r300/r300_shader.c
+++ b/src/mesa/drivers/dri/r300/r300_shader.c
@@ -122,9 +122,7 @@ static GLboolean
r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
{
if (target == GL_FRAGMENT_PROGRAM_ARB) {
- struct r300_fragment_program *fp = r300SelectFragmentShader(ctx);
- if (!fp->translated)
- r300TranslateFragmentShader(ctx, fp);
+ struct r300_fragment_program *fp = r300SelectAndTranslateFragmentShader(ctx);
return !fp->error;
} else {
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index 62a03281ca8..66d9a69622a 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -1462,7 +1462,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
else
RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
- InputsRead = r300->selected_fp->Base->InputsRead;
+ InputsRead = r300->selected_fp->InputsRead;
R300_STATECHANGE(r300, ri);
R300_STATECHANGE(r300, rc);
@@ -1556,7 +1556,7 @@ static void r500SetupRSUnit(GLcontext * ctx)
else
RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
- InputsRead = r300->selected_fp->Base->InputsRead;
+ InputsRead = r300->selected_fp->InputsRead;
R300_STATECHANGE(r300, ri);
R300_STATECHANGE(r300, rc);
@@ -1995,9 +1995,7 @@ void r300UpdateShaders(r300ContextPtr rmesa)
{
struct r300_fragment_program *fp;
- fp = r300SelectFragmentShader(ctx);
- if (!fp->translated)
- r300TranslateFragmentShader(ctx, fp);
+ fp = r300SelectAndTranslateFragmentShader(ctx);
r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error);
}
@@ -2034,9 +2032,11 @@ void r300UpdateShaders(r300ContextPtr rmesa)
}
static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx,
- struct gl_program *program, struct prog_src_register srcreg)
+ struct prog_src_register srcreg)
{
static const GLfloat dummy[4] = { 0, 0, 0, 0 };
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct gl_program * program = rmesa->selected_fp->Base;
switch(srcreg.File) {
case PROGRAM_LOCAL_PARAM:
@@ -2103,7 +2103,7 @@ static void r300SetupPixelShader(GLcontext *ctx)
R300_STATECHANGE(rmesa, fpp);
rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4);
for (i = 0; i < code->const_nr; i++) {
- const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]);
+ const GLfloat *constant = get_fragmentprogram_constant(ctx, code->constant[i]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]);
@@ -2164,7 +2164,7 @@ static void r500SetupPixelShader(GLcontext *ctx)
R300_STATECHANGE(rmesa, r500fp_const);
for (i = 0; i < code->const_nr; i++) {
- const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]);
+ const GLfloat *constant = get_fragmentprogram_constant(ctx, code->constant[i]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]);
diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
index a7e8e711499..1e4ea2c5775 100644
--- a/src/mesa/drivers/dri/r300/r300_swtcl.c
+++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
@@ -76,7 +76,7 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_
GLuint InputsRead = 0;
GLuint OutputsWritten = 0;
int num_attrs = 0;
- GLuint fp_reads = rmesa->selected_fp->Base->InputsRead;
+ GLuint fp_reads = rmesa->selected_fp->InputsRead;
struct vertex_attribute *attrs = rmesa->vbuf.attribs;
rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0;
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index f98de34e93c..cf4788411fe 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -1452,7 +1452,7 @@ static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog)
OutputsAdded = 0;
count = 0;
- FpReads = r300->selected_fp->Base->InputsRead;
+ FpReads = r300->selected_fp->InputsRead;
ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
@@ -1499,7 +1499,7 @@ static void nqssadceInit(struct nqssadce_state* s)
r300ContextPtr r300 = (r300ContextPtr)(s->UserData);
GLuint fp_reads;
- fp_reads = r300->selected_fp->Base->InputsRead;
+ fp_reads = r300->selected_fp->InputsRead;
{
if (fp_reads & FRAG_BIT_COL0) {
s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW;
@@ -1639,7 +1639,7 @@ struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx)
struct r300_vertex_program *vp;
vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
- wanted_key.FpReads = r300->selected_fp->Base->InputsRead;
+ wanted_key.FpReads = r300->selected_fp->InputsRead;
wanted_key.FogAttr = r300->selected_fp->code.fog_attr;
wanted_key.WPosAttr = r300->selected_fp->code.wpos_attr;
--
cgit v1.2.3
From d6275ccf79667094de496d06aba35222be9935fc Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Tue, 21 Jul 2009 18:28:30 +0200
Subject: r300: Further reduce dependency between compiler and classic driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r300_fragprog.c | 4 ++--
src/mesa/drivers/dri/r300/compiler/r300_fragprog.h | 4 ++--
src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c | 10 +++++-----
src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c | 2 +-
src/mesa/drivers/dri/r300/compiler/r500_fragprog.c | 4 ++--
src/mesa/drivers/dri/r300/compiler/r500_fragprog.h | 4 ++--
src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c | 8 ++++----
src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c | 7 +++----
src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h | 3 ++-
9 files changed, 23 insertions(+), 23 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
index 00ef9645711..932f9d97716 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
@@ -25,11 +25,11 @@
*
*/
-#include "compiler/r300_fragprog.h"
+#include "r300_fragprog.h"
#include "shader/prog_parameter.h"
-#include "r300_reg.h"
+#include "../r300_reg.h"
static void reset_srcreg(struct prog_src_register* reg)
{
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
index 186fad6490f..21507bd8e06 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
@@ -36,8 +36,8 @@
#include "shader/program.h"
#include "shader/prog_instruction.h"
-#include "compiler/radeon_compiler.h"
-#include "compiler/radeon_program.h"
+#include "radeon_compiler.h"
+#include "radeon_program.h"
extern GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index d6784d1c22a..520d81d3b4f 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -38,12 +38,12 @@
* \todo FogOption
*/
-#include "compiler/r300_fragprog.h"
+#include "r300_fragprog.h"
-#include "r300_reg.h"
+#include "../r300_reg.h"
-#include "compiler/radeon_program_pair.h"
-#include "compiler/r300_fragprog_swizzle.h"
+#include "radeon_program_pair.h"
+#include "r300_fragprog_swizzle.h"
#define PROG_CODE \
@@ -334,7 +334,7 @@ GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *
code->node[0].alu_end = -1;
code->node[0].tex_end = -1;
- if (!radeonPairProgram(compiler->program, &pair_handler, compiler))
+ if (!radeonPairProgram(compiler->program, &pair_handler, compiler, compiler->debug))
return GL_FALSE;
if (!finish_node(compiler))
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
index fc9d855bce6..9e88d14ad54 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -33,7 +33,7 @@
#include "r300_fragprog_swizzle.h"
-#include "r300_reg.h"
+#include "../r300_reg.h"
#include "radeon_nqssadce.h"
#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO))
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index fdc18caacb7..4545982bf16 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -25,9 +25,9 @@
*
*/
-#include "compiler/r500_fragprog.h"
+#include "r500_fragprog.h"
-#include "r300_reg.h"
+#include "../r300_reg.h"
static void reset_srcreg(struct prog_src_register* reg)
{
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
index 232993f5816..a1ffde1e838 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -36,8 +36,8 @@
#include "shader/prog_parameter.h"
#include "shader/prog_instruction.h"
-#include "compiler/radeon_compiler.h"
-#include "compiler/radeon_nqssadce.h"
+#include "radeon_compiler.h"
+#include "radeon_nqssadce.h"
extern GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 3d5ddbb981b..5640ed0bcaa 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -43,11 +43,11 @@
*
*/
-#include "compiler/r500_fragprog.h"
+#include "r500_fragprog.h"
-#include "r300_reg.h"
+#include "../r300_reg.h"
-#include "compiler/radeon_program_pair.h"
+#include "radeon_program_pair.h"
#define PROG_CODE \
@@ -310,7 +310,7 @@ GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *
code->inst_offset = 0;
code->inst_end = -1;
- if (!radeonPairProgram(compiler->program, &pair_handler, compiler))
+ if (!radeonPairProgram(compiler->program, &pair_handler, compiler, compiler->debug))
return GL_FALSE;
if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index 4be948b0c11..254431731b3 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -35,8 +35,6 @@
#include "radeon_program_pair.h"
-#include "radeon_common.h"
-
#include "memory_pool.h"
#include "shader/prog_print.h"
@@ -877,7 +875,8 @@ static void emit_alu(struct pair_state *s)
GLboolean radeonPairProgram(struct gl_program *program,
- const struct radeon_pair_handler* handler, void *userdata)
+ const struct radeon_pair_handler* handler, void *userdata,
+ GLboolean debug)
{
struct pair_state s;
@@ -886,7 +885,7 @@ GLboolean radeonPairProgram(struct gl_program *program,
s.Program = program;
s.Handler = handler;
s.UserData = userdata;
- s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
+ s.Debug = debug;
s.Verbose = GL_FALSE && s.Debug;
if (s.Debug)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index 9f479766332..86e3ec4537d 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -140,7 +140,8 @@ struct radeon_pair_handler {
};
GLboolean radeonPairProgram(struct gl_program *program,
- const struct radeon_pair_handler*, void *userdata);
+ const struct radeon_pair_handler*, void *userdata,
+ GLboolean debug);
void radeonPrintPairInstruction(struct radeon_pair_instruction *inst);
--
cgit v1.2.3
From 77a6ae64b6287c0f6ed3b03e908ab3ce397ff02f Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 16 Jul 2009 23:56:15 +0200
Subject: r300/compiler: Compile the compiler seperately into an archive
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This is all part of untangling the compiler from the classic driver, so that
it may be used in Gallium without depending on Mesa stuff if possible
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/Makefile | 22 ++++-----
src/mesa/drivers/dri/r300/compiler/Makefile | 71 +++++++++++++++++++++++++++++
2 files changed, 79 insertions(+), 14 deletions(-)
create mode 100644 src/mesa/drivers/dri/r300/compiler/Makefile
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
index 8a85293756f..95c6765bc4f 100644
--- a/src/mesa/drivers/dri/r300/Makefile
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -38,19 +38,6 @@ RADEON_COMMON_SOURCES = \
radeon_span.c \
radeon_fbo.c
-RADEON_COMPILER_SOURCES = \
- compiler/radeon_nqssadce.c \
- compiler/radeon_program.c \
- compiler/radeon_program_alu.c \
- compiler/radeon_program_pair.c \
- compiler/r3xx_fragprog.c \
- compiler/r300_fragprog.c \
- compiler/r300_fragprog_swizzle.c \
- compiler/r300_fragprog_emit.c \
- compiler/r500_fragprog.c \
- compiler/r500_fragprog_emit.c \
- compiler/memory_pool.c
-
DRIVER_SOURCES = \
radeon_screen.c \
r300_context.c \
@@ -67,7 +54,6 @@ DRIVER_SOURCES = \
r300_emit.c \
r300_swtcl.c \
$(RADEON_COMMON_SOURCES) \
- $(RADEON_COMPILER_SOURCES) \
$(EGL_SOURCES) \
$(CS_SOURCES)
@@ -80,8 +66,16 @@ DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \
DRI_LIB_DEPS += $(RADEON_LDFLAGS)
+PIPE_DRIVERS = compiler/libr300compiler.a
+
##### TARGETS #####
include ../Makefile.template
symlinks:
+
+# Mark the archive phony so that we always check for recompilation
+.PHONY : compiler/libr300compiler.a
+
+compiler/libr300compiler.a:
+ cd compiler && $(MAKE)
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
new file mode 100644
index 00000000000..4da173cb587
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -0,0 +1,71 @@
+# src/mesa/drivers/dri/r300/compiler/Makefile
+
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = r300compiler
+
+C_SOURCES = \
+ radeon_nqssadce.c \
+ radeon_program.c \
+ radeon_program_alu.c \
+ radeon_program_pair.c \
+ r3xx_fragprog.c \
+ r300_fragprog.c \
+ r300_fragprog_swizzle.c \
+ r300_fragprog_emit.c \
+ r500_fragprog.c \
+ r500_fragprog_emit.c \
+ \
+ memory_pool.c
+
+
+### Basic defines ###
+
+OBJECTS = $(C_SOURCES:.c=.o) \
+ $(CPP_SOURCES:.cpp=.o) \
+ $(ASM_SOURCES:.S=.o)
+
+INCLUDES = \
+ -I. \
+ -I$(TOP)/include \
+ -I$(TOP)/src/mesa \
+
+
+##### TARGETS #####
+
+default: depend lib$(LIBNAME).a
+
+lib$(LIBNAME).a: $(OBJECTS) Makefile $(TOP)/configs/current
+ $(MKLIB) -o $(LIBNAME) -static $(OBJECTS)
+
+depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS)
+ rm -f depend
+ touch depend
+ $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) 2> /dev/null
+
+# Emacs tags
+tags:
+ etags `find . -name \*.[ch]` `find ../include`
+
+# Remove .o and backup files
+clean:
+ rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak
+
+# Dummy target
+install:
+ @echo -n ""
+
+##### RULES #####
+
+.c.o:
+ $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+.cpp.o:
+ $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+.S.o:
+ $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+
+sinclude depend
--
cgit v1.2.3
From 647766494f657965c4ac7129d498918e89c9e912 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Tue, 21 Jul 2009 20:25:33 +0200
Subject: r300: Remove some unnecessary includes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/r300_fragprog_common.c | 6 ------
1 file changed, 6 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index 1c57ba49e5b..c7ffbad475b 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -44,12 +44,6 @@
#include "r300_state.h"
-#include "compiler/radeon_program.h"
-#include "compiler/radeon_program_alu.h"
-#include "compiler/r300_fragprog_swizzle.h"
-#include "compiler/r300_fragprog.h"
-#include "compiler/r500_fragprog.h"
-
static GLuint build_dtm(GLuint depthmode)
{
--
cgit v1.2.3
From 9cd5e3e13a1ed2415aa116e35bc9f550b07281c9 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Wed, 22 Jul 2009 21:29:35 +0200
Subject: r300: Add radeon_compiler as a base for compilation-related tasks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/Makefile | 1 +
.../drivers/dri/r300/compiler/r300_fragprog_emit.c | 2 +-
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 8 ++---
.../drivers/dri/r300/compiler/r500_fragprog_emit.c | 2 +-
.../drivers/dri/r300/compiler/radeon_compiler.c | 36 ++++++++++++++++++++++
.../drivers/dri/r300/compiler/radeon_compiler.h | 12 +++++++-
.../dri/r300/compiler/radeon_program_pair.c | 36 ++++++++++------------
.../dri/r300/compiler/radeon_program_pair.h | 9 ++++--
src/mesa/drivers/dri/r300/r300_fragprog_common.c | 7 +++--
9 files changed, 82 insertions(+), 31 deletions(-)
create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index 4da173cb587..c0fd85c1810 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -6,6 +6,7 @@ include $(TOP)/configs/current
LIBNAME = r300compiler
C_SOURCES = \
+ radeon_compiler.c \
radeon_nqssadce.c \
radeon_program.c \
radeon_program_alu.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index 520d81d3b4f..861d532d072 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -334,7 +334,7 @@ GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *
code->node[0].alu_end = -1;
code->node[0].tex_end = -1;
- if (!radeonPairProgram(compiler->program, &pair_handler, compiler, compiler->debug))
+ if (!radeonPairProgram(&compiler->Base, compiler->program, &pair_handler, compiler))
return GL_FALSE;
if (!finish_node(compiler))
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index daef20fe675..d4a6205e700 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -239,7 +239,7 @@ GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c
{
GLboolean success = GL_FALSE;
- if (c->debug) {
+ if (c->Base.Debug) {
fflush(stdout);
_mesa_printf("Fragment Program: Initial program:\n");
_mesa_print_program(c->program);
@@ -269,7 +269,7 @@ GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c
radeonLocalTransform(c->program, 3, transformations);
}
- if (c->debug) {
+ if (c->Base.Debug) {
_mesa_printf("Fragment Program: After native rewrite:\n");
_mesa_print_program(c->program);
fflush(stdout);
@@ -291,7 +291,7 @@ GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c
radeonNqssaDce(c->program, &nqssadce, 0);
}
- if (c->debug) {
+ if (c->Base.Debug) {
_mesa_printf("Compiler: after NqSSA-DCE:\n");
_mesa_print_program(c->program);
fflush(stdout);
@@ -303,7 +303,7 @@ GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c
success = r300BuildFragmentProgramHwCode(c);
}
- if (!success || c->debug) {
+ if (!success || c->Base.Debug) {
if (c->is_r500) {
r500FragmentProgramDump(c->code);
} else {
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 5640ed0bcaa..a0cc88da9ca 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -310,7 +310,7 @@ GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *
code->inst_offset = 0;
code->inst_end = -1;
- if (!radeonPairProgram(compiler->program, &pair_handler, compiler, compiler->debug))
+ if (!radeonPairProgram(&compiler->Base, compiler->program, &pair_handler, compiler))
return GL_FALSE;
if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
new file mode 100644
index 00000000000..20af4a651aa
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2009 Nicolai Hähnle
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+
+void rc_init(struct radeon_compiler * c)
+{
+ memset(c, 0, sizeof(*c));
+
+ memory_pool_init(&c->Pool);
+}
+
+void rc_destroy(struct radeon_compiler * c)
+{
+ memory_pool_destroy(&c->Pool);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 92560b5d8ae..6c5a2e5c8c3 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -26,6 +26,8 @@
#include "main/mtypes.h"
#include "shader/prog_instruction.h"
+#include "memory_pool.h"
+
#define R300_PFS_MAX_ALU_INST 64
#define R300_PFS_MAX_TEX_INST 32
#define R300_PFS_MAX_TEX_INDIRECT 4
@@ -148,12 +150,20 @@ struct rX00_fragment_program_code {
gl_frag_attrib fog_attr;
};
+struct radeon_compiler {
+ struct memory_pool Pool;
+ GLboolean Debug;
+};
+
+void rc_init(struct radeon_compiler * c);
+void rc_destroy(struct radeon_compiler * c);
+
struct r300_fragment_program_compiler {
+ struct radeon_compiler Base;
struct rX00_fragment_program_code *code;
struct gl_program *program;
struct r300_fragment_program_external_state state;
GLboolean is_r500;
- GLboolean debug;
};
GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index 254431731b3..5e0484f2960 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -36,6 +36,7 @@
#include "radeon_program_pair.h"
#include "memory_pool.h"
+#include "radeon_compiler.h"
#include "shader/prog_print.h"
#define error(fmt, args...) do { \
@@ -118,11 +119,10 @@ struct pair_register_translation {
};
struct pair_state {
- struct memory_pool Pool;
+ struct radeon_compiler * Compiler;
struct gl_program *Program;
const struct radeon_pair_handler *Handler;
GLboolean Error;
- GLboolean Debug;
GLboolean Verbose;
void *UserData;
@@ -341,7 +341,7 @@ static void scan_instructions(struct pair_state *s)
for(source = s->Program->Instructions, ip = 0;
source->Opcode != OPCODE_END;
++source, ++ip) {
- struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Pool, sizeof(*pairinst));
+ struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Compiler->Pool, sizeof(*pairinst));
memset(pairinst, 0, sizeof(struct pair_state_instruction));
pairinst->Instruction = *source;
@@ -377,7 +377,7 @@ static void scan_instructions(struct pair_state *s)
GET_BIT(pairinst->Instruction.DstReg.WriteMask, swz))
continue;
- struct reg_value_reader* r = memory_pool_malloc(&s->Pool, sizeof(*r));
+ struct reg_value_reader* r = memory_pool_malloc(&s->Compiler->Pool, sizeof(*r));
pairinst->NumDependencies++;
t->Value[swz]->NumReaders++;
r->Reader = pairinst;
@@ -400,7 +400,7 @@ static void scan_instructions(struct pair_state *s)
if (!GET_BIT(pairinst->Instruction.DstReg.WriteMask, j))
continue;
- struct reg_value* v = memory_pool_malloc(&s->Pool, sizeof(*v));
+ struct reg_value* v = memory_pool_malloc(&s->Compiler->Pool, sizeof(*v));
memset(v, 0, sizeof(struct reg_value));
v->Writer = pairinst;
if (t->Value[j]) {
@@ -580,7 +580,7 @@ static void emit_all_tex(struct pair_state *s)
get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
}
- if (s->Debug)
+ if (s->Compiler->Debug)
_mesa_printf(" BEGIN_TEX\n");
if (s->Handler->BeginTexBlock)
@@ -594,7 +594,7 @@ static void emit_all_tex(struct pair_state *s)
inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index);
- if (s->Debug) {
+ if (s->Compiler->Debug) {
_mesa_printf(" ");
_mesa_print_instruction(inst);
fflush(stdout);
@@ -620,7 +620,7 @@ static void emit_all_tex(struct pair_state *s)
s->Error = s->Error || !s->Handler->EmitTex(s->UserData, &rpti);
}
- if (s->Debug)
+ if (s->Compiler->Debug)
_mesa_printf(" END_TEX\n");
}
@@ -867,28 +867,28 @@ static void emit_alu(struct pair_state *s)
success: ;
}
- if (s->Debug)
+ if (s->Compiler->Debug)
radeonPrintPairInstruction(&pair);
s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair);
}
-GLboolean radeonPairProgram(struct gl_program *program,
- const struct radeon_pair_handler* handler, void *userdata,
- GLboolean debug)
+GLboolean radeonPairProgram(
+ struct radeon_compiler * compiler,
+ struct gl_program *program,
+ const struct radeon_pair_handler* handler, void *userdata)
{
struct pair_state s;
_mesa_bzero(&s, sizeof(s));
- memory_pool_init(&s.Pool);
+ s.Compiler = compiler;
s.Program = program;
s.Handler = handler;
s.UserData = userdata;
- s.Debug = debug;
- s.Verbose = GL_FALSE && s.Debug;
+ s.Verbose = GL_FALSE && s.Compiler->Debug;
- if (s.Debug)
+ if (s.Compiler->Debug)
_mesa_printf("Emit paired program\n");
scan_instructions(&s);
@@ -903,11 +903,9 @@ GLboolean radeonPairProgram(struct gl_program *program,
emit_alu(&s);
}
- if (s.Debug)
+ if (s.Compiler->Debug)
_mesa_printf(" END\n");
- memory_pool_destroy(&s.Pool);
-
return !s.Error;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index 86e3ec4537d..2e6bdf90390 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -30,6 +30,8 @@
#include "radeon_program.h"
+struct radeon_compiler;
+
/**
* Represents a paired instruction, as found in R300 and R500
@@ -139,9 +141,10 @@ struct radeon_pair_handler {
GLuint MaxHwTemps;
};
-GLboolean radeonPairProgram(struct gl_program *program,
- const struct radeon_pair_handler*, void *userdata,
- GLboolean debug);
+GLboolean radeonPairProgram(
+ struct radeon_compiler * compiler,
+ struct gl_program *program,
+ const struct radeon_pair_handler*, void *userdata);
void radeonPrintPairInstruction(struct radeon_pair_instruction *inst);
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index c7ffbad475b..5216f5904ad 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -89,18 +89,21 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_fragment_program_compiler compiler;
+ rc_init(&compiler.Base);
+ compiler.Base.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
+
compiler.code = &fp->code;
compiler.state = fp->state;
compiler.program = _mesa_clone_program(ctx, &cont->Base.Base);
compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE;
- compiler.debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
if (!r3xx_compile_fragment_program(&compiler))
fp->error = GL_TRUE;
fp->InputsRead = compiler.program->InputsRead;
-
fp->Base = compiler.program;
+
+ rc_destroy(&compiler.Base);
}
struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx)
--
cgit v1.2.3
From 9198ab8bfca465a327ea1f2429b6ddfeb0a2b258 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Wed, 22 Jul 2009 22:10:13 +0200
Subject: r300: Introduce rc_program and use it in radeon_pair
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The goal is to convert both Mesa and TGSI programs into an intermediate format
that happens to be convenient for us.
Signed-off-by: Nicolai Hähnle
---
.../drivers/dri/r300/compiler/r300_fragprog_emit.c | 2 +-
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 2 +
.../drivers/dri/r300/compiler/r500_fragprog_emit.c | 2 +-
.../drivers/dri/r300/compiler/radeon_compiler.c | 3 +
.../drivers/dri/r300/compiler/radeon_compiler.h | 18 ++++++
.../drivers/dri/r300/compiler/radeon_program.c | 70 ++++++++++++++++++++++
.../drivers/dri/r300/compiler/radeon_program.h | 9 +++
.../dri/r300/compiler/radeon_program_pair.c | 15 ++---
.../dri/r300/compiler/radeon_program_pair.h | 1 -
src/mesa/drivers/dri/r300/r300_fragprog_common.c | 2 +-
10 files changed, 111 insertions(+), 13 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index 861d532d072..672b36532c9 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -334,7 +334,7 @@ GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *
code->node[0].alu_end = -1;
code->node[0].tex_end = -1;
- if (!radeonPairProgram(&compiler->Base, compiler->program, &pair_handler, compiler))
+ if (!radeonPairProgram(&compiler->Base, &pair_handler, compiler))
return GL_FALSE;
if (!finish_node(compiler))
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index d4a6205e700..30fedb42118 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -297,6 +297,8 @@ GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c
fflush(stdout);
}
+ rc_mesa_to_rc_program(&c->Base, c->program);
+
if (c->is_r500) {
success = r500BuildFragmentProgramHwCode(c);
} else {
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index a0cc88da9ca..f8a1dc5fbeb 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -310,7 +310,7 @@ GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *
code->inst_offset = 0;
code->inst_end = -1;
- if (!radeonPairProgram(&compiler->Base, compiler->program, &pair_handler, compiler))
+ if (!radeonPairProgram(&compiler->Base, &pair_handler, compiler))
return GL_FALSE;
if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index 20af4a651aa..17c9b17682c 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -28,6 +28,9 @@ void rc_init(struct radeon_compiler * c)
memset(c, 0, sizeof(*c));
memory_pool_init(&c->Pool);
+ c->Program.Instructions.Prev = &c->Program.Instructions;
+ c->Program.Instructions.Next = &c->Program.Instructions;
+ c->Program.Instructions.I.Opcode = OPCODE_END;
}
void rc_destroy(struct radeon_compiler * c)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 6c5a2e5c8c3..9b9b9c5c654 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -150,8 +150,26 @@ struct rX00_fragment_program_code {
gl_frag_attrib fog_attr;
};
+struct rc_instruction {
+ struct rc_instruction * Prev;
+ struct rc_instruction * Next;
+ struct prog_instruction I;
+};
+
+struct rc_program {
+ /**
+ * Instructions.Next points to the first instruction,
+ * Instructions.Prev points to the last instruction.
+ */
+ struct rc_instruction Instructions;
+
+ GLbitfield InputsRead;
+ GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */
+};
+
struct radeon_compiler {
struct memory_pool Pool;
+ struct rc_program Program;
GLboolean Debug;
};
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
index 0022d0a76ce..d6cc62ff8bd 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -27,6 +27,7 @@
#include "radeon_program.h"
+#include "radeon_compiler.h"
#include "shader/prog_print.h"
@@ -124,3 +125,72 @@ struct prog_instruction *radeonAppendInstructions(struct gl_program *program, in
_mesa_insert_instructions(program, oldnum, count);
return program->Instructions + oldnum;
}
+
+
+GLint rc_find_free_temporary(struct radeon_compiler * c)
+{
+ GLboolean used[MAX_PROGRAM_TEMPS];
+ GLuint i;
+
+ memset(used, 0, sizeof(used));
+
+ for (struct rc_instruction * rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) {
+ const struct prog_instruction *inst = &rcinst->I;
+ const GLuint n = _mesa_num_inst_src_regs(inst->Opcode);
+ GLuint k;
+
+ for (k = 0; k < n; k++) {
+ if (inst->SrcReg[k].File == PROGRAM_TEMPORARY)
+ used[inst->SrcReg[k].Index] = GL_TRUE;
+ }
+ }
+
+ for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+ if (!used[i])
+ return i;
+ }
+
+ return -1;
+}
+
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction));
+
+ inst->Prev = 0;
+ inst->Next = 0;
+
+ _mesa_init_instructions(&inst->I, 1);
+
+ return inst;
+}
+
+
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after)
+{
+ struct rc_instruction * inst = rc_alloc_instruction(c);
+
+ inst->Prev = after;
+ inst->Next = after->Next;
+
+ inst->Prev->Next = inst;
+ inst->Next->Prev = inst;
+
+ return inst;
+}
+
+
+void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program)
+{
+ struct prog_instruction *source;
+
+ for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) {
+ struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+ dest->I = *source;
+ }
+
+ c->Program.ShadowSamplers = program->ShadowSamplers;
+ c->Program.InputsRead = program->InputsRead;
+}
+
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index 5b42883812a..7e0f2544837 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -34,6 +34,8 @@
#include "shader/program.h"
#include "shader/prog_instruction.h"
+struct radeon_compiler;
+struct rc_instruction;
enum {
PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */
@@ -120,4 +122,11 @@ GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx);
struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count);
+GLint rc_find_free_temporary(struct radeon_compiler * c);
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
+
+void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program);
+
#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index 5e0484f2960..ffc218b5ecc 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -120,7 +120,6 @@ struct pair_register_translation {
struct pair_state {
struct radeon_compiler * Compiler;
- struct gl_program *Program;
const struct radeon_pair_handler *Handler;
GLboolean Error;
GLboolean Verbose;
@@ -335,16 +334,16 @@ static void classify_instruction(struct pair_state *s,
*/
static void scan_instructions(struct pair_state *s)
{
- struct prog_instruction *source;
+ struct rc_instruction *source;
GLuint ip;
- for(source = s->Program->Instructions, ip = 0;
- source->Opcode != OPCODE_END;
- ++source, ++ip) {
+ for(source = s->Compiler->Program.Instructions.Next, ip = 0;
+ source != &s->Compiler->Program.Instructions;
+ source = source->Next, ++ip) {
struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Compiler->Pool, sizeof(*pairinst));
memset(pairinst, 0, sizeof(struct pair_state_instruction));
- pairinst->Instruction = *source;
+ pairinst->Instruction = source->I;
pairinst->IP = ip;
final_rewrite(s, &pairinst->Instruction);
classify_instruction(s, pairinst);
@@ -438,7 +437,7 @@ static void scan_instructions(struct pair_state *s)
*/
static void allocate_input_registers(struct pair_state *s)
{
- GLuint InputsRead = s->Program->InputsRead;
+ GLuint InputsRead = s->Compiler->Program.InputsRead;
int i;
GLuint hwindex = 0;
@@ -876,14 +875,12 @@ static void emit_alu(struct pair_state *s)
GLboolean radeonPairProgram(
struct radeon_compiler * compiler,
- struct gl_program *program,
const struct radeon_pair_handler* handler, void *userdata)
{
struct pair_state s;
_mesa_bzero(&s, sizeof(s));
s.Compiler = compiler;
- s.Program = program;
s.Handler = handler;
s.UserData = userdata;
s.Verbose = GL_FALSE && s.Compiler->Debug;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index 2e6bdf90390..3992082662b 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -143,7 +143,6 @@ struct radeon_pair_handler {
GLboolean radeonPairProgram(
struct radeon_compiler * compiler,
- struct gl_program *program,
const struct radeon_pair_handler*, void *userdata);
void radeonPrintPairInstruction(struct radeon_pair_instruction *inst);
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index 5216f5904ad..a89dbb00499 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -100,7 +100,7 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
if (!r3xx_compile_fragment_program(&compiler))
fp->error = GL_TRUE;
- fp->InputsRead = compiler.program->InputsRead;
+ fp->InputsRead = compiler.Base.Program.InputsRead;
fp->Base = compiler.program;
rc_destroy(&compiler.Base);
--
cgit v1.2.3
From a808b10ce82692ff91ab69a1afda24dda56cba6b Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Wed, 22 Jul 2009 22:13:06 +0200
Subject: r300: Reduce include dependencies
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/radeon_code.h | 150 +++++++++++++++++++++
.../drivers/dri/r300/compiler/radeon_compiler.h | 122 +----------------
src/mesa/drivers/dri/r300/r300_context.h | 2 +-
src/mesa/drivers/dri/r300/r300_fragprog_common.c | 2 +
4 files changed, 154 insertions(+), 122 deletions(-)
create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_code.h
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
new file mode 100644
index 00000000000..7d8bf483e79
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2009 Nicolai Hähnle
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_CODE_H
+#define RADEON_CODE_H
+
+
+#define R300_PFS_MAX_ALU_INST 64
+#define R300_PFS_MAX_TEX_INST 32
+#define R300_PFS_MAX_TEX_INDIRECT 4
+#define R300_PFS_NUM_TEMP_REGS 32
+#define R300_PFS_NUM_CONST_REGS 32
+
+#define R500_PFS_MAX_INST 512
+#define R500_PFS_NUM_TEMP_REGS 128
+#define R500_PFS_NUM_CONST_REGS 256
+
+
+#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
+#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
+
+
+/**
+ * Stores state that influences the compilation of a fragment program.
+ */
+struct r300_fragment_program_external_state {
+ struct {
+ /**
+ * If the sampler is used as a shadow sampler,
+ * this field is:
+ * 0 - GL_LUMINANCE
+ * 1 - GL_INTENSITY
+ * 2 - GL_ALPHA
+ * depending on the depth texture mode.
+ */
+ GLuint depth_texture_mode : 2;
+
+ /**
+ * If the sampler is used as a shadow sampler,
+ * this field is (texture_compare_func - GL_NEVER).
+ * [e.g. if compare function is GL_LEQUAL, this field is 3]
+ *
+ * Otherwise, this field is 0.
+ */
+ GLuint texture_compare_func : 3;
+ } unit[16];
+};
+
+
+
+struct r300_fragment_program_node {
+ int tex_offset; /**< first tex instruction */
+ int tex_end; /**< last tex instruction, relative to tex_offset */
+ int alu_offset; /**< first ALU instruction */
+ int alu_end; /**< last ALU instruction, relative to alu_offset */
+ int flags;
+};
+
+/**
+ * Stores an R300 fragment program in its compiled-to-hardware form.
+ */
+struct r300_fragment_program_code {
+ struct {
+ int length; /**< total # of texture instructions used */
+ GLuint inst[R300_PFS_MAX_TEX_INST];
+ } tex;
+
+ struct {
+ int length; /**< total # of ALU instructions used */
+ struct {
+ GLuint inst0;
+ GLuint inst1;
+ GLuint inst2;
+ GLuint inst3;
+ } inst[R300_PFS_MAX_ALU_INST];
+ } alu;
+
+ struct r300_fragment_program_node node[4];
+ int cur_node;
+ int first_node_has_tex;
+
+ /**
+ * Remember which program register a given hardware constant
+ * belongs to.
+ */
+ struct prog_src_register constant[R300_PFS_NUM_CONST_REGS];
+ int const_nr;
+
+ int max_temp_idx;
+};
+
+
+struct r500_fragment_program_code {
+ struct {
+ GLuint inst0;
+ GLuint inst1;
+ GLuint inst2;
+ GLuint inst3;
+ GLuint inst4;
+ GLuint inst5;
+ } inst[R500_PFS_MAX_INST];
+
+ int inst_offset;
+ int inst_end;
+
+ /**
+ * Remember which program register a given hardware constant
+ * belongs to.
+ */
+ struct prog_src_register constant[R500_PFS_NUM_CONST_REGS];
+ int const_nr;
+
+ int max_temp_idx;
+};
+
+struct rX00_fragment_program_code {
+ union {
+ struct r300_fragment_program_code r300;
+ struct r500_fragment_program_code r500;
+ } code;
+
+ GLboolean writes_depth;
+
+ /* attribute that we are sending the WPOS in */
+ gl_frag_attrib wpos_attr;
+ /* attribute that we are sending the fog coordinate in */
+ gl_frag_attrib fog_attr;
+};
+
+
+#endif /* RADEON_CODE_H */
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 9b9b9c5c654..a5f70173b7d 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -27,128 +27,8 @@
#include "shader/prog_instruction.h"
#include "memory_pool.h"
+#include "radeon_code.h"
-#define R300_PFS_MAX_ALU_INST 64
-#define R300_PFS_MAX_TEX_INST 32
-#define R300_PFS_MAX_TEX_INDIRECT 4
-#define R300_PFS_NUM_TEMP_REGS 32
-#define R300_PFS_NUM_CONST_REGS 32
-
-#define R500_PFS_MAX_INST 512
-#define R500_PFS_NUM_TEMP_REGS 128
-#define R500_PFS_NUM_CONST_REGS 256
-
-
-#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
-#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
-
-
-/**
- * Stores state that influences the compilation of a fragment program.
- */
-struct r300_fragment_program_external_state {
- struct {
- /**
- * If the sampler is used as a shadow sampler,
- * this field is:
- * 0 - GL_LUMINANCE
- * 1 - GL_INTENSITY
- * 2 - GL_ALPHA
- * depending on the depth texture mode.
- */
- GLuint depth_texture_mode : 2;
-
- /**
- * If the sampler is used as a shadow sampler,
- * this field is (texture_compare_func - GL_NEVER).
- * [e.g. if compare function is GL_LEQUAL, this field is 3]
- *
- * Otherwise, this field is 0.
- */
- GLuint texture_compare_func : 3;
- } unit[16];
-};
-
-
-
-struct r300_fragment_program_node {
- int tex_offset; /**< first tex instruction */
- int tex_end; /**< last tex instruction, relative to tex_offset */
- int alu_offset; /**< first ALU instruction */
- int alu_end; /**< last ALU instruction, relative to alu_offset */
- int flags;
-};
-
-/**
- * Stores an R300 fragment program in its compiled-to-hardware form.
- */
-struct r300_fragment_program_code {
- struct {
- int length; /**< total # of texture instructions used */
- GLuint inst[R300_PFS_MAX_TEX_INST];
- } tex;
-
- struct {
- int length; /**< total # of ALU instructions used */
- struct {
- GLuint inst0;
- GLuint inst1;
- GLuint inst2;
- GLuint inst3;
- } inst[R300_PFS_MAX_ALU_INST];
- } alu;
-
- struct r300_fragment_program_node node[4];
- int cur_node;
- int first_node_has_tex;
-
- /**
- * Remember which program register a given hardware constant
- * belongs to.
- */
- struct prog_src_register constant[R300_PFS_NUM_CONST_REGS];
- int const_nr;
-
- int max_temp_idx;
-};
-
-
-struct r500_fragment_program_code {
- struct {
- GLuint inst0;
- GLuint inst1;
- GLuint inst2;
- GLuint inst3;
- GLuint inst4;
- GLuint inst5;
- } inst[R500_PFS_MAX_INST];
-
- int inst_offset;
- int inst_end;
-
- /**
- * Remember which program register a given hardware constant
- * belongs to.
- */
- struct prog_src_register constant[R500_PFS_NUM_CONST_REGS];
- int const_nr;
-
- int max_temp_idx;
-};
-
-struct rX00_fragment_program_code {
- union {
- struct r300_fragment_program_code r300;
- struct r500_fragment_program_code r500;
- } code;
-
- GLboolean writes_depth;
-
- /* attribute that we are sending the WPOS in */
- gl_frag_attrib wpos_attr;
- /* attribute that we are sending the fog coordinate in */
- gl_frag_attrib fog_attr;
-};
struct rc_instruction {
struct rc_instruction * Prev;
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index ea30d3e12ff..ce742641a43 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -44,7 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/mtypes.h"
#include "shader/prog_instruction.h"
-#include "compiler/radeon_compiler.h"
+#include "compiler/radeon_code.h"
struct r300_context;
typedef struct r300_context r300ContextRec;
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index a89dbb00499..27aec645759 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -42,6 +42,8 @@
#include "shader/prog_parameter.h"
#include "shader/prog_print.h"
+#include "compiler/radeon_compiler.h"
+
#include "r300_state.h"
--
cgit v1.2.3
From 927f5f16826a95cf665219c4b0039eeafb936057 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Wed, 22 Jul 2009 22:47:31 +0200
Subject: r300: Remove faux lazy translation of vertex programs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
De facto, vertex programs were translated immediately in all situations,
so let's just stop pretending that we do lazy translation.
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/r300_context.h | 1 -
src/mesa/drivers/dri/r300/r300_shader.c | 4 +---
src/mesa/drivers/dri/r300/r300_state.c | 4 +---
src/mesa/drivers/dri/r300/r300_vertprog.c | 7 ++++---
src/mesa/drivers/dri/r300/r300_vertprog.h | 4 +---
5 files changed, 7 insertions(+), 13 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index ce742641a43..bd363f6b3a2 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -420,7 +420,6 @@ struct r300_vertex_program {
} body;
} hw_code;
- GLboolean translated;
GLboolean error;
int pos_end;
diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c
index 06c893881e5..3704c101558 100644
--- a/src/mesa/drivers/dri/r300/r300_shader.c
+++ b/src/mesa/drivers/dri/r300/r300_shader.c
@@ -126,9 +126,7 @@ r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
return !fp->error;
} else {
- struct r300_vertex_program *vp = r300SelectVertexShader(ctx);
- if (!vp->translated)
- r300TranslateVertexShader(vp);
+ struct r300_vertex_program *vp = r300SelectAndTranslateVertexShader(ctx);
return !vp->error;
}
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index 66d9a69622a..fdc3362dddd 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -2020,9 +2020,7 @@ void r300UpdateShaders(r300ContextPtr rmesa)
}
}
- vp = r300SelectVertexShader(ctx);
- if (!vp->translated)
- r300TranslateVertexShader(vp);
+ vp = r300SelectAndTranslateVertexShader(ctx);
r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error);
}
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index cf4788411fe..1df50c15091 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -1006,7 +1006,7 @@ static void t_inputs_outputs(struct r300_vertex_program *vp)
}
}
-void r300TranslateVertexShader(struct r300_vertex_program *vp)
+static void translate_vertex_program(struct r300_vertex_program *vp)
{
struct prog_instruction *vpi = vp->Base->Base.Instructions;
int i;
@@ -1020,7 +1020,6 @@ void r300TranslateVertexShader(struct r300_vertex_program *vp)
vp->pos_end = 0; /* Not supported yet */
vp->hw_code.length = 0;
- vp->translated = GL_TRUE;
vp->error = GL_FALSE;
t_inputs_outputs(vp);
@@ -1628,10 +1627,12 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
vp->num_temporaries = max + 1;
}
+ translate_vertex_program(vp);
+
return vp;
}
-struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx)
+struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_vertex_program_key wanted_key = { 0 };
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h
index 2dab11c3378..896699ffe2e 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.h
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.h
@@ -34,8 +34,6 @@
void r300SetupVertexProgram(r300ContextPtr rmesa);
-struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx);
-
-void r300TranslateVertexShader(struct r300_vertex_program *vp);
+struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx);
#endif
--
cgit v1.2.3
From 11cd795940723e79f99e7887a2e2dd8410352572 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Fri, 24 Jul 2009 00:32:41 +0200
Subject: r300: Cleanup vertex_program structure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/r300_context.h | 14 ++++++++-----
src/mesa/drivers/dri/r300/r300_draw.c | 4 ++--
src/mesa/drivers/dri/r300/r300_state.c | 4 ++--
src/mesa/drivers/dri/r300/r300_vertprog.c | 34 +++++++++++++++++--------------
4 files changed, 32 insertions(+), 24 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index bd363f6b3a2..d14d992366b 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -402,15 +402,19 @@ struct r300_hw_state {
#include "tnl_dd/t_dd_vertex.h"
#undef TAG
+struct r300_vertex_program_key {
+ GLuint FpReads;
+ GLuint FogAttr;
+ GLuint WPosAttr;
+};
+
struct r300_vertex_program {
struct gl_vertex_program *Base;
struct r300_vertex_program *next;
- struct r300_vertex_program_key {
- GLuint FpReads;
- GLuint FogAttr;
- GLuint WPosAttr;
- } key;
+ struct r300_vertex_program_key key;
+ GLbitfield InputsRead;
+ GLbitfield OutputsWritten;
struct r300_vertex_shader_hw_code {
int length;
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index 9769ff53994..e2e92fde482 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -341,7 +341,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar
{
int i, tmp;
- tmp = r300->selected_vp->Base->Base.InputsRead;
+ tmp = r300->selected_vp->InputsRead;
i = 0;
vbuf->num_attribs = 0;
while (tmp) {
@@ -437,7 +437,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx,
if (r300->fallback)
return GL_FALSE;
- r300SetupVAP(ctx, r300->selected_vp->Base->Base.InputsRead, r300->selected_vp->Base->Base.OutputsWritten);
+ r300SetupVAP(ctx, r300->selected_vp->InputsRead, r300->selected_vp->OutputsWritten);
r300UpdateShaderStates(r300);
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index fdc3362dddd..ad57b7e2f11 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -1458,7 +1458,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
hw_tcl_on = r300->options.hw_tcl_enabled;
if (hw_tcl_on)
- OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten;
+ OutputsWritten.vp_outputs = r300->selected_vp->OutputsWritten;
else
RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
@@ -1552,7 +1552,7 @@ static void r500SetupRSUnit(GLcontext * ctx)
hw_tcl_on = r300->options.hw_tcl_enabled;
if (hw_tcl_on)
- OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten;
+ OutputsWritten.vp_outputs = r300->selected_vp->OutputsWritten;
else
RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index 1df50c15091..95cedd9d919 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -932,14 +932,14 @@ static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp,
return inst;
}
-static void t_inputs_outputs(struct r300_vertex_program *vp)
+static void t_inputs_outputs(struct r300_vertex_program *vp, struct gl_program * glvp)
{
int i;
int cur_reg;
GLuint OutputsWritten, InputsRead;
- OutputsWritten = vp->Base->Base.OutputsWritten;
- InputsRead = vp->Base->Base.InputsRead;
+ OutputsWritten = glvp->OutputsWritten;
+ InputsRead = glvp->InputsRead;
cur_reg = -1;
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
@@ -1006,9 +1006,9 @@ static void t_inputs_outputs(struct r300_vertex_program *vp)
}
}
-static void translate_vertex_program(struct r300_vertex_program *vp)
+static void translate_vertex_program(struct r300_vertex_program *vp, struct gl_program * glvp)
{
- struct prog_instruction *vpi = vp->Base->Base.Instructions;
+ struct prog_instruction *vpi = glvp->Instructions;
int i;
GLuint *inst;
unsigned long num_operands;
@@ -1022,7 +1022,7 @@ static void translate_vertex_program(struct r300_vertex_program *vp)
vp->hw_code.length = 0;
vp->error = GL_FALSE;
- t_inputs_outputs(vp);
+ t_inputs_outputs(vp, glvp);
for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END;
vpi++, inst += 4) {
@@ -1539,13 +1539,14 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_vertex_program *vp;
+ struct gl_vertex_program * glvp = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base);
struct gl_program *prog;
vp = _mesa_calloc(sizeof(*vp));
- vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base);
+ vp->Base = glvp;
_mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
- prog = &vp->Base->Base;
+ prog = &glvp->Base;
if (RADEON_DEBUG & DEBUG_VERTS) {
fprintf(stderr, "Initial vertex program:\n");
@@ -1553,16 +1554,16 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
fflush(stdout);
}
- if (vp->Base->IsPositionInvariant) {
- _mesa_insert_mvp_code(ctx, vp->Base);
+ if (glvp->IsPositionInvariant) {
+ _mesa_insert_mvp_code(ctx, glvp);
}
if (r300->selected_fp->code.wpos_attr != FRAG_ATTRIB_MAX) {
- pos_as_texcoord(&vp->Base->Base, r300->selected_fp->code.wpos_attr - FRAG_ATTRIB_TEX0);
+ pos_as_texcoord(&glvp->Base, r300->selected_fp->code.wpos_attr - FRAG_ATTRIB_TEX0);
}
if (r300->selected_fp->code.fog_attr != FRAG_ATTRIB_MAX) {
- fog_as_texcoord(&vp->Base->Base, r300->selected_fp->code.fog_attr - FRAG_ATTRIB_TEX0);
+ fog_as_texcoord(&glvp->Base, r300->selected_fp->code.fog_attr - FRAG_ATTRIB_TEX0);
}
addArtificialOutputs(ctx, prog);
@@ -1627,7 +1628,10 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
vp->num_temporaries = max + 1;
}
- translate_vertex_program(vp);
+ translate_vertex_program(vp, &glvp->Base);
+
+ vp->InputsRead = glvp->Base.InputsRead;
+ vp->OutputsWritten = glvp->Base.OutputsWritten;
return vp;
}
@@ -1716,8 +1720,8 @@ void r300SetupVertexProgram(r300ContextPtr rmesa)
r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code));
inst_count = (prog->hw_code.length / 4) - 1;
- r300VapCntl(rmesa, _mesa_bitcount(prog->Base->Base.InputsRead),
- _mesa_bitcount(prog->Base->Base.OutputsWritten), prog->num_temporaries);
+ r300VapCntl(rmesa, _mesa_bitcount(prog->InputsRead),
+ _mesa_bitcount(prog->OutputsWritten), prog->num_temporaries);
R300_STATECHANGE(rmesa, pvs);
rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
--
cgit v1.2.3
From 84445273ed554ea6fa65c894bbe098eb3f3d1230 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 23 Jul 2009 18:40:41 +0200
Subject: r300: Move vertex program compilation to compiler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This is just the first step of refactoring. The separation is not yet
clean enough with this commit.
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/Makefile | 1 +
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 1533 +++++++++++++++++++
src/mesa/drivers/dri/r300/compiler/radeon_code.h | 25 +
.../drivers/dri/r300/compiler/radeon_compiler.h | 10 +
src/mesa/drivers/dri/r300/r300_context.h | 31 +-
src/mesa/drivers/dri/r300/r300_draw.c | 4 +-
src/mesa/drivers/dri/r300/r300_ioctl.c | 9 +-
src/mesa/drivers/dri/r300/r300_reg.h | 18 +
src/mesa/drivers/dri/r300/r300_state.c | 4 +-
src/mesa/drivers/dri/r300/r300_vertprog.c | 1549 +-------------------
src/mesa/drivers/dri/r300/r300_vertprog.h | 28 -
src/mesa/shader/prog_instruction.h | 9 +-
12 files changed, 1625 insertions(+), 1596 deletions(-)
create mode 100644 src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index c0fd85c1810..4e2ff50c69d 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -17,6 +17,7 @@ C_SOURCES = \
r300_fragprog_emit.c \
r500_fragprog.c \
r500_fragprog_emit.c \
+ r3xx_vertprog.c \
\
memory_pool.c
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
new file mode 100644
index 00000000000..b074c98ee9d
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -0,0 +1,1533 @@
+/*
+ * Copyright 2009 Nicolai Hähnle
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include "../r300_reg.h"
+
+#include "radeon_nqssadce.h"
+
+#include "shader/prog_optimize.h"
+#include "shader/prog_print.h"
+
+
+/* TODO: Get rid of t_src_class call */
+#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
+ ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
+ t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
+ (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
+ t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
+
+/*
+ * Take an already-setup and valid source then swizzle it appropriately to
+ * obtain a constant ZERO or ONE source.
+ */
+#define __CONST(x, y) \
+ (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_src_class(src[x].File), \
+ NEGATE_NONE) | (src[x].RelAddr << 4))
+
+
+
+
+static unsigned long t_dst_mask(GLuint mask)
+{
+ /* WRITEMASK_* is equivalent to VSF_FLAG_* */
+ return mask & WRITEMASK_XYZW;
+}
+
+static unsigned long t_dst_class(gl_register_file file)
+{
+
+ switch (file) {
+ case PROGRAM_TEMPORARY:
+ return PVS_DST_REG_TEMPORARY;
+ case PROGRAM_OUTPUT:
+ return PVS_DST_REG_OUT;
+ case PROGRAM_ADDRESS:
+ return PVS_DST_REG_A0;
+ /*
+ case PROGRAM_INPUT:
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_WRITE_ONLY:
+ case PROGRAM_ADDRESS:
+ */
+ default:
+ fprintf(stderr, "problem in %s", __FUNCTION__);
+ _mesa_exit(-1);
+ return -1;
+ }
+}
+
+static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
+ struct prog_dst_register *dst)
+{
+ if (dst->File == PROGRAM_OUTPUT)
+ return vp->outputs[dst->Index];
+
+ return dst->Index;
+}
+
+static unsigned long t_src_class(gl_register_file file)
+{
+ switch (file) {
+ case PROGRAM_TEMPORARY:
+ return PVS_SRC_REG_TEMPORARY;
+ case PROGRAM_INPUT:
+ return PVS_SRC_REG_INPUT;
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_STATE_VAR:
+ return PVS_SRC_REG_CONSTANT;
+ /*
+ case PROGRAM_OUTPUT:
+ case PROGRAM_WRITE_ONLY:
+ case PROGRAM_ADDRESS:
+ */
+ default:
+ fprintf(stderr, "problem in %s", __FUNCTION__);
+ _mesa_exit(-1);
+ return -1;
+ }
+}
+
+static INLINE unsigned long t_swizzle(GLubyte swizzle)
+{
+ /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
+ return swizzle;
+}
+
+static unsigned long t_src_index(struct r300_vertex_program_code *vp,
+ struct prog_src_register *src)
+{
+ if (src->File == PROGRAM_INPUT) {
+ assert(vp->inputs[src->Index] != -1);
+ return vp->inputs[src->Index];
+ } else {
+ if (src->Index < 0) {
+ fprintf(stderr,
+ "negative offsets for indirect addressing do not work.\n");
+ return 0;
+ }
+ return src->Index;
+ }
+}
+
+/* these two functions should probably be merged... */
+
+static unsigned long t_src(struct r300_vertex_program_code *vp,
+ struct prog_src_register *src)
+{
+ /* src->Negate uses the NEGATE_ flags from program_instruction.h,
+ * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+ */
+ return PVS_SRC_OPERAND(t_src_index(vp, src),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 1)),
+ t_swizzle(GET_SWZ(src->Swizzle, 2)),
+ t_swizzle(GET_SWZ(src->Swizzle, 3)),
+ t_src_class(src->File),
+ src->Negate) | (src->RelAddr << 4);
+}
+
+static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
+ struct prog_src_register *src)
+{
+ /* src->Negate uses the NEGATE_ flags from program_instruction.h,
+ * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+ */
+ return PVS_SRC_OPERAND(t_src_index(vp, src),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_src_class(src->File),
+ src->Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (src->RelAddr << 4);
+}
+
+static GLboolean valid_dst(struct r300_vertex_program_code *vp,
+ struct prog_dst_register *dst)
+{
+ if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
+ return GL_FALSE;
+ } else if (dst->File == PROGRAM_ADDRESS) {
+ assert(dst->Index == 0);
+ }
+
+ return GL_TRUE;
+}
+
+static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+
+ inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
+ t_src_class(src[0].File),
+ (!src[0].
+ Negate) ? NEGATE_XYZW : NEGATE_NONE) |
+ (src[0].RelAddr << 4);
+ inst[3] = 0;
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
+
+ inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+ SWIZZLE_ZERO,
+ t_src_class(src[0].File),
+ src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (src[0].RelAddr << 4);
+ inst[2] =
+ PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO,
+ t_src_class(src[1].File),
+ src[1].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (src[1].RelAddr << 4);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
+ inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+ PVS_SRC_SELECT_FORCE_1,
+ t_src_class(src[0].File),
+ src[0].Negate ? NEGATE_XYZ : NEGATE_NONE) |
+ (src[0].RelAddr << 4);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3],
+ int *u_temp_i)
+{
+ /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
+ ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
+
+ inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
+ GL_FALSE,
+ GL_FALSE,
+ *u_temp_i,
+ t_dst_mask(vpi->DstReg.WriteMask),
+ PVS_DST_REG_TEMPORARY);
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+ inst += 4;
+
+ inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = PVS_SRC_OPERAND(*u_temp_i,
+ PVS_SRC_SELECT_X,
+ PVS_SRC_SELECT_Y,
+ PVS_SRC_SELECT_Z,
+ PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
+ /* Not 100% sure about this */
+ (!src[0].
+ Negate) ? NEGATE_XYZW : NEGATE_NONE);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+ (*u_temp_i)--;
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
+
+ inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+ t_src_class(src[0].File),
+ src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (src[0].RelAddr << 4);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
+
+ inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ /* NOTE: Users swizzling might not work. */
+ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
+ t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
+ t_src_class(src[0].File),
+ src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (src[0].RelAddr << 4);
+ inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
+ t_src_class(src[0].File),
+ src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (src[0].RelAddr << 4);
+ inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
+ t_src_class(src[0].File),
+ src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (src[0].RelAddr << 4);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
+ GL_FALSE,
+ GL_TRUE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = t_src(vp, &src[2]);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
+
+ inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = t_src_scalar(vp, &src[1]);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = t_src(vp, &src[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+
+#if 0
+ inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
+ t_src_class(src[1].File),
+ (!src[1].
+ Negate) ? NEGATE_XYZW : NEGATE_NONE) |
+ (src[1].RelAddr << 4);
+ inst[3] = 0;
+#else
+ inst[0] =
+ PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ONE);
+ inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
+ t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
+ t_src_class(src[1].File),
+ (!src[1].
+ Negate) ? NEGATE_XYZW : NEGATE_NONE) |
+ (src[1].RelAddr << 4);
+#endif
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
+{
+ //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
+
+ inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &src[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+ return inst;
+}
+
+static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3],
+ int *u_temp_i)
+{
+ /* mul r0, r1.yzxw, r2.zxyw
+ mad r0, -r2.yzxw, r1.zxyw, r0
+ */
+
+ inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ *u_temp_i,
+ t_dst_mask(vpi->DstReg.WriteMask),
+ PVS_DST_REG_TEMPORARY);
+ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
+ t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
+ t_src_class(src[0].File),
+ src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (src[0].RelAddr << 4);
+ inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
+ t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
+ t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
+ t_src_class(src[1].File),
+ src[1].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (src[1].RelAddr << 4);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+ inst += 4;
+
+ inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
+ t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
+ t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
+ t_src_class(src[1].File),
+ (!src[1].
+ Negate) ? NEGATE_XYZW : NEGATE_NONE) |
+ (src[1].RelAddr << 4);
+ inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
+ t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
+ t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
+ t_src_class(src[0].File),
+ src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (src[0].RelAddr << 4);
+ inst[3] =
+ PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
+ PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
+ PVS_SRC_REG_TEMPORARY, NEGATE_NONE);
+
+ (*u_temp_i)--;
+
+ return inst;
+}
+
+static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp)
+{
+ int i;
+ int cur_reg;
+ GLuint OutputsWritten, InputsRead;
+
+ OutputsWritten = glvp->OutputsWritten;
+ InputsRead = glvp->InputsRead;
+
+ cur_reg = -1;
+ for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+ if (InputsRead & (1 << i))
+ vp->inputs[i] = ++cur_reg;
+ else
+ vp->inputs[i] = -1;
+ }
+
+ cur_reg = 0;
+ for (i = 0; i < VERT_RESULT_MAX; i++)
+ vp->outputs[i] = -1;
+
+ assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
+
+ if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
+ vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
+ }
+
+ if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
+ vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
+ }
+
+ /* If we're writing back facing colors we need to send
+ * four colors to make front/back face colors selection work.
+ * If the vertex program doesn't write all 4 colors, lets
+ * pretend it does by skipping output index reg so the colors
+ * get written into appropriate output vectors.
+ */
+ if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
+ vp->outputs[VERT_RESULT_COL0] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+ OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ cur_reg++;
+ }
+
+ if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
+ vp->outputs[VERT_RESULT_COL1] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+ OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ cur_reg++;
+ }
+
+ if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+ vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ cur_reg++;
+ }
+
+ if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+ cur_reg++;
+ }
+
+ for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
+ if (OutputsWritten & (1 << i)) {
+ vp->outputs[i] = cur_reg++;
+ }
+ }
+
+ if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
+ vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
+ }
+}
+
+static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * compiler)
+{
+ struct prog_instruction *vpi = compiler->program->Instructions;
+ int i;
+ GLuint *inst;
+ unsigned long num_operands;
+ /* Initial value should be last tmp reg that hw supports.
+ Strangely enough r300 doesnt mind even though these would be out of range.
+ Smart enough to realize that it doesnt need it? */
+ int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
+ struct prog_src_register src[3];
+ struct r300_vertex_program_code * vp = compiler->code;
+
+ compiler->code->pos_end = 0; /* Not supported yet */
+ compiler->code->length = 0;
+
+ t_inputs_outputs(compiler->code, compiler->program);
+
+ for (inst = compiler->code->body.d; vpi->Opcode != OPCODE_END;
+ vpi++, inst += 4) {
+
+ {
+ int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i;
+ if((compiler->code->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) {
+ fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", compiler->code->num_temporaries, u_temp_used);
+ return GL_FALSE;
+ }
+ u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1;
+ }
+
+ if (!valid_dst(compiler->code, &vpi->DstReg)) {
+ /* redirect result to unused temp */
+ vpi->DstReg.File = PROGRAM_TEMPORARY;
+ vpi->DstReg.Index = u_temp_i;
+ }
+
+ num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
+
+ /* copy the sources (src) from mesa into a local variable... is this needed? */
+ for (i = 0; i < num_operands; i++) {
+ src[i] = vpi->SrcReg[i];
+ }
+
+ if (num_operands == 3) { /* TODO: scalars */
+ if (CMP_SRCS(src[1], src[2])
+ || CMP_SRCS(src[0], src[2])) {
+ inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ u_temp_i,
+ WRITEMASK_XYZW,
+ PVS_DST_REG_TEMPORARY);
+ inst[1] =
+ PVS_SRC_OPERAND(t_src_index(compiler->code, &src[2]),
+ SWIZZLE_X,
+ SWIZZLE_Y,
+ SWIZZLE_Z,
+ SWIZZLE_W,
+ t_src_class(src[2].File),
+ NEGATE_NONE) | (src[2].
+ RelAddr <<
+ 4);
+ inst[2] = __CONST(2, SWIZZLE_ZERO);
+ inst[3] = __CONST(2, SWIZZLE_ZERO);
+ inst += 4;
+
+ src[2].File = PROGRAM_TEMPORARY;
+ src[2].Index = u_temp_i;
+ src[2].RelAddr = 0;
+ u_temp_i--;
+ }
+ }
+
+ if (num_operands >= 2) {
+ if (CMP_SRCS(src[1], src[0])) {
+ inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+ GL_FALSE,
+ GL_FALSE,
+ u_temp_i,
+ WRITEMASK_XYZW,
+ PVS_DST_REG_TEMPORARY);
+ inst[1] =
+ PVS_SRC_OPERAND(t_src_index(compiler->code, &src[0]),
+ SWIZZLE_X,
+ SWIZZLE_Y,
+ SWIZZLE_Z,
+ SWIZZLE_W,
+ t_src_class(src[0].File),
+ NEGATE_NONE) | (src[0].
+ RelAddr <<
+ 4);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+ inst += 4;
+
+ src[0].File = PROGRAM_TEMPORARY;
+ src[0].Index = u_temp_i;
+ src[0].RelAddr = 0;
+ u_temp_i--;
+ }
+ }
+
+ switch (vpi->Opcode) {
+ case OPCODE_ABS:
+ inst = r300TranslateOpcodeABS(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_ADD:
+ inst = r300TranslateOpcodeADD(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_ARL:
+ inst = r300TranslateOpcodeARL(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_DP3:
+ inst = r300TranslateOpcodeDP3(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_DP4:
+ inst = r300TranslateOpcodeDP4(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_DPH:
+ inst = r300TranslateOpcodeDPH(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_DST:
+ inst = r300TranslateOpcodeDST(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_EX2:
+ inst = r300TranslateOpcodeEX2(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_EXP:
+ inst = r300TranslateOpcodeEXP(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_FLR:
+ inst = r300TranslateOpcodeFLR(compiler->code, vpi, inst, src, /* FIXME */
+ &u_temp_i);
+ break;
+ case OPCODE_FRC:
+ inst = r300TranslateOpcodeFRC(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_LG2:
+ inst = r300TranslateOpcodeLG2(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_LIT:
+ inst = r300TranslateOpcodeLIT(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_LOG:
+ inst = r300TranslateOpcodeLOG(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_MAD:
+ inst = r300TranslateOpcodeMAD(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_MAX:
+ inst = r300TranslateOpcodeMAX(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_MIN:
+ inst = r300TranslateOpcodeMIN(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_MOV:
+ inst = r300TranslateOpcodeMOV(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_MUL:
+ inst = r300TranslateOpcodeMUL(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_POW:
+ inst = r300TranslateOpcodePOW(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_RCP:
+ inst = r300TranslateOpcodeRCP(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_RSQ:
+ inst = r300TranslateOpcodeRSQ(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_SGE:
+ inst = r300TranslateOpcodeSGE(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_SLT:
+ inst = r300TranslateOpcodeSLT(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_SUB:
+ inst = r300TranslateOpcodeSUB(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_SWZ:
+ inst = r300TranslateOpcodeSWZ(compiler->code, vpi, inst, src);
+ break;
+ case OPCODE_XPD:
+ inst = r300TranslateOpcodeXPD(compiler->code, vpi, inst, src, /* FIXME */
+ &u_temp_i);
+ break;
+ default:
+ return GL_FALSE;
+ }
+ }
+
+ compiler->code->length = (inst - compiler->code->body.d);
+ if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id)
+{
+ struct prog_instruction *vpi;
+
+ _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2);
+
+ vpi = &prog->Instructions[prog->NumInstructions - 3];
+
+ vpi->Opcode = OPCODE_MOV;
+
+ vpi->DstReg.File = PROGRAM_OUTPUT;
+ vpi->DstReg.Index = VERT_RESULT_HPOS;
+ vpi->DstReg.WriteMask = WRITEMASK_XYZW;
+ vpi->DstReg.CondMask = COND_TR;
+
+ vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
+ vpi->SrcReg[0].Index = temp_index;
+ vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ ++vpi;
+
+ vpi->Opcode = OPCODE_MOV;
+
+ vpi->DstReg.File = PROGRAM_OUTPUT;
+ vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
+ vpi->DstReg.WriteMask = WRITEMASK_XYZW;
+ vpi->DstReg.CondMask = COND_TR;
+
+ vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
+ vpi->SrcReg[0].Index = temp_index;
+ vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ ++vpi;
+
+ vpi->Opcode = OPCODE_END;
+}
+
+static void pos_as_texcoord(struct gl_program *prog, int tex_id)
+{
+ struct prog_instruction *vpi;
+ GLuint tempregi = prog->NumTemporaries;
+
+ prog->NumTemporaries++;
+
+ for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
+ if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) {
+ vpi->DstReg.File = PROGRAM_TEMPORARY;
+ vpi->DstReg.Index = tempregi;
+ }
+ }
+
+ insert_wpos(prog, tempregi, tex_id);
+
+ prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
+}
+
+/**
+ * The fogcoord attribute is special in that only the first component
+ * is relevant, and the remaining components are always fixed (when read
+ * from by the fragment program) to yield an X001 pattern.
+ *
+ * We need to enforce this either in the vertex program or in the fragment
+ * program, and this code chooses not to enforce it in the vertex program.
+ * This is slightly cheaper, as long as the fragment program does not use
+ * weird swizzles.
+ *
+ * And it seems that usually, weird swizzles are not used, so...
+ *
+ * See also the counterpart rewriting for fragment programs.
+ */
+static void fog_as_texcoord(struct gl_program *prog, int tex_id)
+{
+ struct prog_instruction *vpi;
+
+ vpi = prog->Instructions;
+ while (vpi->Opcode != OPCODE_END) {
+ if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) {
+ vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
+ vpi->DstReg.WriteMask = WRITEMASK_X;
+ }
+
+ ++vpi;
+ }
+
+ prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC);
+ prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
+}
+
+static int translateABS(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
+
+ inst = &prog->Instructions[pos];
+
+ inst->Opcode = OPCODE_MAX;
+ inst->SrcReg[1] = inst->SrcReg[0];
+ inst->SrcReg[1].Negate ^= NEGATE_XYZW;
+
+ return 0;
+}
+
+static int translateDP3(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
+
+ inst = &prog->Instructions[pos];
+
+ inst->Opcode = OPCODE_DP4;
+ inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+ return 0;
+}
+
+static int translateDPH(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
+
+ inst = &prog->Instructions[pos];
+
+ inst->Opcode = OPCODE_DP4;
+ inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
+
+ return 0;
+}
+
+static int translateFLR(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
+ struct prog_dst_register dst;
+ int tmp_idx;
+
+ tmp_idx = prog->NumTemporaries++;
+
+ _mesa_insert_instructions(prog, pos + 1, 1);
+
+ inst = &prog->Instructions[pos];
+ dst = inst->DstReg;
+
+ inst->Opcode = OPCODE_FRC;
+ inst->DstReg.File = PROGRAM_TEMPORARY;
+ inst->DstReg.Index = tmp_idx;
+ ++inst;
+
+ inst->Opcode = OPCODE_ADD;
+ inst->DstReg = dst;
+ inst->SrcReg[0] = (inst-1)->SrcReg[0];
+ inst->SrcReg[1].File = PROGRAM_TEMPORARY;
+ inst->SrcReg[1].Index = tmp_idx;
+ inst->SrcReg[1].Negate = NEGATE_XYZW;
+
+ return 1;
+}
+
+static int translateSUB(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
+
+ inst = &prog->Instructions[pos];
+
+ inst->Opcode = OPCODE_ADD;
+ inst->SrcReg[1].Negate ^= NEGATE_XYZW;
+
+ return 0;
+}
+
+static int translateSWZ(struct gl_program *prog, int pos)
+{
+ prog->Instructions[pos].Opcode = OPCODE_MOV;
+
+ return 0;
+}
+
+static int translateXPD(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
+ int tmp_idx;
+
+ tmp_idx = prog->NumTemporaries++;
+
+ _mesa_insert_instructions(prog, pos + 1, 1);
+
+ inst = &prog->Instructions[pos];
+
+ *(inst+1) = *inst;
+
+ inst->Opcode = OPCODE_MUL;
+ inst->DstReg.File = PROGRAM_TEMPORARY;
+ inst->DstReg.Index = tmp_idx;
+ inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
+ inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
+ ++inst;
+
+ inst->Opcode = OPCODE_MAD;
+ inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
+ inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
+ inst->SrcReg[1].Negate ^= NEGATE_XYZW;
+ inst->SrcReg[2].File = PROGRAM_TEMPORARY;
+ inst->SrcReg[2].Index = tmp_idx;
+
+ return 1;
+}
+
+static void translateInsts(struct gl_program *prog)
+{
+ struct prog_instruction *inst;
+ int i;
+
+ for (i = 0; i < prog->NumInstructions; ++i) {
+ inst = &prog->Instructions[i];
+
+ switch (inst->Opcode) {
+ case OPCODE_ABS:
+ i += translateABS(prog, i);
+ break;
+ case OPCODE_DP3:
+ i += translateDP3(prog, i);
+ break;
+ case OPCODE_DPH:
+ i += translateDPH(prog, i);
+ break;
+ case OPCODE_FLR:
+ i += translateFLR(prog, i);
+ break;
+ case OPCODE_SUB:
+ i += translateSUB(prog, i);
+ break;
+ case OPCODE_SWZ:
+ i += translateSWZ(prog, i);
+ break;
+ case OPCODE_XPD:
+ i += translateXPD(prog, i);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+#define ADD_OUTPUT(fp_attr, vp_result) \
+ do { \
+ if ((FpReads & (1 << (fp_attr))) && !(compiler->program->OutputsWritten & (1 << (vp_result)))) { \
+ OutputsAdded |= 1 << (vp_result); \
+ count++; \
+ } \
+ } while (0)
+
+static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
+{
+ GLuint OutputsAdded, FpReads;
+ int i, count;
+
+ OutputsAdded = 0;
+ count = 0;
+ FpReads = compiler->state.FpReads;
+
+ ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
+ ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
+
+ for (i = 0; i < 7; ++i) {
+ ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
+ }
+
+ /* Some outputs may be artificially added, to match the inputs of the fragment program.
+ * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
+ * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
+ */
+ if (count > 0) {
+ struct prog_instruction *inst;
+
+ _mesa_insert_instructions(compiler->program, compiler->program->NumInstructions - 1, count);
+ inst = &compiler->program->Instructions[compiler->program->NumInstructions - 1 - count];
+
+ for (i = 0; i < VERT_RESULT_MAX; ++i) {
+ if (OutputsAdded & (1 << i)) {
+ inst->Opcode = OPCODE_MOV;
+
+ inst->DstReg.File = PROGRAM_OUTPUT;
+ inst->DstReg.Index = i;
+ inst->DstReg.WriteMask = WRITEMASK_XYZW;
+ inst->DstReg.CondMask = COND_TR;
+
+ inst->SrcReg[0].File = PROGRAM_CONSTANT;
+ inst->SrcReg[0].Index = 0;
+ inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ ++inst;
+ }
+ }
+
+ compiler->program->OutputsWritten |= OutputsAdded;
+ }
+}
+
+#undef ADD_OUTPUT
+
+static void nqssadceInit(struct nqssadce_state* s)
+{
+ struct r300_vertex_program_compiler * compiler = s->UserData;
+ GLuint fp_reads;
+
+ fp_reads = compiler->state.FpReads;
+ {
+ if (fp_reads & FRAG_BIT_COL0) {
+ s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW;
+ s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW;
+ }
+
+ if (fp_reads & FRAG_BIT_COL1) {
+ s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW;
+ s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW;
+ }
+ }
+
+ {
+ int i;
+ for (i = 0; i < 8; ++i) {
+ if (fp_reads & FRAG_BIT_TEX(i)) {
+ s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW;
+ }
+ }
+ }
+
+ s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW;
+ if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ))
+ s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X;
+}
+
+static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
+{
+ (void) opcode;
+ (void) reg;
+
+ return GL_TRUE;
+}
+
+
+
+GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler, GLcontext * ctx)
+{
+ GLboolean success;
+
+ if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) {
+ pos_as_texcoord(compiler->program, compiler->state.WPosAttr - FRAG_ATTRIB_TEX0);
+ }
+
+ if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) {
+ fog_as_texcoord(compiler->program, compiler->state.FogAttr - FRAG_ATTRIB_TEX0);
+ }
+
+ addArtificialOutputs(compiler);
+
+ translateInsts(compiler->program);
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Vertex program after native rewrite:\n");
+ _mesa_print_program(compiler->program);
+ fflush(stdout);
+ }
+
+ {
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadceInit,
+ .IsNativeSwizzle = &swizzleIsNative,
+ .BuildSwizzle = NULL
+ };
+ radeonNqssaDce(compiler->program, &nqssadce, compiler);
+
+ /* We need this step for reusing temporary registers */
+ _mesa_optimize_program(ctx, compiler->program);
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Vertex program after NQSSADCE:\n");
+ _mesa_print_program(compiler->program);
+ fflush(stdout);
+ }
+ }
+
+ assert(compiler->program->NumInstructions);
+ {
+ struct prog_instruction *inst;
+ int max, i, tmp;
+
+ inst = compiler->program->Instructions;
+ max = -1;
+ while (inst->Opcode != OPCODE_END) {
+ tmp = _mesa_num_inst_src_regs(inst->Opcode);
+ for (i = 0; i < tmp; ++i) {
+ if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) {
+ if ((int) inst->SrcReg[i].Index > max) {
+ max = inst->SrcReg[i].Index;
+ }
+ }
+ }
+
+ if (_mesa_num_inst_dst_regs(inst->Opcode)) {
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ if ((int) inst->DstReg.Index > max) {
+ max = inst->DstReg.Index;
+ }
+ }
+ }
+ ++inst;
+ }
+
+ /* We actually want highest index of used temporary register,
+ * not the number of temporaries used.
+ * These values aren't always the same.
+ */
+ compiler->code->num_temporaries = max + 1;
+ }
+
+ success = translate_vertex_program(compiler);
+
+ compiler->code->InputsRead = compiler->program->InputsRead;
+ compiler->code->OutputsWritten = compiler->program->OutputsWritten;
+
+ return success;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index 7d8bf483e79..e89e7bc17b2 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -147,4 +147,29 @@ struct rX00_fragment_program_code {
};
+#define VSF_MAX_FRAGMENT_LENGTH (255*4)
+#define VSF_MAX_FRAGMENT_TEMPS (14)
+
+struct r300_vertex_program_external_state {
+ GLuint FpReads;
+ GLuint FogAttr;
+ GLuint WPosAttr;
+};
+
+struct r300_vertex_program_code {
+ int length;
+ union {
+ GLuint d[VSF_MAX_FRAGMENT_LENGTH];
+ float f[VSF_MAX_FRAGMENT_LENGTH];
+ } body;
+
+ int pos_end;
+ int num_temporaries; /* Number of temp vars used by program */
+ int inputs[VERT_ATTRIB_MAX];
+ int outputs[VERT_RESULT_MAX];
+
+ GLbitfield InputsRead;
+ GLbitfield OutputsWritten;
+};
+
#endif /* RADEON_CODE_H */
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index a5f70173b7d..f8e4b3c681a 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -66,4 +66,14 @@ struct r300_fragment_program_compiler {
GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
+
+struct r300_vertex_program_compiler {
+ struct radeon_compiler Base;
+ struct r300_vertex_program_code *code;
+ struct r300_vertex_program_external_state state;
+ struct gl_program *program;
+};
+
+GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c, GLcontext * ctx);
+
#endif /* RADEON_COMPILER_H */
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index d14d992366b..5c575441d7b 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -390,46 +390,19 @@ struct r300_hw_state {
/* Vertex shader state */
-/* Perhaps more if we store programs in vmem? */
-/* drm_r300_cmd_header_t->vpu->count is unsigned char */
-#define VSF_MAX_FRAGMENT_LENGTH (255*4)
-
-/* Can be tested with colormat currently. */
-#define VSF_MAX_FRAGMENT_TEMPS (14)
-
#define COLOR_IS_RGBA
#define TAG(x) r300##x
#include "tnl_dd/t_dd_vertex.h"
#undef TAG
-struct r300_vertex_program_key {
- GLuint FpReads;
- GLuint FogAttr;
- GLuint WPosAttr;
-};
-
struct r300_vertex_program {
struct gl_vertex_program *Base;
struct r300_vertex_program *next;
- struct r300_vertex_program_key key;
- GLbitfield InputsRead;
- GLbitfield OutputsWritten;
-
- struct r300_vertex_shader_hw_code {
- int length;
- union {
- GLuint d[VSF_MAX_FRAGMENT_LENGTH];
- float f[VSF_MAX_FRAGMENT_LENGTH];
- } body;
- } hw_code;
+ struct r300_vertex_program_external_state key;
+ struct r300_vertex_program_code code;
GLboolean error;
-
- int pos_end;
- int num_temporaries; /* Number of temp vars used by program */
- int inputs[VERT_ATTRIB_MAX];
- int outputs[VERT_RESULT_MAX];
};
struct r300_vertex_program_cont {
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index e2e92fde482..fcfd3099332 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -341,7 +341,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar
{
int i, tmp;
- tmp = r300->selected_vp->InputsRead;
+ tmp = r300->selected_vp->code.InputsRead;
i = 0;
vbuf->num_attribs = 0;
while (tmp) {
@@ -437,7 +437,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx,
if (r300->fallback)
return GL_FALSE;
- r300SetupVAP(ctx, r300->selected_vp->InputsRead, r300->selected_vp->OutputsWritten);
+ r300SetupVAP(ctx, r300->selected_vp->code.InputsRead, r300->selected_vp->code.OutputsWritten);
r300UpdateShaderStates(r300);
diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c
index 2fa626bab24..5bded642ef8 100644
--- a/src/mesa/drivers/dri/r300/r300_ioctl.c
+++ b/src/mesa/drivers/dri/r300/r300_ioctl.c
@@ -567,12 +567,12 @@ static void r300EmitClearState(GLcontext * ctx)
0, 0xf, PVS_DST_REG_OUT);
vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
- PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
+ PVS_SRC_REG_INPUT, NEGATE_NONE);
vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
- PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
+ PVS_SRC_REG_INPUT, NEGATE_NONE);
vpu.cmd[4] = 0x0;
vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf,
@@ -580,13 +580,12 @@ static void r300EmitClearState(GLcontext * ctx)
vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X,
PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z,
PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT,
-
- VSF_FLAG_NONE);
+ NEGATE_NONE);
vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
- PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
+ PVS_SRC_REG_INPUT, NEGATE_NONE);
vpu.cmd[8] = 0x0;
r300->vap_flush_needed = GL_TRUE;
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index 357c600af97..dd32e6c730a 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -2667,6 +2667,24 @@ enum {
PVS_SRC_ADDR_MODE_1_SHIFT = 32,
};
+
+#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \
+ (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \
+ | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \
+ | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \
+ | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \
+ | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \
+ | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT))
+
+#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \
+ (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \
+ | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \
+ | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \
+ | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \
+ | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \
+ | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \
+ | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT))
+
/*\}*/
/* BEGIN: Packet 3 commands */
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index ad57b7e2f11..e3e8a6fb3df 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -1458,7 +1458,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
hw_tcl_on = r300->options.hw_tcl_enabled;
if (hw_tcl_on)
- OutputsWritten.vp_outputs = r300->selected_vp->OutputsWritten;
+ OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten;
else
RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
@@ -1552,7 +1552,7 @@ static void r500SetupRSUnit(GLcontext * ctx)
hw_tcl_on = r300->options.hw_tcl_enabled;
if (hw_tcl_on)
- OutputsWritten.vp_outputs = r300->selected_vp->OutputsWritten;
+ OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten;
else
RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index 95cedd9d919..ec4ba9ca7da 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -40,39 +40,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "shader/prog_statevars.h"
#include "tnl/tnl.h"
+#include "compiler/radeon_compiler.h"
#include "compiler/radeon_nqssadce.h"
#include "r300_context.h"
#include "r300_state.h"
-/* TODO: Get rid of t_src_class call */
-#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
- ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
- t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
- (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
- t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
-
-/*
- * Take an already-setup and valid source then swizzle it appropriately to
- * obtain a constant ZERO or ONE source.
- */
-#define __CONST(x, y) \
- (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
- t_swizzle(y), \
- t_swizzle(y), \
- t_swizzle(y), \
- t_swizzle(y), \
- t_src_class(src[x].File), \
- VSF_FLAG_NONE) | (src[x].RelAddr << 4))
-
-#define FREE_TEMPS() \
- do { \
- int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
- if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
- WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
- vp->error = GL_TRUE; \
- } \
- u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
- } while (0)
static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst)
{
@@ -125,1513 +97,38 @@ static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program
return dst - dst_o;
}
-static unsigned long t_dst_mask(GLuint mask)
-{
- /* WRITEMASK_* is equivalent to VSF_FLAG_* */
- return mask & VSF_FLAG_ALL;
-}
-
-static unsigned long t_dst_class(gl_register_file file)
-{
-
- switch (file) {
- case PROGRAM_TEMPORARY:
- return PVS_DST_REG_TEMPORARY;
- case PROGRAM_OUTPUT:
- return PVS_DST_REG_OUT;
- case PROGRAM_ADDRESS:
- return PVS_DST_REG_A0;
- /*
- case PROGRAM_INPUT:
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_ENV_PARAM:
- case PROGRAM_NAMED_PARAM:
- case PROGRAM_STATE_VAR:
- case PROGRAM_WRITE_ONLY:
- case PROGRAM_ADDRESS:
- */
- default:
- fprintf(stderr, "problem in %s", __FUNCTION__);
- _mesa_exit(-1);
- return -1;
- }
-}
-
-static unsigned long t_dst_index(struct r300_vertex_program *vp,
- struct prog_dst_register *dst)
-{
- if (dst->File == PROGRAM_OUTPUT)
- return vp->outputs[dst->Index];
-
- return dst->Index;
-}
-
-static unsigned long t_src_class(gl_register_file file)
-{
- switch (file) {
- case PROGRAM_TEMPORARY:
- return PVS_SRC_REG_TEMPORARY;
- case PROGRAM_INPUT:
- return PVS_SRC_REG_INPUT;
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_ENV_PARAM:
- case PROGRAM_NAMED_PARAM:
- case PROGRAM_CONSTANT:
- case PROGRAM_STATE_VAR:
- return PVS_SRC_REG_CONSTANT;
- /*
- case PROGRAM_OUTPUT:
- case PROGRAM_WRITE_ONLY:
- case PROGRAM_ADDRESS:
- */
- default:
- fprintf(stderr, "problem in %s", __FUNCTION__);
- _mesa_exit(-1);
- return -1;
- }
-}
-
-static INLINE unsigned long t_swizzle(GLubyte swizzle)
-{
-/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
- return swizzle;
-}
-
-#if 0
-static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
-{
- int i;
-
- if (vp == NULL) {
- fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__,
- caller);
- return;
- }
-
- fprintf(stderr, "%s:<", caller);
- for (i = 0; i < VERT_ATTRIB_MAX; i++)
- fprintf(stderr, "%d ", vp->inputs[i]);
- fprintf(stderr, ">\n");
-
-}
-#endif
-
-static unsigned long t_src_index(struct r300_vertex_program *vp,
- struct prog_src_register *src)
-{
- if (src->File == PROGRAM_INPUT) {
- assert(vp->inputs[src->Index] != -1);
- return vp->inputs[src->Index];
- } else {
- if (src->Index < 0) {
- fprintf(stderr,
- "negative offsets for indirect addressing do not work.\n");
- return 0;
- }
- return src->Index;
- }
-}
-
-/* these two functions should probably be merged... */
-
-static unsigned long t_src(struct r300_vertex_program *vp,
- struct prog_src_register *src)
-{
- /* src->Negate uses the NEGATE_ flags from program_instruction.h,
- * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
- */
- return PVS_SRC_OPERAND(t_src_index(vp, src),
- t_swizzle(GET_SWZ(src->Swizzle, 0)),
- t_swizzle(GET_SWZ(src->Swizzle, 1)),
- t_swizzle(GET_SWZ(src->Swizzle, 2)),
- t_swizzle(GET_SWZ(src->Swizzle, 3)),
- t_src_class(src->File),
- src->Negate) | (src->RelAddr << 4);
-}
-
-static unsigned long t_src_scalar(struct r300_vertex_program *vp,
- struct prog_src_register *src)
-{
- /* src->Negate uses the NEGATE_ flags from program_instruction.h,
- * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
- */
- return PVS_SRC_OPERAND(t_src_index(vp, src),
- t_swizzle(GET_SWZ(src->Swizzle, 0)),
- t_swizzle(GET_SWZ(src->Swizzle, 0)),
- t_swizzle(GET_SWZ(src->Swizzle, 0)),
- t_swizzle(GET_SWZ(src->Swizzle, 0)),
- t_src_class(src->File),
- src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src->RelAddr << 4);
-}
-
-static GLboolean valid_dst(struct r300_vertex_program *vp,
- struct prog_dst_register *dst)
-{
- if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
- return GL_FALSE;
- } else if (dst->File == PROGRAM_ADDRESS) {
- assert(dst->Index == 0);
- }
-
- return GL_TRUE;
-}
-
-static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
-
- inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
- t_src_class(src[0].File),
- (!src[0].
- Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- inst[3] = 0;
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
-
- inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
- SWIZZLE_ZERO,
- t_src_class(src[0].File),
- src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- inst[2] =
- PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
- t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO,
- t_src_class(src[1].File),
- src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
- (src[1].RelAddr << 4);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
- inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
- PVS_SRC_SELECT_FORCE_1,
- t_src_class(src[0].File),
- src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3],
- int *u_temp_i)
-{
- /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
- ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
-
- inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
- GL_FALSE,
- GL_FALSE,
- *u_temp_i,
- t_dst_mask(vpi->DstReg.WriteMask),
- PVS_DST_REG_TEMPORARY);
- inst[1] = t_src(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
- inst += 4;
-
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = PVS_SRC_OPERAND(*u_temp_i,
- PVS_SRC_SELECT_X,
- PVS_SRC_SELECT_Y,
- PVS_SRC_SELECT_Z,
- PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
- /* Not 100% sure about this */
- (!src[0].
- Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE
- /*VSF_FLAG_ALL */ );
- inst[3] = __CONST(0, SWIZZLE_ZERO);
- (*u_temp_i)--;
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
-
- inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_src_class(src[0].File),
- src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
-
- inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- /* NOTE: Users swizzling might not work. */
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
- PVS_SRC_SELECT_FORCE_0, // Z
- t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
- t_src_class(src[0].File),
- src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
- PVS_SRC_SELECT_FORCE_0, // Z
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
- t_src_class(src[0].File),
- src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
- PVS_SRC_SELECT_FORCE_0, // Z
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
- t_src_class(src[0].File),
- src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
- GL_FALSE,
- GL_TRUE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = t_src(vp, &src[2]);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
-
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = t_src_scalar(vp, &src[1]);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
-
-#if 0
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
- t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
- t_src_class(src[1].File),
- (!src[1].
- Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[1].RelAddr << 4);
- inst[3] = 0;
-#else
- inst[0] =
- PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ONE);
- inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
- t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
- t_src_class(src[1].File),
- (!src[1].
- Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[1].RelAddr << 4);
-#endif
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
-
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3],
- int *u_temp_i)
-{
- /* mul r0, r1.yzxw, r2.zxyw
- mad r0, -r2.yzxw, r1.zxyw, r0
- */
-
- inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
- GL_FALSE,
- GL_FALSE,
- *u_temp_i,
- t_dst_mask(vpi->DstReg.WriteMask),
- PVS_DST_REG_TEMPORARY);
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
- t_src_class(src[0].File),
- src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
- t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
- t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
- t_src_class(src[1].File),
- src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[1].RelAddr << 4);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
- inst += 4;
-
- inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
- t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
- t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
- t_src_class(src[1].File),
- (!src[1].
- Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[1].RelAddr << 4);
- inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
- t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
- t_src_class(src[0].File),
- src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
- (src[0].RelAddr << 4);
- inst[3] =
- PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
- PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
- PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE);
-
- (*u_temp_i)--;
-
- return inst;
-}
-
-static void t_inputs_outputs(struct r300_vertex_program *vp, struct gl_program * glvp)
-{
- int i;
- int cur_reg;
- GLuint OutputsWritten, InputsRead;
-
- OutputsWritten = glvp->OutputsWritten;
- InputsRead = glvp->InputsRead;
-
- cur_reg = -1;
- for (i = 0; i < VERT_ATTRIB_MAX; i++) {
- if (InputsRead & (1 << i))
- vp->inputs[i] = ++cur_reg;
- else
- vp->inputs[i] = -1;
- }
-
- cur_reg = 0;
- for (i = 0; i < VERT_RESULT_MAX; i++)
- vp->outputs[i] = -1;
-
- assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
-
- if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
- vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
- }
-
- if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
- vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
- }
-
- /* If we're writing back facing colors we need to send
- * four colors to make front/back face colors selection work.
- * If the vertex program doesn't write all 4 colors, lets
- * pretend it does by skipping output index reg so the colors
- * get written into appropriate output vectors.
- */
- if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
- vp->outputs[VERT_RESULT_COL0] = cur_reg++;
- } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
- OutputsWritten & (1 << VERT_RESULT_BFC1)) {
- cur_reg++;
- }
-
- if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
- vp->outputs[VERT_RESULT_COL1] = cur_reg++;
- } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
- OutputsWritten & (1 << VERT_RESULT_BFC1)) {
- cur_reg++;
- }
-
- if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
- vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
- } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
- cur_reg++;
- }
-
- if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
- vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
- } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
- cur_reg++;
- }
-
- for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
- if (OutputsWritten & (1 << i)) {
- vp->outputs[i] = cur_reg++;
- }
- }
-
- if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
- vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
- }
-}
-
-static void translate_vertex_program(struct r300_vertex_program *vp, struct gl_program * glvp)
-{
- struct prog_instruction *vpi = glvp->Instructions;
- int i;
- GLuint *inst;
- unsigned long num_operands;
- /* Initial value should be last tmp reg that hw supports.
- Strangely enough r300 doesnt mind even though these would be out of range.
- Smart enough to realize that it doesnt need it? */
- int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
- struct prog_src_register src[3];
-
- vp->pos_end = 0; /* Not supported yet */
- vp->hw_code.length = 0;
- vp->error = GL_FALSE;
-
- t_inputs_outputs(vp, glvp);
-
- for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END;
- vpi++, inst += 4) {
-
- FREE_TEMPS();
-
- if (!valid_dst(vp, &vpi->DstReg)) {
- /* redirect result to unused temp */
- vpi->DstReg.File = PROGRAM_TEMPORARY;
- vpi->DstReg.Index = u_temp_i;
- }
-
- num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
-
- /* copy the sources (src) from mesa into a local variable... is this needed? */
- for (i = 0; i < num_operands; i++) {
- src[i] = vpi->SrcReg[i];
- }
-
- if (num_operands == 3) { /* TODO: scalars */
- if (CMP_SRCS(src[1], src[2])
- || CMP_SRCS(src[0], src[2])) {
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- u_temp_i,
- VSF_FLAG_ALL,
- PVS_DST_REG_TEMPORARY);
- inst[1] =
- PVS_SRC_OPERAND(t_src_index(vp, &src[2]),
- SWIZZLE_X,
- SWIZZLE_Y,
- SWIZZLE_Z,
- SWIZZLE_W,
- t_src_class(src[2].File),
- VSF_FLAG_NONE) | (src[2].
- RelAddr <<
- 4);
- inst[2] = __CONST(2, SWIZZLE_ZERO);
- inst[3] = __CONST(2, SWIZZLE_ZERO);
- inst += 4;
-
- src[2].File = PROGRAM_TEMPORARY;
- src[2].Index = u_temp_i;
- src[2].RelAddr = 0;
- u_temp_i--;
- }
- }
-
- if (num_operands >= 2) {
- if (CMP_SRCS(src[1], src[0])) {
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- u_temp_i,
- VSF_FLAG_ALL,
- PVS_DST_REG_TEMPORARY);
- inst[1] =
- PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
- SWIZZLE_X,
- SWIZZLE_Y,
- SWIZZLE_Z,
- SWIZZLE_W,
- t_src_class(src[0].File),
- VSF_FLAG_NONE) | (src[0].
- RelAddr <<
- 4);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
- inst += 4;
-
- src[0].File = PROGRAM_TEMPORARY;
- src[0].Index = u_temp_i;
- src[0].RelAddr = 0;
- u_temp_i--;
- }
- }
-
- switch (vpi->Opcode) {
- case OPCODE_ABS:
- inst = r300TranslateOpcodeABS(vp, vpi, inst, src);
- break;
- case OPCODE_ADD:
- inst = r300TranslateOpcodeADD(vp, vpi, inst, src);
- break;
- case OPCODE_ARL:
- inst = r300TranslateOpcodeARL(vp, vpi, inst, src);
- break;
- case OPCODE_DP3:
- inst = r300TranslateOpcodeDP3(vp, vpi, inst, src);
- break;
- case OPCODE_DP4:
- inst = r300TranslateOpcodeDP4(vp, vpi, inst, src);
- break;
- case OPCODE_DPH:
- inst = r300TranslateOpcodeDPH(vp, vpi, inst, src);
- break;
- case OPCODE_DST:
- inst = r300TranslateOpcodeDST(vp, vpi, inst, src);
- break;
- case OPCODE_EX2:
- inst = r300TranslateOpcodeEX2(vp, vpi, inst, src);
- break;
- case OPCODE_EXP:
- inst = r300TranslateOpcodeEXP(vp, vpi, inst, src);
- break;
- case OPCODE_FLR:
- inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */
- &u_temp_i);
- break;
- case OPCODE_FRC:
- inst = r300TranslateOpcodeFRC(vp, vpi, inst, src);
- break;
- case OPCODE_LG2:
- inst = r300TranslateOpcodeLG2(vp, vpi, inst, src);
- break;
- case OPCODE_LIT:
- inst = r300TranslateOpcodeLIT(vp, vpi, inst, src);
- break;
- case OPCODE_LOG:
- inst = r300TranslateOpcodeLOG(vp, vpi, inst, src);
- break;
- case OPCODE_MAD:
- inst = r300TranslateOpcodeMAD(vp, vpi, inst, src);
- break;
- case OPCODE_MAX:
- inst = r300TranslateOpcodeMAX(vp, vpi, inst, src);
- break;
- case OPCODE_MIN:
- inst = r300TranslateOpcodeMIN(vp, vpi, inst, src);
- break;
- case OPCODE_MOV:
- inst = r300TranslateOpcodeMOV(vp, vpi, inst, src);
- break;
- case OPCODE_MUL:
- inst = r300TranslateOpcodeMUL(vp, vpi, inst, src);
- break;
- case OPCODE_POW:
- inst = r300TranslateOpcodePOW(vp, vpi, inst, src);
- break;
- case OPCODE_RCP:
- inst = r300TranslateOpcodeRCP(vp, vpi, inst, src);
- break;
- case OPCODE_RSQ:
- inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src);
- break;
- case OPCODE_SGE:
- inst = r300TranslateOpcodeSGE(vp, vpi, inst, src);
- break;
- case OPCODE_SLT:
- inst = r300TranslateOpcodeSLT(vp, vpi, inst, src);
- break;
- case OPCODE_SUB:
- inst = r300TranslateOpcodeSUB(vp, vpi, inst, src);
- break;
- case OPCODE_SWZ:
- inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src);
- break;
- case OPCODE_XPD:
- inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */
- &u_temp_i);
- break;
- default:
- vp->error = GL_TRUE;
- break;
- }
- }
-
- vp->hw_code.length = (inst - vp->hw_code.body.d);
- if (vp->hw_code.length >= VSF_MAX_FRAGMENT_LENGTH) {
- vp->error = GL_TRUE;
- }
-}
-
-static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id)
-{
- struct prog_instruction *vpi;
-
- _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2);
-
- vpi = &prog->Instructions[prog->NumInstructions - 3];
-
- vpi->Opcode = OPCODE_MOV;
-
- vpi->DstReg.File = PROGRAM_OUTPUT;
- vpi->DstReg.Index = VERT_RESULT_HPOS;
- vpi->DstReg.WriteMask = WRITEMASK_XYZW;
- vpi->DstReg.CondMask = COND_TR;
-
- vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
- vpi->SrcReg[0].Index = temp_index;
- vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- ++vpi;
-
- vpi->Opcode = OPCODE_MOV;
-
- vpi->DstReg.File = PROGRAM_OUTPUT;
- vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
- vpi->DstReg.WriteMask = WRITEMASK_XYZW;
- vpi->DstReg.CondMask = COND_TR;
-
- vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
- vpi->SrcReg[0].Index = temp_index;
- vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- ++vpi;
-
- vpi->Opcode = OPCODE_END;
-}
-
-static void pos_as_texcoord(struct gl_program *prog, int tex_id)
-{
- struct prog_instruction *vpi;
- GLuint tempregi = prog->NumTemporaries;
-
- prog->NumTemporaries++;
-
- for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
- if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) {
- vpi->DstReg.File = PROGRAM_TEMPORARY;
- vpi->DstReg.Index = tempregi;
- }
- }
-
- insert_wpos(prog, tempregi, tex_id);
-
- prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
-}
-
-/**
- * The fogcoord attribute is special in that only the first component
- * is relevant, and the remaining components are always fixed (when read
- * from by the fragment program) to yield an X001 pattern.
- *
- * We need to enforce this either in the vertex program or in the fragment
- * program, and this code chooses not to enforce it in the vertex program.
- * This is slightly cheaper, as long as the fragment program does not use
- * weird swizzles.
- *
- * And it seems that usually, weird swizzles are not used, so...
- *
- * See also the counterpart rewriting for fragment programs.
- */
-static void fog_as_texcoord(struct gl_program *prog, int tex_id)
-{
- struct prog_instruction *vpi;
-
- vpi = prog->Instructions;
- while (vpi->Opcode != OPCODE_END) {
- if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) {
- vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
- vpi->DstReg.WriteMask = WRITEMASK_X;
- }
-
- ++vpi;
- }
-
- prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC);
- prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
-}
-
-static int translateABS(struct gl_program *prog, int pos)
-{
- struct prog_instruction *inst;
-
- inst = &prog->Instructions[pos];
-
- inst->Opcode = OPCODE_MAX;
- inst->SrcReg[1] = inst->SrcReg[0];
- inst->SrcReg[1].Negate ^= NEGATE_XYZW;
-
- return 0;
-}
-
-static int translateDP3(struct gl_program *prog, int pos)
-{
- struct prog_instruction *inst;
-
- inst = &prog->Instructions[pos];
-
- inst->Opcode = OPCODE_DP4;
- inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
- return 0;
-}
-
-static int translateDPH(struct gl_program *prog, int pos)
-{
- struct prog_instruction *inst;
-
- inst = &prog->Instructions[pos];
-
- inst->Opcode = OPCODE_DP4;
- inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
-
- return 0;
-}
-
-static int translateFLR(struct gl_program *prog, int pos)
-{
- struct prog_instruction *inst;
- struct prog_dst_register dst;
- int tmp_idx;
-
- tmp_idx = prog->NumTemporaries++;
-
- _mesa_insert_instructions(prog, pos + 1, 1);
-
- inst = &prog->Instructions[pos];
- dst = inst->DstReg;
-
- inst->Opcode = OPCODE_FRC;
- inst->DstReg.File = PROGRAM_TEMPORARY;
- inst->DstReg.Index = tmp_idx;
- ++inst;
-
- inst->Opcode = OPCODE_ADD;
- inst->DstReg = dst;
- inst->SrcReg[0] = (inst-1)->SrcReg[0];
- inst->SrcReg[1].File = PROGRAM_TEMPORARY;
- inst->SrcReg[1].Index = tmp_idx;
- inst->SrcReg[1].Negate = NEGATE_XYZW;
-
- return 1;
-}
-
-static int translateSUB(struct gl_program *prog, int pos)
-{
- struct prog_instruction *inst;
-
- inst = &prog->Instructions[pos];
-
- inst->Opcode = OPCODE_ADD;
- inst->SrcReg[1].Negate ^= NEGATE_XYZW;
-
- return 0;
-}
-
-static int translateSWZ(struct gl_program *prog, int pos)
-{
- prog->Instructions[pos].Opcode = OPCODE_MOV;
-
- return 0;
-}
-
-static int translateXPD(struct gl_program *prog, int pos)
-{
- struct prog_instruction *inst;
- int tmp_idx;
-
- tmp_idx = prog->NumTemporaries++;
-
- _mesa_insert_instructions(prog, pos + 1, 1);
-
- inst = &prog->Instructions[pos];
-
- *(inst+1) = *inst;
-
- inst->Opcode = OPCODE_MUL;
- inst->DstReg.File = PROGRAM_TEMPORARY;
- inst->DstReg.Index = tmp_idx;
- inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
- inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
- ++inst;
-
- inst->Opcode = OPCODE_MAD;
- inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
- inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
- inst->SrcReg[1].Negate ^= NEGATE_XYZW;
- inst->SrcReg[2].File = PROGRAM_TEMPORARY;
- inst->SrcReg[2].Index = tmp_idx;
-
- return 1;
-}
-
-static void translateInsts(struct gl_program *prog)
-{
- struct prog_instruction *inst;
- int i;
-
- for (i = 0; i < prog->NumInstructions; ++i) {
- inst = &prog->Instructions[i];
-
- switch (inst->Opcode) {
- case OPCODE_ABS:
- i += translateABS(prog, i);
- break;
- case OPCODE_DP3:
- i += translateDP3(prog, i);
- break;
- case OPCODE_DPH:
- i += translateDPH(prog, i);
- break;
- case OPCODE_FLR:
- i += translateFLR(prog, i);
- break;
- case OPCODE_SUB:
- i += translateSUB(prog, i);
- break;
- case OPCODE_SWZ:
- i += translateSWZ(prog, i);
- break;
- case OPCODE_XPD:
- i += translateXPD(prog, i);
- break;
- default:
- break;
- }
- }
-}
-
-#define ADD_OUTPUT(fp_attr, vp_result) \
- do { \
- if ((FpReads & (1 << (fp_attr))) && !(prog->OutputsWritten & (1 << (vp_result)))) { \
- OutputsAdded |= 1 << (vp_result); \
- count++; \
- } \
- } while (0)
-
-static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog)
-{
- r300ContextPtr r300 = R300_CONTEXT(ctx);
- GLuint OutputsAdded, FpReads;
- int i, count;
-
- OutputsAdded = 0;
- count = 0;
- FpReads = r300->selected_fp->InputsRead;
-
- ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
- ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
-
- for (i = 0; i < 7; ++i) {
- ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
- }
-
- /* Some outputs may be artificially added, to match the inputs of the fragment program.
- * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
- * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
- */
- if (count > 0) {
- struct prog_instruction *inst;
-
- _mesa_insert_instructions(prog, prog->NumInstructions - 1, count);
- inst = &prog->Instructions[prog->NumInstructions - 1 - count];
-
- for (i = 0; i < VERT_RESULT_MAX; ++i) {
- if (OutputsAdded & (1 << i)) {
- inst->Opcode = OPCODE_MOV;
-
- inst->DstReg.File = PROGRAM_OUTPUT;
- inst->DstReg.Index = i;
- inst->DstReg.WriteMask = WRITEMASK_XYZW;
- inst->DstReg.CondMask = COND_TR;
-
- inst->SrcReg[0].File = PROGRAM_CONSTANT;
- inst->SrcReg[0].Index = 0;
- inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- ++inst;
- }
- }
-
- prog->OutputsWritten |= OutputsAdded;
- }
-}
-
-#undef ADD_OUTPUT
-
-static void nqssadceInit(struct nqssadce_state* s)
-{
- r300ContextPtr r300 = (r300ContextPtr)(s->UserData);
- GLuint fp_reads;
-
- fp_reads = r300->selected_fp->InputsRead;
- {
- if (fp_reads & FRAG_BIT_COL0) {
- s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW;
- s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW;
- }
-
- if (fp_reads & FRAG_BIT_COL1) {
- s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW;
- s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW;
- }
- }
-
- {
- int i;
- for (i = 0; i < 8; ++i) {
- if (fp_reads & FRAG_BIT_TEX(i)) {
- s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW;
- }
- }
- }
-
- s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW;
- if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ))
- s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X;
-}
-
-static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
-{
- (void) opcode;
- (void) reg;
-
- return GL_TRUE;
-}
-
static struct r300_vertex_program *build_program(GLcontext *ctx,
- struct r300_vertex_program_key *wanted_key,
+ struct r300_vertex_program_external_state *wanted_key,
const struct gl_vertex_program *mesa_vp)
{
- r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_vertex_program *vp;
- struct gl_vertex_program * glvp = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base);
- struct gl_program *prog;
+ struct r300_vertex_program_compiler compiler;
vp = _mesa_calloc(sizeof(*vp));
- vp->Base = glvp;
+ vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base);
_mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
- prog = &glvp->Base;
-
- if (RADEON_DEBUG & DEBUG_VERTS) {
- fprintf(stderr, "Initial vertex program:\n");
- _mesa_print_program(prog);
- fflush(stdout);
- }
-
- if (glvp->IsPositionInvariant) {
- _mesa_insert_mvp_code(ctx, glvp);
- }
-
- if (r300->selected_fp->code.wpos_attr != FRAG_ATTRIB_MAX) {
- pos_as_texcoord(&glvp->Base, r300->selected_fp->code.wpos_attr - FRAG_ATTRIB_TEX0);
- }
-
- if (r300->selected_fp->code.fog_attr != FRAG_ATTRIB_MAX) {
- fog_as_texcoord(&glvp->Base, r300->selected_fp->code.fog_attr - FRAG_ATTRIB_TEX0);
- }
-
- addArtificialOutputs(ctx, prog);
+ rc_init(&compiler.Base);
+ compiler.Base.Debug = (RADEON_DEBUG & DEBUG_VERTS) ? GL_TRUE : GL_FALSE;
- translateInsts(prog);
+ compiler.code = &vp->code;
+ compiler.state = vp->key;
+ compiler.program = vp->Base;
- if (RADEON_DEBUG & DEBUG_VERTS) {
- fprintf(stderr, "Vertex program after native rewrite:\n");
- _mesa_print_program(prog);
+ if (compiler.Base.Debug) {
+ fprintf(stderr, "Initial vertex program:\n");
+ _mesa_print_program(compiler.program);
fflush(stdout);
}
- {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadceInit,
- .IsNativeSwizzle = &swizzleIsNative,
- .BuildSwizzle = NULL
- };
- radeonNqssaDce(prog, &nqssadce, r300);
-
- /* We need this step for reusing temporary registers */
- _mesa_optimize_program(ctx, prog);
-
- if (RADEON_DEBUG & DEBUG_VERTS) {
- fprintf(stderr, "Vertex program after NQSSADCE:\n");
- _mesa_print_program(prog);
- fflush(stdout);
- }
- }
-
- assert(prog->NumInstructions);
- {
- struct prog_instruction *inst;
- int max, i, tmp;
-
- inst = prog->Instructions;
- max = -1;
- while (inst->Opcode != OPCODE_END) {
- tmp = _mesa_num_inst_src_regs(inst->Opcode);
- for (i = 0; i < tmp; ++i) {
- if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) {
- if ((int) inst->SrcReg[i].Index > max) {
- max = inst->SrcReg[i].Index;
- }
- }
- }
-
- if (_mesa_num_inst_dst_regs(inst->Opcode)) {
- if (inst->DstReg.File == PROGRAM_TEMPORARY) {
- if ((int) inst->DstReg.Index > max) {
- max = inst->DstReg.Index;
- }
- }
- }
- ++inst;
- }
-
- /* We actually want highest index of used temporary register,
- * not the number of temporaries used.
- * These values aren't always the same.
- */
- vp->num_temporaries = max + 1;
+ if (mesa_vp->IsPositionInvariant) {
+ _mesa_insert_mvp_code(ctx, (struct gl_vertex_program *)compiler.program);
}
- translate_vertex_program(vp, &glvp->Base);
+ if (!r3xx_compile_vertex_program(&compiler, ctx))
+ vp->error = GL_TRUE;
- vp->InputsRead = glvp->Base.InputsRead;
- vp->OutputsWritten = glvp->Base.OutputsWritten;
+ rc_destroy(&compiler.Base);
return vp;
}
@@ -1639,7 +136,7 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- struct r300_vertex_program_key wanted_key = { 0 };
+ struct r300_vertex_program_external_state wanted_key = { 0 };
struct r300_vertex_program_cont *vpc;
struct r300_vertex_program *vp;
@@ -1669,7 +166,7 @@ struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
} while(0)
-static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_shader_hw_code *code)
+static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code)
{
int i;
@@ -1717,11 +214,11 @@ void r300SetupVertexProgram(r300ContextPtr rmesa)
bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
param_count /= 4;
- r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code));
- inst_count = (prog->hw_code.length / 4) - 1;
+ r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code));
+ inst_count = (prog->code.length / 4) - 1;
- r300VapCntl(rmesa, _mesa_bitcount(prog->InputsRead),
- _mesa_bitcount(prog->OutputsWritten), prog->num_temporaries);
+ r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead),
+ _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries);
R300_STATECHANGE(rmesa, pvs);
rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h
index 896699ffe2e..ccec896be40 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.h
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.h
@@ -3,34 +3,6 @@
#include "r300_reg.h"
-#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \
- (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \
- | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \
- | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \
- | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \
- | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \
- | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT))
-
-#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \
- (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \
- | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \
- | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \
- | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \
- | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \
- | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \
- | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT))
-
-#if 1
-
-#define VSF_FLAG_X 1
-#define VSF_FLAG_Y 2
-#define VSF_FLAG_Z 4
-#define VSF_FLAG_W 8
-#define VSF_FLAG_XYZ (VSF_FLAG_X | VSF_FLAG_Y | VSF_FLAG_Z)
-#define VSF_FLAG_ALL 0xf
-#define VSF_FLAG_NONE 0
-
-#endif
void r300SetupVertexProgram(r300ContextPtr rmesa);
diff --git a/src/mesa/shader/prog_instruction.h b/src/mesa/shader/prog_instruction.h
index 40ad998f79d..39a221eeaba 100644
--- a/src/mesa/shader/prog_instruction.h
+++ b/src/mesa/shader/prog_instruction.h
@@ -133,6 +133,7 @@
#define NEGATE_Y 0x2
#define NEGATE_Z 0x4
#define NEGATE_W 0x8
+#define NEGATE_XYZ 0x7
#define NEGATE_XYZW 0xf
#define NEGATE_NONE 0x0
/*@}*/
@@ -303,11 +304,11 @@ struct prog_dst_register
* Condition code swizzle value.
*/
GLuint CondSwizzle:12;
-
+
/**
* Selects the condition code register to use for conditional destination
* update masking. In NV_fragmnet_program or NV_vertex_program2 mode, only
- * condition code register 0 is available. In NV_vertex_program3 mode,
+ * condition code register 0 is available. In NV_vertex_program3 mode,
* condition code registers 0 and 1 are available.
*/
GLuint CondSrc:1;
@@ -359,7 +360,7 @@ struct prog_instruction
* NV_fragment_program, NV_fragment_program_option, NV_vertex_program3.
*/
GLuint SaturateMode:2;
-
+
/**
* Per-instruction selectable precision: FLOAT32, FLOAT16, FIXED12.
*
@@ -374,7 +375,7 @@ struct prog_instruction
/*@{*/
/** Source texture unit. */
GLuint TexSrcUnit:5;
-
+
/** Source texture target, one of TEXTURE_{1D,2D,3D,CUBE,RECT}_INDEX */
GLuint TexSrcTarget:3;
--
cgit v1.2.3
From 127ca61fa34497e69149360201ae97f87cb9f38e Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 23 Jul 2009 19:25:06 +0200
Subject: r300/vertprog: Use generic transforms and throw away unneeded code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 443 +--------------------
.../drivers/dri/r300/compiler/radeon_program_alu.c | 46 +++
.../drivers/dri/r300/compiler/radeon_program_alu.h | 5 +
3 files changed, 59 insertions(+), 435 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index b074c98ee9d..c7fc2617de1 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -25,6 +25,8 @@
#include "../r300_reg.h"
#include "radeon_nqssadce.h"
+#include "radeon_program.h"
+#include "radeon_program_alu.h"
#include "shader/prog_optimize.h"
#include "shader/prog_print.h"
@@ -186,34 +188,6 @@ static GLboolean valid_dst(struct r300_vertex_program_code *vp,
return GL_TRUE;
}
-static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
-
- inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
- t_src_class(src[0].File),
- (!src[0].
- Negate) ? NEGATE_XYZW : NEGATE_NONE) |
- (src[0].RelAddr << 4);
- inst[3] = 0;
-
- return inst;
-}
-
static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program_code *vp,
struct prog_instruction *vpi,
GLuint * inst,
@@ -250,40 +224,6 @@ static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program_code *vp,
return inst;
}
-static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
-
- inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
- SWIZZLE_ZERO,
- t_src_class(src[0].File),
- src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
- (src[0].RelAddr << 4);
- inst[2] =
- PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
- t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO,
- t_src_class(src[1].File),
- src[1].Negate ? NEGATE_XYZW : NEGATE_NONE) |
- (src[1].RelAddr << 4);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program_code *vp,
struct prog_instruction *vpi,
GLuint * inst,
@@ -302,32 +242,6 @@ static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program_code *vp,
return inst;
}
-static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
- inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
- PVS_SRC_SELECT_FORCE_1,
- t_src_class(src[0].File),
- src[0].Negate ? NEGATE_XYZ : NEGATE_NONE) |
- (src[0].RelAddr << 4);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program_code *vp,
struct prog_instruction *vpi,
GLuint * inst,
@@ -382,47 +296,6 @@ static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program_code *vp,
return inst;
}
-static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3],
- int *u_temp_i)
-{
- /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
- ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
-
- inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
- GL_FALSE,
- GL_FALSE,
- *u_temp_i,
- t_dst_mask(vpi->DstReg.WriteMask),
- PVS_DST_REG_TEMPORARY);
- inst[1] = t_src(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
- inst += 4;
-
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = PVS_SRC_OPERAND(*u_temp_i,
- PVS_SRC_SELECT_X,
- PVS_SRC_SELECT_Y,
- PVS_SRC_SELECT_Z,
- PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
- /* Not 100% sure about this */
- (!src[0].
- Negate) ? NEGATE_XYZW : NEGATE_NONE);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
- (*u_temp_i)--;
-
- return inst;
-}
-
static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program_code *vp,
struct prog_instruction *vpi,
GLuint * inst,
@@ -707,139 +580,6 @@ static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program_code *vp,
return inst;
}
-static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
-
-#if 0
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
- t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
- t_src_class(src[1].File),
- (!src[1].
- Negate) ? NEGATE_XYZW : NEGATE_NONE) |
- (src[1].RelAddr << 4);
- inst[3] = 0;
-#else
- inst[0] =
- PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ONE);
- inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
- t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
- t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
- t_src_class(src[1].File),
- (!src[1].
- Negate) ? NEGATE_XYZW : NEGATE_NONE) |
- (src[1].RelAddr << 4);
-#endif
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
-
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3],
- int *u_temp_i)
-{
- /* mul r0, r1.yzxw, r2.zxyw
- mad r0, -r2.yzxw, r1.zxyw, r0
- */
-
- inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
- GL_FALSE,
- GL_FALSE,
- *u_temp_i,
- t_dst_mask(vpi->DstReg.WriteMask),
- PVS_DST_REG_TEMPORARY);
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
- t_src_class(src[0].File),
- src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
- (src[0].RelAddr << 4);
- inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
- t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
- t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
- t_src_class(src[1].File),
- src[1].Negate ? NEGATE_XYZW : NEGATE_NONE) |
- (src[1].RelAddr << 4);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
- inst += 4;
-
- inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
- t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
- t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
- t_src_class(src[1].File),
- (!src[1].
- Negate) ? NEGATE_XYZW : NEGATE_NONE) |
- (src[1].RelAddr << 4);
- inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
- t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
- t_src_class(src[0].File),
- src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
- (src[0].RelAddr << 4);
- inst[3] =
- PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
- PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
- PVS_SRC_REG_TEMPORARY, NEGATE_NONE);
-
- (*u_temp_i)--;
-
- return inst;
-}
-
static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp)
{
int i;
@@ -1017,24 +757,15 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler *
}
switch (vpi->Opcode) {
- case OPCODE_ABS:
- inst = r300TranslateOpcodeABS(compiler->code, vpi, inst, src);
- break;
case OPCODE_ADD:
inst = r300TranslateOpcodeADD(compiler->code, vpi, inst, src);
break;
case OPCODE_ARL:
inst = r300TranslateOpcodeARL(compiler->code, vpi, inst, src);
break;
- case OPCODE_DP3:
- inst = r300TranslateOpcodeDP3(compiler->code, vpi, inst, src);
- break;
case OPCODE_DP4:
inst = r300TranslateOpcodeDP4(compiler->code, vpi, inst, src);
break;
- case OPCODE_DPH:
- inst = r300TranslateOpcodeDPH(compiler->code, vpi, inst, src);
- break;
case OPCODE_DST:
inst = r300TranslateOpcodeDST(compiler->code, vpi, inst, src);
break;
@@ -1044,10 +775,6 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler *
case OPCODE_EXP:
inst = r300TranslateOpcodeEXP(compiler->code, vpi, inst, src);
break;
- case OPCODE_FLR:
- inst = r300TranslateOpcodeFLR(compiler->code, vpi, inst, src, /* FIXME */
- &u_temp_i);
- break;
case OPCODE_FRC:
inst = r300TranslateOpcodeFRC(compiler->code, vpi, inst, src);
break;
@@ -1090,16 +817,6 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler *
case OPCODE_SLT:
inst = r300TranslateOpcodeSLT(compiler->code, vpi, inst, src);
break;
- case OPCODE_SUB:
- inst = r300TranslateOpcodeSUB(compiler->code, vpi, inst, src);
- break;
- case OPCODE_SWZ:
- inst = r300TranslateOpcodeSWZ(compiler->code, vpi, inst, src);
- break;
- case OPCODE_XPD:
- inst = r300TranslateOpcodeXPD(compiler->code, vpi, inst, src, /* FIXME */
- &u_temp_i);
- break;
default:
return GL_FALSE;
}
@@ -1201,155 +918,6 @@ static void fog_as_texcoord(struct gl_program *prog, int tex_id)
prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
}
-static int translateABS(struct gl_program *prog, int pos)
-{
- struct prog_instruction *inst;
-
- inst = &prog->Instructions[pos];
-
- inst->Opcode = OPCODE_MAX;
- inst->SrcReg[1] = inst->SrcReg[0];
- inst->SrcReg[1].Negate ^= NEGATE_XYZW;
-
- return 0;
-}
-
-static int translateDP3(struct gl_program *prog, int pos)
-{
- struct prog_instruction *inst;
-
- inst = &prog->Instructions[pos];
-
- inst->Opcode = OPCODE_DP4;
- inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
- return 0;
-}
-
-static int translateDPH(struct gl_program *prog, int pos)
-{
- struct prog_instruction *inst;
-
- inst = &prog->Instructions[pos];
-
- inst->Opcode = OPCODE_DP4;
- inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
-
- return 0;
-}
-
-static int translateFLR(struct gl_program *prog, int pos)
-{
- struct prog_instruction *inst;
- struct prog_dst_register dst;
- int tmp_idx;
-
- tmp_idx = prog->NumTemporaries++;
-
- _mesa_insert_instructions(prog, pos + 1, 1);
-
- inst = &prog->Instructions[pos];
- dst = inst->DstReg;
-
- inst->Opcode = OPCODE_FRC;
- inst->DstReg.File = PROGRAM_TEMPORARY;
- inst->DstReg.Index = tmp_idx;
- ++inst;
-
- inst->Opcode = OPCODE_ADD;
- inst->DstReg = dst;
- inst->SrcReg[0] = (inst-1)->SrcReg[0];
- inst->SrcReg[1].File = PROGRAM_TEMPORARY;
- inst->SrcReg[1].Index = tmp_idx;
- inst->SrcReg[1].Negate = NEGATE_XYZW;
-
- return 1;
-}
-
-static int translateSUB(struct gl_program *prog, int pos)
-{
- struct prog_instruction *inst;
-
- inst = &prog->Instructions[pos];
-
- inst->Opcode = OPCODE_ADD;
- inst->SrcReg[1].Negate ^= NEGATE_XYZW;
-
- return 0;
-}
-
-static int translateSWZ(struct gl_program *prog, int pos)
-{
- prog->Instructions[pos].Opcode = OPCODE_MOV;
-
- return 0;
-}
-
-static int translateXPD(struct gl_program *prog, int pos)
-{
- struct prog_instruction *inst;
- int tmp_idx;
-
- tmp_idx = prog->NumTemporaries++;
-
- _mesa_insert_instructions(prog, pos + 1, 1);
-
- inst = &prog->Instructions[pos];
-
- *(inst+1) = *inst;
-
- inst->Opcode = OPCODE_MUL;
- inst->DstReg.File = PROGRAM_TEMPORARY;
- inst->DstReg.Index = tmp_idx;
- inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
- inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
- ++inst;
-
- inst->Opcode = OPCODE_MAD;
- inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
- inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
- inst->SrcReg[1].Negate ^= NEGATE_XYZW;
- inst->SrcReg[2].File = PROGRAM_TEMPORARY;
- inst->SrcReg[2].Index = tmp_idx;
-
- return 1;
-}
-
-static void translateInsts(struct gl_program *prog)
-{
- struct prog_instruction *inst;
- int i;
-
- for (i = 0; i < prog->NumInstructions; ++i) {
- inst = &prog->Instructions[i];
-
- switch (inst->Opcode) {
- case OPCODE_ABS:
- i += translateABS(prog, i);
- break;
- case OPCODE_DP3:
- i += translateDP3(prog, i);
- break;
- case OPCODE_DPH:
- i += translateDPH(prog, i);
- break;
- case OPCODE_FLR:
- i += translateFLR(prog, i);
- break;
- case OPCODE_SUB:
- i += translateSUB(prog, i);
- break;
- case OPCODE_SWZ:
- i += translateSWZ(prog, i);
- break;
- case OPCODE_XPD:
- i += translateXPD(prog, i);
- break;
- default:
- break;
- }
- }
-}
#define ADD_OUTPUT(fp_attr, vp_result) \
do { \
@@ -1464,7 +1032,12 @@ GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compi
addArtificialOutputs(compiler);
- translateInsts(compiler->program);
+ {
+ struct radeon_program_transformation transformations[] = {
+ { &r300_transform_vertex_alu, 0 },
+ };
+ radeonLocalTransform(compiler->program, 1, transformations);
+ }
if (compiler->Base.Debug) {
fprintf(stderr, "Vertex program after native rewrite:\n");
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index 8283723bad7..dd20b4603e5 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -180,6 +180,20 @@ static void transform_ABS(struct radeon_transform_context* t,
emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src);
}
+static void transform_DP3(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ struct prog_src_register src0 = inst->SrcReg[0];
+ struct prog_src_register src1 = inst->SrcReg[1];
+ src0.Negate &= ~NEGATE_W;
+ src0.Swizzle &= ~(7 << (3 * 3));
+ src0.Swizzle |= SWIZZLE_ZERO << (3 * 3);
+ src1.Negate &= ~NEGATE_W;
+ src1.Swizzle &= ~(7 << (3 * 3));
+ src1.Swizzle |= SWIZZLE_ZERO << (3 * 3);
+ emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, src1);
+}
+
static void transform_DPH(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
@@ -416,6 +430,38 @@ GLboolean radeonTransformALU(struct radeon_transform_context* t,
}
+static void transform_r300_vertex_ABS(struct radeon_transform_context* t,
+ struct prog_instruction* inst)
+{
+ /* Note: r500 can take absolute values, but r300 cannot. */
+ struct prog_src_register src1 = inst->SrcReg[0];
+ src1.Negate ^= NEGATE_XYZW;
+
+ emit2(t->Program, OPCODE_MAX, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], src1);
+}
+
+/**
+ * For use with radeonLocalTransform, this transforms non-native ALU
+ * instructions of the r300 up to r500 vertex engine.
+ */
+GLboolean r300_transform_vertex_alu(struct radeon_transform_context* t,
+ struct prog_instruction* inst,
+ void* unused)
+{
+ switch(inst->Opcode) {
+ case OPCODE_ABS: transform_r300_vertex_ABS(t, inst); return GL_TRUE;
+ case OPCODE_DP3: transform_DP3(t, inst); return GL_TRUE;
+ case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
+ case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
+ case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE;
+ case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE;
+ case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE;
+ case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE;
+ default:
+ return GL_FALSE;
+ }
+}
+
static void sincos_constants(struct radeon_transform_context* t, GLuint *constants)
{
static const GLfloat SinCosConsts[2][4] = {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
index b45958115cf..7d94a089eb0 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
@@ -35,6 +35,11 @@ GLboolean radeonTransformALU(
struct prog_instruction*,
void*);
+GLboolean r300_transform_vertex_alu(
+ struct radeon_transform_context *t,
+ struct prog_instruction*,
+ void*);
+
GLboolean radeonTransformTrigSimple(
struct radeon_transform_context *t,
struct prog_instruction*,
--
cgit v1.2.3
From 86e3334333d1de7fd723221155de9c8c1d0ce1c6 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 23 Jul 2009 19:52:00 +0200
Subject: r300/vertprog: Massively reduce code duplication
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 383 +++------------------
1 file changed, 42 insertions(+), 341 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index c7fc2617de1..ff6575b303c 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -188,30 +188,13 @@ static GLboolean valid_dst(struct r300_vertex_program_code *vp,
return GL_TRUE;
}
-static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
+static GLuint * ei_vector1(struct r300_vertex_program_code *vp,
+ GLuint hw_opcode,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
{
- inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX,
+ inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
GL_FALSE,
GL_FALSE,
t_dst_index(vp, &vpi->DstReg),
@@ -224,30 +207,13 @@ static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program_code *vp,
return inst;
}
-static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
+static GLuint * ei_vector2(struct r300_vertex_program_code *vp,
+ GLuint hw_opcode,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
{
- inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR,
+ inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
GL_FALSE,
GL_FALSE,
t_dst_index(vp, &vpi->DstReg),
@@ -260,30 +226,13 @@ static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program_code *vp,
return inst;
}
-static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
+static GLuint *ei_math1(struct r300_vertex_program_code *vp,
+ GLuint hw_opcode,
+ struct prog_instruction *vpi,
+ GLuint * inst,
+ struct prog_src_register src[3])
{
- inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX,
+ inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
GL_TRUE,
GL_FALSE,
t_dst_index(vp, &vpi->DstReg),
@@ -296,52 +245,7 @@ static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program_code *vp,
return inst;
}
-static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
-
- inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
- t_src_class(src[0].File),
- src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
- (src[0].RelAddr << 4);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program_code *vp,
+static GLuint *ei_lit(struct r300_vertex_program_code *vp,
struct prog_instruction *vpi,
GLuint * inst,
struct prog_src_register src[3])
@@ -380,25 +284,7 @@ static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program_code *vp,
return inst;
}
-static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program_code *vp,
+static GLuint *ei_mad(struct r300_vertex_program_code *vp,
struct prog_instruction *vpi,
GLuint * inst,
struct prog_src_register src[3])
@@ -416,81 +302,7 @@ static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program_code *vp,
return inst;
}
-static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
-
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program_code *vp,
+static GLuint *ei_pow(struct r300_vertex_program_code *vp,
struct prog_instruction *vpi,
GLuint * inst,
struct prog_src_register src[3])
@@ -508,78 +320,6 @@ static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program_code *vp,
return inst;
}
-static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX,
- GL_TRUE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
-static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
-{
- inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN,
- GL_FALSE,
- GL_FALSE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
-}
-
static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp)
{
int i;
@@ -757,67 +497,28 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler *
}
switch (vpi->Opcode) {
- case OPCODE_ADD:
- inst = r300TranslateOpcodeADD(compiler->code, vpi, inst, src);
- break;
- case OPCODE_ARL:
- inst = r300TranslateOpcodeARL(compiler->code, vpi, inst, src);
- break;
- case OPCODE_DP4:
- inst = r300TranslateOpcodeDP4(compiler->code, vpi, inst, src);
- break;
- case OPCODE_DST:
- inst = r300TranslateOpcodeDST(compiler->code, vpi, inst, src);
- break;
- case OPCODE_EX2:
- inst = r300TranslateOpcodeEX2(compiler->code, vpi, inst, src);
- break;
- case OPCODE_EXP:
- inst = r300TranslateOpcodeEXP(compiler->code, vpi, inst, src);
- break;
- case OPCODE_FRC:
- inst = r300TranslateOpcodeFRC(compiler->code, vpi, inst, src);
- break;
- case OPCODE_LG2:
- inst = r300TranslateOpcodeLG2(compiler->code, vpi, inst, src);
- break;
- case OPCODE_LIT:
- inst = r300TranslateOpcodeLIT(compiler->code, vpi, inst, src);
- break;
- case OPCODE_LOG:
- inst = r300TranslateOpcodeLOG(compiler->code, vpi, inst, src);
- break;
- case OPCODE_MAD:
- inst = r300TranslateOpcodeMAD(compiler->code, vpi, inst, src);
- break;
- case OPCODE_MAX:
- inst = r300TranslateOpcodeMAX(compiler->code, vpi, inst, src);
- break;
- case OPCODE_MIN:
- inst = r300TranslateOpcodeMIN(compiler->code, vpi, inst, src);
- break;
- case OPCODE_MOV:
- inst = r300TranslateOpcodeMOV(compiler->code, vpi, inst, src);
- break;
- case OPCODE_MUL:
- inst = r300TranslateOpcodeMUL(compiler->code, vpi, inst, src);
- break;
- case OPCODE_POW:
- inst = r300TranslateOpcodePOW(compiler->code, vpi, inst, src);
- break;
- case OPCODE_RCP:
- inst = r300TranslateOpcodeRCP(compiler->code, vpi, inst, src);
- break;
- case OPCODE_RSQ:
- inst = r300TranslateOpcodeRSQ(compiler->code, vpi, inst, src);
- break;
- case OPCODE_SGE:
- inst = r300TranslateOpcodeSGE(compiler->code, vpi, inst, src);
- break;
- case OPCODE_SLT:
- inst = r300TranslateOpcodeSLT(compiler->code, vpi, inst, src);
- break;
+ case OPCODE_ADD: inst = ei_vector2(compiler->code, VE_ADD, vpi, inst, src); break;
+ case OPCODE_ARL: inst = ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst, src); break;
+ case OPCODE_DP4: inst = ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst, src); break;
+ case OPCODE_DST: inst = ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst, src); break;
+ case OPCODE_EX2: inst = ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst, src); break;
+ case OPCODE_EXP: inst = ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst, src); break;
+ case OPCODE_FRC: inst = ei_vector1(compiler->code, VE_FRACTION, vpi, inst, src); break;
+ case OPCODE_LG2: inst = ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst, src); break;
+ case OPCODE_LIT: inst = ei_lit(compiler->code, vpi, inst, src); break;
+ case OPCODE_LOG: inst = ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst, src); break;
+ case OPCODE_MAD: inst = ei_mad(compiler->code, vpi, inst, src); break;
+ case OPCODE_MAX: inst = ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst, src); break;
+ case OPCODE_MIN: inst = ei_vector2(compiler->code, VE_MINIMUM, vpi, inst, src); break;
+ case OPCODE_MOV: inst = ei_vector1(compiler->code, VE_ADD, vpi, inst, src); break;
+ case OPCODE_MUL: inst = ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst, src); break;
+ case OPCODE_POW: inst = ei_pow(compiler->code, vpi, inst, src); break;
+ case OPCODE_RCP: inst = ei_math1(compiler->code, ME_RECIP_DX, vpi, inst, src); break;
+ case OPCODE_RSQ: inst = ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst, src); break;
+ case OPCODE_SGE: inst = ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst, src); break;
+ case OPCODE_SLT: inst = ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst, src); break;
default:
+ fprintf(stderr, "Unknown opcode %i\n", vpi->Opcode);
return GL_FALSE;
}
}
--
cgit v1.2.3
From d65404225d8ba2c16eaffac833cb7dcfd2722a38 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 23 Jul 2009 20:24:22 +0200
Subject: r300/vertprog: Cleanup source conflict handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r300_fragprog.c | 6 -
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 325 ++++++++++-----------
src/mesa/drivers/dri/r300/compiler/r500_fragprog.c | 6 -
.../drivers/dri/r300/compiler/radeon_program.h | 6 +
4 files changed, 155 insertions(+), 188 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
index 932f9d97716..f838179f79b 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
@@ -31,12 +31,6 @@
#include "../r300_reg.h"
-static void reset_srcreg(struct prog_src_register* reg)
-{
- _mesa_bzero(reg, sizeof(*reg));
- reg->Swizzle = SWIZZLE_NOOP;
-}
-
static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
{
gl_state_index fail_value_tokens[STATE_LENGTH] = {
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index ff6575b303c..400408620ea 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -32,27 +32,18 @@
#include "shader/prog_print.h"
-/* TODO: Get rid of t_src_class call */
-#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
- ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
- t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
- (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
- t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
-
/*
* Take an already-setup and valid source then swizzle it appropriately to
* obtain a constant ZERO or ONE source.
*/
#define __CONST(x, y) \
- (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
+ (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
t_swizzle(y), \
t_swizzle(y), \
t_swizzle(y), \
t_swizzle(y), \
- t_src_class(src[x].File), \
- NEGATE_NONE) | (src[x].RelAddr << 4))
-
-
+ t_src_class(vpi->SrcReg[x].File), \
+ NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4))
static unsigned long t_dst_mask(GLuint mask)
@@ -121,6 +112,24 @@ static unsigned long t_src_class(gl_register_file file)
}
}
+static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b)
+{
+ unsigned long aclass = t_src_class(a.File);
+ unsigned long bclass = t_src_class(b.File);
+
+ if (aclass != bclass)
+ return GL_FALSE;
+ if (aclass == PVS_SRC_REG_TEMPORARY)
+ return GL_FALSE;
+
+ if (a.RelAddr || b.RelAddr)
+ return GL_TRUE;
+ if (a.Index != b.Index)
+ return GL_TRUE;
+
+ return GL_FALSE;
+}
+
static INLINE unsigned long t_swizzle(GLubyte swizzle)
{
/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
@@ -188,11 +197,10 @@ static GLboolean valid_dst(struct r300_vertex_program_code *vp,
return GL_TRUE;
}
-static GLuint * ei_vector1(struct r300_vertex_program_code *vp,
+static void ei_vector1(struct r300_vertex_program_code *vp,
GLuint hw_opcode,
struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
+ GLuint * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
GL_FALSE,
@@ -200,18 +208,15 @@ static GLuint * ei_vector1(struct r300_vertex_program_code *vp,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
+ inst[1] = t_src(vp, &vpi->SrcReg[0]);
inst[2] = __CONST(0, SWIZZLE_ZERO);
inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
}
-static GLuint * ei_vector2(struct r300_vertex_program_code *vp,
+static void ei_vector2(struct r300_vertex_program_code *vp,
GLuint hw_opcode,
struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
+ GLuint * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
GL_FALSE,
@@ -219,18 +224,15 @@ static GLuint * ei_vector2(struct r300_vertex_program_code *vp,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
+ inst[1] = t_src(vp, &vpi->SrcReg[0]);
+ inst[2] = t_src(vp, &vpi->SrcReg[1]);
inst[3] = __CONST(1, SWIZZLE_ZERO);
-
- return inst;
}
-static GLuint *ei_math1(struct r300_vertex_program_code *vp,
+static void ei_math1(struct r300_vertex_program_code *vp,
GLuint hw_opcode,
struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
+ GLuint * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
GL_TRUE,
@@ -238,17 +240,14 @@ static GLuint *ei_math1(struct r300_vertex_program_code *vp,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
+ inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
inst[2] = __CONST(0, SWIZZLE_ZERO);
inst[3] = __CONST(0, SWIZZLE_ZERO);
-
- return inst;
}
-static GLuint *ei_lit(struct r300_vertex_program_code *vp,
+static void ei_lit(struct r300_vertex_program_code *vp,
struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
+ GLuint * inst)
{
//LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
@@ -259,35 +258,32 @@ static GLuint *ei_lit(struct r300_vertex_program_code *vp,
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
/* NOTE: Users swizzling might not work. */
- inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
+ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
PVS_SRC_SELECT_FORCE_0, // Z
- t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
- t_src_class(src[0].File),
- src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
- (src[0].RelAddr << 4);
- inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
+ t_src_class(vpi->SrcReg[0].File),
+ vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (vpi->SrcReg[0].RelAddr << 4);
+ inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
PVS_SRC_SELECT_FORCE_0, // Z
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
- t_src_class(src[0].File),
- src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
- (src[0].RelAddr << 4);
- inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
- t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
+ t_src_class(vpi->SrcReg[0].File),
+ vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (vpi->SrcReg[0].RelAddr << 4);
+ inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
PVS_SRC_SELECT_FORCE_0, // Z
- t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
- t_src_class(src[0].File),
- src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
- (src[0].RelAddr << 4);
-
- return inst;
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
+ t_src_class(vpi->SrcReg[0].File),
+ vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (vpi->SrcReg[0].RelAddr << 4);
}
-static GLuint *ei_mad(struct r300_vertex_program_code *vp,
+static void ei_mad(struct r300_vertex_program_code *vp,
struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
+ GLuint * inst)
{
inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
GL_FALSE,
@@ -295,17 +291,14 @@ static GLuint *ei_mad(struct r300_vertex_program_code *vp,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
- inst[1] = t_src(vp, &src[0]);
- inst[2] = t_src(vp, &src[1]);
- inst[3] = t_src(vp, &src[2]);
-
- return inst;
+ inst[1] = t_src(vp, &vpi->SrcReg[0]);
+ inst[2] = t_src(vp, &vpi->SrcReg[1]);
+ inst[3] = t_src(vp, &vpi->SrcReg[2]);
}
-static GLuint *ei_pow(struct r300_vertex_program_code *vp,
+static void ei_pow(struct r300_vertex_program_code *vp,
struct prog_instruction *vpi,
- GLuint * inst,
- struct prog_src_register src[3])
+ GLuint * inst)
{
inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
GL_TRUE,
@@ -313,11 +306,9 @@ static GLuint *ei_pow(struct r300_vertex_program_code *vp,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
- inst[1] = t_src_scalar(vp, &src[0]);
+ inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = t_src_scalar(vp, &src[1]);
-
- return inst;
+ inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
}
static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp)
@@ -397,15 +388,7 @@ static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_prog
static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * compiler)
{
struct prog_instruction *vpi = compiler->program->Instructions;
- int i;
GLuint *inst;
- unsigned long num_operands;
- /* Initial value should be last tmp reg that hw supports.
- Strangely enough r300 doesnt mind even though these would be out of range.
- Smart enough to realize that it doesnt need it? */
- int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
- struct prog_src_register src[3];
- struct r300_vertex_program_code * vp = compiler->code;
compiler->code->pos_end = 0; /* Not supported yet */
compiler->code->length = 0;
@@ -414,109 +397,33 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler *
for (inst = compiler->code->body.d; vpi->Opcode != OPCODE_END;
vpi++, inst += 4) {
-
- {
- int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i;
- if((compiler->code->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) {
- fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", compiler->code->num_temporaries, u_temp_used);
- return GL_FALSE;
- }
- u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1;
- }
-
+ /* Skip instructions writing to non-existing destination */
if (!valid_dst(compiler->code, &vpi->DstReg)) {
- /* redirect result to unused temp */
- vpi->DstReg.File = PROGRAM_TEMPORARY;
- vpi->DstReg.Index = u_temp_i;
- }
-
- num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
-
- /* copy the sources (src) from mesa into a local variable... is this needed? */
- for (i = 0; i < num_operands; i++) {
- src[i] = vpi->SrcReg[i];
- }
-
- if (num_operands == 3) { /* TODO: scalars */
- if (CMP_SRCS(src[1], src[2])
- || CMP_SRCS(src[0], src[2])) {
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- u_temp_i,
- WRITEMASK_XYZW,
- PVS_DST_REG_TEMPORARY);
- inst[1] =
- PVS_SRC_OPERAND(t_src_index(compiler->code, &src[2]),
- SWIZZLE_X,
- SWIZZLE_Y,
- SWIZZLE_Z,
- SWIZZLE_W,
- t_src_class(src[2].File),
- NEGATE_NONE) | (src[2].
- RelAddr <<
- 4);
- inst[2] = __CONST(2, SWIZZLE_ZERO);
- inst[3] = __CONST(2, SWIZZLE_ZERO);
- inst += 4;
-
- src[2].File = PROGRAM_TEMPORARY;
- src[2].Index = u_temp_i;
- src[2].RelAddr = 0;
- u_temp_i--;
- }
- }
-
- if (num_operands >= 2) {
- if (CMP_SRCS(src[1], src[0])) {
- inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
- GL_FALSE,
- GL_FALSE,
- u_temp_i,
- WRITEMASK_XYZW,
- PVS_DST_REG_TEMPORARY);
- inst[1] =
- PVS_SRC_OPERAND(t_src_index(compiler->code, &src[0]),
- SWIZZLE_X,
- SWIZZLE_Y,
- SWIZZLE_Z,
- SWIZZLE_W,
- t_src_class(src[0].File),
- NEGATE_NONE) | (src[0].
- RelAddr <<
- 4);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
- inst += 4;
-
- src[0].File = PROGRAM_TEMPORARY;
- src[0].Index = u_temp_i;
- src[0].RelAddr = 0;
- u_temp_i--;
- }
+ inst -= 4;
+ continue;
}
switch (vpi->Opcode) {
- case OPCODE_ADD: inst = ei_vector2(compiler->code, VE_ADD, vpi, inst, src); break;
- case OPCODE_ARL: inst = ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst, src); break;
- case OPCODE_DP4: inst = ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst, src); break;
- case OPCODE_DST: inst = ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst, src); break;
- case OPCODE_EX2: inst = ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst, src); break;
- case OPCODE_EXP: inst = ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst, src); break;
- case OPCODE_FRC: inst = ei_vector1(compiler->code, VE_FRACTION, vpi, inst, src); break;
- case OPCODE_LG2: inst = ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst, src); break;
- case OPCODE_LIT: inst = ei_lit(compiler->code, vpi, inst, src); break;
- case OPCODE_LOG: inst = ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst, src); break;
- case OPCODE_MAD: inst = ei_mad(compiler->code, vpi, inst, src); break;
- case OPCODE_MAX: inst = ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst, src); break;
- case OPCODE_MIN: inst = ei_vector2(compiler->code, VE_MINIMUM, vpi, inst, src); break;
- case OPCODE_MOV: inst = ei_vector1(compiler->code, VE_ADD, vpi, inst, src); break;
- case OPCODE_MUL: inst = ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst, src); break;
- case OPCODE_POW: inst = ei_pow(compiler->code, vpi, inst, src); break;
- case OPCODE_RCP: inst = ei_math1(compiler->code, ME_RECIP_DX, vpi, inst, src); break;
- case OPCODE_RSQ: inst = ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst, src); break;
- case OPCODE_SGE: inst = ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst, src); break;
- case OPCODE_SLT: inst = ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst, src); break;
+ case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
+ case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
+ case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
+ case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+ case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
+ case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
+ case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+ case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
+ case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
+ case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
+ case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
+ case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
+ case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
+ case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
+ case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
+ case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
+ case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
+ case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
+ case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
+ case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
default:
fprintf(stderr, "Unknown opcode %i\n", vpi->Opcode);
return GL_FALSE;
@@ -531,6 +438,55 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler *
return GL_TRUE;
}
+/**
+ * Vertex engine cannot read two inputs or two constants at the same time.
+ * Introduce intermediate MOVs to temporary registers to account for this.
+ */
+static GLboolean transform_source_conflicts(
+ struct radeon_transform_context *t,
+ struct prog_instruction* orig_inst,
+ void* unused)
+{
+ struct prog_instruction inst = *orig_inst;
+ struct prog_instruction * dst;
+ GLuint num_operands = _mesa_num_inst_src_regs(inst.Opcode);
+
+ if (num_operands == 3) {
+ if (t_src_conflict(inst.SrcReg[1], inst.SrcReg[2])
+ || t_src_conflict(inst.SrcReg[0], inst.SrcReg[2])) {
+ int tmpreg = radeonFindFreeTemporary(t);
+ struct prog_instruction * inst_mov = radeonAppendInstructions(t->Program, 1);
+ inst_mov->Opcode = OPCODE_MOV;
+ inst_mov->DstReg.File = PROGRAM_TEMPORARY;
+ inst_mov->DstReg.Index = tmpreg;
+ inst_mov->SrcReg[0] = inst.SrcReg[2];
+
+ reset_srcreg(&inst.SrcReg[2]);
+ inst.SrcReg[2].File = PROGRAM_TEMPORARY;
+ inst.SrcReg[2].Index = tmpreg;
+ }
+ }
+
+ if (num_operands >= 2) {
+ if (t_src_conflict(inst.SrcReg[1], inst.SrcReg[0])) {
+ int tmpreg = radeonFindFreeTemporary(t);
+ struct prog_instruction * inst_mov = radeonAppendInstructions(t->Program, 1);
+ inst_mov->Opcode = OPCODE_MOV;
+ inst_mov->DstReg.File = PROGRAM_TEMPORARY;
+ inst_mov->DstReg.Index = tmpreg;
+ inst_mov->SrcReg[0] = inst.SrcReg[1];
+
+ reset_srcreg(&inst.SrcReg[1]);
+ inst.SrcReg[1].File = PROGRAM_TEMPORARY;
+ inst.SrcReg[1].Index = tmpreg;
+ }
+ }
+
+ dst = radeonAppendInstructions(t->Program, 1);
+ *dst = inst;
+ return GL_TRUE;
+}
+
static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id)
{
struct prog_instruction *vpi;
@@ -746,6 +702,23 @@ GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compi
fflush(stdout);
}
+ {
+ /* Note: This pass has to be done seperately from ALU rewrite,
+ * otherwise non-native ALU instructions with source conflits
+ * will not be treated properly.
+ */
+ struct radeon_program_transformation transformations[] = {
+ { &transform_source_conflicts, 0 },
+ };
+ radeonLocalTransform(compiler->program, 1, transformations);
+ }
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Vertex program after source conflict resolve:\n");
+ _mesa_print_program(compiler->program);
+ fflush(stdout);
+ }
+
{
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadceInit,
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index 4545982bf16..cc9e3aeebb9 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -29,12 +29,6 @@
#include "../r300_reg.h"
-static void reset_srcreg(struct prog_src_register* reg)
-{
- _mesa_bzero(reg, sizeof(*reg));
- reg->Swizzle = SWIZZLE_NOOP;
-}
-
static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
{
gl_state_index fail_value_tokens[STATE_LENGTH] = {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index 7e0f2544837..0fcedfcd9dd 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -79,6 +79,12 @@ static inline GLuint combine_swizzles(GLuint src, GLuint swz)
return ret;
}
+static INLINE void reset_srcreg(struct prog_src_register* reg)
+{
+ _mesa_bzero(reg, sizeof(*reg));
+ reg->Swizzle = SWIZZLE_NOOP;
+}
+
/**
* Transformation context that is passed to local transformations.
--
cgit v1.2.3
From c5cb9a337881229f1db462632fa1d64e2677f316 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 23 Jul 2009 21:10:37 +0200
Subject: r300: Remove dependency on GLcontext from compiler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Unfortunately, this does cause some code duplication (which we can hopefully
eliminate eventually).
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 131 +++++++++++++++------
.../drivers/dri/r300/compiler/radeon_compiler.h | 2 +-
src/mesa/drivers/dri/r300/r300_vertprog.c | 2 +-
3 files changed, 98 insertions(+), 37 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 400408620ea..a0e081fe6f8 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -28,7 +28,6 @@
#include "radeon_program.h"
#include "radeon_program_alu.h"
-#include "shader/prog_optimize.h"
#include "shader/prog_print.h"
@@ -438,6 +437,100 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler *
return GL_TRUE;
}
+struct temporary_allocation {
+ GLuint Allocated:1;
+ GLuint HwTemp:15;
+ struct prog_instruction * LastRead;
+};
+
+static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
+{
+ struct prog_instruction *inst;
+ GLuint num_orig_temps = 0;
+ GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS];
+ struct temporary_allocation * ta;
+ GLuint i, j;
+
+ compiler->code->num_temporaries = 0;
+ memset(hwtemps, 0, sizeof(hwtemps));
+
+ /* Pass 1: Count original temporaries and allocate structures */
+ for(inst = compiler->program->Instructions; inst->Opcode != OPCODE_END; inst++) {
+ GLuint numsrcs = _mesa_num_inst_src_regs(inst->Opcode);
+ GLuint numdsts = _mesa_num_inst_dst_regs(inst->Opcode);
+
+ for (i = 0; i < numsrcs; ++i) {
+ if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) {
+ if (inst->SrcReg[i].Index >= num_orig_temps)
+ num_orig_temps = inst->SrcReg[i].Index + 1;
+ }
+ }
+
+ if (numdsts) {
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ if (inst->DstReg.Index >= num_orig_temps)
+ num_orig_temps = inst->DstReg.Index + 1;
+ }
+ }
+ }
+
+ ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
+ sizeof(struct temporary_allocation) * num_orig_temps);
+ memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
+
+ /* Pass 2: Determine original temporary lifetimes */
+ for(inst = compiler->program->Instructions; inst->Opcode != OPCODE_END; inst++) {
+ GLuint numsrcs = _mesa_num_inst_src_regs(inst->Opcode);
+
+ for (i = 0; i < numsrcs; ++i) {
+ if (inst->SrcReg[i].File == PROGRAM_TEMPORARY)
+ ta[inst->SrcReg[i].Index].LastRead = inst;
+ }
+ }
+
+ /* Pass 3: Register allocation */
+ for(inst = compiler->program->Instructions; inst->Opcode != OPCODE_END; inst++) {
+ GLuint numsrcs = _mesa_num_inst_src_regs(inst->Opcode);
+ GLuint numdsts = _mesa_num_inst_dst_regs(inst->Opcode);
+
+ for (i = 0; i < numsrcs; ++i) {
+ if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) {
+ GLuint orig = inst->SrcReg[i].Index;
+ inst->SrcReg[i].Index = ta[orig].HwTemp;
+
+ if (ta[orig].Allocated && inst == ta[orig].LastRead)
+ hwtemps[ta[orig].HwTemp] = GL_FALSE;
+ }
+ }
+
+ if (numdsts) {
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ GLuint orig = inst->DstReg.Index;
+
+ if (!ta[orig].Allocated) {
+ for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
+ if (!hwtemps[j])
+ break;
+ }
+ if (j >= VSF_MAX_FRAGMENT_TEMPS) {
+ fprintf(stderr, "Out of hw temporaries\n");
+ } else {
+ ta[orig].Allocated = GL_TRUE;
+ ta[orig].HwTemp = j;
+ hwtemps[j] = GL_TRUE;
+
+ if (j >= compiler->code->num_temporaries)
+ compiler->code->num_temporaries = j + 1;
+ }
+ }
+
+ inst->DstReg.Index = ta[orig].HwTemp;
+ }
+ }
+ }
+}
+
+
/**
* Vertex engine cannot read two inputs or two constants at the same time.
* Introduce intermediate MOVs to temporary registers to account for this.
@@ -675,7 +768,7 @@ static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
-GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler, GLcontext * ctx)
+GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
{
GLboolean success;
@@ -728,7 +821,7 @@ GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compi
radeonNqssaDce(compiler->program, &nqssadce, compiler);
/* We need this step for reusing temporary registers */
- _mesa_optimize_program(ctx, compiler->program);
+ allocate_temporary_registers(compiler);
if (compiler->Base.Debug) {
fprintf(stderr, "Vertex program after NQSSADCE:\n");
@@ -738,38 +831,6 @@ GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compi
}
assert(compiler->program->NumInstructions);
- {
- struct prog_instruction *inst;
- int max, i, tmp;
-
- inst = compiler->program->Instructions;
- max = -1;
- while (inst->Opcode != OPCODE_END) {
- tmp = _mesa_num_inst_src_regs(inst->Opcode);
- for (i = 0; i < tmp; ++i) {
- if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) {
- if ((int) inst->SrcReg[i].Index > max) {
- max = inst->SrcReg[i].Index;
- }
- }
- }
-
- if (_mesa_num_inst_dst_regs(inst->Opcode)) {
- if (inst->DstReg.File == PROGRAM_TEMPORARY) {
- if ((int) inst->DstReg.Index > max) {
- max = inst->DstReg.Index;
- }
- }
- }
- ++inst;
- }
-
- /* We actually want highest index of used temporary register,
- * not the number of temporaries used.
- * These values aren't always the same.
- */
- compiler->code->num_temporaries = max + 1;
- }
success = translate_vertex_program(compiler);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index f8e4b3c681a..b98b1c9e6b5 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -74,6 +74,6 @@ struct r300_vertex_program_compiler {
struct gl_program *program;
};
-GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c, GLcontext * ctx);
+GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
#endif /* RADEON_COMPILER_H */
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index ec4ba9ca7da..dfb2a9e3d85 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -125,7 +125,7 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
_mesa_insert_mvp_code(ctx, (struct gl_vertex_program *)compiler.program);
}
- if (!r3xx_compile_vertex_program(&compiler, ctx))
+ if (!r3xx_compile_vertex_program(&compiler))
vp->error = GL_TRUE;
rc_destroy(&compiler.Base);
--
cgit v1.2.3
From 8bcb6ef786dc41049b56e6efeca0f5788cfefe5a Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 23 Jul 2009 21:38:28 +0200
Subject: r300/compiler: Lay groundwork for better error handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r300_fragprog.h | 2 +-
.../drivers/dri/r300/compiler/r300_fragprog_emit.c | 22 +++++-----
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 12 ++----
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 37 ++++++++--------
src/mesa/drivers/dri/r300/compiler/r500_fragprog.h | 2 +-
.../drivers/dri/r300/compiler/r500_fragprog_emit.c | 23 +++++-----
.../drivers/dri/r300/compiler/radeon_compiler.c | 50 ++++++++++++++++++++++
.../drivers/dri/r300/compiler/radeon_compiler.h | 9 +++-
.../dri/r300/compiler/radeon_program_pair.c | 18 +++-----
.../dri/r300/compiler/radeon_program_pair.h | 2 +-
src/mesa/drivers/dri/r300/r300_fragprog_common.c | 4 +-
src/mesa/drivers/dri/r300/r300_vertprog.c | 4 +-
12 files changed, 113 insertions(+), 72 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
index 21507bd8e06..2d094c36186 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
@@ -40,7 +40,7 @@
#include "radeon_program.h"
-extern GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
+extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c);
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index 672b36532c9..f2472d6ce1c 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -51,7 +51,7 @@
struct r300_fragment_program_code *code = &c->code->code.r300
#define error(fmt, args...) do { \
- fprintf(stderr, "%s::%s(): " fmt "\n", \
+ rc_error(&c->Base, "%s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
} while(0)
@@ -91,7 +91,7 @@ static void use_temporary(struct r300_fragment_program_code *code, GLuint index)
}
-static GLuint translate_rgb_opcode(GLuint opcode)
+static GLuint translate_rgb_opcode(struct r300_fragment_program_compiler * c, GLuint opcode)
{
switch(opcode) {
case OPCODE_CMP: return R300_ALU_OUTC_CMP;
@@ -110,7 +110,7 @@ static GLuint translate_rgb_opcode(GLuint opcode)
}
}
-static GLuint translate_alpha_opcode(GLuint opcode)
+static GLuint translate_alpha_opcode(struct r300_fragment_program_compiler * c, GLuint opcode)
{
switch(opcode) {
case OPCODE_CMP: return R300_ALU_OUTA_CMP;
@@ -148,8 +148,8 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
int j;
code->node[code->cur_node].alu_end++;
- code->alu.inst[ip].inst0 = translate_rgb_opcode(inst->RGB.Opcode);
- code->alu.inst[ip].inst2 = translate_alpha_opcode(inst->Alpha.Opcode);
+ code->alu.inst[ip].inst0 = translate_rgb_opcode(c, inst->RGB.Opcode);
+ code->alu.inst[ip].inst2 = translate_alpha_opcode(c, inst->Alpha.Opcode);
for(j = 0; j < 3; ++j) {
GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5);
@@ -326,7 +326,7 @@ static const struct radeon_pair_handler pair_handler = {
* Final compilation step: Turn the intermediate radeon_program into
* machine-readable instructions.
*/
-GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
+void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
{
struct r300_fragment_program_code *code = &compiler->code->code.r300;
@@ -334,12 +334,10 @@ GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *
code->node[0].alu_end = -1;
code->node[0].tex_end = -1;
- if (!radeonPairProgram(&compiler->Base, &pair_handler, compiler))
- return GL_FALSE;
-
- if (!finish_node(compiler))
- return GL_FALSE;
+ radeonPairProgram(&compiler->Base, &pair_handler, compiler);
+ if (compiler->Base.Error)
+ return;
- return GL_TRUE;
+ finish_node(compiler);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 30fedb42118..dae77a575cc 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -235,10 +235,8 @@ static void rewrite_depth_out(struct gl_program *prog)
}
}
-GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{
- GLboolean success = GL_FALSE;
-
if (c->Base.Debug) {
fflush(stdout);
_mesa_printf("Fragment Program: Initial program:\n");
@@ -300,18 +298,16 @@ GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c
rc_mesa_to_rc_program(&c->Base, c->program);
if (c->is_r500) {
- success = r500BuildFragmentProgramHwCode(c);
+ r500BuildFragmentProgramHwCode(c);
} else {
- success = r300BuildFragmentProgramHwCode(c);
+ r300BuildFragmentProgramHwCode(c);
}
- if (!success || c->Base.Debug) {
+ if (c->Base.Debug) {
if (c->is_r500) {
r500FragmentProgramDump(c->code);
} else {
r300FragmentProgramDump(c->code);
}
}
-
- return success;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index a0e081fe6f8..9edff6b0393 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -384,22 +384,25 @@ static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_prog
}
}
-static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * compiler)
+static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
{
struct prog_instruction *vpi = compiler->program->Instructions;
- GLuint *inst;
compiler->code->pos_end = 0; /* Not supported yet */
compiler->code->length = 0;
t_inputs_outputs(compiler->code, compiler->program);
- for (inst = compiler->code->body.d; vpi->Opcode != OPCODE_END;
- vpi++, inst += 4) {
+ for (; vpi->Opcode != OPCODE_END; vpi++) {
+ GLuint *inst = compiler->code->body.d + compiler->code->length;
+
/* Skip instructions writing to non-existing destination */
- if (!valid_dst(compiler->code, &vpi->DstReg)) {
- inst -= 4;
+ if (!valid_dst(compiler->code, &vpi->DstReg))
continue;
+
+ if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
+ rc_error(&compiler->Base, "Vertex program has too many instructions\n");
+ return;
}
switch (vpi->Opcode) {
@@ -424,17 +427,15 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler *
case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
default:
- fprintf(stderr, "Unknown opcode %i\n", vpi->Opcode);
- return GL_FALSE;
+ rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode);
+ return;
}
- }
- compiler->code->length = (inst - compiler->code->body.d);
- if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
- return GL_FALSE;
- }
+ compiler->code->length += 4;
- return GL_TRUE;
+ if (compiler->Base.Error)
+ return;
+ }
}
struct temporary_allocation {
@@ -768,10 +769,8 @@ static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
-GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
{
- GLboolean success;
-
if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) {
pos_as_texcoord(compiler->program, compiler->state.WPosAttr - FRAG_ATTRIB_TEX0);
}
@@ -832,10 +831,8 @@ GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compi
assert(compiler->program->NumInstructions);
- success = translate_vertex_program(compiler);
+ translate_vertex_program(compiler);
compiler->code->InputsRead = compiler->program->InputsRead;
compiler->code->OutputsWritten = compiler->program->OutputsWritten;
-
- return success;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
index a1ffde1e838..e405267bb35 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -39,7 +39,7 @@
#include "radeon_compiler.h"
#include "radeon_nqssadce.h"
-extern GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
+extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c);
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index f8a1dc5fbeb..5b0b306b9cf 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -55,7 +55,7 @@
struct r500_fragment_program_code *code = &c->code->code.r500
#define error(fmt, args...) do { \
- fprintf(stderr, "%s::%s(): " fmt "\n", \
+ rc_error(&c->Base, "%s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
} while(0)
@@ -87,7 +87,7 @@ static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex
return GL_TRUE;
}
-static GLuint translate_rgb_op(GLuint opcode)
+static GLuint translate_rgb_op(struct r300_fragment_program_compiler *c, GLuint opcode)
{
switch(opcode) {
case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
@@ -108,7 +108,7 @@ static GLuint translate_rgb_op(GLuint opcode)
}
}
-static GLuint translate_alpha_op(GLuint opcode)
+static GLuint translate_alpha_op(struct r300_fragment_program_compiler *c, GLuint opcode)
{
switch(opcode) {
case OPCODE_CMP: return R500_ALPHA_OP_CMP;
@@ -191,8 +191,8 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
int ip = ++code->inst_end;
- code->inst[ip].inst5 = translate_rgb_op(inst->RGB.Opcode);
- code->inst[ip].inst4 = translate_alpha_op(inst->Alpha.Opcode);
+ code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
+ code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask)
code->inst[ip].inst0 = R500_INST_TYPE_OUT;
@@ -301,7 +301,7 @@ static const struct radeon_pair_handler pair_handler = {
.MaxHwTemps = 128
};
-GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
+void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
{
struct r500_fragment_program_code *code = &compiler->code->code.r500;
@@ -310,20 +310,19 @@ GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *
code->inst_offset = 0;
code->inst_end = -1;
- if (!radeonPairProgram(&compiler->Base, &pair_handler, compiler))
- return GL_FALSE;
+ radeonPairProgram(&compiler->Base, &pair_handler, compiler);
+ if (compiler->Base.Error)
+ return;
if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
/* This may happen when dead-code elimination is disabled or
* when most of the fragment program logic is leading to a KIL */
if (code->inst_end >= 511) {
- error("Introducing fake OUT: Too many instructions");
- return GL_FALSE;
+ rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
+ return;
}
int ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
}
-
- return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index 17c9b17682c..7b8322c201d 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -22,6 +22,8 @@
#include "radeon_compiler.h"
+#include
+
void rc_init(struct radeon_compiler * c)
{
@@ -36,4 +38,52 @@ void rc_init(struct radeon_compiler * c)
void rc_destroy(struct radeon_compiler * c)
{
memory_pool_destroy(&c->Pool);
+ free(c->ErrorMsg);
+}
+
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
+{
+ va_list ap;
+
+ if (!c->Debug)
+ return;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+}
+
+void rc_error(struct radeon_compiler * c, const char * fmt, ...)
+{
+ va_list ap;
+
+ c->Error = GL_TRUE;
+
+ if (!c->ErrorMsg) {
+ /* Only remember the first error */
+ char buf[1024];
+ int written;
+
+ va_start(ap, fmt);
+ written = vsnprintf(buf, sizeof(buf), fmt, ap);
+ va_end(ap);
+
+ if (written < sizeof(buf)) {
+ c->ErrorMsg = strdup(buf);
+ } else {
+ c->ErrorMsg = malloc(written + 1);
+
+ va_start(ap, fmt);
+ vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
+ va_end(ap);
+ }
+ }
+
+ if (c->Debug) {
+ fprintf(stderr, "r300compiler error: ");
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ }
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index b98b1c9e6b5..f0ed78a11fc 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -51,11 +51,16 @@ struct radeon_compiler {
struct memory_pool Pool;
struct rc_program Program;
GLboolean Debug;
+ GLboolean Error;
+ char * ErrorMsg;
};
void rc_init(struct radeon_compiler * c);
void rc_destroy(struct radeon_compiler * c);
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
+void rc_error(struct radeon_compiler * c, const char * fmt, ...);
+
struct r300_fragment_program_compiler {
struct radeon_compiler Base;
struct rX00_fragment_program_code *code;
@@ -64,7 +69,7 @@ struct r300_fragment_program_compiler {
GLboolean is_r500;
};
-GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
struct r300_vertex_program_compiler {
@@ -74,6 +79,6 @@ struct r300_vertex_program_compiler {
struct gl_program *program;
};
-GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
#endif /* RADEON_COMPILER_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index ffc218b5ecc..ca63b906964 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -40,9 +40,8 @@
#include "shader/prog_print.h"
#define error(fmt, args...) do { \
- fprintf(stderr, "r300 driver problem: %s::%s(): " fmt "\n", \
+ rc_error(s->Compiler, "%s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
- s->Error = GL_TRUE; \
} while(0)
struct pair_state_instruction {
@@ -121,7 +120,6 @@ struct pair_register_translation {
struct pair_state {
struct radeon_compiler * Compiler;
const struct radeon_pair_handler *Handler;
- GLboolean Error;
GLboolean Verbose;
void *UserData;
@@ -583,7 +581,7 @@ static void emit_all_tex(struct pair_state *s)
_mesa_printf(" BEGIN_TEX\n");
if (s->Handler->BeginTexBlock)
- s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData);
+ s->Compiler->Error = s->Compiler->Error || !s->Handler->BeginTexBlock(s->UserData);
for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
struct prog_instruction *inst = &pairinst->Instruction;
@@ -616,7 +614,7 @@ static void emit_all_tex(struct pair_state *s)
rpti.SrcIndex = inst->SrcReg[0].Index;
rpti.SrcSwizzle = inst->SrcReg[0].Swizzle;
- s->Error = s->Error || !s->Handler->EmitTex(s->UserData, &rpti);
+ s->Compiler->Error = s->Compiler->Error || !s->Handler->EmitTex(s->UserData, &rpti);
}
if (s->Compiler->Debug)
@@ -642,7 +640,7 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio
index = get_hw_reg(s, src.File, src.Index);
} else {
constant = 1;
- s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index);
+ s->Compiler->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index);
}
for(i = 0; i < 3; ++i) {
@@ -869,11 +867,11 @@ static void emit_alu(struct pair_state *s)
if (s->Compiler->Debug)
radeonPrintPairInstruction(&pair);
- s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair);
+ s->Compiler->Error = s->Compiler->Error || !s->Handler->EmitPaired(s->UserData, &pair);
}
-GLboolean radeonPairProgram(
+void radeonPairProgram(
struct radeon_compiler * compiler,
const struct radeon_pair_handler* handler, void *userdata)
{
@@ -891,7 +889,7 @@ GLboolean radeonPairProgram(
scan_instructions(&s);
allocate_input_registers(&s);
- while(!s.Error &&
+ while(!s.Compiler->Error &&
(s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
if (s.ReadyTEX)
emit_all_tex(&s);
@@ -902,8 +900,6 @@ GLboolean radeonPairProgram(
if (s.Compiler->Debug)
_mesa_printf(" END\n");
-
- return !s.Error;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index 3992082662b..46196fb1c87 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -141,7 +141,7 @@ struct radeon_pair_handler {
GLuint MaxHwTemps;
};
-GLboolean radeonPairProgram(
+void radeonPairProgram(
struct radeon_compiler * compiler,
const struct radeon_pair_handler*, void *userdata);
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index 27aec645759..0ce57e834b4 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -99,8 +99,8 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
compiler.program = _mesa_clone_program(ctx, &cont->Base.Base);
compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE;
- if (!r3xx_compile_fragment_program(&compiler))
- fp->error = GL_TRUE;
+ r3xx_compile_fragment_program(&compiler);
+ fp->error = compiler.Base.Error;
fp->InputsRead = compiler.Base.Program.InputsRead;
fp->Base = compiler.program;
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index dfb2a9e3d85..91d9d8ae949 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -125,8 +125,8 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
_mesa_insert_mvp_code(ctx, (struct gl_vertex_program *)compiler.program);
}
- if (!r3xx_compile_vertex_program(&compiler))
- vp->error = GL_TRUE;
+ r3xx_compile_vertex_program(&compiler);
+ vp->error = compiler.Base.Error;
rc_destroy(&compiler.Base);
--
cgit v1.2.3
From 2237d136cd8f964048a4ccdc87e0ffb48af0f73d Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 23 Jul 2009 22:09:11 +0200
Subject: r300/compiler: Add rc_print_program
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
.../drivers/dri/r300/compiler/radeon_program.c | 24 ++++++++++++++++++++++
.../drivers/dri/r300/compiler/radeon_program.h | 2 ++
src/mesa/shader/prog_print.c | 2 +-
src/mesa/shader/prog_print.h | 7 +++++++
4 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
index d6cc62ff8bd..fea7f646070 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -194,3 +194,27 @@ void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * progr
c->Program.InputsRead = program->InputsRead;
}
+
+/**
+ * Print program to stderr, default options.
+ */
+void rc_print_program(const struct rc_program *prog)
+{
+ GLuint indent = 0;
+ GLuint linenum = 1;
+ struct rc_instruction *inst;
+
+ fprintf(stderr, "# Radeon Compiler Program\n");
+
+ for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
+ fprintf(stderr, "%3d: ", linenum);
+
+ /* Massive hack: We rely on the fact that the printers do not actually
+ * use the gl_program argument (last argument) in debug mode */
+ indent = _mesa_fprint_instruction_opt(
+ stderr, &inst->I,
+ indent, PROG_PRINT_DEBUG, 0);
+
+ linenum++;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index 0fcedfcd9dd..9987a3d3bc9 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -36,6 +36,7 @@
struct radeon_compiler;
struct rc_instruction;
+struct rc_program;
enum {
PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */
@@ -134,5 +135,6 @@ struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program);
+void rc_print_program(const struct rc_program *prog);
#endif
diff --git a/src/mesa/shader/prog_print.c b/src/mesa/shader/prog_print.c
index de7fef1f861..cef0288a9c9 100644
--- a/src/mesa/shader/prog_print.c
+++ b/src/mesa/shader/prog_print.c
@@ -541,7 +541,7 @@ _mesa_print_alu_instruction(const struct prog_instruction *inst,
/**
* Print a single vertex/fragment program instruction.
*/
-static GLint
+GLint
_mesa_fprint_instruction_opt(FILE *f,
const struct prog_instruction *inst,
GLint indent,
diff --git a/src/mesa/shader/prog_print.h b/src/mesa/shader/prog_print.h
index d55661cebb8..3da3e767cd0 100644
--- a/src/mesa/shader/prog_print.h
+++ b/src/mesa/shader/prog_print.h
@@ -56,6 +56,13 @@ _mesa_print_alu_instruction(const struct prog_instruction *inst,
extern void
_mesa_print_instruction(const struct prog_instruction *inst);
+extern GLint
+_mesa_fprint_instruction_opt(FILE *f,
+ const struct prog_instruction *inst,
+ GLint indent,
+ gl_prog_print_mode mode,
+ const struct gl_program *prog);
+
extern GLint
_mesa_print_instruction_opt(const struct prog_instruction *inst, GLint indent,
gl_prog_print_mode mode,
--
cgit v1.2.3
From a898e7d66c834be6b6e964e85cbbdf73e93300e0 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 23 Jul 2009 22:09:48 +0200
Subject: r300/compiler: Refactor for rc_program usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 59 +++++++++++-----------
1 file changed, 30 insertions(+), 29 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 9edff6b0393..743fc205978 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -386,14 +386,15 @@ static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_prog
static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
{
- struct prog_instruction *vpi = compiler->program->Instructions;
+ struct rc_instruction *rci;
compiler->code->pos_end = 0; /* Not supported yet */
compiler->code->length = 0;
t_inputs_outputs(compiler->code, compiler->program);
- for (; vpi->Opcode != OPCODE_END; vpi++) {
+ for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
+ struct prog_instruction *vpi = &rci->I;
GLuint *inst = compiler->code->body.d + compiler->code->length;
/* Skip instructions writing to non-existing destination */
@@ -441,12 +442,12 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
struct temporary_allocation {
GLuint Allocated:1;
GLuint HwTemp:15;
- struct prog_instruction * LastRead;
+ struct rc_instruction * LastRead;
};
static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
{
- struct prog_instruction *inst;
+ struct rc_instruction *inst;
GLuint num_orig_temps = 0;
GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS];
struct temporary_allocation * ta;
@@ -456,21 +457,21 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
memset(hwtemps, 0, sizeof(hwtemps));
/* Pass 1: Count original temporaries and allocate structures */
- for(inst = compiler->program->Instructions; inst->Opcode != OPCODE_END; inst++) {
- GLuint numsrcs = _mesa_num_inst_src_regs(inst->Opcode);
- GLuint numdsts = _mesa_num_inst_dst_regs(inst->Opcode);
+ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
for (i = 0; i < numsrcs; ++i) {
- if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) {
- if (inst->SrcReg[i].Index >= num_orig_temps)
- num_orig_temps = inst->SrcReg[i].Index + 1;
+ if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
+ if (inst->I.SrcReg[i].Index >= num_orig_temps)
+ num_orig_temps = inst->I.SrcReg[i].Index + 1;
}
}
if (numdsts) {
- if (inst->DstReg.File == PROGRAM_TEMPORARY) {
- if (inst->DstReg.Index >= num_orig_temps)
- num_orig_temps = inst->DstReg.Index + 1;
+ if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
+ if (inst->I.DstReg.Index >= num_orig_temps)
+ num_orig_temps = inst->I.DstReg.Index + 1;
}
}
}
@@ -480,24 +481,24 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
/* Pass 2: Determine original temporary lifetimes */
- for(inst = compiler->program->Instructions; inst->Opcode != OPCODE_END; inst++) {
- GLuint numsrcs = _mesa_num_inst_src_regs(inst->Opcode);
+ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
for (i = 0; i < numsrcs; ++i) {
- if (inst->SrcReg[i].File == PROGRAM_TEMPORARY)
- ta[inst->SrcReg[i].Index].LastRead = inst;
+ if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY)
+ ta[inst->I.SrcReg[i].Index].LastRead = inst;
}
}
/* Pass 3: Register allocation */
- for(inst = compiler->program->Instructions; inst->Opcode != OPCODE_END; inst++) {
- GLuint numsrcs = _mesa_num_inst_src_regs(inst->Opcode);
- GLuint numdsts = _mesa_num_inst_dst_regs(inst->Opcode);
+ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
for (i = 0; i < numsrcs; ++i) {
- if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) {
- GLuint orig = inst->SrcReg[i].Index;
- inst->SrcReg[i].Index = ta[orig].HwTemp;
+ if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
+ GLuint orig = inst->I.SrcReg[i].Index;
+ inst->I.SrcReg[i].Index = ta[orig].HwTemp;
if (ta[orig].Allocated && inst == ta[orig].LastRead)
hwtemps[ta[orig].HwTemp] = GL_FALSE;
@@ -505,8 +506,8 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
}
if (numdsts) {
- if (inst->DstReg.File == PROGRAM_TEMPORARY) {
- GLuint orig = inst->DstReg.Index;
+ if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
+ GLuint orig = inst->I.DstReg.Index;
if (!ta[orig].Allocated) {
for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
@@ -525,7 +526,7 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
}
}
- inst->DstReg.Index = ta[orig].HwTemp;
+ inst->I.DstReg.Index = ta[orig].HwTemp;
}
}
}
@@ -819,18 +820,18 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
};
radeonNqssaDce(compiler->program, &nqssadce, compiler);
+ rc_mesa_to_rc_program(&compiler->Base, compiler->program);
+
/* We need this step for reusing temporary registers */
allocate_temporary_registers(compiler);
if (compiler->Base.Debug) {
fprintf(stderr, "Vertex program after NQSSADCE:\n");
- _mesa_print_program(compiler->program);
+ rc_print_program(&compiler->Base.Program);
fflush(stdout);
}
}
- assert(compiler->program->NumInstructions);
-
translate_vertex_program(compiler);
compiler->code->InputsRead = compiler->program->InputsRead;
--
cgit v1.2.3
From 92f7a599c7e94b0687d02efef1890e1a8ed2f9f3 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Thu, 23 Jul 2009 22:49:31 +0200
Subject: r300/compiler: Refactor nqssadce to use rc_program
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
.../dri/r300/compiler/r300_fragprog_swizzle.c | 18 ++-
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 10 +-
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 42 +++----
src/mesa/drivers/dri/r300/compiler/r500_fragprog.c | 17 +--
.../drivers/dri/r300/compiler/radeon_compiler.h | 1 +
.../drivers/dri/r300/compiler/radeon_nqssadce.c | 127 ++++++++++-----------
.../drivers/dri/r300/compiler/radeon_nqssadce.h | 7 +-
.../drivers/dri/r300/compiler/radeon_program.c | 7 ++
.../drivers/dri/r300/compiler/radeon_program.h | 1 +
9 files changed, 114 insertions(+), 116 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
index 9e88d14ad54..1b14cc3888b 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -35,6 +35,7 @@
#include "../r300_reg.h"
#include "radeon_nqssadce.h"
+#include "radeon_compiler.h"
#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO))
@@ -174,18 +175,15 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst,
}
}
- struct prog_instruction *inst;
-
- _mesa_insert_instructions(s->Program, s->IP, 1);
- inst = s->Program->Instructions + s->IP++;
- inst->Opcode = OPCODE_MOV;
- inst->DstReg = dst;
- inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W);
- inst->SrcReg[0] = src;
- inst->SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE;
+ struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev);
+ inst->I.Opcode = OPCODE_MOV;
+ inst->I.DstReg = dst;
+ inst->I.DstReg.WriteMask &= (best_matchmask | WRITEMASK_W);
+ inst->I.SrcReg[0] = src;
+ inst->I.SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE;
/* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */
- dst.WriteMask &= ~inst->DstReg.WriteMask;
+ dst.WriteMask &= ~inst->I.DstReg.WriteMask;
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index dae77a575cc..406961bea94 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -273,30 +273,30 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
fflush(stdout);
}
+ rc_mesa_to_rc_program(&c->Base, c->program);
+
if (c->is_r500) {
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadce_init,
.IsNativeSwizzle = &r500FPIsNativeSwizzle,
.BuildSwizzle = &r500FPBuildSwizzle
};
- radeonNqssaDce(c->program, &nqssadce, 0);
+ radeonNqssaDce(&c->Base, &nqssadce, 0);
} else {
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadce_init,
.IsNativeSwizzle = &r300FPIsNativeSwizzle,
.BuildSwizzle = &r300FPBuildSwizzle
};
- radeonNqssaDce(c->program, &nqssadce, 0);
+ radeonNqssaDce(&c->Base, &nqssadce, 0);
}
if (c->Base.Debug) {
_mesa_printf("Compiler: after NqSSA-DCE:\n");
- _mesa_print_program(c->program);
+ rc_print_program(&c->Base.Program);
fflush(stdout);
}
- rc_mesa_to_rc_program(&c->Base, c->program);
-
if (c->is_r500) {
r500BuildFragmentProgramHwCode(c);
} else {
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 743fc205978..cdcfa1d27ec 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -310,35 +310,35 @@ static void ei_pow(struct r300_vertex_program_code *vp,
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
}
-static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp)
+static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
{
int i;
int cur_reg;
GLuint OutputsWritten, InputsRead;
- OutputsWritten = glvp->OutputsWritten;
- InputsRead = glvp->InputsRead;
+ OutputsWritten = c->Base.Program.OutputsWritten;
+ InputsRead = c->Base.Program.InputsRead;
cur_reg = -1;
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
if (InputsRead & (1 << i))
- vp->inputs[i] = ++cur_reg;
+ c->code->inputs[i] = ++cur_reg;
else
- vp->inputs[i] = -1;
+ c->code->inputs[i] = -1;
}
cur_reg = 0;
for (i = 0; i < VERT_RESULT_MAX; i++)
- vp->outputs[i] = -1;
+ c->code->outputs[i] = -1;
assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
- vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
+ c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
}
if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
- vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
+ c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
}
/* If we're writing back facing colors we need to send
@@ -348,39 +348,39 @@ static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_prog
* get written into appropriate output vectors.
*/
if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
- vp->outputs[VERT_RESULT_COL0] = cur_reg++;
+ c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
} else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
OutputsWritten & (1 << VERT_RESULT_BFC1)) {
cur_reg++;
}
if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
- vp->outputs[VERT_RESULT_COL1] = cur_reg++;
+ c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
} else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
OutputsWritten & (1 << VERT_RESULT_BFC1)) {
cur_reg++;
}
if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
- vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
+ c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
} else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
cur_reg++;
}
if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
- vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
+ c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
} else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
cur_reg++;
}
for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
if (OutputsWritten & (1 << i)) {
- vp->outputs[i] = cur_reg++;
+ c->code->outputs[i] = cur_reg++;
}
}
if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
- vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
+ c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
}
}
@@ -391,7 +391,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
compiler->code->pos_end = 0; /* Not supported yet */
compiler->code->length = 0;
- t_inputs_outputs(compiler->code, compiler->program);
+ t_inputs_outputs(compiler);
for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
struct prog_instruction *vpi = &rci->I;
@@ -756,7 +756,7 @@ static void nqssadceInit(struct nqssadce_state* s)
}
s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW;
- if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ))
+ if (s->Compiler->Program.OutputsWritten & (1 << VERT_RESULT_PSIZ))
s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X;
}
@@ -812,15 +812,15 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
fflush(stdout);
}
+ rc_mesa_to_rc_program(&compiler->Base, compiler->program);
+
{
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadceInit,
.IsNativeSwizzle = &swizzleIsNative,
.BuildSwizzle = NULL
};
- radeonNqssaDce(compiler->program, &nqssadce, compiler);
-
- rc_mesa_to_rc_program(&compiler->Base, compiler->program);
+ radeonNqssaDce(&compiler->Base, &nqssadce, compiler);
/* We need this step for reusing temporary registers */
allocate_temporary_registers(compiler);
@@ -834,6 +834,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
translate_vertex_program(compiler);
- compiler->code->InputsRead = compiler->program->InputsRead;
- compiler->code->OutputsWritten = compiler->program->OutputsWritten;
+ compiler->code->InputsRead = compiler->Base.Program.InputsRead;
+ compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index cc9e3aeebb9..e8d0e77b68c 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -245,7 +245,6 @@ GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
*/
void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
{
- struct prog_instruction *inst;
GLuint negatebase[2] = { 0, 0 };
int i;
@@ -256,20 +255,16 @@ void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst,
negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
}
- _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0));
- inst = s->Program->Instructions + s->IP;
-
for(i = 0; i <= 1; ++i) {
if (!negatebase[i])
continue;
- inst->Opcode = OPCODE_MOV;
- inst->DstReg = dst;
- inst->DstReg.WriteMask = negatebase[i];
- inst->SrcReg[0] = src;
- inst->SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW;
- inst++;
- s->IP++;
+ struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev);
+ inst->I.Opcode = OPCODE_MOV;
+ inst->I.DstReg = dst;
+ inst->I.DstReg.WriteMask = negatebase[i];
+ inst->I.SrcReg[0] = src;
+ inst->I.SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW;
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index f0ed78a11fc..abe2c185258 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -44,6 +44,7 @@ struct rc_program {
struct rc_instruction Instructions;
GLbitfield InputsRead;
+ GLbitfield OutputsWritten;
GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */
};
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
index ee2e1cbc54b..9d0e265a5d9 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
@@ -36,6 +36,8 @@
#include "radeon_nqssadce.h"
+#include "radeon_compiler.h"
+
/**
* Return the @ref register_state for the given register (or 0 for untracked
@@ -76,9 +78,10 @@ struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register s
}
-static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
- struct prog_instruction *inst, GLint src, GLuint sourced)
+static void track_used_srcreg(struct nqssadce_state* s,
+ GLint src, GLuint sourced)
{
+ struct prog_instruction * inst = &s->IP->I;
int i;
GLuint deswz_source = 0;
@@ -95,12 +98,11 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
struct prog_dst_register dstreg = inst->DstReg;
dstreg.File = PROGRAM_TEMPORARY;
- dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
+ dstreg.Index = rc_find_free_temporary(s->Compiler);
dstreg.WriteMask = sourced;
s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
- inst = s->Program->Instructions + s->IP;
inst->SrcReg[src].File = PROGRAM_TEMPORARY;
inst->SrcReg[src].Index = dstreg.Index;
inst->SrcReg[src].Swizzle = 0;
@@ -126,30 +128,27 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
if (regstate)
regstate->Sourced |= deswz_source & 0xf;
}
-
- return inst;
}
-static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
+static void unalias_srcregs(struct rc_instruction *inst, GLuint oldindex, GLuint newindex)
{
- int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
+ int nsrc = _mesa_num_inst_src_regs(inst->I.Opcode);
int i;
for(i = 0; i < nsrc; ++i)
- if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex)
- inst->SrcReg[i].Index = newindex;
+ if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY && inst->I.SrcReg[i].Index == oldindex)
+ inst->I.SrcReg[i].Index = newindex;
}
static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
{
- GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
- int ip;
- for(ip = 0; ip < s->IP; ++ip) {
- struct prog_instruction* inst = s->Program->Instructions + ip;
- if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex)
- inst->DstReg.Index = newindex;
+ GLuint newindex = rc_find_free_temporary(s->Compiler);
+ struct rc_instruction * inst;
+ for(inst = s->Compiler->Program.Instructions.Next; inst != s->IP; inst = inst->Next) {
+ if (inst->I.DstReg.File == PROGRAM_TEMPORARY && inst->I.DstReg.Index == oldindex)
+ inst->I.DstReg.Index = newindex;
unalias_srcregs(inst, oldindex, newindex);
}
- unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex);
+ unalias_srcregs(s->IP, oldindex, newindex);
}
@@ -158,7 +157,8 @@ static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
*/
static void process_instruction(struct nqssadce_state* s)
{
- struct prog_instruction *inst = s->Program->Instructions + s->IP;
+ struct prog_instruction *inst = &s->IP->I;
+ GLuint WriteMask;
if (inst->Opcode == OPCODE_END)
return;
@@ -166,7 +166,7 @@ static void process_instruction(struct nqssadce_state* s)
if (inst->Opcode != OPCODE_KIL) {
struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
if (!regstate) {
- fprintf(stderr, "r300 driver: NqssaDce: bad destination register (%i[%i])\n",
+ rc_error(s->Compiler, "NqssaDce: bad destination register (%i[%i])\n",
inst->DstReg.File, inst->DstReg.Index);
return;
}
@@ -175,7 +175,9 @@ static void process_instruction(struct nqssadce_state* s)
regstate->Sourced &= ~inst->DstReg.WriteMask;
if (inst->DstReg.WriteMask == 0) {
- _mesa_delete_instructions(s->Program, s->IP, 1);
+ struct rc_instruction * inst_remove = s->IP;
+ s->IP = s->IP->Prev;
+ rc_remove_instruction(inst_remove);
return;
}
@@ -183,16 +185,15 @@ static void process_instruction(struct nqssadce_state* s)
unalias_temporary(s, inst->DstReg.Index);
}
- /* Attention: Due to swizzle emulation code, the following
- * might change the instruction stream under us, so we have
- * to be careful with the inst pointer. */
+ WriteMask = inst->DstReg.WriteMask;
+
switch (inst->Opcode) {
case OPCODE_ARL:
case OPCODE_DDX:
case OPCODE_DDY:
case OPCODE_FRC:
case OPCODE_MOV:
- inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
+ track_used_srcreg(s, 0, WriteMask);
break;
case OPCODE_ADD:
case OPCODE_MAX:
@@ -200,14 +201,14 @@ static void process_instruction(struct nqssadce_state* s)
case OPCODE_MUL:
case OPCODE_SGE:
case OPCODE_SLT:
- inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
- inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
+ track_used_srcreg(s, 0, WriteMask);
+ track_used_srcreg(s, 1, WriteMask);
break;
case OPCODE_CMP:
case OPCODE_MAD:
- inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
- inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
- inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask);
+ track_used_srcreg(s, 0, WriteMask);
+ track_used_srcreg(s, 1, WriteMask);
+ track_used_srcreg(s, 2, WriteMask);
break;
case OPCODE_COS:
case OPCODE_EX2:
@@ -215,83 +216,79 @@ static void process_instruction(struct nqssadce_state* s)
case OPCODE_RCP:
case OPCODE_RSQ:
case OPCODE_SIN:
- inst = track_used_srcreg(s, inst, 0, 0x1);
+ track_used_srcreg(s, 0, 0x1);
break;
case OPCODE_DP3:
- inst = track_used_srcreg(s, inst, 0, 0x7);
- inst = track_used_srcreg(s, inst, 1, 0x7);
+ track_used_srcreg(s, 0, 0x7);
+ track_used_srcreg(s, 1, 0x7);
break;
case OPCODE_DP4:
- inst = track_used_srcreg(s, inst, 0, 0xf);
- inst = track_used_srcreg(s, inst, 1, 0xf);
+ track_used_srcreg(s, 0, 0xf);
+ track_used_srcreg(s, 1, 0xf);
break;
case OPCODE_KIL:
case OPCODE_TEX:
case OPCODE_TXB:
case OPCODE_TXP:
- inst = track_used_srcreg(s, inst, 0, 0xf);
+ track_used_srcreg(s, 0, 0xf);
break;
case OPCODE_DST:
- inst = track_used_srcreg(s, inst, 0, 0x6);
- inst = track_used_srcreg(s, inst, 1, 0xa);
+ track_used_srcreg(s, 0, 0x6);
+ track_used_srcreg(s, 1, 0xa);
break;
case OPCODE_EXP:
case OPCODE_LOG:
case OPCODE_POW:
- inst = track_used_srcreg(s, inst, 0, 0x3);
+ track_used_srcreg(s, 0, 0x3);
break;
case OPCODE_LIT:
- inst = track_used_srcreg(s, inst, 0, 0xb);
+ track_used_srcreg(s, 0, 0xb);
break;
default:
- fprintf(stderr, "r300 driver: NqssaDce: Unknown opcode %d\n", inst->Opcode);
+ rc_error(s->Compiler, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
return;
}
+
+ s->IP = s->IP->Prev;
}
-static void calculateInputsOutputs(struct gl_program *p)
+static void calculateInputs(struct radeon_compiler * c)
{
- struct prog_instruction *inst;
- GLuint InputsRead, OutputsWritten;
+ struct rc_instruction *inst;
+
+ c->Program.InputsRead = 0;
+ c->Program.OutputsWritten = 0;
- inst = p->Instructions;
- InputsRead = 0;
- OutputsWritten = 0;
- while (inst->Opcode != OPCODE_END)
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
{
- int i, num_src_regs;
+ int i;
+ int num_src_regs = _mesa_num_inst_src_regs(inst->I.Opcode);
- num_src_regs = _mesa_num_inst_src_regs(inst->Opcode);
for (i = 0; i < num_src_regs; ++i) {
- if (inst->SrcReg[i].File == PROGRAM_INPUT)
- InputsRead |= 1 << inst->SrcReg[i].Index;
+ if (inst->I.SrcReg[i].File == PROGRAM_INPUT)
+ c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index;
}
- if (inst->DstReg.File == PROGRAM_OUTPUT)
- OutputsWritten |= 1 << inst->DstReg.Index;
-
- ++inst;
+ if (_mesa_num_inst_dst_regs(inst->I.Opcode)) {
+ if (inst->I.DstReg.File == PROGRAM_OUTPUT)
+ c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index;
+ }
}
-
- p->InputsRead = InputsRead;
- p->OutputsWritten = OutputsWritten;
}
-void radeonNqssaDce(struct gl_program *p, struct radeon_nqssadce_descr* descr, void * data)
+void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data)
{
struct nqssadce_state s;
_mesa_bzero(&s, sizeof(s));
- s.Program = p;
+ s.Compiler = c;
s.Descr = descr;
s.UserData = data;
s.Descr->Init(&s);
- s.IP = p->NumInstructions;
+ s.IP = c->Program.Instructions.Prev;
- while(s.IP > 0) {
- s.IP--;
+ while(s.IP != &c->Program.Instructions && !c->Error)
process_instruction(&s);
- }
- calculateInputsOutputs(p);
+ calculateInputs(c);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h
index 8059b3b66d6..b3fc77a35a6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h
@@ -30,7 +30,6 @@
#include "radeon_program.h"
-
struct register_state {
/**
* Bitmask indicating which components of the register are sourced
@@ -44,13 +43,13 @@ struct register_state {
* read from, etc.
*/
struct nqssadce_state {
- struct gl_program *Program;
+ struct radeon_compiler *Compiler;
struct radeon_nqssadce_descr *Descr;
/**
* All instructions after this instruction pointer have been dealt with.
*/
- int IP;
+ struct rc_instruction * IP;
/**
* Which registers are read by subsequent instructions?
@@ -86,7 +85,7 @@ struct radeon_nqssadce_descr {
void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
};
-void radeonNqssaDce(struct gl_program *p, struct radeon_nqssadce_descr* descr, void * data);
+void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data);
struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg);
#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
index fea7f646070..5f35f56a239 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -180,6 +180,12 @@ struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, str
return inst;
}
+void rc_remove_instruction(struct rc_instruction * inst)
+{
+ inst->Prev->Next = inst->Next;
+ inst->Next->Prev = inst->Prev;
+}
+
void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program)
{
@@ -192,6 +198,7 @@ void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * progr
c->Program.ShadowSamplers = program->ShadowSamplers;
c->Program.InputsRead = program->InputsRead;
+ c->Program.OutputsWritten = program->OutputsWritten;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index 9987a3d3bc9..fae8c6babe8 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -133,6 +133,7 @@ GLint rc_find_free_temporary(struct radeon_compiler * c);
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
+void rc_remove_instruction(struct rc_instruction * inst);
void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program);
void rc_print_program(const struct rc_program *prog);
--
cgit v1.2.3
From 800f48258623f8caf25d013f44784edb7caa3f93 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Fri, 24 Jul 2009 22:41:14 +0200
Subject: r300: Allow compiler to add constants in a cleaner way
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Adding constants is used in a number of non-native instruction
rewrites, and it required us to keep copies of modified gl_programs
around. This is a first step towards ending this.
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/Makefile | 1 +
.../drivers/dri/r300/compiler/r300_fragprog_emit.c | 26 --------
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 2 +
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 2 +
src/mesa/drivers/dri/r300/compiler/r500_fragprog.c | 9 ---
.../drivers/dri/r300/compiler/r500_fragprog_emit.c | 28 --------
src/mesa/drivers/dri/r300/compiler/radeon_code.c | 78 ++++++++++++++++++++++
src/mesa/drivers/dri/r300/compiler/radeon_code.h | 57 +++++++++++-----
.../drivers/dri/r300/compiler/radeon_compiler.c | 1 +
.../drivers/dri/r300/compiler/radeon_compiler.h | 2 +
.../drivers/dri/r300/compiler/radeon_program.c | 11 +++
.../dri/r300/compiler/radeon_program_pair.c | 2 +-
.../dri/r300/compiler/radeon_program_pair.h | 7 --
src/mesa/drivers/dri/r300/r300_shader.c | 2 +
src/mesa/drivers/dri/r300/r300_state.c | 39 +++++------
src/mesa/drivers/dri/r300/r300_vertprog.c | 71 ++++++++++----------
16 files changed, 193 insertions(+), 145 deletions(-)
create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_code.c
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index 4e2ff50c69d..dd04b81a2f3 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -6,6 +6,7 @@ include $(TOP)/configs/current
LIBNAME = r300compiler
C_SOURCES = \
+ radeon_code.c \
radeon_compiler.c \
radeon_nqssadce.c \
radeon_program.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index f2472d6ce1c..674d1f8cd35 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -56,31 +56,6 @@
} while(0)
-static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex)
-{
- PROG_CODE;
-
- for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
- if (code->constant[*hwindex].File == file &&
- code->constant[*hwindex].Index == index)
- break;
- }
-
- if (*hwindex >= code->const_nr) {
- if (*hwindex >= R300_PFS_NUM_CONST_REGS) {
- error("Out of hw constants!\n");
- return GL_FALSE;
- }
-
- code->const_nr++;
- code->constant[*hwindex].File = file;
- code->constant[*hwindex].Index = index;
- }
-
- return GL_TRUE;
-}
-
-
/**
* Mark a temporary register as used.
*/
@@ -315,7 +290,6 @@ static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* in
static const struct radeon_pair_handler pair_handler = {
- .EmitConst = &emit_const,
.EmitPaired = &emit_alu,
.EmitTex = &emit_tex,
.BeginTexBlock = &begin_tex,
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 406961bea94..7ac57d0d499 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -303,6 +303,8 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
r300BuildFragmentProgramHwCode(c);
}
+ rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
+
if (c->Base.Debug) {
if (c->is_r500) {
r500FragmentProgramDump(c->code);
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index cdcfa1d27ec..c9d0996d447 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -834,6 +834,8 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
translate_vertex_program(compiler);
+ rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants);
+
compiler->code->InputsRead = compiler->Base.Program.InputsRead;
compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index e8d0e77b68c..6d2158dbfc3 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -376,15 +376,6 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
uint32_t inst0;
char *str = NULL;
- if (code->const_nr) {
- fprintf(stderr, "--------\nConstants:\n");
- for (n = 0; n < code->const_nr; n++) {
- fprintf(stderr, "Constant %d: %i[%i]\n", n,
- code->constant[n].File, code->constant[n].Index);
- }
- fprintf(stderr, "--------\n");
- }
-
for (n = 0; n < code->inst_end+1; n++) {
inst0 = inst = code->inst[n].inst0;
fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 5b0b306b9cf..21d6b9bba71 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -60,33 +60,6 @@
} while(0)
-/**
- * Callback to register hardware constants.
- */
-static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex)
-{
- PROG_CODE;
-
- for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
- if (code->constant[*hwindex].File == file &&
- code->constant[*hwindex].Index == idx)
- break;
- }
-
- if (*hwindex >= code->const_nr) {
- if (*hwindex >= R500_PFS_NUM_CONST_REGS) {
- error("Out of hw constants!\n");
- return GL_FALSE;
- }
-
- code->const_nr++;
- code->constant[*hwindex].File = file;
- code->constant[*hwindex].Index = idx;
- }
-
- return GL_TRUE;
-}
-
static GLuint translate_rgb_op(struct r300_fragment_program_compiler *c, GLuint opcode)
{
switch(opcode) {
@@ -295,7 +268,6 @@ static GLboolean emit_tex(void *data, struct radeon_pair_texture_instruction *in
}
static const struct radeon_pair_handler pair_handler = {
- .EmitConst = emit_const,
.EmitPaired = emit_paired,
.EmitTex = emit_tex,
.MaxHwTemps = 128
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
new file mode 100644
index 00000000000..b94e534d9c7
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "main/mtypes.h"
+#include "shader/prog_instruction.h"
+
+#include "radeon_code.h"
+
+void rc_constants_init(struct rc_constant_list * c)
+{
+ memset(c, 0, sizeof(*c));
+}
+
+/**
+ * Copy a constants structure, assuming that the destination structure
+ * is not initialized.
+ */
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src)
+{
+ dst->Constants = malloc(sizeof(struct rc_constant) * src->Count);
+ memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count);
+ dst->Count = src->Count;
+ dst->_Reserved = src->Count;
+}
+
+void rc_constants_destroy(struct rc_constant_list * c)
+{
+ free(c->Constants);
+ memset(c, 0, sizeof(*c));
+}
+
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant)
+{
+ unsigned index = c->Count;
+
+ if (c->Count >= c->_Reserved) {
+ struct rc_constant * newlist;
+
+ c->_Reserved = c->_Reserved * 2;
+ if (!c->_Reserved)
+ c->_Reserved = 16;
+
+ newlist = malloc(sizeof(struct rc_constant) * c->_Reserved);
+ memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count);
+
+ free(c->Constants);
+ c->Constants = newlist;
+ }
+
+ c->Constants[index] = *constant;
+ c->Count++;
+
+ return index;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index e89e7bc17b2..3e6eb97b177 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -23,7 +23,6 @@
#ifndef RADEON_CODE_H
#define RADEON_CODE_H
-
#define R300_PFS_MAX_ALU_INST 64
#define R300_PFS_MAX_TEX_INST 32
#define R300_PFS_MAX_TEX_INDIRECT 4
@@ -39,6 +38,44 @@
#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
+enum {
+ /**
+ * External constants are constants whose meaning is unknown to this
+ * compiler. For example, a Mesa gl_program's constants are turned
+ * into external constants.
+ */
+ RC_CONSTANT_EXTERNAL = 0,
+
+ RC_CONSTANT_IMMEDIATE,
+
+ /**
+ * Constant referring to state that is known by this compiler,
+ * i.e. *not* arbitrary Mesa (or other) state.
+ */
+ RC_CONSTANT_STATE
+};
+
+struct rc_constant {
+ unsigned Type:2; /**< RC_CONSTANT_xxx */
+ union {
+ unsigned External;
+ float Immediate[4];
+ unsigned State[4];
+ } u;
+};
+
+struct rc_constant_list {
+ struct rc_constant * Constants;
+ unsigned Count;
+
+ unsigned _Reserved;
+};
+
+void rc_constants_init(struct rc_constant_list * c);
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
+void rc_constants_destroy(struct rc_constant_list * c);
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
+
/**
* Stores state that influences the compilation of a fragment program.
*/
@@ -98,13 +135,6 @@ struct r300_fragment_program_code {
int cur_node;
int first_node_has_tex;
- /**
- * Remember which program register a given hardware constant
- * belongs to.
- */
- struct prog_src_register constant[R300_PFS_NUM_CONST_REGS];
- int const_nr;
-
int max_temp_idx;
};
@@ -122,13 +152,6 @@ struct r500_fragment_program_code {
int inst_offset;
int inst_end;
- /**
- * Remember which program register a given hardware constant
- * belongs to.
- */
- struct prog_src_register constant[R500_PFS_NUM_CONST_REGS];
- int const_nr;
-
int max_temp_idx;
};
@@ -140,6 +163,8 @@ struct rX00_fragment_program_code {
GLboolean writes_depth;
+ struct rc_constant_list constants;
+
/* attribute that we are sending the WPOS in */
gl_frag_attrib wpos_attr;
/* attribute that we are sending the fog coordinate in */
@@ -168,6 +193,8 @@ struct r300_vertex_program_code {
int inputs[VERT_ATTRIB_MAX];
int outputs[VERT_RESULT_MAX];
+ struct rc_constant_list constants;
+
GLbitfield InputsRead;
GLbitfield OutputsWritten;
};
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index 7b8322c201d..684961021a1 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -37,6 +37,7 @@ void rc_init(struct radeon_compiler * c)
void rc_destroy(struct radeon_compiler * c)
{
+ rc_constants_destroy(&c->Program.Constants);
memory_pool_destroy(&c->Pool);
free(c->ErrorMsg);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index abe2c185258..6630db62795 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -46,6 +46,8 @@ struct rc_program {
GLbitfield InputsRead;
GLbitfield OutputsWritten;
GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */
+
+ struct rc_constant_list Constants;
};
struct radeon_compiler {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
index 5f35f56a239..50a0ce8743c 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -28,6 +28,7 @@
#include "radeon_program.h"
#include "radeon_compiler.h"
+#include "shader/prog_parameter.h"
#include "shader/prog_print.h"
@@ -190,6 +191,7 @@ void rc_remove_instruction(struct rc_instruction * inst)
void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program)
{
struct prog_instruction *source;
+ unsigned int i;
for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) {
struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
@@ -199,6 +201,15 @@ void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * progr
c->Program.ShadowSamplers = program->ShadowSamplers;
c->Program.InputsRead = program->InputsRead;
c->Program.OutputsWritten = program->OutputsWritten;
+
+ for(i = 0; i < program->Parameters->NumParameters; ++i) {
+ struct rc_constant constant;
+
+ constant.Type = RC_CONSTANT_EXTERNAL;
+ constant.u.External = i;
+
+ rc_constants_add(&c->Program.Constants, &constant);
+ }
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index ca63b906964..57a364c78b6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -640,7 +640,7 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio
index = get_hw_reg(s, src.File, src.Index);
} else {
constant = 1;
- s->Compiler->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index);
+ index = src.Index;
}
for(i = 0; i < 3; ++i) {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index 46196fb1c87..9d4d7dd3c93 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -109,13 +109,6 @@ struct radeon_pair_texture_instruction {
*
*/
struct radeon_pair_handler {
- /**
- * Fill in the proper hardware index for the given constant register.
- *
- * @return GL_FALSE on error.
- */
- GLboolean (*EmitConst)(void*, GLuint file, GLuint index, GLuint *hwindex);
-
/**
* Write a paired instruction to the hardware.
*
diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c
index 3704c101558..40b073f2c73 100644
--- a/src/mesa/drivers/dri/r300/r300_shader.c
+++ b/src/mesa/drivers/dri/r300/r300_shader.c
@@ -38,6 +38,7 @@ static void freeFragProgCache(GLcontext *ctx, struct r300_fragment_program_cont
while (fp) {
tmp = fp->next;
+ rc_constants_destroy(&fp->code.constants);
_mesa_reference_program(ctx, &fp->Base, NULL);
_mesa_free(fp);
fp = tmp;
@@ -50,6 +51,7 @@ static void freeVertProgCache(GLcontext *ctx, struct r300_vertex_program_cont *c
while (vp) {
tmp = vp->next;
+ rc_constants_destroy(&vp->code.constants);
_mesa_reference_vertprog(ctx, &vp->Base, NULL);
_mesa_free(vp);
vp = tmp;
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index e3e8a6fb3df..b8fad4a6e77 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -2029,26 +2029,21 @@ void r300UpdateShaders(r300ContextPtr rmesa)
rmesa->radeon.NewGLState = 0;
}
-static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx,
- struct prog_src_register srcreg)
+static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index)
{
static const GLfloat dummy[4] = { 0, 0, 0, 0 };
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- struct gl_program * program = rmesa->selected_fp->Base;
-
- switch(srcreg.File) {
- case PROGRAM_LOCAL_PARAM:
- return program->LocalParams[srcreg.Index];
- case PROGRAM_ENV_PARAM:
- return ctx->FragmentProgram.Parameters[srcreg.Index];
- case PROGRAM_STATE_VAR:
- case PROGRAM_NAMED_PARAM:
- case PROGRAM_CONSTANT:
- return program->Parameters->ParameterValues[srcreg.Index];
- default:
- _mesa_problem(ctx, "get_fragmentprogram_constant: Unknown\n");
- return dummy;
+ struct r300_fragment_program * fp = rmesa->selected_fp;
+ struct rc_constant * rcc = &fp->code.constants.Constants[index];
+
+ switch(rcc->Type) {
+ case RC_CONSTANT_EXTERNAL:
+ return fp->Base->Parameters->ParameterValues[rcc->u.External];
+ case RC_CONSTANT_IMMEDIATE:
+ return rcc->u.Immediate;
}
+
+ return dummy;
}
@@ -2099,9 +2094,9 @@ static void r300SetupPixelShader(GLcontext *ctx)
}
R300_STATECHANGE(rmesa, fpp);
- rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4);
- for (i = 0; i < code->const_nr; i++) {
- const GLfloat *constant = get_fragmentprogram_constant(ctx, code->constant[i]);
+ rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, fp->code.constants.Count * 4);
+ for (i = 0; i < fp->code.constants.Count; i++) {
+ const GLfloat *constant = get_fragmentprogram_constant(ctx, i);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]);
@@ -2161,14 +2156,14 @@ static void r500SetupPixelShader(GLcontext *ctx)
bump_r500fp_count(rmesa->hw.r500fp.cmd, (code->inst_end + 1) * 6);
R300_STATECHANGE(rmesa, r500fp_const);
- for (i = 0; i < code->const_nr; i++) {
- const GLfloat *constant = get_fragmentprogram_constant(ctx, code->constant[i]);
+ for (i = 0; i < fp->code.constants.Count; i++) {
+ const GLfloat *constant = get_fragmentprogram_constant(ctx, i);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(constant[3]);
}
- bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4);
+ bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->code.constants.Count * 4);
}
void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten)
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index 91d9d8ae949..fd2b9fcaf26 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -45,56 +45,53 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_context.h"
#include "r300_state.h"
-
-static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst)
+/**
+ * Write parameter array for the given vertex program into dst.
+ * Return the total number of components written.
+ */
+static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst)
{
- int pi;
- float *dst_o = dst;
- struct gl_program_parameter_list *paramList;
+ int i;
- if (vp->IsNVProgram) {
+ if (vp->Base->IsNVProgram) {
_mesa_load_tracked_matrices(ctx);
-
- for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
- *dst++ = ctx->VertexProgram.Parameters[pi][0];
- *dst++ = ctx->VertexProgram.Parameters[pi][1];
- *dst++ = ctx->VertexProgram.Parameters[pi][2];
- *dst++ = ctx->VertexProgram.Parameters[pi][3];
+ } else {
+ if (vp->Base->Base.Parameters) {
+ _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters);
}
- return dst - dst_o;
}
- if (!vp->Base.Parameters)
- return 0;
-
- _mesa_load_state_parameters(ctx, vp->Base.Parameters);
-
- if (vp->Base.Parameters->NumParameters * 4 >
- VSF_MAX_FRAGMENT_LENGTH) {
+ if (vp->code.constants.Count * 4 > VSF_MAX_FRAGMENT_LENGTH) {
+ /* Should have checked this earlier... */
fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
_mesa_exit(-1);
}
- paramList = vp->Base.Parameters;
- for (pi = 0; pi < paramList->NumParameters; pi++) {
- switch (paramList->Parameters[pi].Type) {
- case PROGRAM_STATE_VAR:
- case PROGRAM_NAMED_PARAM:
- //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
- case PROGRAM_CONSTANT:
- *dst++ = paramList->ParameterValues[pi][0];
- *dst++ = paramList->ParameterValues[pi][1];
- *dst++ = paramList->ParameterValues[pi][2];
- *dst++ = paramList->ParameterValues[pi][3];
+ for(i = 0; i < vp->code.constants.Count; ++i) {
+ const float * src = 0;
+ const struct rc_constant * constant = &vp->code.constants.Constants[i];
+
+ switch(constant->Type) {
+ case RC_CONSTANT_EXTERNAL:
+ if (vp->Base->IsNVProgram) {
+ src = ctx->VertexProgram.Parameters[constant->u.External];
+ } else {
+ src = vp->Base->Base.Parameters->ParameterValues[constant->u.External];
+ }
+ break;
+
+ case RC_CONSTANT_IMMEDIATE:
+ src = constant->u.Immediate;
break;
- default:
- _mesa_problem(NULL, "Bad param type in %s",
- __FUNCTION__);
}
+ dst[4*i] = src[0];
+ dst[4*i + 1] = src[1];
+ dst[4*i + 2] = src[2];
+ dst[4*i + 3] = src[3];
}
- return dst - dst_o;
+ return 4 * vp->code.constants.Count;
}
static struct r300_vertex_program *build_program(GLcontext *ctx,
@@ -113,7 +110,7 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
compiler.code = &vp->code;
compiler.state = vp->key;
- compiler.program = vp->Base;
+ compiler.program = &vp->Base->Base;
if (compiler.Base.Debug) {
fprintf(stderr, "Initial vertex program:\n");
@@ -210,7 +207,7 @@ void r300SetupVertexProgram(r300ContextPtr rmesa)
((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
R300_STATECHANGE(rmesa, vpp);
- param_count = r300VertexProgUpdateParams(ctx, prog->Base, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
+ param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
param_count /= 4;
--
cgit v1.2.3
From 25e371fb7bd68ca9aba258a89d0d339b5901e1d3 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Fri, 17 Jul 2009 16:58:27 -0600
Subject: docs: 7.5 tarball md5sums
---
docs/relnotes-7.5.html | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/docs/relnotes-7.5.html b/docs/relnotes-7.5.html
index 8bcacd62e1e..56deca6a86c 100644
--- a/docs/relnotes-7.5.html
+++ b/docs/relnotes-7.5.html
@@ -40,7 +40,15 @@ If you're especially concerned with stability you should probably look for
MD5 checksums
-tbd
+553fd956e544727f30fbe249619b6286 MesaLib-7.5.tar.gz
+459f332551f6ebb86f384d21dd15e1f0 MesaLib-7.5.tar.bz2
+8c02c0e17a9025250d20424ae32f5163 MesaLib-7.5.zip
+a188da2886fa5496ea0c2cda602b2eeb MesaDemos-7.5.tar.gz
+398ee8801814a00e47f6c2314e3dfddc MesaDemos-7.5.tar.bz2
+15a0c8ae013c54335a26335e1a98d609 MesaDemos-7.5.zip
+81010147def5a644ba14f9bbb7a49a2a MesaGLUT-7.5.tar.gz
+baa7a1e850b6e39bae58868fd0684004 MesaGLUT-7.5.tar.bz2
+265228418e4423fa328f2f5b7970cf08 MesaGLUT-7.5.zip
--
cgit v1.2.3
From 09ef339b691036a5db6258aed0d9b91ee9c5b1b0 Mon Sep 17 00:00:00 2001
From: Karl Schultz
Date: Mon, 27 Jul 2009 13:38:35 -0600
Subject: windows: updated VC8 project files
See bug 22882.
---
src/mesa/drivers/windows/gdi/mesa.def | 1 -
windows/VC8/mesa/mesa/mesa.vcproj | 52 ++++++++++++++++++++++++-----------
windows/VC8/mesa/osmesa/osmesa.vcproj | 4 +++
3 files changed, 40 insertions(+), 17 deletions(-)
diff --git a/src/mesa/drivers/windows/gdi/mesa.def b/src/mesa/drivers/windows/gdi/mesa.def
index ede43ef4c0f..bd3e5b21373 100644
--- a/src/mesa/drivers/windows/gdi/mesa.def
+++ b/src/mesa/drivers/windows/gdi/mesa.def
@@ -902,7 +902,6 @@ EXPORTS
_mesa_generate_mipmap
_mesa_get_compressed_teximage
_mesa_get_current_context
- _mesa_get_program_register
_mesa_get_teximage
_mesa_init_driver_functions
_mesa_init_glsl_driver_functions
diff --git a/windows/VC8/mesa/mesa/mesa.vcproj b/windows/VC8/mesa/mesa/mesa.vcproj
index 51a837f0219..068da1612d4 100644
--- a/windows/VC8/mesa/mesa/mesa.vcproj
+++ b/windows/VC8/mesa/mesa/mesa.vcproj
@@ -52,6 +52,7 @@
WarningLevel="3"
SuppressStartupBanner="true"
CompileAs="0"
+ ForcedIncludeFiles="../../../../src/mesa/main/compiler.h"
/>
+
+
@@ -650,10 +658,6 @@
RelativePath="..\..\..\..\src\mesa\shader\prog_cache.c"
>
-
-
@@ -666,6 +670,10 @@
RelativePath="..\..\..\..\src\mesa\shader\prog_noise.c"
>
+
+
@@ -1034,6 +1042,10 @@
RelativePath="..\..\..\..\src\mesa\main\texgen.c"
>
+
+
@@ -1062,10 +1074,6 @@
RelativePath="..\..\..\..\src\mesa\main\varray.c"
>
-
-
@@ -1122,6 +1130,10 @@
RelativePath="..\..\..\..\src\mesa\vbo\vbo_split_inplace.c"
>
+
+
@@ -1263,6 +1275,10 @@
RelativePath="..\..\..\..\src\mesa\main\convolve.h"
>
+
+
@@ -1519,10 +1535,6 @@
RelativePath="..\..\..\..\src\mesa\main\polygon.h"
>
-
-
@@ -1535,6 +1547,10 @@
RelativePath="..\..\..\..\src\mesa\shader\prog_noise.h"
>
+
+
@@ -1879,6 +1895,10 @@
RelativePath="..\..\..\..\src\mesa\main\texgen.h"
>
+
+
@@ -1911,10 +1931,6 @@
RelativePath="..\..\..\..\src\mesa\main\varray.h"
>
-
-
@@ -1947,6 +1963,10 @@
RelativePath="..\..\..\..\src\mesa\main\version.h"
>
+
+
diff --git a/windows/VC8/mesa/osmesa/osmesa.vcproj b/windows/VC8/mesa/osmesa/osmesa.vcproj
index d7cd47a6c14..10f2dc9717a 100644
--- a/windows/VC8/mesa/osmesa/osmesa.vcproj
+++ b/windows/VC8/mesa/osmesa/osmesa.vcproj
@@ -55,6 +55,7 @@
WarningLevel="3"
SuppressStartupBanner="true"
CompileAs="0"
+ ForcedIncludeFiles="../../../../src/mesa/main/compiler.h"
/>
Date: Mon, 27 Jul 2009 14:35:18 -0600
Subject: intel: Use _mesa_warning() to report GEM warnings
---
src/mesa/drivers/dri/intel/intel_screen.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 0f278b3acdd..2c6e2640b08 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -617,10 +617,10 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen)
/* Otherwise, use the classic buffer manager. */
if (intelScreen->bufmgr == NULL) {
if (gem_disable) {
- fprintf(stderr, "GEM disabled. Using classic.\n");
+ _mesa_warning(NULL, "GEM disabled. Using classic.");
} else {
- fprintf(stderr, "Failed to initialize GEM. "
- "Falling back to classic.\n");
+ _mesa_warning(NULL,
+ "Failed to initialize GEM. Falling back to classic.");
}
if (intelScreen->tex.size == 0) {
--
cgit v1.2.3
From 6f4608f53c7ba28b5640974fc1daf6ad860df2f6 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Fri, 24 Jul 2009 22:34:44 +0200
Subject: r300/compiler: Refactor local transforms to use rc_program
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r300_fragprog.c | 261 ++++++------
src/mesa/drivers/dri/r300/compiler/r300_fragprog.h | 2 +-
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 10 +-
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 68 ++-
src/mesa/drivers/dri/r300/compiler/r500_fragprog.c | 213 +++++-----
src/mesa/drivers/dri/r300/compiler/r500_fragprog.h | 5 +-
src/mesa/drivers/dri/r300/compiler/radeon_code.c | 92 +++++
src/mesa/drivers/dri/r300/compiler/radeon_code.h | 18 +-
.../drivers/dri/r300/compiler/radeon_compiler.h | 2 +-
.../drivers/dri/r300/compiler/radeon_program.c | 73 +---
.../drivers/dri/r300/compiler/radeon_program.h | 25 +-
.../drivers/dri/r300/compiler/radeon_program_alu.c | 454 +++++++++++----------
.../drivers/dri/r300/compiler/radeon_program_alu.h | 20 +-
src/mesa/drivers/dri/r300/r300_state.c | 67 ++-
14 files changed, 664 insertions(+), 646 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
index f838179f79b..fbffd3d7cdd 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
@@ -31,16 +31,12 @@
#include "../r300_reg.h"
-static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
+static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
{
- gl_state_index fail_value_tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
- };
struct prog_src_register reg = { 0, };
- fail_value_tokens[2] = tmu;
reg.File = PROGRAM_STATE_VAR;
- reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
+ reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
reg.Swizzle = SWIZZLE_WWWW;
return reg;
}
@@ -50,173 +46,146 @@ static struct prog_src_register shadow_ambient(struct gl_program *program, int t
* - premultiply texture coordinates for RECT
* - extract operand swizzles
* - introduce a temporary register when write masks are needed
- *
- * \todo If/when r5xx uses the radeon_program architecture, this can probably
- * be reused.
*/
GLboolean r300_transform_TEX(
- struct radeon_transform_context *t,
- struct prog_instruction* orig_inst, void* data)
+ struct radeon_compiler * c,
+ struct rc_instruction* inst,
+ void* data)
{
struct r300_fragment_program_compiler *compiler =
(struct r300_fragment_program_compiler*)data;
- struct prog_instruction inst = *orig_inst;
- struct prog_instruction* tgt;
- GLboolean destredirect = GL_FALSE;
-
- if (inst.Opcode != OPCODE_TEX &&
- inst.Opcode != OPCODE_TXB &&
- inst.Opcode != OPCODE_TXP &&
- inst.Opcode != OPCODE_KIL)
+
+ if (inst->I.Opcode != OPCODE_TEX &&
+ inst->I.Opcode != OPCODE_TXB &&
+ inst->I.Opcode != OPCODE_TXP &&
+ inst->I.Opcode != OPCODE_KIL)
return GL_FALSE;
- if (inst.Opcode != OPCODE_KIL &&
- t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func;
+ /* ARB_shadow & EXT_shadow_funcs */
+ if (inst->I.Opcode != OPCODE_KIL &&
+ c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) {
+ GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
- tgt = radeonAppendInstructions(t->Program, 1);
+ inst->I.Opcode = OPCODE_MOV;
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg = inst.DstReg;
if (comparefunc == GL_ALWAYS) {
- tgt->SrcReg[0].File = PROGRAM_BUILTIN;
- tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
+ inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
+ inst->I.SrcReg[0].Swizzle = SWIZZLE_1111;
} else {
- tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
+ inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit);
}
+
return GL_TRUE;
- }
+ } else {
+ GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
+ GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode;
+ struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
+ struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
+ struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
+ int pass, fail;
+
+ inst_rcp->I.Opcode = OPCODE_RCP;
+ inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_rcp->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
+ inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
+
+ inst_cmp->I.DstReg = inst->I.DstReg;
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = rc_find_free_temporary(c);
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+
+ inst_mad->I.Opcode = OPCODE_MAD;
+ inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mad->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mad->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
+ inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY;
+ inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index;
+ inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
+ inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY;
+ inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index;
+ if (depthmode == 0) /* GL_LUMINANCE */
+ inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
+ else if (depthmode == 2) /* GL_ALPHA */
+ inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW;
+
+ /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
+ * r < tex <=> -tex+r < 0
+ * r >= tex <=> not (-tex+r < 0 */
+ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
+ inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW;
+ else
+ inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW;
+
+ inst_cmp->I.Opcode = OPCODE_CMP;
+ /* DstReg has been filled out above */
+ inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index;
+
+ if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
+ pass = 1;
+ fail = 2;
+ } else {
+ pass = 2;
+ fail = 1;
+ }
- inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = radeonFindFreeTemporary(t);
- inst.DstReg.WriteMask = WRITEMASK_XYZW;
+ inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN;
+ inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111;
+ inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit);
+ }
}
-
/* Hardware uses [0..1]x[0..1] range for rectangle textures
* instead of [0..Width]x[0..Height].
* Add a scaling instruction.
*/
- if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) {
- gl_state_index tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
- 0
- };
-
- int tempreg = radeonFindFreeTemporary(t);
- int factor_index;
-
- tokens[2] = inst.TexSrcUnit;
- factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens);
-
- tgt = radeonAppendInstructions(t->Program, 1);
-
- tgt->Opcode = OPCODE_MUL;
- tgt->DstReg.File = PROGRAM_TEMPORARY;
- tgt->DstReg.Index = tempreg;
- tgt->SrcReg[0] = inst.SrcReg[0];
- tgt->SrcReg[1].File = PROGRAM_STATE_VAR;
- tgt->SrcReg[1].Index = factor_index;
-
- reset_srcreg(&inst.SrcReg[0]);
- inst.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst.SrcReg[0].Index = tempreg;
+ if (inst->I.Opcode != OPCODE_KIL && inst->I.TexSrcTarget == TEXTURE_RECT_INDEX) {
+ struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mul->I.Opcode = OPCODE_MUL;
+ inst_mul->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mul->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mul->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_mul->I.SrcReg[1].File = PROGRAM_STATE_VAR;
+ inst_mul->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->I.TexSrcUnit);
+
+ reset_srcreg(&inst->I.SrcReg[0]);
+ inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[0].Index = inst_mul->I.DstReg.Index;
}
- if (inst.Opcode != OPCODE_KIL) {
- if (inst.DstReg.File != PROGRAM_TEMPORARY ||
- inst.DstReg.WriteMask != WRITEMASK_XYZW) {
- int tempreg = radeonFindFreeTemporary(t);
-
- inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = tempreg;
- inst.DstReg.WriteMask = WRITEMASK_XYZW;
- destredirect = GL_TRUE;
- } else if (inst.SaturateMode) {
- destredirect = GL_TRUE;
- }
- }
+ /* Cannot write texture to output registers or with masks */
+ if (inst->I.Opcode != OPCODE_KIL &&
+ (inst->I.DstReg.File != PROGRAM_TEMPORARY || inst->I.DstReg.WriteMask != WRITEMASK_XYZW)) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
+
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg = inst->I.DstReg;
+ inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c);
- if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
- int tmpreg = radeonFindFreeTemporary(t);
- tgt = radeonAppendInstructions(t->Program, 1);
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg.File = PROGRAM_TEMPORARY;
- tgt->DstReg.Index = tmpreg;
- tgt->SrcReg[0] = inst.SrcReg[0];
-
- reset_srcreg(&inst.SrcReg[0]);
- inst.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst.SrcReg[0].Index = tmpreg;
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
}
- tgt = radeonAppendInstructions(t->Program, 1);
- _mesa_copy_instructions(tgt, &inst, 1);
-
- if (inst.Opcode != OPCODE_KIL &&
- t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func;
- GLuint depthmode = compiler->state.unit[inst.TexSrcUnit].depth_texture_mode;
- int rcptemp = radeonFindFreeTemporary(t);
- int pass, fail;
-
- tgt = radeonAppendInstructions(t->Program, 3);
-
- tgt[0].Opcode = OPCODE_RCP;
- tgt[0].DstReg.File = PROGRAM_TEMPORARY;
- tgt[0].DstReg.Index = rcptemp;
- tgt[0].DstReg.WriteMask = WRITEMASK_W;
- tgt[0].SrcReg[0] = inst.SrcReg[0];
- tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
-
- tgt[1].Opcode = OPCODE_MAD;
- tgt[1].DstReg = inst.DstReg;
- tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
- tgt[1].SrcReg[0] = inst.SrcReg[0];
- tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
- tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
- tgt[1].SrcReg[1].Index = rcptemp;
- tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
- tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
- tgt[1].SrcReg[2].Index = inst.DstReg.Index;
- if (depthmode == 0) /* GL_LUMINANCE */
- tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
- else if (depthmode == 2) /* GL_ALPHA */
- tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
-
- /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
- * r < tex <=> -tex+r < 0
- * r >= tex <=> not (-tex+r < 0 */
- if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
- tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW;
- else
- tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW;
-
- tgt[2].Opcode = OPCODE_CMP;
- tgt[2].DstReg = orig_inst->DstReg;
- tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
-
- if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
- pass = 1;
- fail = 2;
- } else {
- pass = 2;
- fail = 1;
- }
- tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
- tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
- tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
- } else if (destredirect) {
- tgt = radeonAppendInstructions(t->Program, 1);
-
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg = orig_inst->DstReg;
- tgt->SaturateMode = inst.SaturateMode;
- tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt->SrcReg[0].Index = inst.DstReg.Index;
+ /* Cannot read texture coordinate from constants file */
+ if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mov->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mov->I.SrcReg[0] = inst->I.SrcReg[0];
+
+ reset_srcreg(&inst->I.SrcReg[0]);
+ inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index;
}
return GL_TRUE;
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
index 2d094c36186..0ac46dbd9cb 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
@@ -44,6 +44,6 @@ extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler
extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c);
-extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data);
+extern GLboolean r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data);
#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 7ac57d0d499..bdab61f32da 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -250,6 +250,8 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
rewrite_depth_out(c->program);
+ rc_mesa_to_rc_program(&c->Base, c->program);
+
if (c->is_r500) {
struct radeon_program_transformation transformations[] = {
{ &r500_transform_TEX, c },
@@ -257,24 +259,22 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{ &radeonTransformDeriv, 0 },
{ &radeonTransformTrigScale, 0 }
};
- radeonLocalTransform(c->program, 4, transformations);
+ radeonLocalTransform(&c->Base, 4, transformations);
} else {
struct radeon_program_transformation transformations[] = {
{ &r300_transform_TEX, c },
{ &radeonTransformALU, 0 },
{ &radeonTransformTrigSimple, 0 }
};
- radeonLocalTransform(c->program, 3, transformations);
+ radeonLocalTransform(&c->Base, 3, transformations);
}
if (c->Base.Debug) {
_mesa_printf("Fragment Program: After native rewrite:\n");
- _mesa_print_program(c->program);
+ rc_print_program(&c->Base.Program);
fflush(stdout);
}
- rc_mesa_to_rc_program(&c->Base, c->program);
-
if (c->is_r500) {
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadce_init,
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index c9d0996d447..90910c45c62 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -538,47 +538,43 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
* Introduce intermediate MOVs to temporary registers to account for this.
*/
static GLboolean transform_source_conflicts(
- struct radeon_transform_context *t,
- struct prog_instruction* orig_inst,
+ struct radeon_compiler *c,
+ struct rc_instruction* inst,
void* unused)
{
- struct prog_instruction inst = *orig_inst;
- struct prog_instruction * dst;
- GLuint num_operands = _mesa_num_inst_src_regs(inst.Opcode);
+ GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode);
if (num_operands == 3) {
- if (t_src_conflict(inst.SrcReg[1], inst.SrcReg[2])
- || t_src_conflict(inst.SrcReg[0], inst.SrcReg[2])) {
- int tmpreg = radeonFindFreeTemporary(t);
- struct prog_instruction * inst_mov = radeonAppendInstructions(t->Program, 1);
- inst_mov->Opcode = OPCODE_MOV;
- inst_mov->DstReg.File = PROGRAM_TEMPORARY;
- inst_mov->DstReg.Index = tmpreg;
- inst_mov->SrcReg[0] = inst.SrcReg[2];
-
- reset_srcreg(&inst.SrcReg[2]);
- inst.SrcReg[2].File = PROGRAM_TEMPORARY;
- inst.SrcReg[2].Index = tmpreg;
+ if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2])
+ || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) {
+ int tmpreg = rc_find_free_temporary(c);
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mov->I.DstReg.Index = tmpreg;
+ inst_mov->I.SrcReg[0] = inst->I.SrcReg[2];
+
+ reset_srcreg(&inst->I.SrcReg[2]);
+ inst->I.SrcReg[2].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[2].Index = tmpreg;
}
}
if (num_operands >= 2) {
- if (t_src_conflict(inst.SrcReg[1], inst.SrcReg[0])) {
- int tmpreg = radeonFindFreeTemporary(t);
- struct prog_instruction * inst_mov = radeonAppendInstructions(t->Program, 1);
- inst_mov->Opcode = OPCODE_MOV;
- inst_mov->DstReg.File = PROGRAM_TEMPORARY;
- inst_mov->DstReg.Index = tmpreg;
- inst_mov->SrcReg[0] = inst.SrcReg[1];
-
- reset_srcreg(&inst.SrcReg[1]);
- inst.SrcReg[1].File = PROGRAM_TEMPORARY;
- inst.SrcReg[1].Index = tmpreg;
+ if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) {
+ int tmpreg = rc_find_free_temporary(c);
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mov->I.DstReg.Index = tmpreg;
+ inst_mov->I.SrcReg[0] = inst->I.SrcReg[1];
+
+ reset_srcreg(&inst->I.SrcReg[1]);
+ inst->I.SrcReg[1].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[1].Index = tmpreg;
}
}
- dst = radeonAppendInstructions(t->Program, 1);
- *dst = inst;
return GL_TRUE;
}
@@ -782,16 +778,18 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
addArtificialOutputs(compiler);
+ rc_mesa_to_rc_program(&compiler->Base, compiler->program);
+
{
struct radeon_program_transformation transformations[] = {
{ &r300_transform_vertex_alu, 0 },
};
- radeonLocalTransform(compiler->program, 1, transformations);
+ radeonLocalTransform(&compiler->Base, 1, transformations);
}
if (compiler->Base.Debug) {
fprintf(stderr, "Vertex program after native rewrite:\n");
- _mesa_print_program(compiler->program);
+ rc_print_program(&compiler->Base.Program);
fflush(stdout);
}
@@ -803,17 +801,15 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
struct radeon_program_transformation transformations[] = {
{ &transform_source_conflicts, 0 },
};
- radeonLocalTransform(compiler->program, 1, transformations);
+ radeonLocalTransform(&compiler->Base, 1, transformations);
}
if (compiler->Base.Debug) {
fprintf(stderr, "Vertex program after source conflict resolve:\n");
- _mesa_print_program(compiler->program);
+ rc_print_program(&compiler->Base.Program);
fflush(stdout);
}
- rc_mesa_to_rc_program(&compiler->Base, compiler->program);
-
{
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadceInit,
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index 6d2158dbfc3..7e2faed6908 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -29,152 +29,139 @@
#include "../r300_reg.h"
-static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
+static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
{
- gl_state_index fail_value_tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
- };
struct prog_src_register reg = { 0, };
- fail_value_tokens[2] = tmu;
reg.File = PROGRAM_STATE_VAR;
- reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
+ reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
reg.Swizzle = SWIZZLE_WWWW;
return reg;
}
/**
* Transform TEX, TXP, TXB, and KIL instructions in the following way:
- * - premultiply texture coordinates for RECT
- * - extract operand swizzles
- * - introduce a temporary register when write masks are needed
- *
+ * - implement texture compare (shadow extensions)
+ * - extract non-native source / destination operands
*/
GLboolean r500_transform_TEX(
- struct radeon_transform_context *t,
- struct prog_instruction* orig_inst, void* data)
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data)
{
struct r300_fragment_program_compiler *compiler =
(struct r300_fragment_program_compiler*)data;
- struct prog_instruction inst = *orig_inst;
- struct prog_instruction* tgt;
- GLboolean destredirect = GL_FALSE;
-
- if (inst.Opcode != OPCODE_TEX &&
- inst.Opcode != OPCODE_TXB &&
- inst.Opcode != OPCODE_TXP &&
- inst.Opcode != OPCODE_KIL)
+
+ if (inst->I.Opcode != OPCODE_TEX &&
+ inst->I.Opcode != OPCODE_TXB &&
+ inst->I.Opcode != OPCODE_TXP &&
+ inst->I.Opcode != OPCODE_KIL)
return GL_FALSE;
/* ARB_shadow & EXT_shadow_funcs */
- if (inst.Opcode != OPCODE_KIL &&
- t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func;
+ if (inst->I.Opcode != OPCODE_KIL &&
+ c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) {
+ GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
- tgt = radeonAppendInstructions(t->Program, 1);
+ inst->I.Opcode = OPCODE_MOV;
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg = inst.DstReg;
if (comparefunc == GL_ALWAYS) {
- tgt->SrcReg[0].File = PROGRAM_BUILTIN;
- tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
+ inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
+ inst->I.SrcReg[0].Swizzle = SWIZZLE_1111;
} else {
- tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
+ inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit);
}
+
return GL_TRUE;
+ } else {
+ GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
+ GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode;
+ struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
+ struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
+ struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
+ int pass, fail;
+
+ inst_rcp->I.Opcode = OPCODE_RCP;
+ inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_rcp->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
+ inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
+
+ inst_cmp->I.DstReg = inst->I.DstReg;
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = rc_find_free_temporary(c);
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+
+ inst_mad->I.Opcode = OPCODE_MAD;
+ inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mad->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mad->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
+ inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY;
+ inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index;
+ inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
+ inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY;
+ inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index;
+ if (depthmode == 0) /* GL_LUMINANCE */
+ inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
+ else if (depthmode == 2) /* GL_ALPHA */
+ inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW;
+
+ /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
+ * r < tex <=> -tex+r < 0
+ * r >= tex <=> not (-tex+r < 0 */
+ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
+ inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW;
+ else
+ inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW;
+
+ inst_cmp->I.Opcode = OPCODE_CMP;
+ /* DstReg has been filled out above */
+ inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index;
+
+ if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
+ pass = 1;
+ fail = 2;
+ } else {
+ pass = 2;
+ fail = 1;
+ }
+
+ inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN;
+ inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111;
+ inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit);
}
+ }
- inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = radeonFindFreeTemporary(t);
- inst.DstReg.WriteMask = WRITEMASK_XYZW;
- } else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) {
- int tempreg = radeonFindFreeTemporary(t);
+ /* Cannot write texture to output registers */
+ if (inst->I.Opcode != OPCODE_KIL && inst->I.DstReg.File != PROGRAM_TEMPORARY) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
- inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = tempreg;
- inst.DstReg.WriteMask = WRITEMASK_XYZW;
- destredirect = GL_TRUE;
- }
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg = inst->I.DstReg;
+ inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c);
- if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
- int tmpreg = radeonFindFreeTemporary(t);
- tgt = radeonAppendInstructions(t->Program, 1);
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg.File = PROGRAM_TEMPORARY;
- tgt->DstReg.Index = tmpreg;
- tgt->SrcReg[0] = inst.SrcReg[0];
-
- reset_srcreg(&inst.SrcReg[0]);
- inst.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst.SrcReg[0].Index = tmpreg;
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
}
- tgt = radeonAppendInstructions(t->Program, 1);
- _mesa_copy_instructions(tgt, &inst, 1);
-
- if (inst.Opcode != OPCODE_KIL &&
- t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func;
- GLuint depthmode = compiler->state.unit[inst.TexSrcUnit].depth_texture_mode;
- int rcptemp = radeonFindFreeTemporary(t);
- int pass, fail;
-
- tgt = radeonAppendInstructions(t->Program, 3);
-
- tgt[0].Opcode = OPCODE_RCP;
- tgt[0].DstReg.File = PROGRAM_TEMPORARY;
- tgt[0].DstReg.Index = rcptemp;
- tgt[0].DstReg.WriteMask = WRITEMASK_W;
- tgt[0].SrcReg[0] = inst.SrcReg[0];
- tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
-
- tgt[1].Opcode = OPCODE_MAD;
- tgt[1].DstReg = inst.DstReg;
- tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
- tgt[1].SrcReg[0] = inst.SrcReg[0];
- tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
- tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
- tgt[1].SrcReg[1].Index = rcptemp;
- tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
- tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
- tgt[1].SrcReg[2].Index = inst.DstReg.Index;
- if (depthmode == 0) /* GL_LUMINANCE */
- tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
- else if (depthmode == 2) /* GL_ALPHA */
- tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
-
- /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
- * r < tex <=> -tex+r < 0
- * r >= tex <=> not (-tex+r < 0 */
- if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
- tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW;
- else
- tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW;
-
- tgt[2].Opcode = OPCODE_CMP;
- tgt[2].DstReg = orig_inst->DstReg;
- tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
-
- if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
- pass = 1;
- fail = 2;
- } else {
- pass = 2;
- fail = 1;
- }
+ /* Cannot read texture coordinate from constants file */
+ if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
- tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
- tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
- tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
- } else if (destredirect) {
- tgt = radeonAppendInstructions(t->Program, 1);
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mov->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mov->I.SrcReg[0] = inst->I.SrcReg[0];
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg = orig_inst->DstReg;
- tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt->SrcReg[0].Index = inst.DstReg.Index;
+ reset_srcreg(&inst->I.SrcReg[0]);
+ inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index;
}
return GL_TRUE;
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
index e405267bb35..9091f65cd20 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -47,6 +47,9 @@ extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register r
extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src);
-extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data);
+extern GLboolean r500_transform_TEX(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data);
#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
index b94e534d9c7..c7923004df9 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
@@ -76,3 +76,95 @@ unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * cons
return index;
}
+
+
+/**
+ * Add a state vector to the constant list, while trying to avoid duplicates.
+ */
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1)
+{
+ unsigned index;
+ struct rc_constant constant;
+
+ for(index = 0; index < c->Count; ++index) {
+ if (c->Constants[index].Type == RC_CONSTANT_STATE) {
+ if (c->Constants[index].u.State[0] == state0 &&
+ c->Constants[index].u.State[1] == state1)
+ return index;
+ }
+ }
+
+ memset(&constant, 0, sizeof(constant));
+ constant.Type = RC_CONSTANT_STATE;
+ constant.Size = 4;
+ constant.u.State[0] = state0;
+ constant.u.State[1] = state1;
+
+ return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate vector to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data)
+{
+ unsigned index;
+ struct rc_constant constant;
+
+ for(index = 0; index < c->Count; ++index) {
+ if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+ if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4))
+ return index;
+ }
+ }
+
+ memset(&constant, 0, sizeof(constant));
+ constant.Type = RC_CONSTANT_IMMEDIATE;
+ constant.Size = 4;
+ memcpy(constant.u.Immediate, data, sizeof(float) * 4);
+
+ return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate scalar to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle)
+{
+ unsigned index;
+ int free_index = -1;
+ struct rc_constant constant;
+
+ for(index = 0; index < c->Count; ++index) {
+ if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+ for(unsigned comp = 0; comp < c->Constants[index].Size; ++comp) {
+ if (c->Constants[index].u.Immediate[comp] == data) {
+ *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
+ return index;
+ }
+ }
+
+ if (c->Constants[index].Size < 4)
+ free_index = index;
+ }
+ }
+
+ if (free_index >= 0) {
+ unsigned comp = c->Constants[free_index].Size++;
+ c->Constants[free_index].u.Immediate[comp] = data;
+ *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
+ return free_index;
+ }
+
+ memset(&constant, 0, sizeof(constant));
+ constant.Type = RC_CONSTANT_IMMEDIATE;
+ constant.Size = 1;
+ constant.u.Immediate[0] = data;
+ *swizzle = SWIZZLE_XXXX;
+
+ return rc_constants_add(c, &constant);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index 3e6eb97b177..9cf4ed57bb5 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -35,8 +35,6 @@
#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
-#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
-
enum {
/**
@@ -50,17 +48,26 @@ enum {
/**
* Constant referring to state that is known by this compiler,
- * i.e. *not* arbitrary Mesa (or other) state.
+ * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
*/
RC_CONSTANT_STATE
};
+enum {
+ RC_STATE_SHADOW_AMBIENT = 0,
+
+ RC_STATE_R300_WINDOW_DIMENSION,
+ RC_STATE_R300_TEXRECT_FACTOR
+};
+
struct rc_constant {
unsigned Type:2; /**< RC_CONSTANT_xxx */
+ unsigned Size:3;
+
union {
unsigned External;
float Immediate[4];
- unsigned State[4];
+ unsigned State[2];
} u;
};
@@ -75,6 +82,9 @@ void rc_constants_init(struct rc_constant_list * c);
void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
void rc_constants_destroy(struct rc_constant_list * c);
unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
/**
* Stores state that influences the compilation of a fragment program.
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 6630db62795..d9ee43017dc 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -67,7 +67,7 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...);
struct r300_fragment_program_compiler {
struct radeon_compiler Base;
struct rX00_fragment_program_code *code;
- struct gl_program *program;
+ struct gl_program * program;
struct r300_fragment_program_external_state state;
GLboolean is_r500;
};
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
index 50a0ce8743c..208d3b90c83 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -48,85 +48,27 @@
* one instruction at a time.
*/
void radeonLocalTransform(
- struct gl_program *program,
+ struct radeon_compiler * c,
int num_transformations,
struct radeon_program_transformation* transformations)
{
- struct radeon_transform_context ctx;
- int ip;
+ struct rc_instruction * inst = c->Program.Instructions.Next;
- ctx.Program = program;
- ctx.OldInstructions = program->Instructions;
- ctx.OldNumInstructions = program->NumInstructions;
-
- program->Instructions = 0;
- program->NumInstructions = 0;
-
- for(ip = 0; ip < ctx.OldNumInstructions; ++ip) {
- struct prog_instruction *instr = ctx.OldInstructions + ip;
+ while(inst != &c->Program.Instructions) {
+ struct rc_instruction * current = inst;
int i;
+ inst = inst->Next;
+
for(i = 0; i < num_transformations; ++i) {
struct radeon_program_transformation* t = transformations + i;
- if (t->function(&ctx, instr, t->userData))
+ if (t->function(c, current, t->userData))
break;
}
-
- if (i >= num_transformations) {
- struct prog_instruction* dest = radeonAppendInstructions(program, 1);
- _mesa_copy_instructions(dest, instr, 1);
- }
- }
-
- _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions);
-}
-
-
-static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count)
-{
- GLuint i;
- for (i = 0; i < count; i++) {
- const struct prog_instruction *inst = insts + i;
- const GLuint n = _mesa_num_inst_src_regs(inst->Opcode);
- GLuint k;
-
- for (k = 0; k < n; k++) {
- if (inst->SrcReg[k].File == PROGRAM_TEMPORARY)
- used[inst->SrcReg[k].Index] = GL_TRUE;
- }
}
}
-GLint radeonFindFreeTemporary(struct radeon_transform_context *t)
-{
- GLboolean used[MAX_PROGRAM_TEMPS];
- GLuint i;
-
- _mesa_memset(used, 0, sizeof(used));
- scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions);
- scan_instructions(used, t->OldInstructions, t->OldNumInstructions);
-
- for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
- if (!used[i])
- return i;
- }
-
- return -1;
-}
-
-
-/**
- * Append the given number of instructions to the program and return a
- * pointer to the first new instruction.
- */
-struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count)
-{
- int oldnum = program->NumInstructions;
- _mesa_insert_instructions(program, oldnum, count);
- return program->Instructions + oldnum;
-}
-
GLint rc_find_free_temporary(struct radeon_compiler * c)
{
@@ -206,6 +148,7 @@ void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * progr
struct rc_constant constant;
constant.Type = RC_CONSTANT_EXTERNAL;
+ constant.Size = 4;
constant.u.External = i;
rc_constants_add(&c->Program.Constants, &constant);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index fae8c6babe8..1a34b50ed72 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -87,18 +87,6 @@ static INLINE void reset_srcreg(struct prog_src_register* reg)
}
-/**
- * Transformation context that is passed to local transformations.
- *
- * Care must be taken with some operations during transformation,
- * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary
- */
-struct radeon_transform_context {
- struct gl_program *Program;
- struct prog_instruction *OldInstructions;
- GLuint OldNumInstructions;
-};
-
/**
* A transformation that can be passed to \ref radeonLocalTransform.
*
@@ -111,24 +99,17 @@ struct radeon_transform_context {
*/
struct radeon_program_transformation {
GLboolean (*function)(
- struct radeon_transform_context*,
- struct prog_instruction*,
+ struct radeon_compiler*,
+ struct rc_instruction*,
void*);
void *userData;
};
void radeonLocalTransform(
- struct gl_program *program,
+ struct radeon_compiler *c,
int num_transformations,
struct radeon_program_transformation* transformations);
-/**
- * Find a usable free temporary register during program transformation
- */
-GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx);
-
-struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count);
-
GLint rc_find_free_temporary(struct radeon_compiler * c);
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index dd20b4603e5..609e510ff2b 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -35,49 +35,52 @@
#include "radeon_program_alu.h"
-#include "shader/prog_parameter.h"
+#include "radeon_compiler.h"
-static struct prog_instruction *emit1(struct gl_program* p,
+static struct rc_instruction *emit1(
+ struct radeon_compiler * c, struct rc_instruction * after,
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
struct prog_src_register SrcReg)
{
- struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
+ struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
- fpi->Opcode = Opcode;
- fpi->SaturateMode = Saturate;
- fpi->DstReg = DstReg;
- fpi->SrcReg[0] = SrcReg;
+ fpi->I.Opcode = Opcode;
+ fpi->I.SaturateMode = Saturate;
+ fpi->I.DstReg = DstReg;
+ fpi->I.SrcReg[0] = SrcReg;
return fpi;
}
-static struct prog_instruction *emit2(struct gl_program* p,
+static struct rc_instruction *emit2(
+ struct radeon_compiler * c, struct rc_instruction * after,
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
struct prog_src_register SrcReg0, struct prog_src_register SrcReg1)
{
- struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
+ struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
- fpi->Opcode = Opcode;
- fpi->SaturateMode = Saturate;
- fpi->DstReg = DstReg;
- fpi->SrcReg[0] = SrcReg0;
- fpi->SrcReg[1] = SrcReg1;
+ fpi->I.Opcode = Opcode;
+ fpi->I.SaturateMode = Saturate;
+ fpi->I.DstReg = DstReg;
+ fpi->I.SrcReg[0] = SrcReg0;
+ fpi->I.SrcReg[1] = SrcReg1;
return fpi;
}
-static struct prog_instruction *emit3(struct gl_program* p,
+static struct rc_instruction *emit3(
+ struct radeon_compiler * c, struct rc_instruction * after,
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
struct prog_src_register SrcReg0, struct prog_src_register SrcReg1,
struct prog_src_register SrcReg2)
{
- struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
+ struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
- fpi->Opcode = Opcode;
- fpi->SaturateMode = Saturate;
- fpi->DstReg = DstReg;
- fpi->SrcReg[0] = SrcReg0;
- fpi->SrcReg[1] = SrcReg1;
- fpi->SrcReg[2] = SrcReg2;
+ fpi->I.Opcode = Opcode;
+ fpi->I.SaturateMode = Saturate;
+ fpi->I.DstReg = DstReg;
+ fpi->I.SrcReg[0] = SrcReg0;
+ fpi->I.SrcReg[1] = SrcReg1;
+ fpi->I.SrcReg[2] = SrcReg2;
return fpi;
}
@@ -171,58 +174,63 @@ static struct prog_src_register scalar(struct prog_src_register reg)
return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
}
-static void transform_ABS(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_ABS(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- struct prog_src_register src = inst->SrcReg[0];
+ struct prog_src_register src = inst->I.SrcReg[0];
src.Abs = 1;
src.Negate = NEGATE_NONE;
- emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src);
+ emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, inst->I.DstReg, src);
+ rc_remove_instruction(inst);
}
-static void transform_DP3(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_DP3(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- struct prog_src_register src0 = inst->SrcReg[0];
- struct prog_src_register src1 = inst->SrcReg[1];
+ struct prog_src_register src0 = inst->I.SrcReg[0];
+ struct prog_src_register src1 = inst->I.SrcReg[1];
src0.Negate &= ~NEGATE_W;
src0.Swizzle &= ~(7 << (3 * 3));
src0.Swizzle |= SWIZZLE_ZERO << (3 * 3);
src1.Negate &= ~NEGATE_W;
src1.Swizzle &= ~(7 << (3 * 3));
src1.Swizzle |= SWIZZLE_ZERO << (3 * 3);
- emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, src1);
+ emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, src1);
+ rc_remove_instruction(inst);
}
-static void transform_DPH(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_DPH(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- struct prog_src_register src0 = inst->SrcReg[0];
+ struct prog_src_register src0 = inst->I.SrcReg[0];
src0.Negate &= ~NEGATE_W;
src0.Swizzle &= ~(7 << (3 * 3));
src0.Swizzle |= SWIZZLE_ONE << (3 * 3);
- emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]);
+ emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, inst->I.SrcReg[1]);
+ rc_remove_instruction(inst);
}
/**
* [1, src0.y*src1.y, src0.z, src1.w]
* So basically MUL with lotsa swizzling.
*/
-static void transform_DST(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_DST(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg,
- swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE),
- swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W));
+ emit2(c, inst->Prev, OPCODE_MUL, inst->I.SaturateMode, inst->I.DstReg,
+ swizzle(inst->I.SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE),
+ swizzle(inst->I.SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W));
+ rc_remove_instruction(inst);
}
-static void transform_FLR(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_FLR(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
- emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]);
- emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg,
- inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+ int tempreg = rc_find_free_temporary(c);
+ emit1(c, inst->Prev, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0]);
+ emit2(c, inst->Prev, OPCODE_ADD, inst->I.SaturateMode, inst->I.DstReg,
+ inst->I.SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+ rc_remove_instruction(inst);
}
/**
@@ -243,152 +251,159 @@ static void transform_FLR(struct radeon_transform_context* t,
* 5 slots, if the subsequent optimization passes are clever enough
* to pair instructions correctly.
*/
-static void transform_LIT(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_LIT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- static const GLfloat LitConst[4] = { -127.999999 };
-
GLuint constant;
GLuint constant_swizzle;
GLuint temp;
- int needTemporary = 0;
struct prog_src_register srctemp;
- constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle);
+ constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle);
- if (inst->DstReg.WriteMask != WRITEMASK_XYZW) {
- needTemporary = 1;
- } else if (inst->DstReg.File != PROGRAM_TEMPORARY) {
- // LIT is typically followed by DP3/DP4, so there's no point
- // in creating special code for this case
- needTemporary = 1;
- }
+ if (inst->I.DstReg.WriteMask != WRITEMASK_XYZW || inst->I.DstReg.File != PROGRAM_TEMPORARY) {
+ struct rc_instruction * inst_mov;
- if (needTemporary) {
- temp = radeonFindFreeTemporary(t);
- } else {
- temp = inst->DstReg.Index;
+ inst_mov = emit1(c, inst,
+ OPCODE_MOV, 0, inst->I.DstReg,
+ srcreg(PROGRAM_TEMPORARY, rc_find_free_temporary(c)));
+
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
}
+
+ temp = inst->I.DstReg.Index;
srctemp = srcreg(PROGRAM_TEMPORARY, temp);
// tmp.x = max(0.0, Src.x);
// tmp.y = max(0.0, Src.y);
// tmp.w = clamp(Src.z, -128+eps, 128-eps);
- emit2(t->Program, OPCODE_MAX, 0,
+ emit2(c, inst->Prev, OPCODE_MAX, 0,
dstregtmpmask(temp, WRITEMASK_XYW),
- inst->SrcReg[0],
+ inst->I.SrcReg[0],
swizzle(srcreg(PROGRAM_CONSTANT, constant),
SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3));
- emit2(t->Program, OPCODE_MIN, 0,
+ emit2(c, inst->Prev, OPCODE_MIN, 0,
dstregtmpmask(temp, WRITEMASK_Z),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)));
// tmp.w = Pow(tmp.y, tmp.w)
- emit1(t->Program, OPCODE_LG2, 0,
+ emit1(c, inst->Prev, OPCODE_LG2, 0,
dstregtmpmask(temp, WRITEMASK_W),
swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
- emit2(t->Program, OPCODE_MUL, 0,
+ emit2(c, inst->Prev, OPCODE_MUL, 0,
dstregtmpmask(temp, WRITEMASK_W),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z));
- emit1(t->Program, OPCODE_EX2, 0,
+ emit1(c, inst->Prev, OPCODE_EX2, 0,
dstregtmpmask(temp, WRITEMASK_W),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
// tmp.z = (tmp.x > 0) ? tmp.w : 0.0
- emit3(t->Program, OPCODE_CMP, inst->SaturateMode,
+ emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode,
dstregtmpmask(temp, WRITEMASK_Z),
negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
builtin_zero);
// tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
- emit1(t->Program, OPCODE_MOV, inst->SaturateMode,
+ emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode,
dstregtmpmask(temp, WRITEMASK_XYW),
swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE));
- if (needTemporary)
- emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp);
+ rc_remove_instruction(inst);
}
-static void transform_LRP(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_LRP(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
+ int tempreg = rc_find_free_temporary(c);
- emit2(t->Program, OPCODE_ADD, 0,
+ emit2(c, inst->Prev, OPCODE_ADD, 0,
dstreg(PROGRAM_TEMPORARY, tempreg),
- inst->SrcReg[1], negate(inst->SrcReg[2]));
- emit3(t->Program, OPCODE_MAD, inst->SaturateMode,
- inst->DstReg,
- inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]);
+ inst->I.SrcReg[1], negate(inst->I.SrcReg[2]));
+ emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode,
+ inst->I.DstReg,
+ inst->I.SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[2]);
+
+ rc_remove_instruction(inst);
}
-static void transform_POW(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_POW(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
+ int tempreg = rc_find_free_temporary(c);
struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg);
struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg);
tempdst.WriteMask = WRITEMASK_W;
tempsrc.Swizzle = SWIZZLE_WWWW;
- emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0]));
- emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1]));
- emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc);
+ emit1(c, inst->Prev, OPCODE_LG2, 0, tempdst, scalar(inst->I.SrcReg[0]));
+ emit2(c, inst->Prev, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->I.SrcReg[1]));
+ emit1(c, inst->Prev, OPCODE_EX2, inst->I.SaturateMode, inst->I.DstReg, tempsrc);
+
+ rc_remove_instruction(inst);
}
-static void transform_RSQ(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_RSQ(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0]));
+ inst->I.SrcReg[0] = absolute(inst->I.SrcReg[0]);
}
-static void transform_SGE(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_SGE(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
+ int tempreg = rc_find_free_temporary(c);
- emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
- emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
+ emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1]));
+ emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg,
srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);
+
+ rc_remove_instruction(inst);
}
-static void transform_SLT(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_SLT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
+ int tempreg = rc_find_free_temporary(c);
- emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
- emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
+ emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1]));
+ emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg,
srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);
+
+ rc_remove_instruction(inst);
}
-static void transform_SUB(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_SUB(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1]));
+ inst->I.Opcode = OPCODE_ADD;
+ inst->I.SrcReg[1] = negate(inst->I.SrcReg[1]);
}
-static void transform_SWZ(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_SWZ(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]);
+ inst->I.Opcode = OPCODE_MOV;
}
-static void transform_XPD(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_XPD(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
+ int tempreg = rc_find_free_temporary(c);
- emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),
- swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
- swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
- emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg,
- swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
- swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
+ emit2(c, inst->Prev, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),
+ swizzle(inst->I.SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
+ swizzle(inst->I.SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
+ emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, inst->I.DstReg,
+ swizzle(inst->I.SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
+ swizzle(inst->I.SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+
+ rc_remove_instruction(inst);
}
@@ -406,63 +421,64 @@ static void transform_XPD(struct radeon_transform_context* t,
*
* @note should be applicable to R300 and R500 fragment programs.
*/
-GLboolean radeonTransformALU(struct radeon_transform_context* t,
- struct prog_instruction* inst,
+GLboolean radeonTransformALU(
+ struct radeon_compiler * c,
+ struct rc_instruction* inst,
void* unused)
{
- switch(inst->Opcode) {
- case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE;
- case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
- case OPCODE_DST: transform_DST(t, inst); return GL_TRUE;
- case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
- case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE;
- case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE;
- case OPCODE_POW: transform_POW(t, inst); return GL_TRUE;
- case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE;
- case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE;
- case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE;
- case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE;
- case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE;
- case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE;
+ switch(inst->I.Opcode) {
+ case OPCODE_ABS: transform_ABS(c, inst); return GL_TRUE;
+ case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE;
+ case OPCODE_DST: transform_DST(c, inst); return GL_TRUE;
+ case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE;
+ case OPCODE_LIT: transform_LIT(c, inst); return GL_TRUE;
+ case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE;
+ case OPCODE_POW: transform_POW(c, inst); return GL_TRUE;
+ case OPCODE_RSQ: transform_RSQ(c, inst); return GL_TRUE;
+ case OPCODE_SGE: transform_SGE(c, inst); return GL_TRUE;
+ case OPCODE_SLT: transform_SLT(c, inst); return GL_TRUE;
+ case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE;
+ case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE;
+ case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE;
default:
return GL_FALSE;
}
}
-static void transform_r300_vertex_ABS(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_r300_vertex_ABS(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
/* Note: r500 can take absolute values, but r300 cannot. */
- struct prog_src_register src1 = inst->SrcReg[0];
- src1.Negate ^= NEGATE_XYZW;
-
- emit2(t->Program, OPCODE_MAX, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], src1);
+ inst->I.Opcode = OPCODE_MAX;
+ inst->I.SrcReg[1] = inst->I.SrcReg[0];
+ inst->I.SrcReg[1].Negate ^= NEGATE_XYZW;
}
/**
* For use with radeonLocalTransform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
*/
-GLboolean r300_transform_vertex_alu(struct radeon_transform_context* t,
- struct prog_instruction* inst,
+GLboolean r300_transform_vertex_alu(
+ struct radeon_compiler * c,
+ struct rc_instruction* inst,
void* unused)
{
- switch(inst->Opcode) {
- case OPCODE_ABS: transform_r300_vertex_ABS(t, inst); return GL_TRUE;
- case OPCODE_DP3: transform_DP3(t, inst); return GL_TRUE;
- case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
- case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
- case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE;
- case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE;
- case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE;
- case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE;
+ switch(inst->I.Opcode) {
+ case OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return GL_TRUE;
+ case OPCODE_DP3: transform_DP3(c, inst); return GL_TRUE;
+ case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE;
+ case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE;
+ case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE;
+ case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE;
+ case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE;
+ case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE;
default:
return GL_FALSE;
}
}
-static void sincos_constants(struct radeon_transform_context* t, GLuint *constants)
+static void sincos_constants(struct radeon_compiler* c, GLuint *constants)
{
static const GLfloat SinCosConsts[2][4] = {
{
@@ -480,11 +496,8 @@ static void sincos_constants(struct radeon_transform_context* t, GLuint *constan
};
int i;
- for(i = 0; i < 2; ++i) {
- GLuint swz;
- constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz);
- ASSERT(swz == SWIZZLE_NOOP);
- }
+ for(i = 0; i < 2; ++i)
+ constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]);
}
/**
@@ -495,23 +508,24 @@ static void sincos_constants(struct radeon_transform_context* t, GLuint *constan
* MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
* MAD dest, tmp.y, weight, tmp.x
*/
-static void sin_approx(struct radeon_transform_context* t,
+static void sin_approx(
+ struct radeon_compiler* c, struct rc_instruction * after,
struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants)
{
- GLuint tempreg = radeonFindFreeTemporary(t);
+ GLuint tempreg = rc_find_free_temporary(c);
- emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ emit2(c, after->Prev, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
srcreg(PROGRAM_CONSTANT, constants[0]));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),
+ emit3(c, after->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),
+ emit3(c, after->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)));
- emit3(t->Program, OPCODE_MAD, 0, dst,
+ emit3(c, after->Prev, OPCODE_MAD, 0, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
@@ -522,78 +536,80 @@ static void sin_approx(struct radeon_transform_context* t,
* using only the basic instructions
* MOV, ADD, MUL, MAD, FRC
*/
-GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
- struct prog_instruction* inst,
+GLboolean radeonTransformTrigSimple(struct radeon_compiler* c,
+ struct rc_instruction* inst,
void* unused)
{
- if (inst->Opcode != OPCODE_COS &&
- inst->Opcode != OPCODE_SIN &&
- inst->Opcode != OPCODE_SCS)
+ if (inst->I.Opcode != OPCODE_COS &&
+ inst->I.Opcode != OPCODE_SIN &&
+ inst->I.Opcode != OPCODE_SCS)
return GL_FALSE;
GLuint constants[2];
- GLuint tempreg = radeonFindFreeTemporary(t);
+ GLuint tempreg = rc_find_free_temporary(c);
- sincos_constants(t, constants);
+ sincos_constants(c, constants);
- if (inst->Opcode == OPCODE_COS) {
+ if (inst->I.Opcode == OPCODE_COS) {
// MAD tmp.x, src, 1/(2*PI), 0.75
// FRC tmp.x, tmp.x
// MAD tmp.z, tmp.x, 2*PI, -PI
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
- emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
- sin_approx(t, inst->DstReg,
+ sin_approx(c, inst->Prev, inst->I.DstReg,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
constants);
- } else if (inst->Opcode == OPCODE_SIN) {
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ } else if (inst->I.Opcode == OPCODE_SIN) {
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
- emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
- sin_approx(t, inst->DstReg,
+ sin_approx(c, inst->Prev, inst->I.DstReg,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
constants);
} else {
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
- swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W));
- emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
srcreg(PROGRAM_TEMPORARY, tempreg));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
srcreg(PROGRAM_TEMPORARY, tempreg),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
- struct prog_dst_register dst = inst->DstReg;
+ struct prog_dst_register dst = inst->I.DstReg;
- dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X;
- sin_approx(t, dst,
+ dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_X;
+ sin_approx(c, inst->Prev, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
constants);
- dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y;
- sin_approx(t, dst,
+ dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_Y;
+ sin_approx(c, inst->Prev, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
constants);
}
+ rc_remove_instruction(inst);
+
return GL_TRUE;
}
@@ -606,50 +622,52 @@ GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
*
* @warning This transformation implicitly changes the semantics of SIN and COS!
*/
-GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
- struct prog_instruction* inst,
+GLboolean radeonTransformTrigScale(struct radeon_compiler* c,
+ struct rc_instruction* inst,
void* unused)
{
- if (inst->Opcode != OPCODE_COS &&
- inst->Opcode != OPCODE_SIN &&
- inst->Opcode != OPCODE_SCS)
+ if (inst->I.Opcode != OPCODE_COS &&
+ inst->I.Opcode != OPCODE_SIN &&
+ inst->I.Opcode != OPCODE_SCS)
return GL_FALSE;
- static const GLfloat RCP_2PI[] = { 0.15915494309189535 };
+ static const GLfloat RCP_2PI = 0.15915494309189535;
GLuint temp;
GLuint constant;
GLuint constant_swizzle;
- temp = radeonFindFreeTemporary(t);
- constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle);
+ temp = rc_find_free_temporary(c);
+ constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
- emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),
- swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ emit2(c, inst->Prev, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle));
- emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),
+ emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),
srcreg(PROGRAM_TEMPORARY, temp));
- if (inst->Opcode == OPCODE_COS) {
- emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg,
+ if (inst->I.Opcode == OPCODE_COS) {
+ emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, inst->I.DstReg,
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
- } else if (inst->Opcode == OPCODE_SIN) {
- emit1(t->Program, OPCODE_SIN, inst->SaturateMode,
- inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
- } else if (inst->Opcode == OPCODE_SCS) {
- struct prog_dst_register moddst = inst->DstReg;
+ } else if (inst->I.Opcode == OPCODE_SIN) {
+ emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode,
+ inst->I.DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ } else if (inst->I.Opcode == OPCODE_SCS) {
+ struct prog_dst_register moddst = inst->I.DstReg;
- if (inst->DstReg.WriteMask & WRITEMASK_X) {
+ if (inst->I.DstReg.WriteMask & WRITEMASK_X) {
moddst.WriteMask = WRITEMASK_X;
- emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst,
+ emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, moddst,
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
}
- if (inst->DstReg.WriteMask & WRITEMASK_Y) {
+ if (inst->I.DstReg.WriteMask & WRITEMASK_Y) {
moddst.WriteMask = WRITEMASK_Y;
- emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst,
+ emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, moddst,
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
}
}
+ rc_remove_instruction(inst);
+
return GL_TRUE;
}
@@ -661,21 +679,15 @@ GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
* @warning This explicitly changes the form of DDX and DDY!
*/
-GLboolean radeonTransformDeriv(struct radeon_transform_context* t,
- struct prog_instruction* inst,
+GLboolean radeonTransformDeriv(struct radeon_compiler* c,
+ struct rc_instruction* inst,
void* unused)
{
- if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY)
+ if (inst->I.Opcode != OPCODE_DDX && inst->I.Opcode != OPCODE_DDY)
return GL_FALSE;
- struct prog_src_register B = inst->SrcReg[1];
-
- B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE,
- SWIZZLE_ONE, SWIZZLE_ONE);
- B.Negate = NEGATE_XYZW;
-
- emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg,
- inst->SrcReg[0], B);
+ inst->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE);
+ inst->I.SrcReg[1].Negate = NEGATE_XYZW;
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
index 7d94a089eb0..147efec6fcb 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
@@ -31,28 +31,28 @@
#include "radeon_program.h"
GLboolean radeonTransformALU(
- struct radeon_transform_context *t,
- struct prog_instruction*,
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
void*);
GLboolean r300_transform_vertex_alu(
- struct radeon_transform_context *t,
- struct prog_instruction*,
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
void*);
GLboolean radeonTransformTrigSimple(
- struct radeon_transform_context *t,
- struct prog_instruction*,
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
void*);
GLboolean radeonTransformTrigScale(
- struct radeon_transform_context *t,
- struct prog_instruction*,
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
void*);
GLboolean radeonTransformDeriv(
- struct radeon_transform_context *t,
- struct prog_instruction*,
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
void*);
#endif /* __RADEON_PROGRAM_ALU_H_ */
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index b8fad4a6e77..c79601bcb1e 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -1063,24 +1063,6 @@ r300FetchStateParameter(GLcontext * ctx,
break;
}
- case STATE_R300_TEXRECT_FACTOR:{
- struct gl_texture_object *t =
- ctx->Texture.Unit[state[2]].CurrentTex[TEXTURE_RECT_INDEX];
-
- if (t && t->Image[0][t->BaseLevel]) {
- struct gl_texture_image *image =
- t->Image[0][t->BaseLevel];
- value[0] = 1.0 / image->Width2;
- value[1] = 1.0 / image->Height2;
- } else {
- value[0] = 1.0;
- value[1] = 1.0;
- }
- value[2] = 1.0;
- value[3] = 1.0;
- break;
- }
-
default:
break;
}
@@ -2029,7 +2011,7 @@ void r300UpdateShaders(r300ContextPtr rmesa)
rmesa->radeon.NewGLState = 0;
}
-static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index)
+static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index, GLfloat * buffer)
{
static const GLfloat dummy[4] = { 0, 0, 0, 0 };
r300ContextPtr rmesa = R300_CONTEXT(ctx);
@@ -2041,6 +2023,47 @@ static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index)
return fp->Base->Parameters->ParameterValues[rcc->u.External];
case RC_CONSTANT_IMMEDIATE:
return rcc->u.Immediate;
+ case RC_CONSTANT_STATE:
+ switch(rcc->u.State[0]) {
+ case RC_STATE_SHADOW_AMBIENT: {
+ const int unit = (int) rcc->u.State[1];
+ const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
+ if (texObj) {
+ buffer[0] =
+ buffer[1] =
+ buffer[2] =
+ buffer[3] = texObj->CompareFailValue;
+ }
+ return buffer;
+ }
+
+ case RC_STATE_R300_WINDOW_DIMENSION: {
+ __DRIdrawablePrivate * drawable = radeon_get_drawable(&rmesa->radeon);
+ buffer[0] = drawable->w * 0.5f; /* width*0.5 */
+ buffer[1] = drawable->h * 0.5f; /* height*0.5 */
+ buffer[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */
+ buffer[3] = 1.0F; /* not used */
+ return buffer;
+ }
+
+ case RC_STATE_R300_TEXRECT_FACTOR: {
+ struct gl_texture_object *t =
+ ctx->Texture.Unit[rcc->u.State[1]].CurrentTex[TEXTURE_RECT_INDEX];
+
+ if (t && t->Image[0][t->BaseLevel]) {
+ struct gl_texture_image *image =
+ t->Image[0][t->BaseLevel];
+ buffer[0] = 1.0 / image->Width2;
+ buffer[1] = 1.0 / image->Height2;
+ } else {
+ buffer[0] = 1.0;
+ buffer[1] = 1.0;
+ }
+ buffer[2] = 1.0;
+ buffer[3] = 1.0;
+ return buffer;
+ }
+ }
}
return dummy;
@@ -2096,7 +2119,8 @@ static void r300SetupPixelShader(GLcontext *ctx)
R300_STATECHANGE(rmesa, fpp);
rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, fp->code.constants.Count * 4);
for (i = 0; i < fp->code.constants.Count; i++) {
- const GLfloat *constant = get_fragmentprogram_constant(ctx, i);
+ GLfloat buffer[4];
+ const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]);
@@ -2157,7 +2181,8 @@ static void r500SetupPixelShader(GLcontext *ctx)
R300_STATECHANGE(rmesa, r500fp_const);
for (i = 0; i < fp->code.constants.Count; i++) {
- const GLfloat *constant = get_fragmentprogram_constant(ctx, i);
+ GLfloat buffer[4];
+ const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]);
--
cgit v1.2.3
From aab949cb9d62343303ab0836a84fe034244d1584 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Fri, 24 Jul 2009 22:50:35 +0200
Subject: r300/compiler: Refactor rewrite_depth_out to use rc_program
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index bdab61f32da..08283c81474 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -195,11 +195,13 @@ static void rewriteFog(struct r300_fragment_program_compiler *compiler)
}
-static void rewrite_depth_out(struct gl_program *prog)
+static void rewrite_depth_out(struct r300_fragment_program_compiler * c)
{
- struct prog_instruction *inst;
+ struct rc_instruction *rci;
+
+ for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
+ struct prog_instruction * inst = &rci->I;
- for (inst = prog->Instructions; inst->Opcode != OPCODE_END; ++inst) {
if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != FRAG_RESULT_DEPTH)
continue;
@@ -248,10 +250,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
rewriteFog(c);
- rewrite_depth_out(c->program);
-
rc_mesa_to_rc_program(&c->Base, c->program);
+ rewrite_depth_out(c);
+
if (c->is_r500) {
struct radeon_program_transformation transformations[] = {
{ &r500_transform_TEX, c },
--
cgit v1.2.3
From 9dc1be415828962f62d942bf9c362410347d1e75 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Fri, 24 Jul 2009 23:06:54 +0200
Subject: r300/compiler: Refactor fragment program fog rewrite to use
rc_program
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 35 +++++++---------------
.../drivers/dri/r300/compiler/radeon_compiler.c | 32 ++++++++++++++++++++
.../drivers/dri/r300/compiler/radeon_compiler.h | 2 ++
3 files changed, 44 insertions(+), 25 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 08283c81474..2ffee79b4fc 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -156,42 +156,27 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
static void rewriteFog(struct r300_fragment_program_compiler *compiler)
{
struct rX00_fragment_program_code *code = compiler->code;
- GLuint InputsRead = compiler->program->InputsRead;
+ struct prog_src_register src;
int i;
- if (!(InputsRead & FRAG_BIT_FOGC)) {
+ if (!(compiler->Base.Program.InputsRead & FRAG_BIT_FOGC)) {
code->fog_attr = FRAG_ATTRIB_MAX;
return;
}
for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
{
- if (!(InputsRead & (1 << i))) {
- InputsRead &= ~(1 << FRAG_ATTRIB_FOGC);
- InputsRead |= 1 << i;
- compiler->program->InputsRead = InputsRead;
+ if (!(compiler->Base.Program.InputsRead & (1 << i))) {
code->fog_attr = i;
break;
}
}
- {
- struct prog_instruction *inst;
-
- inst = compiler->program->Instructions;
- while (inst->Opcode != OPCODE_END) {
- const int src_regs = _mesa_num_inst_src_regs(inst->Opcode);
- for (i = 0; i < src_regs; ++i) {
- if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) {
- inst->SrcReg[i].Index = code->fog_attr;
- inst->SrcReg[i].Swizzle = combine_swizzles(
- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE),
- inst->SrcReg[i].Swizzle);
- }
- }
- ++inst;
- }
- }
+ reset_srcreg(&src);
+ src.File = PROGRAM_INPUT;
+ src.Index = code->fog_attr;
+ src.Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
+ rc_move_input(&compiler->Base, FRAG_ATTRIB_FOGC, src);
}
@@ -248,10 +233,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
insert_WPOS_trailer(c);
- rewriteFog(c);
-
rc_mesa_to_rc_program(&c->Base, c->program);
+ rewriteFog(c);
+
rewrite_depth_out(c);
if (c->is_r500) {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index 684961021a1..15823064f2c 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -24,6 +24,8 @@
#include
+#include "radeon_program.h"
+
void rc_init(struct radeon_compiler * c)
{
@@ -88,3 +90,33 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...)
va_end(ap);
}
}
+
+/**
+ * Rewrite the program such that everything that source the given input
+ * register will source new_input instead.
+ */
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input)
+{
+ struct rc_instruction * inst;
+
+ c->Program.InputsRead &= ~(1 << input);
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ unsigned i;
+
+ for(i = 0; i < numsrcs; ++i) {
+ if (inst->I.SrcReg[i].File == PROGRAM_INPUT && inst->I.SrcReg[i].Index == input) {
+ inst->I.SrcReg[i].File = new_input.File;
+ inst->I.SrcReg[i].Index = new_input.Index;
+ inst->I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->I.SrcReg[i].Swizzle);
+ if (!inst->I.SrcReg[i].Abs) {
+ inst->I.SrcReg[i].Negate ^= new_input.Negate;
+ inst->I.SrcReg[i].Abs = new_input.Abs;
+ }
+
+ c->Program.InputsRead |= 1 << new_input.Index;
+ }
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index d9ee43017dc..1a09522b012 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -64,6 +64,8 @@ void rc_destroy(struct radeon_compiler * c);
void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
void rc_error(struct radeon_compiler * c, const char * fmt, ...);
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input);
+
struct r300_fragment_program_compiler {
struct radeon_compiler Base;
struct rX00_fragment_program_code *code;
--
cgit v1.2.3
From 273af6857084f3a047a781a6c1a163464bdb3da0 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Fri, 24 Jul 2009 23:28:08 +0200
Subject: r300/fragprog: Refactor wpos rewrite to use rc_program
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 90 ++--------------------
.../drivers/dri/r300/compiler/radeon_compiler.c | 73 ++++++++++++++++++
.../drivers/dri/r300/compiler/radeon_compiler.h | 1 +
3 files changed, 80 insertions(+), 84 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 2ffee79b4fc..014c5fbac01 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -50,102 +50,24 @@ static void nqssadce_init(struct nqssadce_state* s)
*/
static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
{
- GLuint InputsRead = compiler->program->InputsRead;
+ int i;
- if (!(InputsRead & FRAG_BIT_WPOS)) {
+ if (!(compiler->Base.Program.InputsRead & FRAG_BIT_WPOS)) {
compiler->code->wpos_attr = FRAG_ATTRIB_MAX;
return;
}
- static gl_state_index tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
- };
- struct prog_instruction *fpi;
- GLuint window_index;
- int i = 0;
-
for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
{
- if (!(InputsRead & (1 << i))) {
- InputsRead &= ~(1 << FRAG_ATTRIB_WPOS);
- InputsRead |= 1 << i;
- compiler->program->InputsRead = InputsRead;
+ if (!(compiler->Base.Program.InputsRead & (1 << i))) {
compiler->code->wpos_attr = i;
break;
}
}
- GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
-
- _mesa_insert_instructions(compiler->program, 0, 3);
- fpi = compiler->program->Instructions;
- i = 0;
-
- /* perspective divide */
- fpi[i].Opcode = OPCODE_RCP;
-
- fpi[i].DstReg.File = PROGRAM_TEMPORARY;
- fpi[i].DstReg.Index = tempregi;
- fpi[i].DstReg.WriteMask = WRITEMASK_W;
- fpi[i].DstReg.CondMask = COND_TR;
-
- fpi[i].SrcReg[0].File = PROGRAM_INPUT;
- fpi[i].SrcReg[0].Index = compiler->code->wpos_attr;
- fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
- i++;
-
- fpi[i].Opcode = OPCODE_MUL;
-
- fpi[i].DstReg.File = PROGRAM_TEMPORARY;
- fpi[i].DstReg.Index = tempregi;
- fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
- fpi[i].DstReg.CondMask = COND_TR;
-
- fpi[i].SrcReg[0].File = PROGRAM_INPUT;
- fpi[i].SrcReg[0].Index = compiler->code->wpos_attr;
- fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
- fpi[i].SrcReg[1].Index = tempregi;
- fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
- i++;
-
- /* viewport transformation */
- window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);
-
- fpi[i].Opcode = OPCODE_MAD;
-
- fpi[i].DstReg.File = PROGRAM_TEMPORARY;
- fpi[i].DstReg.Index = tempregi;
- fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
- fpi[i].DstReg.CondMask = COND_TR;
-
- fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
- fpi[i].SrcReg[0].Index = tempregi;
- fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
- fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
- fpi[i].SrcReg[1].Index = window_index;
- fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
- fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
- fpi[i].SrcReg[2].Index = window_index;
- fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
- i++;
-
- for (; i < compiler->program->NumInstructions; ++i) {
- int reg;
- for (reg = 0; reg < 3; reg++) {
- if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT &&
- fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) {
- fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY;
- fpi[i].SrcReg[reg].Index = tempregi;
- }
- }
- }
+ rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, compiler->code->wpos_attr);
}
-
/**
* Rewrite fragment.fogcoord to use a texture coordinate slot.
* Note that fogcoord is forced into an X001 pattern, and this enforcement
@@ -231,10 +153,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
fflush(stdout);
}
- insert_WPOS_trailer(c);
-
rc_mesa_to_rc_program(&c->Base, c->program);
+ insert_WPOS_trailer(c);
+
rewriteFog(c);
rewrite_depth_out(c);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index 15823064f2c..adf900a5cb3 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -120,3 +120,76 @@ void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_r
}
}
}
+
+
+/**
+ * Introduce standard code fragment to deal with fragment.position.
+ */
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input)
+{
+ unsigned tempregi = rc_find_free_temporary(c);
+
+ c->Program.InputsRead &= ~(1 << wpos);
+ c->Program.InputsRead |= 1 << new_input;
+
+ /* perspective divide */
+ struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
+ inst_rcp->I.Opcode = OPCODE_RCP;
+
+ inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_rcp->I.DstReg.Index = tempregi;
+ inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
+
+ inst_rcp->I.SrcReg[0].File = PROGRAM_INPUT;
+ inst_rcp->I.SrcReg[0].Index = new_input;
+ inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
+
+ struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst_rcp);
+ inst_mul->I.Opcode = OPCODE_MUL;
+
+ inst_mul->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mul->I.DstReg.Index = tempregi;
+ inst_mul->I.DstReg.WriteMask = WRITEMASK_XYZ;
+
+ inst_mul->I.SrcReg[0].File = PROGRAM_INPUT;
+ inst_mul->I.SrcReg[0].Index = new_input;
+
+ inst_mul->I.SrcReg[1].File = PROGRAM_TEMPORARY;
+ inst_mul->I.SrcReg[1].Index = tempregi;
+ inst_mul->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
+
+ /* viewport transformation */
+ struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_mul);
+ inst_mad->I.Opcode = OPCODE_MAD;
+
+ inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mad->I.DstReg.Index = tempregi;
+ inst_mad->I.DstReg.WriteMask = WRITEMASK_XYZ;
+
+ inst_mad->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst_mad->I.SrcReg[0].Index = tempregi;
+ inst_mad->I.SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+ inst_mad->I.SrcReg[1].File = PROGRAM_STATE_VAR;
+ inst_mad->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
+ inst_mad->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+ inst_mad->I.SrcReg[2].File = PROGRAM_STATE_VAR;
+ inst_mad->I.SrcReg[2].Index = inst_mad->I.SrcReg[1].Index;
+ inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+ struct rc_instruction * inst;
+ for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ unsigned i;
+
+ for(i = 0; i < numsrcs; i++) {
+ if (inst->I.SrcReg[i].File == PROGRAM_INPUT &&
+ inst->I.SrcReg[i].Index == wpos) {
+ inst->I.SrcReg[i].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[i].Index = tempregi;
+ }
+ }
+ }
+}
+
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 1a09522b012..37519add054 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -65,6 +65,7 @@ void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
void rc_error(struct radeon_compiler * c, const char * fmt, ...);
void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input);
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input);
struct r300_fragment_program_compiler {
struct radeon_compiler Base;
--
cgit v1.2.3
From a1e8992ffa4e7bddb4aaeb567f9e2023ae08540e Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Sat, 25 Jul 2009 00:07:46 +0200
Subject: r300/vertprog: Refactor addArtificialOutputs to use rc_program
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 95 +++++-----------------
src/mesa/drivers/dri/r300/compiler/radeon_code.h | 1 -
.../drivers/dri/r300/compiler/radeon_compiler.h | 1 +
src/mesa/drivers/dri/r300/r300_context.h | 8 +-
src/mesa/drivers/dri/r300/r300_vertprog.c | 42 +++++++++-
5 files changed, 69 insertions(+), 78 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 90910c45c62..53e62ae2f32 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -667,93 +667,42 @@ static void fog_as_texcoord(struct gl_program *prog, int tex_id)
}
-#define ADD_OUTPUT(fp_attr, vp_result) \
- do { \
- if ((FpReads & (1 << (fp_attr))) && !(compiler->program->OutputsWritten & (1 << (vp_result)))) { \
- OutputsAdded |= 1 << (vp_result); \
- count++; \
- } \
- } while (0)
-
static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
{
- GLuint OutputsAdded, FpReads;
- int i, count;
-
- OutputsAdded = 0;
- count = 0;
- FpReads = compiler->state.FpReads;
-
- ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
- ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
-
- for (i = 0; i < 7; ++i) {
- ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
- }
-
- /* Some outputs may be artificially added, to match the inputs of the fragment program.
- * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
- * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
- */
- if (count > 0) {
- struct prog_instruction *inst;
-
- _mesa_insert_instructions(compiler->program, compiler->program->NumInstructions - 1, count);
- inst = &compiler->program->Instructions[compiler->program->NumInstructions - 1 - count];
+ int i;
- for (i = 0; i < VERT_RESULT_MAX; ++i) {
- if (OutputsAdded & (1 << i)) {
- inst->Opcode = OPCODE_MOV;
+ for(i = 0; i < 32; ++i) {
+ if ((compiler->RequiredOutputs & (1 << i)) &&
+ !(compiler->Base.Program.OutputsWritten & (1 << i))) {
+ struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
+ inst->I.Opcode = OPCODE_MOV;
- inst->DstReg.File = PROGRAM_OUTPUT;
- inst->DstReg.Index = i;
- inst->DstReg.WriteMask = WRITEMASK_XYZW;
- inst->DstReg.CondMask = COND_TR;
+ inst->I.DstReg.File = PROGRAM_OUTPUT;
+ inst->I.DstReg.Index = i;
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
- inst->SrcReg[0].File = PROGRAM_CONSTANT;
- inst->SrcReg[0].Index = 0;
- inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+ inst->I.SrcReg[0].File = PROGRAM_CONSTANT;
+ inst->I.SrcReg[0].Index = 0;
+ inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
- ++inst;
- }
+ compiler->Base.Program.OutputsWritten |= 1 << i;
}
-
- compiler->program->OutputsWritten |= OutputsAdded;
}
}
-#undef ADD_OUTPUT
-
static void nqssadceInit(struct nqssadce_state* s)
{
struct r300_vertex_program_compiler * compiler = s->UserData;
- GLuint fp_reads;
-
- fp_reads = compiler->state.FpReads;
- {
- if (fp_reads & FRAG_BIT_COL0) {
- s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW;
- s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW;
- }
+ int i;
- if (fp_reads & FRAG_BIT_COL1) {
- s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW;
- s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW;
+ for(i = 0; i < VERT_RESULT_MAX; ++i) {
+ if (compiler->RequiredOutputs & (1 << i)) {
+ if (i != VERT_RESULT_PSIZ)
+ s->Outputs[i].Sourced = WRITEMASK_XYZW;
+ else
+ s->Outputs[i].Sourced = WRITEMASK_X; /* ugly hack! */
}
}
-
- {
- int i;
- for (i = 0; i < 8; ++i) {
- if (fp_reads & FRAG_BIT_TEX(i)) {
- s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW;
- }
- }
- }
-
- s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW;
- if (s->Compiler->Program.OutputsWritten & (1 << VERT_RESULT_PSIZ))
- s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X;
}
static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
@@ -776,10 +725,10 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
fog_as_texcoord(compiler->program, compiler->state.FogAttr - FRAG_ATTRIB_TEX0);
}
- addArtificialOutputs(compiler);
-
rc_mesa_to_rc_program(&compiler->Base, compiler->program);
+ addArtificialOutputs(compiler);
+
{
struct radeon_program_transformation transformations[] = {
{ &r300_transform_vertex_alu, 0 },
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index 9cf4ed57bb5..5489931434f 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -186,7 +186,6 @@ struct rX00_fragment_program_code {
#define VSF_MAX_FRAGMENT_TEMPS (14)
struct r300_vertex_program_external_state {
- GLuint FpReads;
GLuint FogAttr;
GLuint WPosAttr;
};
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 37519add054..74306994cbd 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -82,6 +82,7 @@ struct r300_vertex_program_compiler {
struct radeon_compiler Base;
struct r300_vertex_program_code *code;
struct r300_vertex_program_external_state state;
+ GLbitfield RequiredOutputs;
struct gl_program *program;
};
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index 5c575441d7b..629fd0af274 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -395,11 +395,17 @@ struct r300_hw_state {
#include "tnl_dd/t_dd_vertex.h"
#undef TAG
+struct r300_vertex_program_key {
+ GLbitfield FpReads;
+ GLuint FogAttr;
+ GLuint WPosAttr;
+};
+
struct r300_vertex_program {
struct gl_vertex_program *Base;
struct r300_vertex_program *next;
- struct r300_vertex_program_external_state key;
+ struct r300_vertex_program_key key;
struct r300_vertex_program_code code;
GLboolean error;
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index fd2b9fcaf26..7dcf7d03837 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -94,8 +94,42 @@ static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_progra
return 4 * vp->code.constants.Count;
}
+static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads)
+{
+ GLbitfield outputs = 0;
+ int i;
+
+#define ADD_OUTPUT(fp_attr, vp_result) \
+ do { \
+ if (fpreads & (1 << (fp_attr))) \
+ outputs |= (1 << (vp_result)); \
+ } while (0)
+
+ ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
+ ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
+
+ for (i = 0; i <= 7; ++i) {
+ ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
+ }
+
+#undef ADD_OUTPUT
+
+ if ((fpreads & (1 << FRAG_ATTRIB_COL0)) &&
+ (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0)))
+ outputs |= 1 << VERT_RESULT_BFC0;
+ if ((fpreads & (1 << FRAG_ATTRIB_COL1)) &&
+ (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1)))
+ outputs |= 1 << VERT_RESULT_BFC1;
+
+ outputs |= 1 << VERT_RESULT_HPOS;
+ if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ))
+ outputs |= 1 << VERT_RESULT_PSIZ;
+
+ return outputs;
+}
+
static struct r300_vertex_program *build_program(GLcontext *ctx,
- struct r300_vertex_program_external_state *wanted_key,
+ struct r300_vertex_program_key *wanted_key,
const struct gl_vertex_program *mesa_vp)
{
struct r300_vertex_program *vp;
@@ -109,7 +143,9 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
compiler.Base.Debug = (RADEON_DEBUG & DEBUG_VERTS) ? GL_TRUE : GL_FALSE;
compiler.code = &vp->code;
- compiler.state = vp->key;
+ compiler.state.FogAttr = vp->key.FogAttr;
+ compiler.state.WPosAttr = vp->key.WPosAttr;
+ compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
compiler.program = &vp->Base->Base;
if (compiler.Base.Debug) {
@@ -133,7 +169,7 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- struct r300_vertex_program_external_state wanted_key = { 0 };
+ struct r300_vertex_program_key wanted_key = { 0 };
struct r300_vertex_program_cont *vpc;
struct r300_vertex_program *vp;
--
cgit v1.2.3
From ce0c32e3d23641214dae9b3fed863dc163b26ea4 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Sat, 25 Jul 2009 00:41:05 +0200
Subject: r300/vertprog: Refactor fog_as_texcoord to use rc_program
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 40 +++-------------------
.../drivers/dri/r300/compiler/radeon_compiler.c | 26 ++++++++++++++
.../drivers/dri/r300/compiler/radeon_compiler.h | 1 +
src/mesa/drivers/dri/r300/r300_vertprog.c | 3 ++
4 files changed, 34 insertions(+), 36 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 53e62ae2f32..38ee9575a32 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -634,39 +634,6 @@ static void pos_as_texcoord(struct gl_program *prog, int tex_id)
prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
}
-/**
- * The fogcoord attribute is special in that only the first component
- * is relevant, and the remaining components are always fixed (when read
- * from by the fragment program) to yield an X001 pattern.
- *
- * We need to enforce this either in the vertex program or in the fragment
- * program, and this code chooses not to enforce it in the vertex program.
- * This is slightly cheaper, as long as the fragment program does not use
- * weird swizzles.
- *
- * And it seems that usually, weird swizzles are not used, so...
- *
- * See also the counterpart rewriting for fragment programs.
- */
-static void fog_as_texcoord(struct gl_program *prog, int tex_id)
-{
- struct prog_instruction *vpi;
-
- vpi = prog->Instructions;
- while (vpi->Opcode != OPCODE_END) {
- if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) {
- vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
- vpi->DstReg.WriteMask = WRITEMASK_X;
- }
-
- ++vpi;
- }
-
- prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC);
- prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
-}
-
-
static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
{
int i;
@@ -721,12 +688,13 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
pos_as_texcoord(compiler->program, compiler->state.WPosAttr - FRAG_ATTRIB_TEX0);
}
+ rc_mesa_to_rc_program(&compiler->Base, compiler->program);
+ compiler->program = 0;
+
if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) {
- fog_as_texcoord(compiler->program, compiler->state.FogAttr - FRAG_ATTRIB_TEX0);
+ rc_move_output(&compiler->Base, VERT_RESULT_FOGC, compiler->state.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
}
- rc_mesa_to_rc_program(&compiler->Base, compiler->program);
-
addArtificialOutputs(compiler);
{
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index adf900a5cb3..6e7361d5686 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -122,6 +122,32 @@ void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_r
}
+/**
+ * Rewrite the program such that everything that writes into the given
+ * output register will instead write to new_output. The new_output
+ * writemask is honoured.
+ */
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
+{
+ struct rc_instruction * inst;
+
+ c->Program.OutputsWritten &= ~(1 << output);
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+
+ if (numdsts) {
+ if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) {
+ inst->I.DstReg.Index = new_output;
+ inst->I.DstReg.WriteMask &= writemask;
+
+ c->Program.OutputsWritten |= 1 << new_output;
+ }
+ }
+ }
+}
+
+
/**
* Introduce standard code fragment to deal with fragment.position.
*/
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 74306994cbd..34f4240d534 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -65,6 +65,7 @@ void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
void rc_error(struct radeon_compiler * c, const char * fmt, ...);
void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input);
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask);
void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input);
struct r300_fragment_program_compiler {
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index 7dcf7d03837..27ec23975a4 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -161,6 +161,9 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
r3xx_compile_vertex_program(&compiler);
vp->error = compiler.Base.Error;
+ vp->Base->Base.InputsRead = vp->code.InputsRead;
+ vp->Base->Base.OutputsWritten = vp->code.OutputsWritten;
+
rc_destroy(&compiler.Base);
return vp;
--
cgit v1.2.3
From 05a51f4b3dfa32c73b85b26254bf9ee270eb6be2 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Sat, 25 Jul 2009 00:49:25 +0200
Subject: r300/vertprog: Refactor wpos rewrite using rc_program
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 70 +++-------------------
.../drivers/dri/r300/compiler/radeon_compiler.c | 41 +++++++++++++
.../drivers/dri/r300/compiler/radeon_compiler.h | 1 +
3 files changed, 51 insertions(+), 61 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 38ee9575a32..c05b488645b 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -578,62 +578,6 @@ static GLboolean transform_source_conflicts(
return GL_TRUE;
}
-static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id)
-{
- struct prog_instruction *vpi;
-
- _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2);
-
- vpi = &prog->Instructions[prog->NumInstructions - 3];
-
- vpi->Opcode = OPCODE_MOV;
-
- vpi->DstReg.File = PROGRAM_OUTPUT;
- vpi->DstReg.Index = VERT_RESULT_HPOS;
- vpi->DstReg.WriteMask = WRITEMASK_XYZW;
- vpi->DstReg.CondMask = COND_TR;
-
- vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
- vpi->SrcReg[0].Index = temp_index;
- vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- ++vpi;
-
- vpi->Opcode = OPCODE_MOV;
-
- vpi->DstReg.File = PROGRAM_OUTPUT;
- vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
- vpi->DstReg.WriteMask = WRITEMASK_XYZW;
- vpi->DstReg.CondMask = COND_TR;
-
- vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
- vpi->SrcReg[0].Index = temp_index;
- vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- ++vpi;
-
- vpi->Opcode = OPCODE_END;
-}
-
-static void pos_as_texcoord(struct gl_program *prog, int tex_id)
-{
- struct prog_instruction *vpi;
- GLuint tempregi = prog->NumTemporaries;
-
- prog->NumTemporaries++;
-
- for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
- if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) {
- vpi->DstReg.File = PROGRAM_TEMPORARY;
- vpi->DstReg.Index = tempregi;
- }
- }
-
- insert_wpos(prog, tempregi, tex_id);
-
- prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
-}
-
static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
{
int i;
@@ -684,15 +628,19 @@ static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
{
- if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) {
- pos_as_texcoord(compiler->program, compiler->state.WPosAttr - FRAG_ATTRIB_TEX0);
- }
-
rc_mesa_to_rc_program(&compiler->Base, compiler->program);
compiler->program = 0;
+ if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) {
+ rc_copy_output(&compiler->Base,
+ VERT_RESULT_HPOS,
+ compiler->state.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
+ }
+
if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) {
- rc_move_output(&compiler->Base, VERT_RESULT_FOGC, compiler->state.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
+ rc_move_output(&compiler->Base,
+ VERT_RESULT_FOGC,
+ compiler->state.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
}
addArtificialOutputs(compiler);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index 6e7361d5686..da950d52894 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -148,6 +148,47 @@ void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_ou
}
+/**
+ * Rewrite the program such that a given output is duplicated.
+ */
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
+{
+ unsigned tempreg = rc_find_free_temporary(c);
+ struct rc_instruction * inst;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+
+ if (numdsts) {
+ if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) {
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = tempreg;
+ }
+ }
+ }
+
+ inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+ inst->I.Opcode = OPCODE_MOV;
+ inst->I.DstReg.File = PROGRAM_OUTPUT;
+ inst->I.DstReg.Index = output;
+
+ inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[0].Index = tempreg;
+ inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+ inst->I.Opcode = OPCODE_MOV;
+ inst->I.DstReg.File = PROGRAM_OUTPUT;
+ inst->I.DstReg.Index = dup_output;
+
+ inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[0].Index = tempreg;
+ inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ c->Program.OutputsWritten |= 1 << dup_output;
+}
+
+
/**
* Introduce standard code fragment to deal with fragment.position.
*/
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 34f4240d534..a4ff2ca86ab 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -66,6 +66,7 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...);
void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input);
void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask);
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input);
struct r300_fragment_program_compiler {
--
cgit v1.2.3
From d6a304800b2385740f3b90efab45564e1e6203b2 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Sat, 25 Jul 2009 00:50:53 +0200
Subject: r300: Remove ugly PSIZ hack
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Instead of setting Sourced, we simply force writemasks to begin with.
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index c05b488645b..14dd36354d6 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -607,12 +607,8 @@ static void nqssadceInit(struct nqssadce_state* s)
int i;
for(i = 0; i < VERT_RESULT_MAX; ++i) {
- if (compiler->RequiredOutputs & (1 << i)) {
- if (i != VERT_RESULT_PSIZ)
- s->Outputs[i].Sourced = WRITEMASK_XYZW;
- else
- s->Outputs[i].Sourced = WRITEMASK_X; /* ugly hack! */
- }
+ if (compiler->RequiredOutputs & (1 << i))
+ s->Outputs[i].Sourced = WRITEMASK_XYZW;
}
}
@@ -631,6 +627,8 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
rc_mesa_to_rc_program(&compiler->Base, compiler->program);
compiler->program = 0;
+ rc_move_output(&compiler->Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);
+
if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) {
rc_copy_output(&compiler->Base,
VERT_RESULT_HPOS,
--
cgit v1.2.3
From 3f7838168781b69aea04514a57058d0aa0cc2cbb Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Sat, 25 Jul 2009 00:59:31 +0200
Subject: r300/vertprog: Move Mesa-dependent input/output handling out of
compiler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 92 +------------------
src/mesa/drivers/dri/r300/compiler/radeon_code.h | 5 --
.../drivers/dri/r300/compiler/radeon_compiler.h | 6 +-
.../drivers/dri/r300/compiler/radeon_program.h | 1 -
src/mesa/drivers/dri/r300/r300_vertprog.c | 100 +++++++++++++++++++--
5 files changed, 100 insertions(+), 104 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 14dd36354d6..339be414cfe 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -310,79 +310,6 @@ static void ei_pow(struct r300_vertex_program_code *vp,
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
}
-static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
-{
- int i;
- int cur_reg;
- GLuint OutputsWritten, InputsRead;
-
- OutputsWritten = c->Base.Program.OutputsWritten;
- InputsRead = c->Base.Program.InputsRead;
-
- cur_reg = -1;
- for (i = 0; i < VERT_ATTRIB_MAX; i++) {
- if (InputsRead & (1 << i))
- c->code->inputs[i] = ++cur_reg;
- else
- c->code->inputs[i] = -1;
- }
-
- cur_reg = 0;
- for (i = 0; i < VERT_RESULT_MAX; i++)
- c->code->outputs[i] = -1;
-
- assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
-
- if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
- c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
- }
-
- if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
- c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
- }
-
- /* If we're writing back facing colors we need to send
- * four colors to make front/back face colors selection work.
- * If the vertex program doesn't write all 4 colors, lets
- * pretend it does by skipping output index reg so the colors
- * get written into appropriate output vectors.
- */
- if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
- c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
- } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
- OutputsWritten & (1 << VERT_RESULT_BFC1)) {
- cur_reg++;
- }
-
- if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
- c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
- } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
- OutputsWritten & (1 << VERT_RESULT_BFC1)) {
- cur_reg++;
- }
-
- if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
- c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
- } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
- cur_reg++;
- }
-
- if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
- c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
- } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
- cur_reg++;
- }
-
- for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
- if (OutputsWritten & (1 << i)) {
- c->code->outputs[i] = cur_reg++;
- }
- }
-
- if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
- c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
- }
-}
static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
{
@@ -391,7 +318,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
compiler->code->pos_end = 0; /* Not supported yet */
compiler->code->length = 0;
- t_inputs_outputs(compiler);
+ compiler->SetHwInputOutput(compiler);
for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
struct prog_instruction *vpi = &rci->I;
@@ -624,23 +551,6 @@ static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
{
- rc_mesa_to_rc_program(&compiler->Base, compiler->program);
- compiler->program = 0;
-
- rc_move_output(&compiler->Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);
-
- if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) {
- rc_copy_output(&compiler->Base,
- VERT_RESULT_HPOS,
- compiler->state.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
- }
-
- if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) {
- rc_move_output(&compiler->Base,
- VERT_RESULT_FOGC,
- compiler->state.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
- }
-
addArtificialOutputs(compiler);
{
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index 5489931434f..6f51358f55e 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -185,11 +185,6 @@ struct rX00_fragment_program_code {
#define VSF_MAX_FRAGMENT_LENGTH (255*4)
#define VSF_MAX_FRAGMENT_TEMPS (14)
-struct r300_vertex_program_external_state {
- GLuint FogAttr;
- GLuint WPosAttr;
-};
-
struct r300_vertex_program_code {
int length;
union {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index a4ff2ca86ab..6b251ba7f19 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -64,6 +64,8 @@ void rc_destroy(struct radeon_compiler * c);
void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
void rc_error(struct radeon_compiler * c, const char * fmt, ...);
+void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program);
+
void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input);
void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask);
void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
@@ -83,9 +85,9 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
struct r300_vertex_program_compiler {
struct radeon_compiler Base;
struct r300_vertex_program_code *code;
- struct r300_vertex_program_external_state state;
GLbitfield RequiredOutputs;
- struct gl_program *program;
+
+ void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
};
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index 1a34b50ed72..561958608ca 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -116,7 +116,6 @@ struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
void rc_remove_instruction(struct rc_instruction * inst);
-void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program);
void rc_print_program(const struct rc_program *prog);
#endif
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index 27ec23975a4..69d6b021d5c 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -128,6 +128,82 @@ static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitf
return outputs;
}
+
+static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
+{
+ int i;
+ int cur_reg;
+ GLuint OutputsWritten, InputsRead;
+
+ OutputsWritten = c->Base.Program.OutputsWritten;
+ InputsRead = c->Base.Program.InputsRead;
+
+ cur_reg = -1;
+ for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+ if (InputsRead & (1 << i))
+ c->code->inputs[i] = ++cur_reg;
+ else
+ c->code->inputs[i] = -1;
+ }
+
+ cur_reg = 0;
+ for (i = 0; i < VERT_RESULT_MAX; i++)
+ c->code->outputs[i] = -1;
+
+ assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
+
+ if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
+ c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
+ }
+
+ if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
+ c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
+ }
+
+ /* If we're writing back facing colors we need to send
+ * four colors to make front/back face colors selection work.
+ * If the vertex program doesn't write all 4 colors, lets
+ * pretend it does by skipping output index reg so the colors
+ * get written into appropriate output vectors.
+ */
+ if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
+ c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+ OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ cur_reg++;
+ }
+
+ if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
+ c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+ OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ cur_reg++;
+ }
+
+ if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+ c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ cur_reg++;
+ }
+
+ if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+ cur_reg++;
+ }
+
+ for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
+ if (OutputsWritten & (1 << i)) {
+ c->code->outputs[i] = cur_reg++;
+ }
+ }
+
+ if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
+ c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
+ }
+}
+
+
static struct r300_vertex_program *build_program(GLcontext *ctx,
struct r300_vertex_program_key *wanted_key,
const struct gl_vertex_program *mesa_vp)
@@ -143,19 +219,33 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
compiler.Base.Debug = (RADEON_DEBUG & DEBUG_VERTS) ? GL_TRUE : GL_FALSE;
compiler.code = &vp->code;
- compiler.state.FogAttr = vp->key.FogAttr;
- compiler.state.WPosAttr = vp->key.WPosAttr;
compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
- compiler.program = &vp->Base->Base;
+ compiler.SetHwInputOutput = &t_inputs_outputs;
if (compiler.Base.Debug) {
fprintf(stderr, "Initial vertex program:\n");
- _mesa_print_program(compiler.program);
+ _mesa_print_program(&vp->Base->Base);
fflush(stdout);
}
if (mesa_vp->IsPositionInvariant) {
- _mesa_insert_mvp_code(ctx, (struct gl_vertex_program *)compiler.program);
+ _mesa_insert_mvp_code(ctx, vp->Base);
+ }
+
+ rc_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);
+
+ rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);
+
+ if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) {
+ rc_copy_output(&compiler.Base,
+ VERT_RESULT_HPOS,
+ vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
+ }
+
+ if (vp->key.FogAttr != FRAG_ATTRIB_MAX) {
+ rc_move_output(&compiler.Base,
+ VERT_RESULT_FOGC,
+ vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
}
r3xx_compile_vertex_program(&compiler);
--
cgit v1.2.3
From 1348a7ebc0524276f2bd53086f13d2c263134db7 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Sat, 25 Jul 2009 01:08:37 +0200
Subject: r300/fragprog: Finally get rid of the duplicate program copy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 9 -----
.../drivers/dri/r300/compiler/radeon_compiler.h | 1 -
src/mesa/drivers/dri/r300/r300_context.h | 1 -
src/mesa/drivers/dri/r300/r300_fragprog_common.c | 11 +++++-
src/mesa/drivers/dri/r300/r300_shader.c | 1 -
src/mesa/drivers/dri/r300/r300_state.c | 46 ++--------------------
6 files changed, 12 insertions(+), 57 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 014c5fbac01..3c63da81766 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -146,15 +146,6 @@ static void rewrite_depth_out(struct r300_fragment_program_compiler * c)
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{
- if (c->Base.Debug) {
- fflush(stdout);
- _mesa_printf("Fragment Program: Initial program:\n");
- _mesa_print_program(c->program);
- fflush(stdout);
- }
-
- rc_mesa_to_rc_program(&c->Base, c->program);
-
insert_WPOS_trailer(c);
rewriteFog(c);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 6b251ba7f19..b9e1a7959af 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -74,7 +74,6 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig
struct r300_fragment_program_compiler {
struct radeon_compiler Base;
struct rX00_fragment_program_code *code;
- struct gl_program * program;
struct r300_fragment_program_external_state state;
GLboolean is_r500;
};
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index 629fd0af274..56f05723b8c 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -428,7 +428,6 @@ struct r300_vertex_program_cont {
*/
struct r300_fragment_program {
GLboolean error;
- struct gl_program *Base;
struct r300_fragment_program *next;
struct r300_fragment_program_external_state state;
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index 0ce57e834b4..05f46ad0607 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -96,14 +96,21 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
compiler.code = &fp->code;
compiler.state = fp->state;
- compiler.program = _mesa_clone_program(ctx, &cont->Base.Base);
compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE;
+ if (compiler.Base.Debug) {
+ fflush(stdout);
+ _mesa_printf("Fragment Program: Initial program:\n");
+ _mesa_print_program(&cont->Base.Base);
+ fflush(stdout);
+ }
+
+ rc_mesa_to_rc_program(&compiler.Base, &cont->Base.Base);
+
r3xx_compile_fragment_program(&compiler);
fp->error = compiler.Base.Error;
fp->InputsRead = compiler.Base.Program.InputsRead;
- fp->Base = compiler.program;
rc_destroy(&compiler.Base);
}
diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c
index 40b073f2c73..a4f9db13ecf 100644
--- a/src/mesa/drivers/dri/r300/r300_shader.c
+++ b/src/mesa/drivers/dri/r300/r300_shader.c
@@ -39,7 +39,6 @@ static void freeFragProgCache(GLcontext *ctx, struct r300_fragment_program_cont
while (fp) {
tmp = fp->next;
rc_constants_destroy(&fp->code.constants);
- _mesa_reference_program(ctx, &fp->Base, NULL);
_mesa_free(fp);
fp = tmp;
}
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index c79601bcb1e..1f799d5a6ff 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -1044,35 +1044,6 @@ void r300UpdateViewportOffset(GLcontext * ctx)
radeonUpdateScissor(ctx);
}
-static void
-r300FetchStateParameter(GLcontext * ctx,
- const gl_state_index state[STATE_LENGTH],
- GLfloat * value)
-{
- r300ContextPtr r300 = R300_CONTEXT(ctx);
-
- switch (state[0]) {
- case STATE_INTERNAL:
- switch (state[1]) {
- case STATE_R300_WINDOW_DIMENSION: {
- __DRIdrawablePrivate * drawable = radeon_get_drawable(&r300->radeon);
- value[0] = drawable->w * 0.5f; /* width*0.5 */
- value[1] = drawable->h * 0.5f; /* height*0.5 */
- value[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */
- value[3] = 1.0F; /* not used */
- break;
- }
-
- default:
- break;
- }
- break;
-
- default:
- break;
- }
-}
-
/**
* Update R300's own internal state parameters.
* For now just STATE_R300_WINDOW_DIMENSION
@@ -1081,7 +1052,6 @@ static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct gl_program_parameter_list *paramList;
- GLuint i;
if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)))
return;
@@ -1089,21 +1059,12 @@ static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
if (!ctx->FragmentProgram._Current || !rmesa->selected_fp)
return;
- paramList = rmesa->selected_fp->Base->Parameters;
+ paramList = ctx->FragmentProgram._Current->Base.Parameters;
if (!paramList)
return;
_mesa_load_state_parameters(ctx, paramList);
-
- for (i = 0; i < paramList->NumParameters; i++) {
- if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
- r300FetchStateParameter(ctx,
- paramList->Parameters[i].
- StateIndexes,
- paramList->ParameterValues[i]);
- }
- }
}
/* =============================================================
@@ -2015,12 +1976,11 @@ static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index,
{
static const GLfloat dummy[4] = { 0, 0, 0, 0 };
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- struct r300_fragment_program * fp = rmesa->selected_fp;
- struct rc_constant * rcc = &fp->code.constants.Constants[index];
+ struct rc_constant * rcc = &rmesa->selected_fp->code.constants.Constants[index];
switch(rcc->Type) {
case RC_CONSTANT_EXTERNAL:
- return fp->Base->Parameters->ParameterValues[rcc->u.External];
+ return ctx->FragmentProgram._Current->Base.Parameters->ParameterValues[rcc->u.External];
case RC_CONSTANT_IMMEDIATE:
return rcc->u.Immediate;
case RC_CONSTANT_STATE:
--
cgit v1.2.3
From 836050ba5eb9690e4a64499249eb71af14961deb Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Sat, 25 Jul 2009 01:19:04 +0200
Subject: r300/fragprog: Move some of the attribute handling out of the
compiler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Attribute indices will probably be different in Gallium, so make the compiler
independent of magic values.
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 78 ++--------------------
.../drivers/dri/r300/compiler/radeon_compiler.h | 2 +
src/mesa/drivers/dri/r300/r300_fragprog_common.c | 69 +++++++++++++++++++
3 files changed, 77 insertions(+), 72 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 3c63da81766..d39b82be71c 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -35,73 +35,11 @@
static void nqssadce_init(struct nqssadce_state* s)
{
- s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW;
- s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W;
+ struct r300_fragment_program_compiler * c = s->UserData;
+ s->Outputs[c->OutputColor].Sourced = WRITEMASK_XYZW;
+ s->Outputs[c->OutputDepth].Sourced = WRITEMASK_W;
}
-/**
- * Transform the program to support fragment.position.
- *
- * Introduce a small fragment at the start of the program that will be
- * the only code that directly reads the FRAG_ATTRIB_WPOS input.
- * All other code pieces that reference that input will be rewritten
- * to read from a newly allocated temporary.
- *
- */
-static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
-{
- int i;
-
- if (!(compiler->Base.Program.InputsRead & FRAG_BIT_WPOS)) {
- compiler->code->wpos_attr = FRAG_ATTRIB_MAX;
- return;
- }
-
- for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
- {
- if (!(compiler->Base.Program.InputsRead & (1 << i))) {
- compiler->code->wpos_attr = i;
- break;
- }
- }
-
- rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, compiler->code->wpos_attr);
-}
-
-/**
- * Rewrite fragment.fogcoord to use a texture coordinate slot.
- * Note that fogcoord is forced into an X001 pattern, and this enforcement
- * is done here.
- *
- * See also the counterpart rewriting for vertex programs.
- */
-static void rewriteFog(struct r300_fragment_program_compiler *compiler)
-{
- struct rX00_fragment_program_code *code = compiler->code;
- struct prog_src_register src;
- int i;
-
- if (!(compiler->Base.Program.InputsRead & FRAG_BIT_FOGC)) {
- code->fog_attr = FRAG_ATTRIB_MAX;
- return;
- }
-
- for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
- {
- if (!(compiler->Base.Program.InputsRead & (1 << i))) {
- code->fog_attr = i;
- break;
- }
- }
-
- reset_srcreg(&src);
- src.File = PROGRAM_INPUT;
- src.Index = code->fog_attr;
- src.Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
- rc_move_input(&compiler->Base, FRAG_ATTRIB_FOGC, src);
-}
-
-
static void rewrite_depth_out(struct r300_fragment_program_compiler * c)
{
struct rc_instruction *rci;
@@ -109,7 +47,7 @@ static void rewrite_depth_out(struct r300_fragment_program_compiler * c)
for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
struct prog_instruction * inst = &rci->I;
- if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != FRAG_RESULT_DEPTH)
+ if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != c->OutputDepth)
continue;
if (inst->DstReg.WriteMask & WRITEMASK_Z) {
@@ -146,10 +84,6 @@ static void rewrite_depth_out(struct r300_fragment_program_compiler * c)
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{
- insert_WPOS_trailer(c);
-
- rewriteFog(c);
-
rewrite_depth_out(c);
if (c->is_r500) {
@@ -181,14 +115,14 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
.IsNativeSwizzle = &r500FPIsNativeSwizzle,
.BuildSwizzle = &r500FPBuildSwizzle
};
- radeonNqssaDce(&c->Base, &nqssadce, 0);
+ radeonNqssaDce(&c->Base, &nqssadce, c);
} else {
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadce_init,
.IsNativeSwizzle = &r300FPIsNativeSwizzle,
.BuildSwizzle = &r300FPBuildSwizzle
};
- radeonNqssaDce(&c->Base, &nqssadce, 0);
+ radeonNqssaDce(&c->Base, &nqssadce, c);
}
if (c->Base.Debug) {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index b9e1a7959af..34f87183169 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -76,6 +76,8 @@ struct r300_fragment_program_compiler {
struct rX00_fragment_program_code *code;
struct r300_fragment_program_external_state state;
GLboolean is_r500;
+ unsigned OutputDepth;
+ unsigned OutputColor;
};
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index 05f46ad0607..00807245752 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -86,6 +86,69 @@ static void build_state(
}
+/**
+ * Transform the program to support fragment.position.
+ *
+ * Introduce a small fragment at the start of the program that will be
+ * the only code that directly reads the FRAG_ATTRIB_WPOS input.
+ * All other code pieces that reference that input will be rewritten
+ * to read from a newly allocated temporary.
+ *
+ */
+static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
+{
+ int i;
+
+ if (!(compiler->Base.Program.InputsRead & FRAG_BIT_WPOS)) {
+ compiler->code->wpos_attr = FRAG_ATTRIB_MAX;
+ return;
+ }
+
+ for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
+ {
+ if (!(compiler->Base.Program.InputsRead & (1 << i))) {
+ compiler->code->wpos_attr = i;
+ break;
+ }
+ }
+
+ rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, compiler->code->wpos_attr);
+}
+
+/**
+ * Rewrite fragment.fogcoord to use a texture coordinate slot.
+ * Note that fogcoord is forced into an X001 pattern, and this enforcement
+ * is done here.
+ *
+ * See also the counterpart rewriting for vertex programs.
+ */
+static void rewriteFog(struct r300_fragment_program_compiler *compiler)
+{
+ struct rX00_fragment_program_code *code = compiler->code;
+ struct prog_src_register src;
+ int i;
+
+ if (!(compiler->Base.Program.InputsRead & FRAG_BIT_FOGC)) {
+ code->fog_attr = FRAG_ATTRIB_MAX;
+ return;
+ }
+
+ for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
+ {
+ if (!(compiler->Base.Program.InputsRead & (1 << i))) {
+ code->fog_attr = i;
+ break;
+ }
+ }
+
+ memset(&src, 0, sizeof(src));
+ src.File = PROGRAM_INPUT;
+ src.Index = code->fog_attr;
+ src.Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
+ rc_move_input(&compiler->Base, FRAG_ATTRIB_FOGC, src);
+}
+
+
static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_program_cont *cont, struct r300_fragment_program *fp)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
@@ -97,6 +160,8 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
compiler.code = &fp->code;
compiler.state = fp->state;
compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE;
+ compiler.OutputDepth = FRAG_RESULT_DEPTH;
+ compiler.OutputColor = FRAG_RESULT_COLOR;
if (compiler.Base.Debug) {
fflush(stdout);
@@ -107,6 +172,10 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
rc_mesa_to_rc_program(&compiler.Base, &cont->Base.Base);
+ insert_WPOS_trailer(&compiler);
+
+ rewriteFog(&compiler);
+
r3xx_compile_fragment_program(&compiler);
fp->error = compiler.Base.Error;
--
cgit v1.2.3
From e034683eda7ab694de400f9803f765b22393cb7d Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Sun, 26 Jul 2009 11:52:17 +0200
Subject: r300/fragprog: No longer rely on hardcoded FRAG_RESULT_xxx constants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Also, this makes radeon_program_pair depend on the r300 fragment program
compiler. Since we now know that r600+ no longer use the same pairing
style in their ALU, we can stop pretending that program_pair is useful
for anything but r300-r500 fragment programs.
Signed-off-by: Nicolai Hähnle
---
.../drivers/dri/r300/compiler/r300_fragprog_emit.c | 2 +-
.../drivers/dri/r300/compiler/r500_fragprog_emit.c | 2 +-
.../dri/r300/compiler/radeon_program_pair.c | 44 +++++++++++-----------
.../dri/r300/compiler/radeon_program_pair.h | 6 +--
4 files changed, 27 insertions(+), 27 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index 674d1f8cd35..80b569ade67 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -308,7 +308,7 @@ void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
code->node[0].alu_end = -1;
code->node[0].tex_end = -1;
- radeonPairProgram(&compiler->Base, &pair_handler, compiler);
+ radeonPairProgram(compiler, &pair_handler, compiler);
if (compiler->Base.Error)
return;
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 21d6b9bba71..3a527210c17 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -282,7 +282,7 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
code->inst_offset = 0;
code->inst_end = -1;
- radeonPairProgram(&compiler->Base, &pair_handler, compiler);
+ radeonPairProgram(compiler, &pair_handler, compiler);
if (compiler->Base.Error)
return;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index 57a364c78b6..84d0831cfc2 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -40,7 +40,7 @@
#include "shader/prog_print.h"
#define error(fmt, args...) do { \
- rc_error(s->Compiler, "%s::%s(): " fmt "\n", \
+ rc_error(&s->Compiler->Base, "%s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
} while(0)
@@ -118,7 +118,7 @@ struct pair_register_translation {
};
struct pair_state {
- struct radeon_compiler * Compiler;
+ struct r300_fragment_program_compiler * Compiler;
const struct radeon_pair_handler *Handler;
GLboolean Verbose;
void *UserData;
@@ -335,10 +335,10 @@ static void scan_instructions(struct pair_state *s)
struct rc_instruction *source;
GLuint ip;
- for(source = s->Compiler->Program.Instructions.Next, ip = 0;
- source != &s->Compiler->Program.Instructions;
+ for(source = s->Compiler->Base.Program.Instructions.Next, ip = 0;
+ source != &s->Compiler->Base.Program.Instructions;
source = source->Next, ++ip) {
- struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Compiler->Pool, sizeof(*pairinst));
+ struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*pairinst));
memset(pairinst, 0, sizeof(struct pair_state_instruction));
pairinst->Instruction = source->I;
@@ -374,7 +374,7 @@ static void scan_instructions(struct pair_state *s)
GET_BIT(pairinst->Instruction.DstReg.WriteMask, swz))
continue;
- struct reg_value_reader* r = memory_pool_malloc(&s->Compiler->Pool, sizeof(*r));
+ struct reg_value_reader* r = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*r));
pairinst->NumDependencies++;
t->Value[swz]->NumReaders++;
r->Reader = pairinst;
@@ -397,7 +397,7 @@ static void scan_instructions(struct pair_state *s)
if (!GET_BIT(pairinst->Instruction.DstReg.WriteMask, j))
continue;
- struct reg_value* v = memory_pool_malloc(&s->Compiler->Pool, sizeof(*v));
+ struct reg_value* v = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*v));
memset(v, 0, sizeof(struct reg_value));
v->Writer = pairinst;
if (t->Value[j]) {
@@ -435,7 +435,7 @@ static void scan_instructions(struct pair_state *s)
*/
static void allocate_input_registers(struct pair_state *s)
{
- GLuint InputsRead = s->Compiler->Program.InputsRead;
+ GLuint InputsRead = s->Compiler->Base.Program.InputsRead;
int i;
GLuint hwindex = 0;
@@ -577,11 +577,11 @@ static void emit_all_tex(struct pair_state *s)
get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
}
- if (s->Compiler->Debug)
+ if (s->Compiler->Base.Debug)
_mesa_printf(" BEGIN_TEX\n");
if (s->Handler->BeginTexBlock)
- s->Compiler->Error = s->Compiler->Error || !s->Handler->BeginTexBlock(s->UserData);
+ s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->BeginTexBlock(s->UserData);
for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
struct prog_instruction *inst = &pairinst->Instruction;
@@ -591,7 +591,7 @@ static void emit_all_tex(struct pair_state *s)
inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index);
- if (s->Compiler->Debug) {
+ if (s->Compiler->Base.Debug) {
_mesa_printf(" ");
_mesa_print_instruction(inst);
fflush(stdout);
@@ -614,10 +614,10 @@ static void emit_all_tex(struct pair_state *s)
rpti.SrcIndex = inst->SrcReg[0].Index;
rpti.SrcSwizzle = inst->SrcReg[0].Swizzle;
- s->Compiler->Error = s->Compiler->Error || !s->Handler->EmitTex(s->UserData, &rpti);
+ s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitTex(s->UserData, &rpti);
}
- if (s->Compiler->Debug)
+ if (s->Compiler->Base.Debug)
_mesa_printf(" END_TEX\n");
}
@@ -781,10 +781,10 @@ static void fill_dest_into_pair(
struct prog_instruction *inst = &pairinst->Instruction;
if (inst->DstReg.File == PROGRAM_OUTPUT) {
- if (inst->DstReg.Index == FRAG_RESULT_COLOR) {
+ if (inst->DstReg.Index == s->Compiler->OutputColor) {
pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ;
pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
- } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) {
+ } else if (inst->DstReg.Index == s->Compiler->OutputDepth) {
pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
}
} else {
@@ -864,15 +864,15 @@ static void emit_alu(struct pair_state *s)
success: ;
}
- if (s->Compiler->Debug)
+ if (s->Compiler->Base.Debug)
radeonPrintPairInstruction(&pair);
- s->Compiler->Error = s->Compiler->Error || !s->Handler->EmitPaired(s->UserData, &pair);
+ s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitPaired(s->UserData, &pair);
}
void radeonPairProgram(
- struct radeon_compiler * compiler,
+ struct r300_fragment_program_compiler * compiler,
const struct radeon_pair_handler* handler, void *userdata)
{
struct pair_state s;
@@ -881,15 +881,15 @@ void radeonPairProgram(
s.Compiler = compiler;
s.Handler = handler;
s.UserData = userdata;
- s.Verbose = GL_FALSE && s.Compiler->Debug;
+ s.Verbose = GL_FALSE && s.Compiler->Base.Debug;
- if (s.Compiler->Debug)
+ if (s.Compiler->Base.Debug)
_mesa_printf("Emit paired program\n");
scan_instructions(&s);
allocate_input_registers(&s);
- while(!s.Compiler->Error &&
+ while(!s.Compiler->Base.Error &&
(s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
if (s.ReadyTEX)
emit_all_tex(&s);
@@ -898,7 +898,7 @@ void radeonPairProgram(
emit_alu(&s);
}
- if (s.Compiler->Debug)
+ if (s.Compiler->Base.Debug)
_mesa_printf(" END\n");
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index 9d4d7dd3c93..ff761785518 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -30,7 +30,7 @@
#include "radeon_program.h"
-struct radeon_compiler;
+struct r300_fragment_program_compiler;
/**
@@ -131,11 +131,11 @@ struct radeon_pair_handler {
*/
GLboolean (*BeginTexBlock)(void*);
- GLuint MaxHwTemps;
+ unsigned MaxHwTemps;
};
void radeonPairProgram(
- struct radeon_compiler * compiler,
+ struct r300_fragment_program_compiler * compiler,
const struct radeon_pair_handler*, void *userdata);
void radeonPrintPairInstruction(struct radeon_pair_instruction *inst);
--
cgit v1.2.3
From 790334883a80ee1d19cb1bba018ed7dc32299dac Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Sun, 26 Jul 2009 12:05:57 +0200
Subject: r300/fragprog: Remove hardcoded FRAG_ATTRIB_xxx constants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
.../drivers/dri/r300/compiler/radeon_compiler.h | 6 +++
.../dri/r300/compiler/radeon_program_pair.c | 53 +++-------------------
src/mesa/drivers/dri/r300/r300_fragprog_common.c | 48 ++++++++++++++++++++
3 files changed, 61 insertions(+), 46 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 34f87183169..b22da17583e 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -78,6 +78,12 @@ struct r300_fragment_program_compiler {
GLboolean is_r500;
unsigned OutputDepth;
unsigned OutputColor;
+
+ void * UserData;
+ void (*AllocateHwInputs)(
+ void * yourdata,
+ void (*allocate)(void * data, unsigned input, unsigned hwreg),
+ void * mydata);
};
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index 84d0831cfc2..8cf1f1aaac2 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -427,51 +427,6 @@ static void scan_instructions(struct pair_state *s)
}
-/**
- * Reserve hardware temporary registers for the program inputs.
- *
- * @note This allocation is performed explicitly, because the order of inputs
- * is determined by the RS hardware.
- */
-static void allocate_input_registers(struct pair_state *s)
-{
- GLuint InputsRead = s->Compiler->Base.Program.InputsRead;
- int i;
- GLuint hwindex = 0;
-
- /* Primary colour */
- if (InputsRead & FRAG_BIT_COL0)
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++);
- InputsRead &= ~FRAG_BIT_COL0;
-
- /* Secondary color */
- if (InputsRead & FRAG_BIT_COL1)
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++);
- InputsRead &= ~FRAG_BIT_COL1;
-
- /* Texcoords */
- for (i = 0; i < 8; i++) {
- if (InputsRead & (FRAG_BIT_TEX0 << i))
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++);
- }
- InputsRead &= ~FRAG_BITS_TEX_ANY;
-
- /* Fogcoords treated as a texcoord */
- if (InputsRead & FRAG_BIT_FOGC)
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++);
- InputsRead &= ~FRAG_BIT_FOGC;
-
- /* fragment position treated as a texcoord */
- if (InputsRead & FRAG_BIT_WPOS)
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++);
- InputsRead &= ~FRAG_BIT_WPOS;
-
- /* Anything else */
- if (InputsRead)
- error("Don't know how to handle inputs 0x%x\n", InputsRead);
-}
-
-
static void decrement_dependencies(struct pair_state *s, struct pair_state_instruction *pairinst)
{
ASSERT(pairinst->NumDependencies > 0);
@@ -870,6 +825,12 @@ static void emit_alu(struct pair_state *s)
s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitPaired(s->UserData, &pair);
}
+/* Callback function for assigning input registers to hardware registers */
+static void alloc_helper(void * data, unsigned input, unsigned hwreg)
+{
+ struct pair_state * s = data;
+ alloc_hw_reg(s, PROGRAM_INPUT, input, hwreg);
+}
void radeonPairProgram(
struct r300_fragment_program_compiler * compiler,
@@ -887,7 +848,7 @@ void radeonPairProgram(
_mesa_printf("Emit paired program\n");
scan_instructions(&s);
- allocate_input_registers(&s);
+ s.Compiler->AllocateHwInputs(s.Compiler->UserData, &alloc_helper, &s);
while(!s.Compiler->Base.Error &&
(s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index 00807245752..2947f5ef7e0 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -149,6 +149,52 @@ static void rewriteFog(struct r300_fragment_program_compiler *compiler)
}
+/**
+ * Reserve hardware temporary registers for the program inputs.
+ *
+ * @note This allocation is performed explicitly, because the order of inputs
+ * is determined by the RS hardware.
+ */
+static void allocate_hw_inputs(void * yourdata, void (*allocate)(void * data, unsigned input, unsigned hwreg), void * mydata)
+{
+ struct r300_fragment_program_compiler * c = yourdata;
+ GLuint InputsRead = c->Base.Program.InputsRead;
+ int i;
+ GLuint hwindex = 0;
+
+ /* Primary colour */
+ if (InputsRead & FRAG_BIT_COL0)
+ allocate(mydata, FRAG_ATTRIB_COL0, hwindex++);
+ InputsRead &= ~FRAG_BIT_COL0;
+
+ /* Secondary color */
+ if (InputsRead & FRAG_BIT_COL1)
+ allocate(mydata, FRAG_ATTRIB_COL1, hwindex++);
+ InputsRead &= ~FRAG_BIT_COL1;
+
+ /* Texcoords */
+ for (i = 0; i < 8; i++) {
+ if (InputsRead & (FRAG_BIT_TEX0 << i))
+ allocate(mydata, FRAG_ATTRIB_TEX0+i, hwindex++);
+ }
+ InputsRead &= ~FRAG_BITS_TEX_ANY;
+
+ /* Fogcoords treated as a texcoord */
+ if (InputsRead & FRAG_BIT_FOGC)
+ allocate(mydata, FRAG_ATTRIB_FOGC, hwindex++);
+ InputsRead &= ~FRAG_BIT_FOGC;
+
+ /* fragment position treated as a texcoord */
+ if (InputsRead & FRAG_BIT_WPOS)
+ allocate(mydata, FRAG_ATTRIB_WPOS, hwindex++);
+ InputsRead &= ~FRAG_BIT_WPOS;
+
+ /* Anything else */
+ if (InputsRead)
+ rc_error(&c->Base, "Don't know how to handle inputs 0x%x\n", InputsRead);
+}
+
+
static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_program_cont *cont, struct r300_fragment_program *fp)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
@@ -162,6 +208,8 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE;
compiler.OutputDepth = FRAG_RESULT_DEPTH;
compiler.OutputColor = FRAG_RESULT_COLOR;
+ compiler.AllocateHwInputs = &allocate_hw_inputs;
+ compiler.UserData = &compiler;
if (compiler.Base.Debug) {
fflush(stdout);
--
cgit v1.2.3
From e82a50a6a1abd39aa7153846be07b7c5e9172485 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Sun, 26 Jul 2009 13:50:56 +0200
Subject: r300/fragprog: Move wpos_attr and fog_attr where they belong
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/radeon_code.h | 5 -----
src/mesa/drivers/dri/r300/r300_context.h | 5 +++++
src/mesa/drivers/dri/r300/r300_fragprog_common.c | 21 ++++++++++-----------
src/mesa/drivers/dri/r300/r300_swtcl.c | 8 ++++----
src/mesa/drivers/dri/r300/r300_vertprog.c | 4 ++--
5 files changed, 21 insertions(+), 22 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index 6f51358f55e..9fd37dc8d24 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -174,11 +174,6 @@ struct rX00_fragment_program_code {
GLboolean writes_depth;
struct rc_constant_list constants;
-
- /* attribute that we are sending the WPOS in */
- gl_frag_attrib wpos_attr;
- /* attribute that we are sending the fog coordinate in */
- gl_frag_attrib fog_attr;
};
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index 56f05723b8c..24dc6bc6a34 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -433,6 +433,11 @@ struct r300_fragment_program {
struct rX00_fragment_program_code code;
GLbitfield InputsRead;
+
+ /* attribute that we are sending the WPOS in */
+ gl_frag_attrib wpos_attr;
+ /* attribute that we are sending the fog coordinate in */
+ gl_frag_attrib fog_attr;
};
struct r300_fragment_program_cont {
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index 2947f5ef7e0..3bfe8a9dede 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -95,24 +95,24 @@ static void build_state(
* to read from a newly allocated temporary.
*
*/
-static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
+static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp)
{
int i;
if (!(compiler->Base.Program.InputsRead & FRAG_BIT_WPOS)) {
- compiler->code->wpos_attr = FRAG_ATTRIB_MAX;
+ fp->wpos_attr = FRAG_ATTRIB_MAX;
return;
}
for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
{
if (!(compiler->Base.Program.InputsRead & (1 << i))) {
- compiler->code->wpos_attr = i;
+ fp->wpos_attr = i;
break;
}
}
- rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, compiler->code->wpos_attr);
+ rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, fp->wpos_attr);
}
/**
@@ -122,28 +122,27 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
*
* See also the counterpart rewriting for vertex programs.
*/
-static void rewriteFog(struct r300_fragment_program_compiler *compiler)
+static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp)
{
- struct rX00_fragment_program_code *code = compiler->code;
struct prog_src_register src;
int i;
if (!(compiler->Base.Program.InputsRead & FRAG_BIT_FOGC)) {
- code->fog_attr = FRAG_ATTRIB_MAX;
+ fp->fog_attr = FRAG_ATTRIB_MAX;
return;
}
for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
{
if (!(compiler->Base.Program.InputsRead & (1 << i))) {
- code->fog_attr = i;
+ fp->fog_attr = i;
break;
}
}
memset(&src, 0, sizeof(src));
src.File = PROGRAM_INPUT;
- src.Index = code->fog_attr;
+ src.Index = fp->fog_attr;
src.Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
rc_move_input(&compiler->Base, FRAG_ATTRIB_FOGC, src);
}
@@ -220,9 +219,9 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
rc_mesa_to_rc_program(&compiler.Base, &cont->Base.Base);
- insert_WPOS_trailer(&compiler);
+ insert_WPOS_trailer(&compiler, fp);
- rewriteFog(&compiler);
+ rewriteFog(&compiler, fp);
r3xx_compile_fragment_program(&compiler);
fp->error = compiler.Base.Error;
diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
index 1e4ea2c5775..a634cb5192d 100644
--- a/src/mesa/drivers/dri/r300/r300_swtcl.c
+++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
@@ -150,16 +150,16 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_
ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0);
}
- if (rmesa->selected_fp->code.wpos_attr != FRAG_ATTRIB_MAX) {
- int tex_id = rmesa->selected_fp->code.wpos_attr - FRAG_ATTRIB_TEX0;
+ if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) {
+ int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0;
VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
}
- if (rmesa->selected_fp->code.fog_attr != FRAG_ATTRIB_MAX) {
- int tex_id = rmesa->selected_fp->code.fog_attr - FRAG_ATTRIB_TEX0;
+ if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) {
+ int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0;
VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index 69d6b021d5c..c5edbd0052b 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -268,8 +268,8 @@ struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
wanted_key.FpReads = r300->selected_fp->InputsRead;
- wanted_key.FogAttr = r300->selected_fp->code.fog_attr;
- wanted_key.WPosAttr = r300->selected_fp->code.wpos_attr;
+ wanted_key.FogAttr = r300->selected_fp->fog_attr;
+ wanted_key.WPosAttr = r300->selected_fp->wpos_attr;
for (vp = vpc->progs; vp; vp = vp->next) {
if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
--
cgit v1.2.3
From 6bc0e1054a212ec80408f685237b0e0c1e4929f0 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Mon, 27 Jul 2009 19:34:08 +0200
Subject: r300/compiler: Prepare for hookup to Gallium
---
src/mesa/drivers/dri/r300/compiler/radeon_code.h | 43 ++++++++++++----------
.../drivers/dri/r300/compiler/radeon_compiler.h | 13 ++++---
2 files changed, 31 insertions(+), 25 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index 9fd37dc8d24..3353617bef3 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -23,6 +23,8 @@
#ifndef RADEON_CODE_H
#define RADEON_CODE_H
+#include
+
#define R300_PFS_MAX_ALU_INST 64
#define R300_PFS_MAX_TEX_INST 32
#define R300_PFS_MAX_TEX_INDIRECT 4
@@ -99,7 +101,7 @@ struct r300_fragment_program_external_state {
* 2 - GL_ALPHA
* depending on the depth texture mode.
*/
- GLuint depth_texture_mode : 2;
+ unsigned depth_texture_mode : 2;
/**
* If the sampler is used as a shadow sampler,
@@ -108,7 +110,7 @@ struct r300_fragment_program_external_state {
*
* Otherwise, this field is 0.
*/
- GLuint texture_compare_func : 3;
+ unsigned texture_compare_func : 3;
} unit[16];
};
@@ -128,16 +130,16 @@ struct r300_fragment_program_node {
struct r300_fragment_program_code {
struct {
int length; /**< total # of texture instructions used */
- GLuint inst[R300_PFS_MAX_TEX_INST];
+ uint32_t inst[R300_PFS_MAX_TEX_INST];
} tex;
struct {
int length; /**< total # of ALU instructions used */
struct {
- GLuint inst0;
- GLuint inst1;
- GLuint inst2;
- GLuint inst3;
+ uint32_t inst0;
+ uint32_t inst1;
+ uint32_t inst2;
+ uint32_t inst3;
} inst[R300_PFS_MAX_ALU_INST];
} alu;
@@ -151,12 +153,12 @@ struct r300_fragment_program_code {
struct r500_fragment_program_code {
struct {
- GLuint inst0;
- GLuint inst1;
- GLuint inst2;
- GLuint inst3;
- GLuint inst4;
- GLuint inst5;
+ uint32_t inst0;
+ uint32_t inst1;
+ uint32_t inst2;
+ uint32_t inst3;
+ uint32_t inst4;
+ uint32_t inst5;
} inst[R500_PFS_MAX_INST];
int inst_offset;
@@ -171,7 +173,7 @@ struct rX00_fragment_program_code {
struct r500_fragment_program_code r500;
} code;
- GLboolean writes_depth;
+ unsigned writes_depth:1;
struct rc_constant_list constants;
};
@@ -180,22 +182,25 @@ struct rX00_fragment_program_code {
#define VSF_MAX_FRAGMENT_LENGTH (255*4)
#define VSF_MAX_FRAGMENT_TEMPS (14)
+#define VSF_MAX_INPUTS 32
+#define VSF_MAX_OUTPUTS 32
+
struct r300_vertex_program_code {
int length;
union {
- GLuint d[VSF_MAX_FRAGMENT_LENGTH];
+ uint32_t d[VSF_MAX_FRAGMENT_LENGTH];
float f[VSF_MAX_FRAGMENT_LENGTH];
} body;
int pos_end;
int num_temporaries; /* Number of temp vars used by program */
- int inputs[VERT_ATTRIB_MAX];
- int outputs[VERT_RESULT_MAX];
+ int inputs[VSF_MAX_INPUTS];
+ int outputs[VSF_MAX_OUTPUTS];
struct rc_constant_list constants;
- GLbitfield InputsRead;
- GLbitfield OutputsWritten;
+ uint32_t InputsRead;
+ uint32_t OutputsWritten;
};
#endif /* RADEON_CODE_H */
\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index b22da17583e..15f8b8fd92c 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -43,9 +43,9 @@ struct rc_program {
*/
struct rc_instruction Instructions;
- GLbitfield InputsRead;
- GLbitfield OutputsWritten;
- GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */
+ uint32_t InputsRead;
+ uint32_t OutputsWritten;
+ uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
struct rc_constant_list Constants;
};
@@ -53,8 +53,8 @@ struct rc_program {
struct radeon_compiler {
struct memory_pool Pool;
struct rc_program Program;
- GLboolean Debug;
- GLboolean Error;
+ unsigned Debug:1;
+ unsigned Error:1;
char * ErrorMsg;
};
@@ -75,7 +75,7 @@ struct r300_fragment_program_compiler {
struct radeon_compiler Base;
struct rX00_fragment_program_code *code;
struct r300_fragment_program_external_state state;
- GLboolean is_r500;
+ unsigned is_r500;
unsigned OutputDepth;
unsigned OutputColor;
@@ -94,6 +94,7 @@ struct r300_vertex_program_compiler {
struct r300_vertex_program_code *code;
GLbitfield RequiredOutputs;
+ void * UserData;
void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
};
--
cgit v1.2.3
From 59fff53492fb385f8567c163aed014fdd9c0f8fa Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Mon, 27 Jul 2009 19:29:21 +0200
Subject: r300/compiler: Add vertex program code dumper from Gallium driver
---
src/mesa/drivers/dri/r300/compiler/Makefile | 1 +
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 5 +
.../drivers/dri/r300/compiler/r3xx_vertprog_dump.c | 177 +++++++++++++++++++++
src/mesa/drivers/dri/r300/compiler/radeon_code.h | 2 +
4 files changed, 185 insertions(+)
create mode 100644 src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index dd04b81a2f3..d9738441928 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -19,6 +19,7 @@ C_SOURCES = \
r500_fragprog.c \
r500_fragprog_emit.c \
r3xx_vertprog.c \
+ r3xx_vertprog_dump.c \
\
memory_pool.c
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 339be414cfe..fc9c8f805ae 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -607,4 +607,9 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
compiler->code->InputsRead = compiler->Base.Program.InputsRead;
compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
+
+ if (compiler->Base.Debug) {
+ printf("Final vertex program code:\n");
+ r300_vertex_program_dump(compiler->code);
+ }
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
new file mode 100644
index 00000000000..39cc6953ba5
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright 2009 Nicolai Hähnle
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_code.h"
+
+#include
+
+static char* r300_vs_ve_ops[] = {
+ /* R300 vector ops */
+ " VE_NO_OP",
+ " VE_DOT_PRODUCT",
+ " VE_MULTIPLY",
+ " VE_ADD",
+ " VE_MULTIPLY_ADD",
+ " VE_DISTANCE_FACTOR",
+ " VE_FRACTION",
+ " VE_MAXIMUM",
+ " VE_MINIMUM",
+ "VE_SET_GREATER_THAN_EQUAL",
+ " VE_SET_LESS_THAN",
+ " VE_MULTIPLYX2_ADD",
+ " VE_MULTIPLY_CLAMP",
+ " VE_FLT2FIX_DX",
+ " VE_FLT2FIX_DX_RND",
+ /* R500 vector ops */
+ " VE_PRED_SET_EQ_PUSH",
+ " VE_PRED_SET_GT_PUSH",
+ " VE_PRED_SET_GTE_PUSH",
+ " VE_PRED_SET_NEQ_PUSH",
+ " VE_COND_WRITE_EQ",
+ " VE_COND_WRITE_GT",
+ " VE_COND_WRITE_GTE",
+ " VE_COND_WRITE_NEQ",
+ " VE_SET_GREATER_THAN",
+ " VE_SET_EQUAL",
+ " VE_SET_NOT_EQUAL",
+ " (reserved)",
+ " (reserved)",
+ " (reserved)",
+};
+
+static char* r300_vs_me_ops[] = {
+ /* R300 math ops */
+ " ME_NO_OP",
+ " ME_EXP_BASE2_DX",
+ " ME_LOG_BASE2_DX",
+ " ME_EXP_BASEE_FF",
+ " ME_LIGHT_COEFF_DX",
+ " ME_POWER_FUNC_FF",
+ " ME_RECIP_DX",
+ " ME_RECIP_FF",
+ " ME_RECIP_SQRT_DX",
+ " ME_RECIP_SQRT_FF",
+ " ME_MULTIPLY",
+ " ME_EXP_BASE2_FULL_DX",
+ " ME_LOG_BASE2_FULL_DX",
+ " ME_POWER_FUNC_FF_CLAMP_B",
+ "ME_POWER_FUNC_FF_CLAMP_B1",
+ "ME_POWER_FUNC_FF_CLAMP_01",
+ " ME_SIN",
+ " ME_COS",
+ /* R500 math ops */
+ " ME_LOG_BASE2_IEEE",
+ " ME_RECIP_IEEE",
+ " ME_RECIP_SQRT_IEEE",
+ " ME_PRED_SET_EQ",
+ " ME_PRED_SET_GT",
+ " ME_PRED_SET_GTE",
+ " ME_PRED_SET_NEQ",
+ " ME_PRED_SET_CLR",
+ " ME_PRED_SET_INV",
+ " ME_PRED_SET_POP",
+ " ME_PRED_SET_RESTORE",
+ " (reserved)",
+ " (reserved)",
+ " (reserved)",
+};
+
+/* XXX refactor to avoid clashing symbols */
+static char* r300_vs_src_debug[] = {
+ "t",
+ "i",
+ "c",
+ "a",
+};
+
+static char* r300_vs_dst_debug[] = {
+ "t",
+ "a0",
+ "o",
+ "ox",
+ "a",
+ "i",
+ "u",
+ "u",
+};
+
+static char* r300_vs_swiz_debug[] = {
+ "X",
+ "Y",
+ "Z",
+ "W",
+ "0",
+ "1",
+ "U",
+ "U",
+};
+
+
+static void r300_vs_op_dump(uint32_t op)
+{
+ printf(" dst: %d%s op: ",
+ (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
+ if (op & 0x80) {
+ if (op & 0x1) {
+ printf("PVS_MACRO_OP_2CLK_M2X_ADD\n");
+ } else {
+ printf(" PVS_MACRO_OP_2CLK_MADD\n");
+ }
+ } else if (op & 0x40) {
+ printf("%s\n", r300_vs_me_ops[op & 0x1f]);
+ } else {
+ printf("%s\n", r300_vs_ve_ops[op & 0x1f]);
+ }
+}
+
+static void r300_vs_src_dump(uint32_t src)
+{
+ printf(" reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n",
+ (src >> 5) & 0x7f, r300_vs_src_debug[src & 0x3],
+ src & (1 << 25) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 13) & 0x7],
+ src & (1 << 26) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 16) & 0x7],
+ src & (1 << 27) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 19) & 0x7],
+ src & (1 << 28) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 22) & 0x7]);
+}
+
+void r300_vertex_program_dump(struct r300_vertex_program_code * vs)
+{
+ unsigned instrcount = vs->length / 4;
+ unsigned i;
+
+ for(i = 0; i < instrcount; i++) {
+ unsigned offset = i*4;
+ unsigned src;
+
+ printf("%d: op: 0x%08x", i, vs->body.d[offset]);
+ r300_vs_op_dump(vs->body.d[offset]);
+
+ for(src = 0; src < 3; ++src) {
+ printf(" src%i: 0x%08x", src, vs->body.d[offset+1+src]);
+ r300_vs_src_dump(vs->body.d[offset+1+src]);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index 3353617bef3..77bc19d8ff9 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -203,4 +203,6 @@ struct r300_vertex_program_code {
uint32_t OutputsWritten;
};
+void r300_vertex_program_dump(struct r300_vertex_program_code * vs);
+
#endif /* RADEON_CODE_H */
\ No newline at end of file
--
cgit v1.2.3
From 3ccf89d58479d48b8643b1c67c4b9c34b6b97e4a Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Mon, 27 Jul 2009 20:16:17 +0200
Subject: r300/compiler: Make calculate_inputs_outputs available to external
users
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
In the long run, it's probably better to just get rid of InputsRead and
OutputsWritten.
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/radeon_compiler.h | 5 +++++
src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c | 4 ++--
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 15f8b8fd92c..5bdc0754470 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -43,6 +43,9 @@ struct rc_program {
*/
struct rc_instruction Instructions;
+ /* Long term, we should probably remove InputsRead & OutputsWritten,
+ * since updating dependent state can be fragile, and they aren't
+ * actually used very often. */
uint32_t InputsRead;
uint32_t OutputsWritten;
uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
@@ -66,6 +69,8 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...);
void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program);
+void rc_calculate_inputs_outputs(struct radeon_compiler * c);
+
void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input);
void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask);
void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
index 9d0e265a5d9..aaaa50ad1f9 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
@@ -252,7 +252,7 @@ static void process_instruction(struct nqssadce_state* s)
s->IP = s->IP->Prev;
}
-static void calculateInputs(struct radeon_compiler * c)
+void rc_calculate_inputs_outputs(struct radeon_compiler * c)
{
struct rc_instruction *inst;
@@ -290,5 +290,5 @@ void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* de
while(s.IP != &c->Program.Instructions && !c->Error)
process_instruction(&s);
- calculateInputs(c);
+ rc_calculate_inputs_outputs(c);
}
--
cgit v1.2.3
From 7e2f26cbbf1142951ae0c0c1b732e8799f8cdbc1 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Mon, 27 Jul 2009 15:08:44 -0600
Subject: softpipe: include sp_winsys.h to silence function prototype warning
---
src/gallium/drivers/softpipe/sp_texture.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index 9e19745889e..7a533dad9f0 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -42,6 +42,7 @@
#include "sp_texture.h"
#include "sp_tile_cache.h"
#include "sp_screen.h"
+#include "sp_winsys.h"
/* Simple, maximally packed layout.
--
cgit v1.2.3
From 0ad9eba333bd80cf83f728390c8cd6c573ed446d Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Mon, 27 Jul 2009 15:09:56 -0600
Subject: mesa: separate some finite/pragma Watcom stuff
---
src/mesa/main/compiler.h | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h
index e79bbc2ac5f..9319505a75d 100644
--- a/src/mesa/main/compiler.h
+++ b/src/mesa/main/compiler.h
@@ -110,10 +110,8 @@ extern "C" {
#if defined(_WIN32) && !defined(__WIN32__) && !defined(__CYGWIN__) && !defined(BUILD_FOR_SNAP)
# define __WIN32__
# define finite _finite
-#endif
-#if defined(__WATCOMC__)
+#elif defined(__WATCOMC__)
# define finite _finite
-# pragma disable_message(201) /* Disable unreachable code warnings */
#endif
@@ -135,6 +133,10 @@ extern "C" {
# endif
# endif
#endif
+#if defined(__WATCOMC__)
+# pragma disable_message(201) /* Disable unreachable code warnings */
+#endif
+
/**
--
cgit v1.2.3
From a7427b0f7b2325b8dcc560d57cb894df25ebef93 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Mon, 27 Jul 2009 15:10:28 -0600
Subject: st/mesa: silence warning
---
src/mesa/state_tracker/st_cb_texture.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 15f84b66382..ee71c012c64 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1047,7 +1047,8 @@ st_TexSubimage(GLcontext *ctx, GLint dims, GLenum target, GLint level,
_mesa_image_image_stride(packing, width, height, format, type);
GLint i;
const GLubyte *src;
- enum pipe_transfer_usage transfer_usage;
+ /* init to silence warning only: */
+ enum pipe_transfer_usage transfer_usage = PIPE_TRANSFER_WRITE;
DBG("%s target %s level %d offset %d,%d %dx%d\n", __FUNCTION__,
_mesa_lookup_enum_by_nr(target),
--
cgit v1.2.3
From 722d136f7bd3390c72bca175831647d93393e92d Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Mon, 27 Jul 2009 15:28:49 -0600
Subject: intel: Clean up leak of driver context structure on context destroy.
(cherry picked from commit ddef7dc87b2001fbe117ee5f24a0c645ee95a03c)
---
src/mesa/drivers/dri/intel/intel_context.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index cfd983d3682..9db5b546433 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -804,6 +804,9 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
/* free the Mesa context */
_mesa_free_context_data(&intel->ctx);
+
+ FREE(intel);
+ driContextPriv->driverPrivate = NULL;
}
}
--
cgit v1.2.3
From 3dbaf68bdc1f7427a60bdcc8da635ae7a27aa3cd Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Mon, 27 Jul 2009 15:32:50 -0600
Subject: intel: Fix leak of DRI option info due to using the wrong free
routine.
(cherry picked from commit 6d66f23c50ebe8f973757b6fd1b81c9b7920c447)
---
src/mesa/drivers/dri/intel/intel_screen.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 2c6e2640b08..f810850ef5b 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -305,7 +305,7 @@ intelDestroyScreen(__DRIscreenPrivate * sPriv)
dri_bufmgr_destroy(intelScreen->bufmgr);
intelUnmapScreenRegions(intelScreen);
- driDestroyOptionCache(&intelScreen->optionCache);
+ driDestroyOptionInfo(&intelScreen->optionCache);
FREE(intelScreen);
sPriv->private = NULL;
--
cgit v1.2.3
From 8f397bffa840d9a14ee2e089728119b65d88bb38 Mon Sep 17 00:00:00 2001
From: "RALOVICH, Kristóf"
Date: Thu, 23 Jul 2009 17:56:52 +0200
Subject: glapi: remove XTHREADS support
---
src/mesa/glapi/gl_x86-64_asm.py | 2 +-
src/mesa/glapi/gl_x86_asm.py | 2 +-
src/mesa/glapi/glthread.c | 51 -----------------------------------------
src/mesa/glapi/glthread.h | 44 +----------------------------------
4 files changed, 3 insertions(+), 96 deletions(-)
diff --git a/src/mesa/glapi/gl_x86-64_asm.py b/src/mesa/glapi/gl_x86-64_asm.py
index fa45406f47b..f36ad3a5d82 100644
--- a/src/mesa/glapi/gl_x86-64_asm.py
+++ b/src/mesa/glapi/gl_x86-64_asm.py
@@ -138,7 +138,7 @@ class PrintGenericStubs(gl_XML.gl_print_base):
print '# define GL_PREFIX(n) GLNAME(CONCAT(gl,n))'
print '# endif'
print ''
- print '#if defined(PTHREADS) || defined(USE_XTHREADS) || defined(SOLARIS_THREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS)'
+ print '#if defined(PTHREADS) || defined(SOLARIS_THREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS)'
print '# define THREADS'
print '#endif'
print ''
diff --git a/src/mesa/glapi/gl_x86_asm.py b/src/mesa/glapi/gl_x86_asm.py
index 0dbf3ebe0ab..36f0e31fe23 100644
--- a/src/mesa/glapi/gl_x86_asm.py
+++ b/src/mesa/glapi/gl_x86_asm.py
@@ -79,7 +79,7 @@ class PrintGenericStubs(gl_XML.gl_print_base):
print '#define GLOBL_FN(x) GLOBL x'
print '#endif'
print ''
- print '#if defined(PTHREADS) || defined(USE_XTHREADS) || defined(SOLARIS_THREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS)'
+ print '#if defined(PTHREADS) || defined(SOLARIS_THREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS)'
print '# define THREADS'
print '#endif'
print ''
diff --git a/src/mesa/glapi/glthread.c b/src/mesa/glapi/glthread.c
index e3abb0f4aee..737fd4d6a84 100644
--- a/src/mesa/glapi/glthread.c
+++ b/src/mesa/glapi/glthread.c
@@ -246,57 +246,6 @@ _glthread_SetTSD(_glthread_TSD *tsd, void *ptr)
#endif /* WIN32_THREADS */
-
-
-/*
- * XFree86 has its own thread wrapper, Xthreads.h
- * We wrap it again for GL.
- */
-#ifdef USE_XTHREADS
-
-unsigned long
-_glthread_GetID(void)
-{
- return (unsigned long) xthread_self();
-}
-
-
-void
-_glthread_InitTSD(_glthread_TSD *tsd)
-{
- if (xthread_key_create(&tsd->key, NULL) != 0) {
- perror(INIT_TSD_ERROR);
- exit(-1);
- }
- tsd->initMagic = INIT_MAGIC;
-}
-
-
-void *
-_glthread_GetTSD(_glthread_TSD *tsd)
-{
- void *ptr;
- if (tsd->initMagic != INIT_MAGIC) {
- _glthread_InitTSD(tsd);
- }
- xthread_get_specific(tsd->key, &ptr);
- return ptr;
-}
-
-
-void
-_glthread_SetTSD(_glthread_TSD *tsd, void *ptr)
-{
- if (tsd->initMagic != INIT_MAGIC) {
- _glthread_InitTSD(tsd);
- }
- xthread_set_specific(tsd->key, ptr);
-}
-
-#endif /* XTHREAD */
-
-
-
/*
* BeOS threads
*/
diff --git a/src/mesa/glapi/glthread.h b/src/mesa/glapi/glthread.h
index dfe09a9d59f..8ec933a8514 100644
--- a/src/mesa/glapi/glthread.h
+++ b/src/mesa/glapi/glthread.h
@@ -71,7 +71,7 @@
#if (defined(PTHREADS) || defined(SOLARIS_THREADS) ||\
- defined(WIN32_THREADS) || defined(USE_XTHREADS) || defined(BEOS_THREADS)) \
+ defined(WIN32_THREADS) || defined(BEOS_THREADS)) \
&& !defined(THREADS)
# define THREADS
#endif
@@ -218,48 +218,6 @@ typedef CRITICAL_SECTION _glthread_Mutex;
#endif /* WIN32_THREADS */
-
-
-/*
- * XFree86 has its own thread wrapper, Xthreads.h
- * We wrap it again for GL.
- */
-#ifdef USE_XTHREADS
-#include
-
-typedef struct {
- xthread_key_t key;
- int initMagic;
-} _glthread_TSD;
-
-typedef xthread_t _glthread_Thread;
-
-typedef xmutex_rec _glthread_Mutex;
-
-#ifdef XMUTEX_INITIALIZER
-#define _glthread_DECLARE_STATIC_MUTEX(name) \
- static _glthread_Mutex name = XMUTEX_INITIALIZER
-#else
-#define _glthread_DECLARE_STATIC_MUTEX(name) \
- static _glthread_Mutex name
-#endif
-
-#define _glthread_INIT_MUTEX(name) \
- xmutex_init(&(name))
-
-#define _glthread_DESTROY_MUTEX(name) \
- xmutex_clear(&(name))
-
-#define _glthread_LOCK_MUTEX(name) \
- (void) xmutex_lock(&(name))
-
-#define _glthread_UNLOCK_MUTEX(name) \
- (void) xmutex_unlock(&(name))
-
-#endif /* USE_XTHREADS */
-
-
-
/*
* BeOS threads. R5.x required.
*/
--
cgit v1.2.3
From bdb8ee51867b88806ec0e17a637b3ef99258e8c6 Mon Sep 17 00:00:00 2001
From: "RALOVICH, Kristóf"
Date: Thu, 23 Jul 2009 17:57:22 +0200
Subject: glapi: regenerated GL API assembly files
---
src/mesa/x86-64/glapi_x86-64.S | 2 +-
src/mesa/x86/glapi_x86.S | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/mesa/x86-64/glapi_x86-64.S b/src/mesa/x86-64/glapi_x86-64.S
index 90ad36a8f36..44179ab6071 100644
--- a/src/mesa/x86-64/glapi_x86-64.S
+++ b/src/mesa/x86-64/glapi_x86-64.S
@@ -45,7 +45,7 @@
# define GL_PREFIX(n) GLNAME(CONCAT(gl,n))
# endif
-#if defined(PTHREADS) || defined(USE_XTHREADS) || defined(SOLARIS_THREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS)
+#if defined(PTHREADS) || defined(SOLARIS_THREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS)
# define THREADS
#endif
diff --git a/src/mesa/x86/glapi_x86.S b/src/mesa/x86/glapi_x86.S
index 40fc6f22297..fa25bf75cd2 100644
--- a/src/mesa/x86/glapi_x86.S
+++ b/src/mesa/x86/glapi_x86.S
@@ -52,7 +52,7 @@
#define GLOBL_FN(x) GLOBL x
#endif
-#if defined(PTHREADS) || defined(USE_XTHREADS) || defined(SOLARIS_THREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS)
+#if defined(PTHREADS) || defined(SOLARIS_THREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS)
# define THREADS
#endif
--
cgit v1.2.3
From 27e55588e0f6c8fb570d3ae601319ed001b7e02a Mon Sep 17 00:00:00 2001
From: "RALOVICH, Kristóf"
Date: Thu, 23 Jul 2009 17:57:48 +0200
Subject: docs: do not mentions xthreads any more
---
docs/dispatch.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/dispatch.html b/docs/dispatch.html
index b9ea8822e60..bcab74c7070 100644
--- a/docs/dispatch.html
+++ b/docs/dispatch.html
@@ -198,7 +198,7 @@ few preprocessor defines.
If GLX_USE_TLS is defined, method #4 is used.
If PTHREADS is defined, method #3 is used.
-If any of PTHREADS , USE_XTHREADS ,
+ If any of PTHREADS ,
SOLARIS_THREADS , WIN32_THREADS , or BEOS_THREADS
is defined, method #2 is used.
If none of the preceeding are defined, method #1 is used.
--
cgit v1.2.3
From 8363dff251fc38b044447bcb173d960b03073974 Mon Sep 17 00:00:00 2001
From: "RALOVICH, Kristóf"
Date: Thu, 23 Jul 2009 17:58:08 +0200
Subject: glx: remove XTHREADS support
---
src/glx/x11/glxclient.h | 14 ++++----------
src/glx/x11/glxcurrent.c | 40 +---------------------------------------
src/glx/x11/glxext.c | 11 -----------
3 files changed, 5 insertions(+), 60 deletions(-)
diff --git a/src/glx/x11/glxclient.h b/src/glx/x11/glxclient.h
index bf68d0f8910..aff1c85fba1 100644
--- a/src/glx/x11/glxclient.h
+++ b/src/glx/x11/glxclient.h
@@ -55,9 +55,7 @@
#include "GL/internal/glcore.h"
#include "glapi/glapitable.h"
#include "glxhash.h"
-#if defined( USE_XTHREADS )
-# include
-#elif defined( PTHREADS )
+#if defined( PTHREADS )
# include
#endif
@@ -623,7 +621,7 @@ extern void __glXPreferEGL(int state);
extern int __glXDebug;
/* This is per-thread storage in an MT environment */
-#if defined( USE_XTHREADS ) || defined( PTHREADS )
+#if defined( PTHREADS )
extern void __glXSetCurrentContext(__GLXcontext *c);
@@ -646,7 +644,7 @@ extern __GLXcontext *__glXcurrentContext;
#define __glXGetCurrentContext() __glXcurrentContext
#define __glXSetCurrentContext(gc) __glXcurrentContext = gc
-#endif /* defined( USE_XTHREADS ) || defined( PTHREADS ) */
+#endif /* defined( PTHREADS ) */
extern void __glXSetCurrentContextNull(void);
@@ -657,11 +655,7 @@ extern void __glXFreeContext(__GLXcontext*);
** Global lock for all threads in this address space using the GLX
** extension
*/
-#if defined( USE_XTHREADS )
-extern xmutex_rec __glXmutex;
-#define __glXLock() xmutex_lock(&__glXmutex)
-#define __glXUnlock() xmutex_unlock(&__glXmutex)
-#elif defined( PTHREADS )
+#if defined( PTHREADS )
extern pthread_mutex_t __glXmutex;
#define __glXLock() pthread_mutex_lock(&__glXmutex)
#define __glXUnlock() pthread_mutex_unlock(&__glXmutex)
diff --git a/src/glx/x11/glxcurrent.c b/src/glx/x11/glxcurrent.c
index d44e0dd1fc8..ce037e0ef41 100644
--- a/src/glx/x11/glxcurrent.c
+++ b/src/glx/x11/glxcurrent.c
@@ -73,45 +73,7 @@ static __GLapi *IndirectAPI = NULL;
* Current context management and locking
*/
-#if defined( USE_XTHREADS )
-
-/* thread safe */
-static GLboolean TSDinitialized = GL_FALSE;
-static xthread_key_t ContextTSD;
-
-_X_HIDDEN __GLXcontext *
-__glXGetCurrentContext(void)
-{
- if (!TSDinitialized) {
- xthread_key_create(&ContextTSD, NULL);
- TSDinitialized = GL_TRUE;
- return &dummyContext;
- }
- else {
- void *p;
- xthread_get_specific(ContextTSD, &p);
- if (!p)
- return &dummyContext;
- else
- return (__GLXcontext *) p;
- }
-}
-
-_X_HIDDEN void
-__glXSetCurrentContext(__GLXcontext * c)
-{
- if (!TSDinitialized) {
- xthread_key_create(&ContextTSD, NULL);
- TSDinitialized = GL_TRUE;
- }
- xthread_set_specific(ContextTSD, c);
-}
-
-
-/* Used by the __glXLock() and __glXUnlock() macros */
-_X_HIDDEN xmutex_rec __glXmutex;
-
-#elif defined( PTHREADS )
+#if defined( PTHREADS )
_X_HIDDEN pthread_mutex_t __glXmutex = PTHREAD_MUTEX_INITIALIZER;
diff --git a/src/glx/x11/glxext.c b/src/glx/x11/glxext.c
index b296b7c651c..e07ed29790c 100644
--- a/src/glx/x11/glxext.c
+++ b/src/glx/x11/glxext.c
@@ -646,17 +646,6 @@ __glXInitialize(Display * dpy)
Bool glx_direct, glx_accel;
#endif
-#if defined(USE_XTHREADS)
- {
- static int firstCall = 1;
- if (firstCall) {
- /* initialize the GLX mutexes */
- xmutex_init(&__glXmutex);
- firstCall = 0;
- }
- }
-#endif
-
/* The one and only long long lock */
__glXLock();
--
cgit v1.2.3
From 6c03563af70e924854da2c0a06099616de08a610 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Mon, 27 Jul 2009 15:42:29 -0600
Subject: mesa: regenerated file
---
src/mesa/drivers/dri/common/extension_helper.h | 30 +++++++++++++-------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/src/mesa/drivers/dri/common/extension_helper.h b/src/mesa/drivers/dri/common/extension_helper.h
index e308fd28311..62801494ce3 100644
--- a/src/mesa/drivers/dri/common/extension_helper.h
+++ b/src/mesa/drivers/dri/common/extension_helper.h
@@ -1771,13 +1771,6 @@ static const char DeleteFencesNV_names[] =
"";
#endif
-#if defined(need_GL_SGIX_polynomial_ffd)
-static const char DeformationMap3dSGIX_names[] =
- "iddiiddiiddiip\0" /* Parameter signature */
- "glDeformationMap3dSGIX\0"
- "";
-#endif
-
#if defined(need_GL_VERSION_2_0)
static const char IsShader_names[] =
"i\0" /* Parameter signature */
@@ -2004,6 +1997,13 @@ static const char GetCombinerOutputParameterivNV_names[] =
"";
#endif
+#if defined(need_GL_IBM_multimode_draw_arrays)
+static const char MultiModeDrawArraysIBM_names[] =
+ "pppii\0" /* Parameter signature */
+ "glMultiModeDrawArraysIBM\0"
+ "";
+#endif
+
#if defined(need_GL_SGIS_pixel_texture)
static const char PixelTexGenParameterivSGIS_names[] =
"ip\0" /* Parameter signature */
@@ -3920,13 +3920,6 @@ static const char VertexAttribs4dvNV_names[] =
"";
#endif
-#if defined(need_GL_IBM_multimode_draw_arrays)
-static const char MultiModeDrawArraysIBM_names[] =
- "pppii\0" /* Parameter signature */
- "glMultiModeDrawArraysIBM\0"
- "";
-#endif
-
#if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program)
static const char VertexAttrib4dARB_names[] =
"idddd\0" /* Parameter signature */
@@ -4611,6 +4604,13 @@ static const char Minmax_names[] =
"";
#endif
+#if defined(need_GL_SGIX_polynomial_ffd)
+static const char DeformationMap3dSGIX_names[] =
+ "iddiiddiiddiip\0" /* Parameter signature */
+ "glDeformationMap3dSGIX\0"
+ "";
+#endif
+
#if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_fog_coord)
static const char FogCoorddvEXT_names[] =
"p\0" /* Parameter signature */
@@ -6131,9 +6131,9 @@ static const struct dri_extension_function GL_SGIX_pixel_texture_functions[] = {
#if defined(need_GL_SGIX_polynomial_ffd)
static const struct dri_extension_function GL_SGIX_polynomial_ffd_functions[] = {
{ LoadIdentityDeformationMapSGIX_names, LoadIdentityDeformationMapSGIX_remap_index, -1 },
- { DeformationMap3dSGIX_names, DeformationMap3dSGIX_remap_index, -1 },
{ DeformSGIX_names, DeformSGIX_remap_index, -1 },
{ DeformationMap3fSGIX_names, DeformationMap3fSGIX_remap_index, -1 },
+ { DeformationMap3dSGIX_names, DeformationMap3dSGIX_remap_index, -1 },
{ NULL, 0, 0 }
};
#endif
--
cgit v1.2.3
From fcf317ac16e72cff754640cb6c7490531d5de667 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Mon, 27 Jul 2009 18:12:30 -0400
Subject: r600: fix _REV texture format component swizzles
---
src/mesa/drivers/dri/r600/r600_texstate.c | 60 +++++++++++++++----------------
1 file changed, 30 insertions(+), 30 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index c76292a5f8e..1cf3b484ae6 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -99,13 +99,13 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
- SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
- SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_ARGB8888:
@@ -125,13 +125,13 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_RGB888:
@@ -190,13 +190,13 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
SETfield(t->SQ_TEX_RESOURCE1, FMT_4_4_4_4,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_ARGB1555:
@@ -216,13 +216,13 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
SETfield(t->SQ_TEX_RESOURCE1, FMT_1_5_5_5,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_AL88:
@@ -708,13 +708,13 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
pitch_val /= 4;
break;
@@ -723,11 +723,11 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
@@ -840,11 +840,11 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
@@ -852,13 +852,13 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
}
pitch_val /= 4;
--
cgit v1.2.3
From 506bacb8e40b0a170a4b620113506925d2333735 Mon Sep 17 00:00:00 2001
From: Cooper Yuan
Date: Tue, 28 Jul 2009 13:57:07 +0800
Subject: R6xx/r7xx: enable flat shading, this can fix quadric/accanti/accpersp
---
src/mesa/drivers/dri/r600/r700_state.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index bd0abc06e38..3812b26e136 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -808,9 +808,11 @@ static void r700ShadeModel(GLcontext * ctx, GLenum mode) //--------------------
switch (mode) {
case GL_FLAT:
SETbit(r700->SPI_INTERP_CONTROL_0.u32All, FLAT_SHADE_ENA_bit);
+ SETbit(r700->SPI_PS_INPUT_CNTL_0.u32All, FLAT_SHADE_bit);
break;
case GL_SMOOTH:
CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, FLAT_SHADE_ENA_bit);
+ CLEARbit(r700->SPI_PS_INPUT_CNTL_0.u32All, FLAT_SHADE_bit);
break;
default:
return;
--
cgit v1.2.3
From bc60b884110b9e41ee3082075717587cc38380b5 Mon Sep 17 00:00:00 2001
From: Vinson Lee
Date: Tue, 28 Jul 2009 08:54:14 -0600
Subject: progs/trivial: add missing files to Makefile, .gitignore
---
progs/trivial/.gitignore | 15 +++++++++++++++
progs/trivial/Makefile | 8 ++++++++
2 files changed, 23 insertions(+)
diff --git a/progs/trivial/.gitignore b/progs/trivial/.gitignore
index 8dcb20a68ff..348431478d9 100644
--- a/progs/trivial/.gitignore
+++ b/progs/trivial/.gitignore
@@ -5,10 +5,17 @@ clear-random
clear-repeat
clear-scissor
clear-undefined
+createwin
+dlist-begin-call-end
dlist-dangling
dlist-degenerate
dlist-edgeflag
dlist-edgeflag-dangling
+dlist-flat-tri
+dlist-mat-tri
+dlist-recursive-call
+dlist-tri-flat-tri
+dlist-tri-mat-tri
draw2arrays
drawarrays
drawelements
@@ -30,6 +37,7 @@ lineloop
lineloop-clip
lineloop-elts
linestrip
+linestrip-clip
linestrip-flat-stipple
linestrip-stipple
linestrip-stipple-wide
@@ -70,8 +78,10 @@ quadstrip-cont
quadstrip-flat
readtex.c
readtex.h
+readpixels
tri
tri-alpha
+tri-alpha-tex
tri-array-interleaved
tri-blend
tri-blend-color
@@ -79,6 +89,7 @@ tri-blend-max
tri-blend-min
tri-blend-revsub
tri-blend-sub
+tri-clear
tri-clip
tri-cull
tri-cull-both
@@ -93,6 +104,7 @@ tri-fog
tri-fp
tri-fp-const-imm
tri-lit
+tri-lit-material
tri-logicop-none
tri-logicop-xor
tri-mask-tri
@@ -101,6 +113,7 @@ tri-orig
tri-query
tri-repeat
tri-scissor-tri
+tri-square
tri-stencil
tri-stipple
tri-tex
@@ -110,6 +123,7 @@ tri-unfilled
tri-unfilled-clip
tri-unfilled-edgeflag
tri-unfilled-fog
+tri-unfilled-point
tri-unfilled-smooth
tri-unfilled-tri
tri-unfilled-tri-lit
@@ -118,6 +132,7 @@ tri-unfilled-userclip-stip
tri-userclip
tri-viewport
tri-z
+tri-z-9
tri-z-eq
trifan
trifan-flat
diff --git a/progs/trivial/Makefile b/progs/trivial/Makefile
index 3bd8faff99a..70728616d28 100644
--- a/progs/trivial/Makefile
+++ b/progs/trivial/Makefile
@@ -18,6 +18,7 @@ SOURCES = \
clear-repeat.c \
clear-random.c \
clear.c \
+ createwin.c \
dlist-dangling.c \
dlist-flat-tri.c \
dlist-mat-tri.c \
@@ -48,6 +49,7 @@ SOURCES = \
lineloop-clip.c \
lineloop-elts.c \
lineloop.c \
+ linestrip-clip.c \
linestrip-flat-stipple.c \
linestrip-stipple-wide.c \
linestrip-stipple.c \
@@ -87,7 +89,9 @@ SOURCES = \
quadstrip-cont.c \
quadstrip-flat.c \
quadstrip.c \
+ readpixels.c \
tri-alpha.c \
+ tri-alpha-tex.c \
tri-array-interleaved.c \
tri-blend-color.c \
tri-blend-max.c \
@@ -95,6 +99,7 @@ SOURCES = \
tri-blend-revsub.c \
tri-blend-sub.c \
tri-blend.c \
+ tri-clear.c \
tri-clip.c \
tri-cull-both.c \
tri-cull.c \
@@ -117,6 +122,7 @@ SOURCES = \
tri-query.c \
tri-repeat.c \
tri-scissor-tri.c \
+ tri-square.c \
tri-stencil.c \
tri-stipple.c \
tri-multitex-vbo.c \
@@ -126,6 +132,7 @@ SOURCES = \
tri-unfilled-fog.c \
tri-unfilled-edgeflag.c \
tri-unfilled-clip.c \
+ tri-unfilled-point.c \
tri-unfilled-smooth.c \
tri-unfilled-tri.c \
tri-unfilled-tri-lit.c \
@@ -134,6 +141,7 @@ SOURCES = \
tri-unfilled.c \
tri-userclip.c \
tri-viewport.c \
+ tri-z-9.c \
tri-z-eq.c \
tri-z.c \
tri.c \
--
cgit v1.2.3
From a744a496d5280ebcd3225debdb2f82589486f89a Mon Sep 17 00:00:00 2001
From: Chia-I Wu
Date: Mon, 27 Jul 2009 16:57:36 -0600
Subject: egl: Comment out unused tables in_eglFillInConfigs
This silences a compiler warning.
Signed-off-by: Chia-I Wu
---
src/egl/main/eglconfigutil.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/egl/main/eglconfigutil.c b/src/egl/main/eglconfigutil.c
index 138dc729e74..c9d00e79826 100644
--- a/src/egl/main/eglconfigutil.c
+++ b/src/egl/main/eglconfigutil.c
@@ -156,6 +156,7 @@ _eglFillInConfigs(_EGLConfig * configs,
{0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000}, /* 8_8_8_8_REV */
};
+#if 0
static const uint32_t masks_table_bgr[8][4] = {
{0x00000000, 0x00000000, 0x00000000, 0x00000000},
{0x00000000, 0x00000000, 0x00000000, 0x00000000},
@@ -177,6 +178,7 @@ _eglFillInConfigs(_EGLConfig * configs,
{0x00000000, 0x00000000, 0x00000000, 0x00000000},
{0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000}, /* 8_8_8_8_REV */
};
+#endif
static const uint8_t bytes_per_pixel[8] = {
0, 0, 0, 2, 2, 4, 0, 4
--
cgit v1.2.3
From 94726bc69e5f9dbefb34a38695f2f51d81ff433f Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Mon, 27 Jul 2009 17:18:05 -0600
Subject: gallium: minor code/comments clean-up
---
src/gallium/auxiliary/util/u_mm.c | 22 ++++++++++++----------
src/gallium/auxiliary/util/u_mm.h | 2 +-
2 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c
index 151a480d34d..4b75d4ba1d0 100644
--- a/src/gallium/auxiliary/util/u_mm.c
+++ b/src/gallium/auxiliary/util/u_mm.c
@@ -33,30 +33,32 @@
void
u_mmDumpMemInfo(const struct mem_block *heap)
{
- debug_printf("Memory heap %p:\n", (void *)heap);
+ debug_printf("Memory heap %p:\n", (void *) heap);
if (heap == 0) {
debug_printf(" heap == 0\n");
- } else {
+ }
+ else {
const struct mem_block *p;
- for(p = heap->next; p != heap; p = p->next) {
- debug_printf(" Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size,
- p->free ? 'F':'.',
- p->reserved ? 'R':'.');
+ for (p = heap->next; p != heap; p = p->next) {
+ debug_printf(" Offset:%08x, Size:%08x, %c%c\n", p->ofs, p->size,
+ p->free ? 'F':'.',
+ p->reserved ? 'R':'.');
}
debug_printf("\nFree list:\n");
- for(p = heap->next_free; p != heap; p = p->next_free) {
- debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size,
- p->free ? 'F':'.',
- p->reserved ? 'R':'.');
+ for (p = heap->next_free; p != heap; p = p->next_free) {
+ debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n", p->ofs, p->size,
+ p->free ? 'F':'.',
+ p->reserved ? 'R':'.');
}
}
debug_printf("End of memory blocks\n");
}
+
struct mem_block *
u_mmInit(int ofs, int size)
{
diff --git a/src/gallium/auxiliary/util/u_mm.h b/src/gallium/auxiliary/util/u_mm.h
index ce20e487635..6b158aae6e4 100644
--- a/src/gallium/auxiliary/util/u_mm.h
+++ b/src/gallium/auxiliary/util/u_mm.h
@@ -84,7 +84,7 @@ extern struct mem_block *u_mmFindBlock(struct mem_block *heap, int start);
extern void u_mmDestroy(struct mem_block *mmInit);
/**
- * For debuging purpose.
+ * For debugging purposes.
*/
extern void u_mmDumpMemInfo(const struct mem_block *mmInit);
--
cgit v1.2.3
From 4d648523aa01af3c9111f5d6394866396ebfb7a2 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Tue, 28 Jul 2009 11:10:38 -0400
Subject: r600: disable flat shade fix in
506bacb8e40b0a170a4b620113506925d2333735
This breaks textures. We need to only set this bit for
attributes that that need flat shading.
---
src/mesa/drivers/dri/r600/r700_state.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index 3812b26e136..5563a63156c 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -808,11 +808,11 @@ static void r700ShadeModel(GLcontext * ctx, GLenum mode) //--------------------
switch (mode) {
case GL_FLAT:
SETbit(r700->SPI_INTERP_CONTROL_0.u32All, FLAT_SHADE_ENA_bit);
- SETbit(r700->SPI_PS_INPUT_CNTL_0.u32All, FLAT_SHADE_bit);
+ //SETbit(r700->SPI_PS_INPUT_CNTL_0.u32All, FLAT_SHADE_bit);
break;
case GL_SMOOTH:
CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, FLAT_SHADE_ENA_bit);
- CLEARbit(r700->SPI_PS_INPUT_CNTL_0.u32All, FLAT_SHADE_bit);
+ //CLEARbit(r700->SPI_PS_INPUT_CNTL_0.u32All, FLAT_SHADE_bit);
break;
default:
return;
--
cgit v1.2.3
From ce0ad53281f236424a72ae021f293a3a5ca69217 Mon Sep 17 00:00:00 2001
From: "RALOVICH, Kristóf"
Date: Thu, 23 Jul 2009 17:03:54 +0200
Subject: glx: cache DRI configs in __GLXscreenConfigsRec
---
src/glx/x11/glxclient.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/glx/x11/glxclient.h b/src/glx/x11/glxclient.h
index bf68d0f8910..2778ad878e7 100644
--- a/src/glx/x11/glxclient.h
+++ b/src/glx/x11/glxclient.h
@@ -501,6 +501,8 @@ struct __GLXscreenConfigsRec {
__GLXDRIscreen *driScreen;
+ const __DRIconfig** driver_configs;
+
#ifdef __DRI_COPY_SUB_BUFFER
const __DRIcopySubBufferExtension *driCopySubBuffer;
#endif
--
cgit v1.2.3
From e32b601e7d79e8fa67b3e9f636125eebc01f3884 Mon Sep 17 00:00:00 2001
From: "RALOVICH, Kristóf"
Date: Thu, 23 Jul 2009 17:04:52 +0200
Subject: glx: properly release DRI configs
Release per screen DRI driver configs during screen destruction.
---
src/glx/x11/glxext.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/src/glx/x11/glxext.c b/src/glx/x11/glxext.c
index b296b7c651c..3078662c9da 100644
--- a/src/glx/x11/glxext.c
+++ b/src/glx/x11/glxext.c
@@ -149,6 +149,12 @@ FreeScreenConfigs(__GLXdisplayPrivate * priv)
Xfree((char *) psc->serverGLXexts);
#ifdef GLX_DIRECT_RENDERING
+ if (psc->driver_configs) {
+ for(unsigned int i = 0; psc->driver_configs[i]; i++)
+ free((__DRIconfig*)psc->driver_configs[i]);
+ free(psc->driver_configs);
+ psc->driver_configs = NULL;
+ }
if (psc->driScreen) {
psc->driScreen->destroyScreen(psc);
__glxHashDestroy(psc->drawHash);
--
cgit v1.2.3
From 82f4dc21cc5bce9e64fd53d158f7162770e9b652 Mon Sep 17 00:00:00 2001
From: "RALOVICH, Kristóf"
Date: Thu, 23 Jul 2009 17:05:43 +0200
Subject: glx: assign per screen driver configs (DRI2)
---
src/glx/x11/dri2_glx.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/glx/x11/dri2_glx.c b/src/glx/x11/dri2_glx.c
index fb31898db2d..c5635a94ab6 100644
--- a/src/glx/x11/dri2_glx.c
+++ b/src/glx/x11/dri2_glx.c
@@ -481,6 +481,8 @@ static __GLXDRIscreen *dri2CreateScreen(__GLXscreenConfigs *psc, int screen,
psc->configs = driConvertConfigs(psc->core, psc->configs, driver_configs);
psc->visuals = driConvertConfigs(psc->core, psc->visuals, driver_configs);
+ psc->driver_configs = driver_configs;
+
psp->destroyScreen = dri2DestroyScreen;
psp->createContext = dri2CreateContext;
psp->createDrawable = dri2CreateDrawable;
--
cgit v1.2.3
From d090ba9e00c7c7893109ae763385c2e0a66eb16f Mon Sep 17 00:00:00 2001
From: "RALOVICH, Kristóf"
Date: Thu, 23 Jul 2009 17:05:50 +0200
Subject: glx: assign per screen driver configs (DRI)
---
src/glx/x11/dri_glx.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/glx/x11/dri_glx.c b/src/glx/x11/dri_glx.c
index ac2eb05341a..d24471c4369 100644
--- a/src/glx/x11/dri_glx.c
+++ b/src/glx/x11/dri_glx.c
@@ -418,6 +418,8 @@ CallCreateNewScreen(Display *dpy, int scrn, __GLXscreenConfigs *psc,
psc->configs = driConvertConfigs(psc->core, psc->configs, driver_configs);
psc->visuals = driConvertConfigs(psc->core, psc->visuals, driver_configs);
+ psc->driver_configs = driver_configs;
+
/* Visuals with depth != screen depth are subject to automatic compositing
* in the X server, so DRI1 can't render to them properly. Mark them as
* non-conformant to prevent apps from picking them up accidentally.
--
cgit v1.2.3
From 45f4e8842e3dae9d8be2a38769a57a524fdc335f Mon Sep 17 00:00:00 2001
From: "RALOVICH, Kristóf"
Date: Thu, 23 Jul 2009 17:05:59 +0200
Subject: glx: assign per screen driver configs (DRISW)
---
src/glx/x11/drisw_glx.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/glx/x11/drisw_glx.c b/src/glx/x11/drisw_glx.c
index 1c229dde900..15e15866582 100644
--- a/src/glx/x11/drisw_glx.c
+++ b/src/glx/x11/drisw_glx.c
@@ -401,7 +401,7 @@ driCreateScreen(__GLXscreenConfigs * psc, int screen,
psc->configs = driConvertConfigs(psc->core, psc->configs, driver_configs);
psc->visuals = driConvertConfigs(psc->core, psc->visuals, driver_configs);
- free(driver_configs);
+ psc->driver_configs = driver_configs;
psp->destroyScreen = driDestroyScreen;
psp->createContext = driCreateContext;
--
cgit v1.2.3
From b1f7c844a38418cbf3fbcef6c2fb0606a1dd6b7e Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Tue, 28 Jul 2009 15:53:45 -0400
Subject: r600: move r700TranslateFragmentShader into r700UpdateShaders
---
src/mesa/drivers/dri/r600/r700_render.c | 17 -----------------
src/mesa/drivers/dri/r600/r700_state.c | 22 +++++++++++++++++++---
2 files changed, 19 insertions(+), 20 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 5a2bf84b59e..ea8419d3d2d 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -276,20 +276,12 @@ static GLboolean r700RunRender(GLcontext * ctx,
context_t *context = R700_CONTEXT(ctx);
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
int lastIndex = 0;
-#if 1
BATCH_LOCALS(&context->radeon);
unsigned int i, j;
TNLcontext *tnl = TNL_CONTEXT(ctx);
struct vertex_buffer *vb = &tnl->vb;
- struct r700_fragment_program *fp = (struct r700_fragment_program *)
- (ctx->FragmentProgram._Current);
- if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
- {
- fp->r700AsmCode.bR6xx = 1;
- }
-
r700Start3D(context); /* TODO : this is too much. */
r700SendSQConfig(context);
@@ -308,14 +300,6 @@ static GLboolean r700RunRender(GLcontext * ctx,
r600UpdateTextureState(ctx);
r700SendTextureState(context);
- if(GL_FALSE == fp->translated)
- {
- if( GL_FALSE == r700TranslateFragmentShader(fp, &(fp->mesa_program)) )
- {
- return GL_TRUE;
- }
- }
-
r700SetupShaders(ctx);
r700SendFSState(context); // FIXME just a place holder for now
@@ -391,7 +375,6 @@ static GLboolean r700RunRender(GLcontext * ctx,
radeonReleaseArrays(ctx, 0);
-#endif //0
rcommonFlushCmdBuf( &context->radeon, __FUNCTION__ );
return GL_FALSE;
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index 5563a63156c..c24c859ef58 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -72,12 +72,28 @@ void r700UpdateShaders (GLcontext * ctx) //----------------------------------
GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
-
- struct r700_vertex_program *vp;
- int i;
+ int i;
+
+ if (ctx->FragmentProgram._Current) {
+ struct r700_fragment_program *fp = (struct r700_fragment_program *)
+ (ctx->FragmentProgram._Current);
+ if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+ {
+ fp->r700AsmCode.bR6xx = 1;
+ }
+
+ if(GL_FALSE == fp->translated)
+ {
+ if( GL_FALSE == r700TranslateFragmentShader(fp, &(fp->mesa_program)) )
+ {
+ //return GL_TRUE;
+ }
+ }
+ }
if (context->radeon.NewGLState)
{
+ struct r700_vertex_program *vp;
context->radeon.NewGLState = 0;
for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++)
--
cgit v1.2.3
From dbdb3952c16e13117891a3c52db4e05c472e96b8 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Tue, 28 Jul 2009 15:58:01 -0400
Subject: r600: don't call r700UpdateShaders twice for each render
---
src/mesa/drivers/dri/r600/r700_render.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index ea8419d3d2d..f2fec27effa 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -403,10 +403,6 @@ static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/
return GL_TRUE;
}
- context_t *context = R700_CONTEXT(ctx);
-
- r700UpdateShaders(ctx);
-
bRet = r700RunRender(ctx, stage);
return bRet;
--
cgit v1.2.3
From 719abd7fc088c5ebc567e9ea20bdd6fc9fe1af3b Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Tue, 28 Jul 2009 16:58:41 -0400
Subject: r600: fix flat shading
Set the flat shading bit on the appropriate PS input
depending on the type of attribute it is. The VS output
and PS input routing should probably be made more dynamic
at some point. We may want to use semantic ids to make
it easier.
---
src/mesa/drivers/dri/r600/r700_chip.c | 44 ++++++-----------------
src/mesa/drivers/dri/r600/r700_chip.h | 33 +-----------------
src/mesa/drivers/dri/r600/r700_fragprog.c | 58 ++++++++++++++++++++++++++-----
src/mesa/drivers/dri/r600/r700_state.c | 6 ----
4 files changed, 60 insertions(+), 81 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index e683c8cf920..c083862f369 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -213,39 +213,6 @@ GLboolean r700InitChipObject(context_t *context)
LINK_STATES(SPI_VS_OUT_ID_8);
LINK_STATES(SPI_VS_OUT_ID_9);
- LINK_STATES(SPI_PS_INPUT_CNTL_0);
- LINK_STATES(SPI_PS_INPUT_CNTL_1);
- LINK_STATES(SPI_PS_INPUT_CNTL_2);
- LINK_STATES(SPI_PS_INPUT_CNTL_3);
- LINK_STATES(SPI_PS_INPUT_CNTL_4);
- LINK_STATES(SPI_PS_INPUT_CNTL_5);
- LINK_STATES(SPI_PS_INPUT_CNTL_6);
- LINK_STATES(SPI_PS_INPUT_CNTL_7);
- LINK_STATES(SPI_PS_INPUT_CNTL_8);
- LINK_STATES(SPI_PS_INPUT_CNTL_9);
- LINK_STATES(SPI_PS_INPUT_CNTL_10);
- LINK_STATES(SPI_PS_INPUT_CNTL_11);
- LINK_STATES(SPI_PS_INPUT_CNTL_12);
- LINK_STATES(SPI_PS_INPUT_CNTL_13);
- LINK_STATES(SPI_PS_INPUT_CNTL_14);
- LINK_STATES(SPI_PS_INPUT_CNTL_15);
- LINK_STATES(SPI_PS_INPUT_CNTL_16);
- LINK_STATES(SPI_PS_INPUT_CNTL_17);
- LINK_STATES(SPI_PS_INPUT_CNTL_18);
- LINK_STATES(SPI_PS_INPUT_CNTL_19);
- LINK_STATES(SPI_PS_INPUT_CNTL_20);
- LINK_STATES(SPI_PS_INPUT_CNTL_21);
- LINK_STATES(SPI_PS_INPUT_CNTL_22);
- LINK_STATES(SPI_PS_INPUT_CNTL_23);
- LINK_STATES(SPI_PS_INPUT_CNTL_24);
- LINK_STATES(SPI_PS_INPUT_CNTL_25);
- LINK_STATES(SPI_PS_INPUT_CNTL_26);
- LINK_STATES(SPI_PS_INPUT_CNTL_27);
- LINK_STATES(SPI_PS_INPUT_CNTL_28);
- LINK_STATES(SPI_PS_INPUT_CNTL_29);
- LINK_STATES(SPI_PS_INPUT_CNTL_30);
- LINK_STATES(SPI_PS_INPUT_CNTL_31);
-
LINK_STATES(SPI_VS_OUT_CONFIG);
LINK_STATES(SPI_THREAD_GROUPING);
LINK_STATES(SPI_PS_IN_CONTROL_0);
@@ -435,12 +402,21 @@ GLboolean r700SendContextStates(context_t *context)
};
END_BATCH();
};
+
+ /* todo:
+ * - split this into a separate function?
+ * - only emit the ones we use
+ */
+ BEGIN_BATCH_NO_AUTOSTATE(2 + R700_MAX_SHADER_EXPORTS);
+ R600_OUT_BATCH_REGSEQ(SPI_PS_INPUT_CNTL_0, R700_MAX_SHADER_EXPORTS);
+ for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++)
+ R600_OUT_BATCH(r700->SPI_PS_INPUT_CNTL[ui].u32All);
+ END_BATCH();
COMMIT_BATCH();
return GL_TRUE;
}
-
GLboolean r700SendDepthTargetState(context_t *context, int id)
{
R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
diff --git a/src/mesa/drivers/dri/r600/r700_chip.h b/src/mesa/drivers/dri/r600/r700_chip.h
index ca3364bb489..4e89c75f2f9 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.h
+++ b/src/mesa/drivers/dri/r600/r700_chip.h
@@ -455,38 +455,7 @@ typedef struct _R700_CHIP_CONTEXT
union UINT_FLOAT SQ_VTX_SEMANTIC_30 ; /* 0xA0FE */
union UINT_FLOAT SQ_VTX_SEMANTIC_31 ; /* 0xA0FF */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_0 ; /* 0xA191 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_1 ; /* 0xA192 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_2 ; /* 0xA193 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_3 ; /* 0xA194 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_4 ; /* 0xA195 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_5 ; /* 0xA196 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_6 ; /* 0xA197 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_7 ; /* 0xA198 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_8 ; /* 0xA199 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_9 ; /* 0xA19A */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_10 ; /* 0xA19B */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_11 ; /* 0xA19C */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_12 ; /* 0xA19D */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_13 ; /* 0xA19E */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_14 ; /* 0xA19F */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_15 ; /* 0xA1A0 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_16 ; /* 0xA1A1 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_17 ; /* 0xA1A2 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_18 ; /* 0xA1A3 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_19 ; /* 0xA1A4 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_20 ; /* 0xA1A5 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_21 ; /* 0xA1A6 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_22 ; /* 0xA1A7 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_23 ; /* 0xA1A8 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_24 ; /* 0xA1A9 */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_25 ; /* 0xA1AA */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_26 ; /* 0xA1AB */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_27 ; /* 0xA1AC */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_28 ; /* 0xA1AD */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_29 ; /* 0xA1AE */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_30 ; /* 0xA1AF */
- union UINT_FLOAT SPI_PS_INPUT_CNTL_31 ; /* 0xA1B0 */
+ union UINT_FLOAT SPI_PS_INPUT_CNTL[R700_MAX_SHADER_EXPORTS];
// shaders
PS_STATE_STRUCT ps;
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index 3afd0b05288..88e66491ba8 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -255,20 +255,20 @@ void * r700GetActiveFpShaderBo(GLcontext * ctx)
GLboolean r700SetupFragmentProgram(GLcontext * ctx)
{
- context_t *context = R700_CONTEXT(ctx);
+ context_t *context = R700_CONTEXT(ctx);
BATCH_LOCALS(&context->radeon);
-
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
struct r700_fragment_program *fp = (struct r700_fragment_program *)
(ctx->FragmentProgram._Current);
-
+ r700_AssemblerBase *pAsm = &(fp->r700AsmCode);
+ struct gl_fragment_program *mesa_fp = &(fp->mesa_program);
struct gl_program_parameter_list *paramList;
unsigned int unNumParamData;
- unsigned int ui;
-
+ unsigned int ui, i;
unsigned int unNumOfReg;
-
+ unsigned int unBit;
+
if(GL_FALSE == fp->loaded)
{
if(fp->r700Shader.bNeedsAssembly == GL_TRUE)
@@ -277,11 +277,11 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
}
/* Load fp to gpu */
- r600EmitShader(ctx,
- &(fp->shaderbo),
+ r600EmitShader(ctx,
+ &(fp->shaderbo),
(GLvoid *)(fp->r700Shader.pProgram),
fp->r700Shader.uShaderBinaryDWORDSize,
- "FS");
+ "FS");
fp->loaded = GL_TRUE;
}
@@ -362,6 +362,46 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
COMMIT_BATCH();
}
+ // emit ps input map
+ unBit = 1 << FRAG_ATTRIB_COL0;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0];
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+
+ unBit = 1 << FRAG_ATTRIB_COL1;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1];
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+
+ for(i=0; i<8; i++)
+ {
+ unBit = 1 << (FRAG_ATTRIB_TEX0 + i);
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i];
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+ }
+
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index c24c859ef58..87ea1719c49 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -824,11 +824,9 @@ static void r700ShadeModel(GLcontext * ctx, GLenum mode) //--------------------
switch (mode) {
case GL_FLAT:
SETbit(r700->SPI_INTERP_CONTROL_0.u32All, FLAT_SHADE_ENA_bit);
- //SETbit(r700->SPI_PS_INPUT_CNTL_0.u32All, FLAT_SHADE_bit);
break;
case GL_SMOOTH:
CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, FLAT_SHADE_ENA_bit);
- //CLEARbit(r700->SPI_PS_INPUT_CNTL_0.u32All, FLAT_SHADE_bit);
break;
default:
return;
@@ -1675,10 +1673,6 @@ void r700InitState(GLcontext * ctx) //-------------------
r700->SPI_VS_OUT_ID_0.u32All = 0x03020100;
r700->SPI_VS_OUT_ID_1.u32All = 0x07060504;
- r700->SPI_PS_INPUT_CNTL_0.u32All = 0x00000800;
- r700->SPI_PS_INPUT_CNTL_1.u32All = 0x00000801;
- r700->SPI_PS_INPUT_CNTL_2.u32All = 0x00000802;
-
r700->SPI_THREAD_GROUPING.u32All = 0;
if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770)
SETfield(r700->SPI_THREAD_GROUPING.u32All, 1, PS_GROUPING_shift, PS_GROUPING_mask);
--
cgit v1.2.3
From e629c50e2be03144e8aeef3c51624ae8db9957b8 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Tue, 28 Jul 2009 17:59:54 -0400
Subject: r600: implement texture border color
---
src/mesa/drivers/dri/r600/r600_tex.c | 14 ++++++--------
src/mesa/drivers/dri/r600/r700_render.c | 9 +++++++++
src/mesa/drivers/dri/radeon/radeon_common_context.h | 5 +++++
3 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
index a1e89453900..444024ee7e0 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.c
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -269,14 +269,12 @@ static void r600SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloa
static void r600SetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4])
{
-#if 0
- GLubyte c[4];
- CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
- CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
- CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
- CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
- t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]);
-#endif
+ t->TD_PS_SAMPLER0_BORDER_ALPHA = *((uint32_t*)&(color[3]));
+ t->TD_PS_SAMPLER0_BORDER_RED = *((uint32_t*)&(color[2]));
+ t->TD_PS_SAMPLER0_BORDER_GREEN = *((uint32_t*)&(color[1]));
+ t->TD_PS_SAMPLER0_BORDER_BLUE = *((uint32_t*)&(color[0]));
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_REGISTER,
+ BORDER_COLOR_TYPE_shift, BORDER_COLOR_TYPE_mask);
}
/**
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index f2fec27effa..4e0d5391d0a 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -187,6 +187,15 @@ GLboolean r700SendTextureState(context_t *context)
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1);
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2);
END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(2 + 4);
+ R600_OUT_BATCH_REGSEQ((TD_PS_SAMPLER0_BORDER_RED + (i * 16)), 4);
+ R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_RED);
+ R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN);
+ R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE);
+ R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA);
+ END_BATCH();
+
COMMIT_BATCH();
}
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h
index e4a8da05965..0cdacb1c361 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
@@ -239,6 +239,11 @@ struct radeon_tex_obj {
GLuint SQ_TEX_SAMPLER1;
GLuint SQ_TEX_SAMPLER2;
+ GLuint TD_PS_SAMPLER0_BORDER_RED;
+ GLuint TD_PS_SAMPLER0_BORDER_GREEN;
+ GLuint TD_PS_SAMPLER0_BORDER_BLUE;
+ GLuint TD_PS_SAMPLER0_BORDER_ALPHA;
+
GLboolean border_fallback;
--
cgit v1.2.3
From 8c56029b1eb71de56b676b91ddfce06c8d3881f0 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Tue, 28 Jul 2009 18:09:38 -0400
Subject: r600: fix tex clamp modes
This makes texwrap look better.
---
src/mesa/drivers/dri/r600/r600_tex.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
index 444024ee7e0..853f824b2c0 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.c
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -62,10 +62,10 @@ static unsigned int translate_wrap_mode(GLenum wrapmode)
case GL_CLAMP: return SQ_TEX_CLAMP_HALF_BORDER;
case GL_CLAMP_TO_EDGE: return SQ_TEX_CLAMP_LAST_TEXEL;
case GL_CLAMP_TO_BORDER: return SQ_TEX_CLAMP_BORDER;
- case GL_MIRRORED_REPEAT: return SQ_TEX_MIRROR_ONCE_HALF_BORDER;
- case GL_MIRROR_CLAMP_EXT: return SQ_TEX_MIRROR;
- case GL_MIRROR_CLAMP_TO_EDGE_EXT: return SQ_TEX_MIRROR_ONCE_BORDER;
- case GL_MIRROR_CLAMP_TO_BORDER_EXT: return SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
+ case GL_MIRRORED_REPEAT: return SQ_TEX_MIRROR;
+ case GL_MIRROR_CLAMP_EXT: return SQ_TEX_MIRROR_ONCE_HALF_BORDER;
+ case GL_MIRROR_CLAMP_TO_EDGE_EXT: return SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
+ case GL_MIRROR_CLAMP_TO_BORDER_EXT: return SQ_TEX_MIRROR_ONCE_BORDER;
default:
_mesa_problem(NULL, "bad wrap mode in %s", __FUNCTION__);
return 0;
--
cgit v1.2.3
From b0341f994feb3ea724bdbbfde5d79ec2c88c34a1 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller
Date: Wed, 29 Jul 2009 00:35:12 +0200
Subject: nv50: use correct scissor reg
---
src/gallium/drivers/nv50/nv50_screen.c | 4 ++++
src/gallium/drivers/nv50/nv50_state_validate.c | 23 ++++++++++++++++-------
2 files changed, 20 insertions(+), 7 deletions(-)
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index ce8f906b15f..349619db213 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -417,6 +417,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_method(so, screen->tesla, 0x1234, 1);
so_data (so, 1);
+ /* activate first scissor rectangle */
+ so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE, 1);
+ so_data (so, 1);
+
so_emit(chan, so);
so_ref (so, &screen->static_init);
so_ref (NULL, &so);
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index d313e9de4fe..03aed81fed7 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -119,12 +119,18 @@ nv50_state_validate_fb(struct nv50_context *nv50)
so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2);
so_data (so, w << 16);
so_data (so, h << 16);
- so_method(so, tesla, 0x0e04, 2);
+ /* set window scissor rectangle to window extents */
+ so_method(so, tesla, NV50TCL_SCISSOR_HORIZ, 2);
so_data (so, w << 16);
so_data (so, h << 16);
- so_method(so, tesla, 0xdf8, 2);
+ /* set window lower left corner */
+ so_method(so, tesla, NV50TCL_WINDOW_LEFT, 2);
so_data (so, 0);
so_data (so, h);
+ /* set screen scissor rectangle */
+ so_method(so, tesla, NV50TCL_SCREEN_SCISSOR_HORIZ, 2);
+ so_data (so, w << 16);
+ so_data (so, h << 16);
so_ref(so, &nv50->state.fb);
so_ref(NULL, &so);
@@ -233,13 +239,16 @@ nv50_state_validate(struct nv50_context *nv50)
nv50->state.scissor_enabled = rast->scissor;
so = so_new(3, 0);
- so_method(so, tesla, 0x0ff4, 2);
+ so_method(so, tesla, NV50TCL_SCISSOR_HORIZ, 2);
if (nv50->state.scissor_enabled) {
- so_data(so, ((s->maxx - s->minx) << 16) | s->minx);
- so_data(so, ((s->maxy - s->miny) << 16) | s->miny);
+ /* the hw has y = 0 = bottom here */
+ unsigned top = nv50->framebuffer.height - s->miny;
+ unsigned bottom = nv50->framebuffer.height - s->maxy;
+ so_data(so, (s->maxx << 16) | s->minx);
+ so_data(so, (top << 16) | bottom);
} else {
- so_data(so, (8192 << 16));
- so_data(so, (8192 << 16));
+ so_data(so, (nv50->framebuffer.width << 16));
+ so_data(so, (nv50->framebuffer.height << 16));
}
so_ref(so, &nv50->state.scissor);
so_ref(NULL, &so);
--
cgit v1.2.3
From d1b9183e64e819d389688074c3db338bd0d2d80e Mon Sep 17 00:00:00 2001
From: Christoph Bumiller
Date: Wed, 29 Jul 2009 01:21:41 +0200
Subject: nv50: fix viewport transform
The translation also needs to be inverted, and in bypass mode
the state tracker incorrectly assumes that Y = 0 = TOP, so we
need inversion there to; NDC clipping has to be deactivated
explicitly.
---
src/gallium/drivers/nv50/nv50_state_validate.c | 31 +++++++++++++++++---------
1 file changed, 20 insertions(+), 11 deletions(-)
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 03aed81fed7..ce8e44fb006 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -258,6 +258,7 @@ scissor_uptodate:
if (nv50->dirty & (NV50_NEW_VIEWPORT | NV50_NEW_RASTERIZER)) {
unsigned bypass;
+ float y_translate = (float)nv50->framebuffer.height;
if (!nv50->rasterizer->pipe.bypass_vs_clip_and_viewport)
bypass = 0;
@@ -271,25 +272,33 @@ scissor_uptodate:
nv50->state.viewport_bypass = bypass;
so = so_new(12, 0);
+ so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1);
if (!bypass) {
- so_method(so, tesla, NV50TCL_VIEWPORT_UNK1(0), 3);
+ so_data(so, 0x0000);
+ y_translate -= nv50->viewport.translate[1];
+ so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE(0), 3);
so_data (so, fui(nv50->viewport.translate[0]));
- so_data (so, fui(nv50->viewport.translate[1]));
+ so_data (so, fui(y_translate));
so_data (so, fui(nv50->viewport.translate[2]));
- so_method(so, tesla, NV50TCL_VIEWPORT_UNK0(0), 3);
+ so_method(so, tesla, NV50TCL_VIEWPORT_SCALE(0), 3);
so_data (so, fui(nv50->viewport.scale[0]));
so_data (so, fui(-nv50->viewport.scale[1]));
so_data (so, fui(nv50->viewport.scale[2]));
- so_method(so, tesla, 0x192c, 1);
- so_data (so, 1);
- so_method(so, tesla, 0x0f90, 1);
- so_data (so, 0);
} else {
- so_method(so, tesla, 0x192c, 1);
- so_data (so, 0);
- so_method(so, tesla, 0x0f90, 1);
- so_data (so, 1);
+ /* don't do xy-clipping in NDC space */
+ so_data(so, 0x0800);
+ /* in bypass mode, y = 0 would be bottom */
+ so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE(0), 3);
+ so_data (so, fui(0.0f));
+ so_data (so, fui(y_translate));
+ so_data (so, fui(0.0f));
+ so_method(so, tesla, NV50TCL_VIEWPORT_SCALE(0), 3);
+ so_data (so, fui(1.0f));
+ so_data (so, fui(-1.0f));
+ so_data (so, fui(1.0f));
}
+ so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
+ so_data (so, 1);
so_ref(so, &nv50->state.viewport);
so_ref(NULL, &so);
--
cgit v1.2.3
From f498ccd654bf04d401eafc3690635fcbf707e1f0 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller
Date: Wed, 29 Jul 2009 00:51:35 +0200
Subject: nv50: fix sx/dx typo in transfer_rect_m2mf
---
src/gallium/drivers/nv50/nv50_transfer.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index d0b7f0bef43..6ff375951e4 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -76,13 +76,13 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct nouveau_bo *src_bo,
OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc);
if (src_bo->tile_flags) {
BEGIN_RING(chan, m2mf, 0x0218, 1);
- OUT_RING (chan, (dy << 16) | sx);
+ OUT_RING (chan, (sy << 16) | sx);
} else {
src_offset += (line_count * src_pitch);
}
if (dst_bo->tile_flags) {
BEGIN_RING(chan, m2mf, 0x0234, 1);
- OUT_RING (chan, (sy << 16) | dx);
+ OUT_RING (chan, (dy << 16) | dx);
} else {
dst_offset += (line_count * dst_pitch);
}
--
cgit v1.2.3
From df189c9efc0fbcdce816af483f0147ab635280d1 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller
Date: Wed, 29 Jul 2009 00:55:03 +0200
Subject: nv50: TIC/TSC fixes and additions
Red and blue were interchanged in TIC.
Add border color and some formats.
---
src/gallium/drivers/nv50/nv50_state.c | 7 +++-
src/gallium/drivers/nv50/nv50_tex.c | 16 ++++----
src/gallium/drivers/nv50/nv50_texture.h | 71 +++++++++++++++++++++------------
3 files changed, 60 insertions(+), 34 deletions(-)
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 116866a8e78..c93694c60f5 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -205,11 +205,16 @@ nv50_sampler_state_create(struct pipe_context *pipe,
}
limit = CLAMP(cso->lod_bias, -16.0, 15.0);
- tsc[1] |= ((int)(limit * 256.0) & 0x1fff) << 11;
+ tsc[1] |= ((int)(limit * 256.0) & 0x1fff) << 12;
tsc[2] |= ((int)CLAMP(cso->max_lod, 0.0, 15.0) << 20) |
((int)CLAMP(cso->min_lod, 0.0, 15.0) << 8);
+ tsc[4] = fui(cso->border_color[0]);
+ tsc[5] = fui(cso->border_color[1]);
+ tsc[6] = fui(cso->border_color[2]);
+ tsc[7] = fui(cso->border_color[3]);
+
sso->normalized = cso->normalized_coords;
return (void *)sso;
}
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index ff40c2ad81b..46c3073defb 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -32,30 +32,30 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
switch (mt->base.format) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
NV50TIC_0_0_FMT_8_8_8_8);
break;
case PIPE_FORMAT_A1R5G5B5_UNORM:
so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
NV50TIC_0_0_FMT_1_5_5_5);
break;
case PIPE_FORMAT_A4R4G4B4_UNORM:
so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
NV50TIC_0_0_FMT_4_4_4_4);
break;
case PIPE_FORMAT_R5G6B5_UNORM:
so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
+ NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
+ NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
NV50TIC_0_0_FMT_5_6_5);
break;
case PIPE_FORMAT_L8_UNORM:
diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h
index aca622c73b1..207fb039f70 100644
--- a/src/gallium/drivers/nv50/nv50_texture.h
+++ b/src/gallium/drivers/nv50/nv50_texture.h
@@ -14,13 +14,13 @@
#define NV50TIC_0_0_MAPA_C2 0x20000000
#define NV50TIC_0_0_MAPA_C3 0x28000000
#define NV50TIC_0_0_MAPA_ONE 0x38000000
-#define NV50TIC_0_0_MAPR_MASK 0x07000000
-#define NV50TIC_0_0_MAPR_ZERO 0x00000000
-#define NV50TIC_0_0_MAPR_C0 0x02000000
-#define NV50TIC_0_0_MAPR_C1 0x03000000
-#define NV50TIC_0_0_MAPR_C2 0x04000000
-#define NV50TIC_0_0_MAPR_C3 0x05000000
-#define NV50TIC_0_0_MAPR_ONE 0x07000000
+#define NV50TIC_0_0_MAPB_MASK 0x07000000
+#define NV50TIC_0_0_MAPB_ZERO 0x00000000
+#define NV50TIC_0_0_MAPB_C0 0x02000000
+#define NV50TIC_0_0_MAPB_C1 0x03000000
+#define NV50TIC_0_0_MAPB_C2 0x04000000
+#define NV50TIC_0_0_MAPB_C3 0x05000000
+#define NV50TIC_0_0_MAPB_ONE 0x07000000
#define NV50TIC_0_0_MAPG_MASK 0x00e00000
#define NV50TIC_0_0_MAPG_ZERO 0x00000000
#define NV50TIC_0_0_MAPG_C0 0x00400000
@@ -28,31 +28,49 @@
#define NV50TIC_0_0_MAPG_C2 0x00800000
#define NV50TIC_0_0_MAPG_C3 0x00a00000
#define NV50TIC_0_0_MAPG_ONE 0x00e00000
-#define NV50TIC_0_0_MAPB_MASK 0x001c0000
-#define NV50TIC_0_0_MAPB_ZERO 0x00000000
-#define NV50TIC_0_0_MAPB_C0 0x00080000
-#define NV50TIC_0_0_MAPB_C1 0x000c0000
-#define NV50TIC_0_0_MAPB_C2 0x00100000
-#define NV50TIC_0_0_MAPB_C3 0x00140000
-#define NV50TIC_0_0_MAPB_ONE 0x001c0000
+#define NV50TIC_0_0_MAPR_MASK 0x001c0000
+#define NV50TIC_0_0_MAPR_ZERO 0x00000000
+#define NV50TIC_0_0_MAPR_C0 0x00080000
+#define NV50TIC_0_0_MAPR_C1 0x000c0000
+#define NV50TIC_0_0_MAPR_C2 0x00100000
+#define NV50TIC_0_0_MAPR_C3 0x00140000
+#define NV50TIC_0_0_MAPR_ONE 0x001c0000
#define NV50TIC_0_0_TYPEA_MASK 0x00038000
#define NV50TIC_0_0_TYPEA_UNORM 0x00010000
-#define NV50TIC_0_0_TYPER_MASK 0x00007000
-#define NV50TIC_0_0_TYPER_UNORM 0x00002000
+#define NV50TIC_0_0_TYPEA_SNORM 0x00008000
+#define NV50TIC_0_0_TYPEA_FLOAT 0x00038000
+#define NV50TIC_0_0_TYPEB_MASK 0x00007000
+#define NV50TIC_0_0_TYPEB_UNORM 0x00002000
+#define NV50TIC_0_0_TYPEB_SNORM 0x00001000
+#define NV50TIC_0_0_TYPEB_FLOAT 0x00007000
#define NV50TIC_0_0_TYPEG_MASK 0x00000e00
#define NV50TIC_0_0_TYPEG_UNORM 0x00000400
-#define NV50TIC_0_0_TYPEB_MASK 0x000001c0
-#define NV50TIC_0_0_TYPEB_UNORM 0x00000080
-#define NV50TIC_0_0_FMT_MASK 0x0000003c
+#define NV50TIC_0_0_TYPEG_SNORM 0x00000200
+#define NV50TIC_0_0_TYPEG_FLOAT 0x00000e00
+#define NV50TIC_0_0_TYPER_MASK 0x000001c0
+#define NV50TIC_0_0_TYPER_UNORM 0x00000080
+#define NV50TIC_0_0_TYPER_SNORM 0x00000040
+#define NV50TIC_0_0_TYPER_FLOAT 0x000001c0
+#define NV50TIC_0_0_FMT_MASK 0x0000003f
+#define NV50TIC_0_0_FMT_32_32_32_32 0x00000001
+#define NV50TIC_0_0_FMT_16_16_16_16 0x00000003
+#define NV50TIC_0_0_FMT_32_32 0x00000004
#define NV50TIC_0_0_FMT_8_8_8_8 0x00000008
+#define NV50TIC_0_0_FMT_2_10_10_10 0x00000009
+#define NV50TIC_0_0_FMT_32 0x0000000f
#define NV50TIC_0_0_FMT_4_4_4_4 0x00000012
-#define NV50TIC_0_0_FMT_1_5_5_5 0x00000013
+/* #define NV50TIC_0_0_FMT_1_5_5_5 0x00000013 */
+#define NV50TIC_0_0_FMT_1_5_5_5 0x00000014
#define NV50TIC_0_0_FMT_5_6_5 0x00000015
#define NV50TIC_0_0_FMT_8_8 0x00000018
+#define NV50TIC_0_0_FMT_16 0x0000001b
#define NV50TIC_0_0_FMT_8 0x0000001d
+#define NV50TIC_0_0_FMT_10_11_11 0x00000021
#define NV50TIC_0_0_FMT_DXT1 0x00000024
#define NV50TIC_0_0_FMT_DXT3 0x00000025
#define NV50TIC_0_0_FMT_DXT5 0x00000026
+#define NV50TIC_0_0_FMT_RGTC1 0x00000027
+#define NV50TIC_0_0_FMT_RGTC2 0x00000028
#define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff
#define NV50TIC_0_1_OFFSET_LOW_SHIFT 0
@@ -102,6 +120,7 @@
#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_EDGE 0x00000140
#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_BORDER 0x00000180
#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP 0x000001c0
+#define NV50TSC_1_0_MAX_ANISOTROPY_MASK 0x00700000
#define NV50TSC_1_1_MAGF_MASK 0x00000003
#define NV50TSC_1_1_MAGF_NEAREST 0x00000001
@@ -113,17 +132,19 @@
#define NV50TSC_1_1_MIPF_NONE 0x00000040
#define NV50TSC_1_1_MIPF_NEAREST 0x00000080
#define NV50TSC_1_1_MIPF_LINEAR 0x000000c0
+#define NV50TSC_1_1_LOD_BIAS_MASK 0x01fff000
-#define NV50TSC_1_2_UNKNOWN_MASK 0xffffffff
+#define NV50TSC_1_2_MIN_LOD_MASK 0x00000f00
+#define NV50TSC_1_2_MAX_LOD_MASK 0x00f00000
#define NV50TSC_1_3_UNKNOWN_MASK 0xffffffff
-#define NV50TSC_1_4_UNKNOWN_MASK 0xffffffff
+#define NV50TSC_1_4_BORDER_COLOR_RED_MASK 0xffffffff
-#define NV50TSC_1_5_UNKNOWN_MASK 0xffffffff
+#define NV50TSC_1_5_BORDER_COLOR_GREEN_MASK 0xffffffff
-#define NV50TSC_1_6_UNKNOWN_MASK 0xffffffff
+#define NV50TSC_1_6_BORDER_COLOR_BLUE_MASK 0xffffffff
-#define NV50TSC_1_7_UNKNOWN_MASK 0xffffffff
+#define NV50TSC_1_7_BORDER_COLOR_ALPHA_MASK 0xffffffff
#endif
--
cgit v1.2.3
From 72813ba5b6ac60519957a96afbefb62e3be91c19 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller
Date: Tue, 28 Jul 2009 17:21:31 +0200
Subject: nv50: should use uint32_t ptr in draw_elements_inline_u32
---
src/gallium/drivers/nv50/nv50_vbo.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index f81929f2387..cbd9d6ab8d4 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -139,7 +139,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
}
static INLINE void
-nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint8_t *map,
+nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
unsigned start, unsigned count)
{
struct nouveau_channel *chan = nv50->screen->tesla->channel;
--
cgit v1.2.3
From 1f17b8ff5903b72117caaa49ac786f2f6dfbe401 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller
Date: Tue, 28 Jul 2009 17:34:07 +0200
Subject: nv50: support more vtxelt formats
NOTE: we must not try to emit buffer relocations when
vtxbuf_nr is 0 but vtxelt_nr is not
---
src/gallium/drivers/nv50/nv50_vbo.c | 80 ++++++++++++++++++++++++++-----------
1 file changed, 56 insertions(+), 24 deletions(-)
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index cbd9d6ab8d4..422c8c6b5b0 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -49,6 +49,57 @@ nv50_prim(unsigned mode)
return NV50TCL_VERTEX_BEGIN_POINTS;
}
+static INLINE unsigned
+nv50_vtxeltfmt(unsigned pf)
+{
+ static const uint8_t vtxelt_32[4] = { 0x90, 0x20, 0x10, 0x08 };
+ static const uint8_t vtxelt_16[4] = { 0xd8, 0x78, 0x28, 0x18 };
+ static const uint8_t vtxelt_08[4] = { 0xe8, 0xc0, 0x98, 0x50 };
+
+ unsigned nf, c = 0;
+
+ switch (pf_type(pf)) {
+ case PIPE_FORMAT_TYPE_FLOAT:
+ nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT; break;
+ case PIPE_FORMAT_TYPE_UNORM:
+ nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM; break;
+ case PIPE_FORMAT_TYPE_SNORM:
+ nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM; break;
+ case PIPE_FORMAT_TYPE_USCALED:
+ nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED; break;
+ case PIPE_FORMAT_TYPE_SSCALED:
+ nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED; break;
+ default:
+ NOUVEAU_ERR("invalid vbo type %d\n",pf_type(pf));
+ assert(0);
+ nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT;
+ break;
+ }
+
+ if (pf_size_y(pf)) c++;
+ if (pf_size_z(pf)) c++;
+ if (pf_size_w(pf)) c++;
+
+ if (pf_exp2(pf) == 3) {
+ switch (pf_size_x(pf)) {
+ case 1: return (nf | (vtxelt_08[c] << 16));
+ case 2: return (nf | (vtxelt_16[c] << 16));
+ case 4: return (nf | (vtxelt_32[c] << 16));
+ default:
+ break;
+ }
+ } else
+ if (pf_exp2(pf) == 6 && pf_size_x(pf) == 1) {
+ NOUVEAU_ERR("unsupported vbo component size 64\n");
+ assert(0);
+ return (nf | 0x08000000);
+ }
+
+ NOUVEAU_ERR("invalid vbo format %s\n",pf_name(pf));
+ assert(0);
+ return (nf | 0x08000000);
+}
+
boolean
nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
unsigned count)
@@ -208,6 +259,10 @@ nv50_vbo_validate(struct nv50_context *nv50)
struct nouveau_stateobj *vtxbuf, *vtxfmt;
int i;
+ /* don't validate if Gallium took away our buffers */
+ if (nv50->vtxbuf_nr == 0)
+ return;
+
vtxbuf = so_new(nv50->vtxelt_nr * 4, nv50->vtxelt_nr * 2);
vtxfmt = so_new(nv50->vtxelt_nr + 1, 0);
so_method(vtxfmt, tesla, 0x1ac0, nv50->vtxelt_nr);
@@ -218,30 +273,7 @@ nv50_vbo_validate(struct nv50_context *nv50)
&nv50->vtxbuf[ve->vertex_buffer_index];
struct nouveau_bo *bo = nouveau_bo(vb->buffer);
- switch (ve->src_format) {
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- so_data(vtxfmt, 0x7e080000 | i);
- break;
- case PIPE_FORMAT_R32G32B32_FLOAT:
- so_data(vtxfmt, 0x7e100000 | i);
- break;
- case PIPE_FORMAT_R32G32_FLOAT:
- so_data(vtxfmt, 0x7e200000 | i);
- break;
- case PIPE_FORMAT_R32_FLOAT:
- so_data(vtxfmt, 0x7e900000 | i);
- break;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- so_data(vtxfmt, 0x24500000 | i);
- break;
- default:
- {
- NOUVEAU_ERR("invalid vbo format %s\n",
- pf_name(ve->src_format));
- assert(0);
- return;
- }
- }
+ so_data(vtxfmt, nv50_vtxeltfmt(ve->src_format) | i);
so_method(vtxbuf, tesla, 0x900 + (i * 16), 3);
so_data (vtxbuf, 0x20000000 | vb->stride);
--
cgit v1.2.3
From 987c59c486500780a9315d2a850e6121319f4e93 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller
Date: Tue, 28 Jul 2009 17:38:28 +0200
Subject: nv50: use new 2D surface format names
---
src/gallium/drivers/nv50/nv50_surface.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index 3da9d6e7285..31c36a12b73 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -35,13 +35,13 @@ nv50_format(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
- return NV50_2D_DST_FORMAT_32BPP;
+ return NV50_2D_DST_FORMAT_A8R8G8B8_UNORM;
case PIPE_FORMAT_X8R8G8B8_UNORM:
- return NV50_2D_DST_FORMAT_24BPP;
+ return NV50_2D_DST_FORMAT_X8R8G8B8_UNORM;
case PIPE_FORMAT_R5G6B5_UNORM:
- return NV50_2D_DST_FORMAT_16BPP;
+ return NV50_2D_DST_FORMAT_R5G6B5_UNORM;
case PIPE_FORMAT_A8_UNORM:
- return NV50_2D_DST_FORMAT_8BPP;
+ return NV50_2D_DST_FORMAT_R8_UNORM;
default:
return -1;
}
--
cgit v1.2.3
From 84166a021fe7b4a05f03d0119d2954998f7f12c6 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller
Date: Wed, 29 Jul 2009 01:07:52 +0200
Subject: nv50: correct zeta formats
What was Z24S8 before is actually S8Z24, and what we had for Z16
is actually X8Z24. Now, we also have the REAL Z24S8 and I added
Z32_FLOAT as well; most of the formats need different tile_flags.
---
src/gallium/drivers/nv50/nv50_miptree.c | 9 +++++++--
src/gallium/drivers/nv50/nv50_screen.c | 5 +++--
src/gallium/drivers/nv50/nv50_state_validate.c | 15 ++++++++++-----
3 files changed, 20 insertions(+), 9 deletions(-)
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 22465e02274..c8392799ed8 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -42,9 +42,14 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
mt->base.screen = pscreen;
switch (pt->format) {
- case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z32_FLOAT:
+ tile_flags = 0x4800;
+ break;
case PIPE_FORMAT_Z24S8_UNORM:
- case PIPE_FORMAT_Z16_UNORM:
+ tile_flags = 0x1800;
+ break;
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
tile_flags = 0x2800;
break;
default:
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 349619db213..0f6b1aed96c 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -44,9 +44,10 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
} else
if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) {
switch (format) {
+ case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_Z24S8_UNORM:
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
return TRUE;
default:
break;
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index ce8e44fb006..4a49b107a5c 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -92,17 +92,22 @@ nv50_state_validate_fb(struct nv50_context *nv50)
so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM |
NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
switch (fb->zsbuf->format) {
+ case PIPE_FORMAT_Z32_FLOAT:
+ so_data(so, NV50TCL_ZETA_FORMAT_Z32_FLOAT);
+ break;
case PIPE_FORMAT_Z24S8_UNORM:
- case PIPE_FORMAT_Z24X8_UNORM:
- so_data(so, 0x16);
+ so_data(so, NV50TCL_ZETA_FORMAT_Z24S8_UNORM);
+ break;
+ case PIPE_FORMAT_X8Z24_UNORM:
+ so_data(so, NV50TCL_ZETA_FORMAT_X8Z24_UNORM);
break;
- case PIPE_FORMAT_Z16_UNORM:
- so_data(so, 0x15);
+ case PIPE_FORMAT_S8Z24_UNORM:
+ so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM);
break;
default:
NOUVEAU_ERR("AIIII unknown format %s\n",
pf_name(fb->zsbuf->format));
- so_data(so, 0x16);
+ so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM);
break;
}
so_data(so, bo->tile_mode << 4);
--
cgit v1.2.3
From 625bc0cfa2ffb67b797672f7fb3c083a863199d3 Mon Sep 17 00:00:00 2001
From: Ben Skeggs
Date: Wed, 29 Jul 2009 10:58:05 +1000
Subject: nouveau: map_range returning -EBUSY isn't necessarily an error
---
src/gallium/drivers/nouveau/nouveau_screen.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index 832366e6462..e4cf91c005c 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -4,6 +4,8 @@
#include
+#include
+
#include "nouveau/nouveau_bo.h"
#include "nouveau_winsys.h"
#include "nouveau_screen.h"
@@ -141,12 +143,13 @@ nouveau_screen_bo_map_range(struct pipe_screen *pscreen, struct pipe_buffer *pb,
unsigned offset, unsigned length, unsigned usage)
{
struct nouveau_bo *bo = nouveau_bo(pb);
+ uint32_t flags = nouveau_screen_map_flags(usage);
int ret;
- ret = nouveau_bo_map_range(bo, offset, length,
- nouveau_screen_map_flags(usage));
+ ret = nouveau_bo_map_range(bo, offset, length, flags);
if (ret) {
- debug_printf("map_range failed: %d\n", ret);
+ if (!(flags & NOUVEAU_BO_NOWAIT) || ret != -EBUSY)
+ debug_printf("map_range failed: %d\n", ret);
return NULL;
}
--
cgit v1.2.3
From 693f900b163f89952ed9b4c30d093f8ead461657 Mon Sep 17 00:00:00 2001
From: Ben Skeggs
Date: Wed, 29 Jul 2009 11:19:52 +1000
Subject: nv50: support non-blocking query_result()
---
src/gallium/drivers/nv50/nv50_query.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
index 940e04365f2..c354e10b0f8 100644
--- a/src/gallium/drivers/nv50/nv50_query.c
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -107,13 +107,13 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
boolean wait, uint64_t *result)
{
struct nv50_query *q = nv50_query(pq);
-
- /*XXX: Want to be able to return FALSE here instead of blocking
- * until the result is available..
- */
+ int ret;
if (!q->ready) {
- nouveau_bo_map(q->bo, NOUVEAU_BO_RD);
+ ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD |
+ wait ? 0 : NOUVEAU_BO_NOWAIT);
+ if (ret)
+ return false;
q->result = ((uint32_t *)q->bo->map)[1];
q->ready = TRUE;
nouveau_bo_unmap(q->bo);
--
cgit v1.2.3
From 0f6d3aece7b193dcacbd94f87ac734ee3a44b366 Mon Sep 17 00:00:00 2001
From: Cooper Yuan
Date: Wed, 29 Jul 2009 15:23:56 +0800
Subject: R6xx/r7xx: VS export fog color as parameter
---
src/mesa/drivers/dri/r600/r700_assembler.c | 16 ++++++++++++++++
src/mesa/drivers/dri/r600/r700_fragprog.c | 6 ++++++
2 files changed, 22 insertions(+)
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 1d41c5cf785..ebd5ff106be 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -4014,6 +4014,22 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
export_starting_index++;
}
+ unBit = 1 << VERT_RESULT_FOGC;
+ if(OutputsWritten & unBit)
+ {
+ if( GL_FALSE == Process_Export(pR700AsmCode,
+ SQ_EXPORT_PARAM,
+ export_starting_index,
+ 1,
+ pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
+ GL_FALSE) )
+ {
+ return GL_FALSE;
+ }
+
+ export_starting_index++;
+ }
+
for(i=0; i<8; i++)
{
unBit = 1 << (VERT_RESULT_TEX0 + i);
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index 88e66491ba8..a473dfe8889 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -67,6 +67,12 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm,
pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++;
}
+ unBit = 1 << FRAG_ATTRIB_FOGC;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++;
+ }
+
for(i=0; i<8; i++)
{
unBit = 1 << (FRAG_ATTRIB_TEX0 + i);
--
cgit v1.2.3
From 03607708b0499816291f0fb0d1c331fbf034f0ba Mon Sep 17 00:00:00 2001
From: Cooper Yuan
Date: Wed, 29 Jul 2009 15:31:41 +0800
Subject: r600: emit fog color in PS input map, fix fog related applications
---
src/mesa/drivers/dri/r600/r700_fragprog.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index a473dfe8889..180d980442b 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -395,6 +395,19 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
}
+ unBit = 1 << FRAG_ATTRIB_FOGC;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC];
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+
for(i=0; i<8; i++)
{
unBit = 1 << (FRAG_ATTRIB_TEX0 + i);
--
cgit v1.2.3
From 4d99e1453552847c2a67f723adc2764443c995d7 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Thu, 16 Jul 2009 17:51:02 +0100
Subject: util: _debug_printf should print even when DEBUG is not defined
The leading underscore is meaningful... This function is used by
_warning and _error functions as well as the more common
debug_printf().
debug_printf (without underscore) gets turned off when DEBUG is
disabled, but warning/error messages still use this function to get
their message out.
(cherry picked from commit 0ac879dca797360570543d5bd0fd64f8fb8e566e)
---
src/gallium/auxiliary/util/u_debug.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index a5ca0b72bd7..96d400c839b 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -143,11 +143,9 @@ void _debug_vprintf(const char *format, va_list ap)
#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)
/* TODO */
#else /* !PIPE_SUBSYSTEM_WINDOWS */
-#ifdef DEBUG
fflush(stdout);
vfprintf(stderr, format, ap);
#endif
-#endif
}
--
cgit v1.2.3
From 2420b283b783751d4def3a3a2a0ed8bf7bb7b6a8 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Tue, 28 Jul 2009 18:45:22 +0100
Subject: mesa/st: recognize no-op scissor state when checking clear_with_quads
Some apps enable scissor but set the rectangle to the dimensions of
the window. Don't let this force us onto a slower clear path.
---
src/mesa/state_tracker/st_cb_clear.c | 24 ++++++++++++++++++++----
1 file changed, 20 insertions(+), 4 deletions(-)
diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c
index 668acbccb88..8a8c99f7e17 100644
--- a/src/mesa/state_tracker/st_cb_clear.c
+++ b/src/mesa/state_tracker/st_cb_clear.c
@@ -280,7 +280,11 @@ clear_with_quad(GLcontext *ctx,
static INLINE GLboolean
check_clear_color_with_quad(GLcontext *ctx, struct gl_renderbuffer *rb)
{
- if (ctx->Scissor.Enabled)
+ if (ctx->Scissor.Enabled &&
+ (ctx->Scissor.X != 0 ||
+ ctx->Scissor.Y != 0 ||
+ ctx->Scissor.Width < rb->Width ||
+ ctx->Scissor.Height < rb->Height))
return TRUE;
if (!ctx->Color.ColorMask[0] ||
@@ -300,7 +304,11 @@ check_clear_depth_stencil_with_quad(GLcontext *ctx, struct gl_renderbuffer *rb)
GLboolean maskStencil
= (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax;
- if (ctx->Scissor.Enabled)
+ if (ctx->Scissor.Enabled &&
+ (ctx->Scissor.X != 0 ||
+ ctx->Scissor.Y != 0 ||
+ ctx->Scissor.Width < rb->Width ||
+ ctx->Scissor.Height < rb->Height))
return TRUE;
if (maskStencil)
@@ -319,7 +327,11 @@ check_clear_depth_with_quad(GLcontext *ctx, struct gl_renderbuffer *rb)
const struct st_renderbuffer *strb = st_renderbuffer(rb);
const GLboolean isDS = pf_is_depth_and_stencil(strb->surface->format);
- if (ctx->Scissor.Enabled)
+ if (ctx->Scissor.Enabled &&
+ (ctx->Scissor.X != 0 ||
+ ctx->Scissor.Y != 0 ||
+ ctx->Scissor.Width < rb->Width ||
+ ctx->Scissor.Height < rb->Height))
return TRUE;
if (isDS &&
@@ -345,7 +357,11 @@ check_clear_stencil_with_quad(GLcontext *ctx, struct gl_renderbuffer *rb)
if (maskStencil)
return TRUE;
- if (ctx->Scissor.Enabled)
+ if (ctx->Scissor.Enabled &&
+ (ctx->Scissor.X != 0 ||
+ ctx->Scissor.Y != 0 ||
+ ctx->Scissor.Width < rb->Width ||
+ ctx->Scissor.Height < rb->Height))
return TRUE;
/* This is correct, but it is necessary to look at the depth clear
--
cgit v1.2.3
From 684282953937a37541f26c6e51ceec4134c62dfb Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Wed, 29 Jul 2009 12:47:23 +0100
Subject: mesa/st: short-circuit glFinish calls on WIN32 only
Windows opengl32.dll calls glFinish prior to every swapbuffers, which
makes it pretty hard to get decent performance...
Work around by mapping finish to flush on PIPE_OS_WINDOWS. This is
conformant, though it might confuse poorly-written benchmarks which
attempt to measure a single event rather than figuring out the rate of
continuous processing.
---
src/mesa/state_tracker/st_cb_flush.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/src/mesa/state_tracker/st_cb_flush.c b/src/mesa/state_tracker/st_cb_flush.c
index fbaffd154f9..f7de4e7a4d6 100644
--- a/src/mesa/state_tracker/st_cb_flush.c
+++ b/src/mesa/state_tracker/st_cb_flush.c
@@ -152,4 +152,16 @@ void st_init_flush_functions(struct dd_function_table *functions)
{
functions->Flush = st_glFlush;
functions->Finish = st_glFinish;
+
+ /* Windows opengl32.dll calls glFinish prior to every swapbuffers.
+ * This is unnecessary and degrades performance. Luckily we have some
+ * scope to work around this, as the externally-visible behaviour of
+ * Finish() is identical to Flush() in all cases - no differences in
+ * rendering or ReadPixels are visible if we opt not to wait here.
+ *
+ * Only set this up on windows to avoid suprise elsewhere.
+ */
+#ifdef PIPE_OS_WINDOWS
+ functions->Finish = st_glFlush;
+#endif
}
--
cgit v1.2.3
From 88ebf514a41ae460dad08a4585a61541864a4432 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Tue, 28 Jul 2009 13:51:29 -0700
Subject: swrast: enable ARB_vertex_array_object.
It was getting enabled anyway but without the entrypoints installed. Whoops.
---
src/mesa/drivers/dri/swrast/swrast.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c
index fbfa49c99d0..3aa7843b1bc 100644
--- a/src/mesa/drivers/dri/swrast/swrast.c
+++ b/src/mesa/drivers/dri/swrast/swrast.c
@@ -63,6 +63,7 @@
/* sw extensions not associated with some GL version */
#define need_GL_ARB_shader_objects
+#define need_GL_ARB_vertex_array_object
#define need_GL_ARB_vertex_program
#define need_GL_APPLE_vertex_array_object
#define need_GL_ATI_fragment_shader
@@ -94,6 +95,7 @@ const struct dri_extension card_extensions[] =
{ "GL_SGI_color_table", GL_SGI_color_table_functions },
{ "GL_ARB_shader_objects", GL_ARB_shader_objects_functions },
+ { "GL_ARB_vertex_array_object", GL_ARB_vertex_array_object_functions },
{ "GL_ARB_vertex_program", GL_ARB_vertex_program_functions },
{ "GL_APPLE_vertex_array_object", GL_APPLE_vertex_array_object_functions },
{ "GL_ATI_fragment_shader", GL_ATI_fragment_shader_functions },
--
cgit v1.2.3
From d3a1fc62f47f1a7f4422585c8c60bf8dcb0dfe4b Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Wed, 29 Jul 2009 19:41:07 +0200
Subject: r300/compiler: Adapt AllocateHwInputs interface to common usage
pattern
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/radeon_compiler.h | 2 +-
src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c | 2 +-
src/mesa/drivers/dri/r300/r300_fragprog_common.c | 7 ++++---
3 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 5bdc0754470..e63ab8840ab 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -86,7 +86,7 @@ struct r300_fragment_program_compiler {
void * UserData;
void (*AllocateHwInputs)(
- void * yourdata,
+ struct r300_fragment_program_compiler * c,
void (*allocate)(void * data, unsigned input, unsigned hwreg),
void * mydata);
};
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index 8cf1f1aaac2..48616ac461c 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -848,7 +848,7 @@ void radeonPairProgram(
_mesa_printf("Emit paired program\n");
scan_instructions(&s);
- s.Compiler->AllocateHwInputs(s.Compiler->UserData, &alloc_helper, &s);
+ s.Compiler->AllocateHwInputs(s.Compiler, &alloc_helper, &s);
while(!s.Compiler->Base.Error &&
(s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index 3bfe8a9dede..6674efc5bca 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -154,9 +154,11 @@ static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r
* @note This allocation is performed explicitly, because the order of inputs
* is determined by the RS hardware.
*/
-static void allocate_hw_inputs(void * yourdata, void (*allocate)(void * data, unsigned input, unsigned hwreg), void * mydata)
+static void allocate_hw_inputs(
+ struct r300_fragment_program_compiler * c,
+ void (*allocate)(void * data, unsigned input, unsigned hwreg),
+ void * mydata)
{
- struct r300_fragment_program_compiler * c = yourdata;
GLuint InputsRead = c->Base.Program.InputsRead;
int i;
GLuint hwindex = 0;
@@ -208,7 +210,6 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
compiler.OutputDepth = FRAG_RESULT_DEPTH;
compiler.OutputColor = FRAG_RESULT_COLOR;
compiler.AllocateHwInputs = &allocate_hw_inputs;
- compiler.UserData = &compiler;
if (compiler.Base.Debug) {
fflush(stdout);
--
cgit v1.2.3
From 0723cd1b0a8a76808844a2216d709f56fbad88e2 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle
Date: Wed, 29 Jul 2009 20:59:56 +0200
Subject: r300: Cleanup r300_fragment_program_code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Configuration register values are now stored directly in that structure.
Signed-off-by: Nicolai Hähnle
---
src/mesa/drivers/dri/r300/compiler/r300_fragprog.c | 67 +++++-----
.../drivers/dri/r300/compiler/r300_fragprog_emit.c | 141 ++++++++++++++-------
src/mesa/drivers/dri/r300/compiler/radeon_code.h | 17 ++-
src/mesa/drivers/dri/r300/r300_state.c | 35 ++---
4 files changed, 145 insertions(+), 115 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
index fbffd3d7cdd..6c9fba49146 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
@@ -203,18 +203,21 @@ void r300FragmentProgramDump(struct rX00_fragment_program_code *c)
fprintf(stderr, "Hardware program\n");
fprintf(stderr, "----------------\n");
- for (n = 0; n < (code->cur_node + 1); n++) {
+ for (n = 0; n <= (code->config & 3); n++) {
+ uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
+ int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT;
+ int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT;
+ int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
+ int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;
+
fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
- "alu_end: %d, tex_end: %d, flags: %08x\n", n,
- code->node[n].alu_offset,
- code->node[n].tex_offset,
- code->node[n].alu_end, code->node[n].tex_end,
- code->node[n].flags);
+ "alu_end: %d, tex_end: %d (code_addr: %08x)\n", n,
+ alu_offset, tex_offset, alu_end, tex_end, code_addr);
- if (n > 0 || code->first_node_has_tex) {
+ if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
fprintf(stderr, " TEX:\n");
- for (i = code->node[n].tex_offset;
- i <= code->node[n].tex_offset + code->node[n].tex_end;
+ for (i = tex_offset;
+ i <= tex_offset + tex_end;
++i) {
const char *instr;
@@ -252,8 +255,8 @@ void r300FragmentProgramDump(struct rX00_fragment_program_code *c)
}
}
- for (i = code->node[n].alu_offset;
- i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) {
+ for (i = alu_offset;
+ i <= alu_offset + alu_end; ++i) {
char srcc[3][10], dstc[20];
char srca[3][10], dsta[20];
char argc[3][20];
@@ -261,8 +264,8 @@ void r300FragmentProgramDump(struct rX00_fragment_program_code *c)
char flags[5], tmp[10];
for (j = 0; j < 3; ++j) {
- int regc = code->alu.inst[i].inst1 >> (j * 6);
- int rega = code->alu.inst[i].inst3 >> (j * 6);
+ int regc = code->alu.inst[i].rgb_addr >> (j * 6);
+ int rega = code->alu.inst[i].alpha_addr >> (j * 6);
sprintf(srcc[j], "%c%i",
(regc & 32) ? 'c' : 't', regc & 31);
@@ -273,45 +276,45 @@ void r300FragmentProgramDump(struct rX00_fragment_program_code *c)
dstc[0] = 0;
sprintf(flags, "%s%s%s",
(code->alu.inst[i].
- inst1 & R300_ALU_DSTC_REG_X) ? "x" : "",
+ rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "",
(code->alu.inst[i].
- inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "",
+ rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "",
(code->alu.inst[i].
- inst1 & R300_ALU_DSTC_REG_Z) ? "z" : "");
+ rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
if (flags[0] != 0) {
sprintf(dstc, "t%i.%s ",
(code->alu.inst[i].
- inst1 >> R300_ALU_DSTC_SHIFT) & 31,
+ rgb_addr >> R300_ALU_DSTC_SHIFT) & 31,
flags);
}
sprintf(flags, "%s%s%s",
(code->alu.inst[i].
- inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
+ rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
(code->alu.inst[i].
- inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
+ rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
(code->alu.inst[i].
- inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
+ rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
if (flags[0] != 0) {
sprintf(tmp, "o%i.%s",
(code->alu.inst[i].
- inst1 >> R300_ALU_DSTC_SHIFT) & 31,
+ rgb_addr >> R300_ALU_DSTC_SHIFT) & 31,
flags);
strcat(dstc, tmp);
}
dsta[0] = 0;
- if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) {
+ if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
sprintf(dsta, "t%i.w ",
(code->alu.inst[i].
- inst3 >> R300_ALU_DSTA_SHIFT) & 31);
+ alpha_addr >> R300_ALU_DSTA_SHIFT) & 31);
}
- if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) {
+ if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
sprintf(tmp, "o%i.w ",
(code->alu.inst[i].
- inst3 >> R300_ALU_DSTA_SHIFT) & 31);
+ alpha_addr >> R300_ALU_DSTA_SHIFT) & 31);
strcat(dsta, tmp);
}
- if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) {
+ if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) {
strcat(dsta, "Z");
}
@@ -319,12 +322,12 @@ void r300FragmentProgramDump(struct rX00_fragment_program_code *c)
"%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
" w: %3s %3s %3s -> %-20s (%08x)\n", i,
srcc[0], srcc[1], srcc[2], dstc,
- code->alu.inst[i].inst1, srca[0], srca[1],
- srca[2], dsta, code->alu.inst[i].inst3);
+ code->alu.inst[i].rgb_addr, srca[0], srca[1],
+ srca[2], dsta, code->alu.inst[i].alpha_addr);
for (j = 0; j < 3; ++j) {
- int regc = code->alu.inst[i].inst0 >> (j * 7);
- int rega = code->alu.inst[i].inst2 >> (j * 7);
+ int regc = code->alu.inst[i].rgb_inst >> (j * 7);
+ int rega = code->alu.inst[i].alpha_inst >> (j * 7);
int d;
char buf[20];
@@ -406,8 +409,8 @@ void r300FragmentProgramDump(struct rX00_fragment_program_code *c)
fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n"
" w: %8s %8s %8s op: %08x\n",
argc[0], argc[1], argc[2],
- code->alu.inst[i].inst0, arga[0], arga[1],
- arga[2], code->alu.inst[i].inst2);
+ code->alu.inst[i].rgb_inst, arga[0], arga[1],
+ arga[2], code->alu.inst[i].alpha_inst);
}
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index 80b569ade67..305dc074ee8 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -46,8 +46,18 @@
#include "r300_fragprog_swizzle.h"
+struct r300_emit_state {
+ struct r300_fragment_program_compiler * compiler;
+
+ unsigned current_node : 2;
+ unsigned node_first_tex : 8;
+ unsigned node_first_alu : 8;
+ uint32_t node_flags;
+};
+
#define PROG_CODE \
- struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
+ struct r300_emit_state * emit = (struct r300_emit_state*)data; \
+ struct r300_fragment_program_compiler *c = emit->compiler; \
struct r300_fragment_program_code *code = &c->code->code.r300
#define error(fmt, args...) do { \
@@ -61,8 +71,8 @@
*/
static void use_temporary(struct r300_fragment_program_code *code, GLuint index)
{
- if (index > code->max_temp_idx)
- code->max_temp_idx = index;
+ if (index > code->pixsize)
+ code->pixsize = index;
}
@@ -121,62 +131,61 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
int ip = code->alu.length++;
int j;
- code->node[code->cur_node].alu_end++;
- code->alu.inst[ip].inst0 = translate_rgb_opcode(c, inst->RGB.Opcode);
- code->alu.inst[ip].inst2 = translate_alpha_opcode(c, inst->Alpha.Opcode);
+ code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
+ code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
for(j = 0; j < 3; ++j) {
GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5);
if (!inst->RGB.Src[j].Constant)
use_temporary(code, inst->RGB.Src[j].Index);
- code->alu.inst[ip].inst1 |= src << (6*j);
+ code->alu.inst[ip].rgb_addr |= src << (6*j);
src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5);
if (!inst->Alpha.Src[j].Constant)
use_temporary(code, inst->Alpha.Src[j].Index);
- code->alu.inst[ip].inst3 |= src << (6*j);
+ code->alu.inst[ip].alpha_addr |= src << (6*j);
GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
arg |= inst->RGB.Arg[j].Abs << 6;
arg |= inst->RGB.Arg[j].Negate << 5;
- code->alu.inst[ip].inst0 |= arg << (7*j);
+ code->alu.inst[ip].rgb_inst |= arg << (7*j);
arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
arg |= inst->Alpha.Arg[j].Abs << 6;
arg |= inst->Alpha.Arg[j].Negate << 5;
- code->alu.inst[ip].inst2 |= arg << (7*j);
+ code->alu.inst[ip].alpha_inst |= arg << (7*j);
}
if (inst->RGB.Saturate)
- code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP;
+ code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
if (inst->Alpha.Saturate)
- code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP;
+ code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
if (inst->RGB.WriteMask) {
use_temporary(code, inst->RGB.DestIndex);
- code->alu.inst[ip].inst1 |=
+ code->alu.inst[ip].rgb_addr |=
(inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
}
if (inst->RGB.OutputWriteMask) {
- code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT);
- code->node[code->cur_node].flags |= R300_RGBA_OUT;
+ code->alu.inst[ip].rgb_addr |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT);
+ emit->node_flags |= R300_RGBA_OUT;
}
if (inst->Alpha.WriteMask) {
use_temporary(code, inst->Alpha.DestIndex);
- code->alu.inst[ip].inst3 |=
+ code->alu.inst[ip].alpha_addr |=
(inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
R300_ALU_DSTA_REG;
}
if (inst->Alpha.OutputWriteMask) {
- code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT;
- code->node[code->cur_node].flags |= R300_RGBA_OUT;
+ code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT;
+ emit->node_flags |= R300_RGBA_OUT;
}
if (inst->Alpha.DepthWriteMask) {
- code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH;
- code->node[code->cur_node].flags |= R300_W_OUT;
+ code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
+ emit->node_flags |= R300_W_OUT;
c->code->writes_depth = GL_TRUE;
}
@@ -187,31 +196,50 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
/**
* Finish the current node without advancing to the next one.
*/
-static GLboolean finish_node(struct r300_fragment_program_compiler *c)
+static GLboolean finish_node(struct r300_emit_state * emit)
{
- struct r300_fragment_program_code *code = &c->code->code.r300;
- struct r300_fragment_program_node *node = &code->node[code->cur_node];
+ struct r300_fragment_program_compiler * c = emit->compiler;
+ struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
- if (node->alu_end < 0) {
+ if (code->alu.length == emit->node_first_alu) {
/* Generate a single NOP for this node */
struct radeon_pair_instruction inst;
_mesa_bzero(&inst, sizeof(inst));
- if (!emit_alu(c, &inst))
+ if (!emit_alu(emit, &inst))
return GL_FALSE;
}
- if (node->tex_end < 0) {
- if (code->cur_node == 0) {
- node->tex_end = 0;
- } else {
- error("Node %i has no TEX instructions", code->cur_node);
+ unsigned alu_offset = emit->node_first_alu;
+ unsigned alu_end = code->alu.length - alu_offset - 1;
+ unsigned tex_offset = emit->node_first_tex;
+ unsigned tex_end = code->tex.length - tex_offset - 1;
+
+ if (code->tex.length == emit->node_first_tex) {
+ if (emit->current_node > 0) {
+ error("Node %i has no TEX instructions", emit->current_node);
return GL_FALSE;
}
+
+ tex_end = 0;
} else {
- if (code->cur_node == 0)
- code->first_node_has_tex = 1;
+ if (emit->current_node == 0)
+ code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
}
+ /* Write the config register.
+ * Note: The order in which the words for each node are written
+ * is not correct here and needs to be fixed up once we're entirely
+ * done
+ *
+ * Also note that the register specification from AMD is slightly
+ * incorrect in its description of this register. */
+ code->code_addr[emit->current_node] =
+ (alu_offset << R300_ALU_START_SHIFT) |
+ (alu_end << R300_ALU_SIZE_SHIFT) |
+ (tex_offset << R300_TEX_START_SHIFT) |
+ (tex_end << R300_TEX_SIZE_SHIFT) |
+ emit->node_flags;
+
return GL_TRUE;
}
@@ -224,25 +252,23 @@ static GLboolean begin_tex(void* data)
{
PROG_CODE;
- if (code->cur_node == 0) {
- if (code->node[0].alu_end < 0 &&
- code->node[0].tex_end < 0)
- return GL_TRUE;
+ if (code->alu.length == emit->node_first_alu &&
+ code->tex.length == emit->node_first_tex) {
+ return GL_TRUE;
}
- if (code->cur_node == 3) {
+ if (emit->current_node == 3) {
error("Too many texture indirections");
return GL_FALSE;
}
- if (!finish_node(c))
+ if (!finish_node(emit))
return GL_FALSE;
- struct r300_fragment_program_node *node = &code->node[++code->cur_node];
- node->alu_offset = code->alu.length;
- node->alu_end = -1;
- node->tex_offset = code->tex.length;
- node->tex_end = -1;
+ emit->current_node++;
+ emit->node_first_tex = code->tex.length;
+ emit->node_first_alu = code->alu.length;
+ emit->node_flags = 0;
return GL_TRUE;
}
@@ -279,7 +305,6 @@ static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* in
use_temporary(code, inst->SrcIndex);
- code->node[code->cur_node].tex_end++;
code->tex.inst[code->tex.length++] =
(inst->SrcIndex << R300_SRC_ADDR_SHIFT) |
(dest << R300_DST_ADDR_SHIFT) |
@@ -302,16 +327,34 @@ static const struct radeon_pair_handler pair_handler = {
*/
void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
{
+ struct r300_emit_state emit;
struct r300_fragment_program_code *code = &compiler->code->code.r300;
+ memset(&emit, 0, sizeof(emit));
+ emit.compiler = compiler;
+
_mesa_bzero(code, sizeof(struct r300_fragment_program_code));
- code->node[0].alu_end = -1;
- code->node[0].tex_end = -1;
- radeonPairProgram(compiler, &pair_handler, compiler);
+ radeonPairProgram(compiler, &pair_handler, &emit);
if (compiler->Base.Error)
return;
- finish_node(compiler);
+ /* Finish the program */
+ finish_node(&emit);
+
+ code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
+ code->code_offset =
+ (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
+ ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) |
+ (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
+ ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT);
+
+ if (emit.current_node < 3) {
+ int shift = 3 - emit.current_node;
+ int i;
+ for(i = 0; i <= emit.current_node; ++i)
+ code->code_addr[shift + i] = code->code_addr[i];
+ for(i = 0; i < shift; ++i)
+ code->code_addr[i] = 0;
+ }
}
-
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index 77bc19d8ff9..6f5bc288316 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -136,18 +136,17 @@ struct r300_fragment_program_code {
struct {
int length; /**< total # of ALU instructions used */
struct {
- uint32_t inst0;
- uint32_t inst1;
- uint32_t inst2;
- uint32_t inst3;
+ uint32_t rgb_inst;
+ uint32_t rgb_addr;
+ uint32_t alpha_inst;
+ uint32_t alpha_addr;
} inst[R300_PFS_MAX_ALU_INST];
} alu;
- struct r300_fragment_program_node node[4];
- int cur_node;
- int first_node_has_tex;
-
- int max_temp_idx;
+ uint32_t config; /* US_CONFIG */
+ uint32_t pixsize; /* US_PIXSIZE */
+ uint32_t code_offset; /* US_CODE_OFFSET */
+ uint32_t code_addr[4]; /* US_CODE_ADDR */
};
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index 1f799d5a6ff..1ac14267d5e 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -2035,7 +2035,7 @@ static void r300SetupPixelShader(GLcontext *ctx)
r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct r300_fragment_program *fp = rmesa->selected_fp;
struct r300_fragment_program_code *code;
- int i, k;
+ int i;
code = &fp->code.code.r300;
@@ -2048,33 +2048,18 @@ static void r300SetupPixelShader(GLcontext *ctx)
rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length);
rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
for (i = 0; i < code->alu.length; i++) {
- rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0;
- rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1;
- rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst2;
- rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst3;
+ rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_inst;
+ rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_addr;
+ rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_inst;
+ rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_addr;
}
R300_STATECHANGE(rmesa, fp);
- rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->cur_node | (code->first_node_has_tex << 3);
- rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->max_temp_idx;
- rmesa->hw.fp.cmd[R300_FP_CNTL2] =
- (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
- ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) |
- (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
- ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT);
- /* I just want to say, the way these nodes are stored.. weird.. */
- for (i = 0, k = (4 - (code->cur_node + 1)); i < 4; i++, k++) {
- if (i < (code->cur_node + 1)) {
- rmesa->hw.fp.cmd[R300_FP_NODE0 + k] =
- (code->node[i].alu_offset << R300_ALU_START_SHIFT) |
- (code->node[i].alu_end << R300_ALU_SIZE_SHIFT) |
- (code->node[i].tex_offset << R300_TEX_START_SHIFT) |
- (code->node[i].tex_end << R300_TEX_SIZE_SHIFT) |
- code->node[i].flags;
- } else {
- rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0;
- }
- }
+ rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->config;
+ rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->pixsize;
+ rmesa->hw.fp.cmd[R300_FP_CNTL2] = code->code_offset;
+ for (i = 0; i < 4; i++)
+ rmesa->hw.fp.cmd[R300_FP_NODE0 + i] = code->code_addr[i];
R300_STATECHANGE(rmesa, fpp);
rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, fp->code.constants.Count * 4);
--
cgit v1.2.3
From 1e207ba9c127d12feff3e1c2e8e29da26182e0bb Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Wed, 29 Jul 2009 15:15:36 -0400
Subject: r600: minor fixes
- set MAX_LOD properly
- min texel pitch is 8 texels
- emit old command buffer when re-initing base state
---
src/mesa/drivers/dri/r600/r600_tex.c | 2 +-
src/mesa/drivers/dri/r600/r600_texstate.c | 13 +++++++++++++
src/mesa/drivers/dri/r600/r700_state.c | 3 ++-
3 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
index 853f824b2c0..6d531bf0f93 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.c
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -160,7 +160,7 @@ static void r600SetTexDefaultState(radeonTexObjPtr t)
SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_TRANS_BLACK, BORDER_COLOR_TYPE_shift, BORDER_COLOR_TYPE_mask);
t->SQ_TEX_SAMPLER1 = 0;
- SETfield(t->SQ_TEX_SAMPLER1, MAX_LOD_mask, MAX_LOD_shift, MAX_LOD_mask);
+ SETfield(t->SQ_TEX_SAMPLER1, 0x3ff, MAX_LOD_shift, MAX_LOD_mask);
t->SQ_TEX_SAMPLER2 = 0;
SETbit(t->SQ_TEX_SAMPLER2, SQ_TEX_SAMPLER_WORD2_0__TYPE_bit);
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index 1cf3b484ae6..70dd5404811 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -598,6 +598,10 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex
uTexelPitch = (firstImage->Width + R700_TEXEL_PITCH_ALIGNMENT_MASK)
& ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
+ /* min pitch is 8 */
+ if (uTexelPitch < 8)
+ uTexelPitch = 8;
+
SETfield(t->SQ_TEX_RESOURCE0, (uTexelPitch/8)-1, PITCH_shift, PITCH_mask);
SETfield(t->SQ_TEX_RESOURCE0, firstImage->Width - 1,
TEX_WIDTH_shift, TEX_WIDTH_mask);
@@ -751,6 +755,11 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
pitch_val = (pitch_val + R700_TEXEL_PITCH_ALIGNMENT_MASK)
& ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
+
+ /* min pitch is 8 */
+ if (pitch_val < 8)
+ pitch_val = 8;
+
SETfield(t->SQ_TEX_RESOURCE0, (pitch_val/8)-1, PITCH_shift, PITCH_mask);
}
@@ -898,6 +907,10 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
pitch_val = (pitch_val + R700_TEXEL_PITCH_ALIGNMENT_MASK)
& ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
+ /* min pitch is 8 */
+ if (pitch_val < 8)
+ pitch_val = 8;
+
SETfield(t->SQ_TEX_RESOURCE0, (pitch_val/8)-1, PITCH_shift, PITCH_mask);
SETfield(t->SQ_TEX_RESOURCE0, rb->base.Width - 1,
TEX_WIDTH_shift, TEX_WIDTH_mask);
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index 87ea1719c49..e0a57425917 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -1638,9 +1638,10 @@ static void r700InitSQConfig(GLcontext * ctx)
void r700InitState(GLcontext * ctx) //-------------------
{
context_t *context = R700_CONTEXT(ctx);
-
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ radeon_firevertices(&context->radeon);
+
r700->TA_CNTL_AUX.u32All = 0;
SETfield(r700->TA_CNTL_AUX.u32All, 28, TD_FIFO_CREDIT_shift, TD_FIFO_CREDIT_mask);
r700->VC_ENHANCE.u32All = 0;
--
cgit v1.2.3
From b116f57bacb79205a1f80c7055964c60b402a19d Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Wed, 29 Jul 2009 18:06:20 -0400
Subject: r600: fix texture pitch alignment
fixes texwrap
---
src/mesa/drivers/dri/r600/r600_texstate.c | 6 ++++--
src/mesa/drivers/dri/radeon/radeon_common_context.c | 10 +++++++++-
src/mesa/drivers/dri/radeon/radeon_common_context.h | 2 ++
src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c | 9 ++++++---
4 files changed, 21 insertions(+), 6 deletions(-)
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index 70dd5404811..082bfd75f69 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -556,7 +556,7 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex
radeonTexObj *t = radeon_tex_obj(texObj);
const struct gl_texture_image *firstImage;
int firstlevel = t->mt ? t->mt->firstLevel : 0;
- GLuint uTexelPitch;
+ GLuint uTexelPitch, row_align;;
firstImage = t->base.Image[0][firstlevel];
@@ -595,7 +595,9 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex
return;
}
- uTexelPitch = (firstImage->Width + R700_TEXEL_PITCH_ALIGNMENT_MASK)
+ row_align = rmesa->radeon.texture_row_align - 1;
+ uTexelPitch = ((firstImage->Width * t->mt->bpp + row_align) & ~row_align) / t->mt->bpp;
+ uTexelPitch = (uTexelPitch + R700_TEXEL_PITCH_ALIGNMENT_MASK)
& ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
/* min pitch is 8 */
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index a50cd056e1c..4e4eba5d94c 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -241,7 +241,15 @@ GLboolean radeonInitContext(radeonContextPtr radeon,
radeon->texture_depth = ( glVisual->rgbBits > 16 ) ?
DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
- radeon->texture_row_align = 32;
+ if (IS_R600_CLASS(radeon->radeonScreen)) {
+ radeon->texture_row_align = 256;
+ radeon->texture_rect_row_align = 256;
+ radeon->texture_compressed_row_align = 256;
+ } else {
+ radeon->texture_row_align = 32;
+ radeon->texture_rect_row_align = 64;
+ radeon->texture_compressed_row_align = 64;
+ }
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h
index 0cdacb1c361..cd1986e1fc3 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
@@ -429,6 +429,8 @@ struct radeon_context {
int texture_depth;
float initialMaxAnisotropy;
uint32_t texture_row_align;
+ uint32_t texture_rect_row_align;
+ uint32_t texture_compressed_row_align;
struct radeon_dma dma;
struct radeon_hw_state hw;
diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
index f04a07fecd2..071a18e7d86 100644
--- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
@@ -90,16 +90,18 @@ static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree
GLuint face, GLuint level, GLuint* curOffset)
{
radeon_mipmap_level *lvl = &mt->levels[level];
- uint32_t row_align = rmesa->texture_row_align - 1;
+ uint32_t row_align;
/* Find image size in bytes */
if (mt->compressed) {
/* TODO: Is this correct? Need test cases for compressed textures! */
- lvl->rowstride = (lvl->width * mt->bpp + 63) & ~63;
+ row_align = rmesa->texture_compressed_row_align - 1;
+ lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align;
lvl->size = radeon_compressed_texture_size(mt->radeon->glCtx,
lvl->width, lvl->height, lvl->depth, mt->compressed);
} else if (mt->target == GL_TEXTURE_RECTANGLE_NV) {
- lvl->rowstride = (lvl->width * mt->bpp + 63) & ~63;
+ row_align = rmesa->texture_rect_row_align - 1;
+ lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align;
lvl->size = lvl->rowstride * lvl->height;
} else if (mt->tilebits & RADEON_TXO_MICRO_TILE) {
/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
@@ -108,6 +110,7 @@ static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree
lvl->rowstride = (lvl->width * mt->bpp * 2 + 31) & ~31;
lvl->size = lvl->rowstride * ((lvl->height + 1) / 2) * lvl->depth;
} else {
+ row_align = rmesa->texture_row_align - 1;
lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align;
lvl->size = lvl->rowstride * lvl->height * lvl->depth;
}
--
cgit v1.2.3
From 9c4c9f2837e08c5f69bc0d12bce7a656b2291837 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Wed, 29 Jul 2009 18:12:33 -0400
Subject: r600: remove extraneous semicolon
---
src/mesa/drivers/dri/r600/r600_texstate.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index 082bfd75f69..6918b3b8a12 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -556,7 +556,7 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex
radeonTexObj *t = radeon_tex_obj(texObj);
const struct gl_texture_image *firstImage;
int firstlevel = t->mt ? t->mt->firstLevel : 0;
- GLuint uTexelPitch, row_align;;
+ GLuint uTexelPitch, row_align;
firstImage = t->base.Image[0][firstlevel];
--
cgit v1.2.3
From b9889517f5e7eceb14bd103bbdff9ff47ce6ed25 Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Wed, 29 Jul 2009 16:27:03 -0600
Subject: gallium: fix SSE shadow texture instructions
When sampling a 2D shadow map we need 3 texcoord components, not 2.
The third component (distance from light source) is compared against
the texture sample to return the result (visible vs. occluded).
Also, enable proper handling of TGSI_TEXTURE_SHADOW targets in Mesa->TGSI
translation. There's a possibility for breakage in gallium drivers if
they fail to handle the TGSI_TEXTURE_SHADOW1D / TGSI_TEXTURE_SHADOW2D /
TGSI_TEXTURE_SHADOWRECT texture targets for TGSI_OPCODE_TEX/TXP instructions,
but that should be easy to fix.
With these changes, progs/demos/shadowtex.c renders properly again with
softpipe.
---
src/gallium/auxiliary/tgsi/tgsi_sse2.c | 6 +++---
src/mesa/state_tracker/st_mesa_to_tgsi.c | 6 ------
2 files changed, 3 insertions(+), 9 deletions(-)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 781ea1e75cf..16848f7cc5e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1467,15 +1467,15 @@ emit_tex( struct x86_function *func,
switch (inst->InstructionExtTexture.Texture) {
case TGSI_TEXTURE_1D:
- case TGSI_TEXTURE_SHADOW1D:
count = 1;
break;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
- case TGSI_TEXTURE_SHADOW2D:
- case TGSI_TEXTURE_SHADOWRECT:
count = 2;
break;
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
count = 3;
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 6380cd6b2a8..c8fb39d558f 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -158,12 +158,6 @@ map_texture_target(
GLuint textarget,
GLboolean shadow )
{
-#if 1
- /* XXX remove this line after we've checked that the rest of gallium
- * can handle the TGSI_TEXTURE_SHADOWx tokens.
- */
- shadow = GL_FALSE;
-#endif
switch( textarget ) {
case TEXTURE_1D_INDEX:
if (shadow)
--
cgit v1.2.3
From 9d0b8d72d8d704ff4d8e10448b60cbb42f07eecb Mon Sep 17 00:00:00 2001
From: Brian Paul
Date: Wed, 29 Jul 2009 20:07:41 -0600
Subject: mesa: add new FRAG_ATTRIB_FACE and FRAG_ATTRIB_PNTC fragment program
inputs
Previously, the FOGC attribute contained the fragment fog coord, front/back-
face flag and the gl_PointCoord.xy values. Now each of those things are
separate fragment program attributes. This simplifies quite a few things in
Mesa and gallium.
Need to test i965 driver and fix up point coord handling in the gallium/draw
module...
---
src/mesa/drivers/dri/i965/brw_wm_fp.c | 61 ++++++++++++++++++--------------
src/mesa/main/mtypes.h | 9 ++---
src/mesa/shader/arbprogparse.c | 7 ----
src/mesa/shader/programopt.c | 1 -
src/mesa/shader/slang/slang_codegen.c | 4 +--
src/mesa/shader/slang/slang_link.c | 14 --------
src/mesa/state_tracker/st_atom_shader.c | 17 ---------
src/mesa/state_tracker/st_mesa_to_tgsi.c | 21 -----------
src/mesa/state_tracker/st_program.c | 42 ++++++++--------------
src/mesa/swrast/s_fragprog.c | 5 ++-
src/mesa/swrast/s_points.c | 26 +++++++-------
11 files changed, 69 insertions(+), 138 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index b9e8dd2e96e..606baab9675 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -376,14 +376,6 @@ static void emit_interp( struct brw_wm_compile *c,
}
break;
case FRAG_ATTRIB_FOGC:
- /* The FOGC input is really special. When a program uses glFogFragCoord,
- * the results returned are supposed to be (f,0,0,1). But for Mesa GLSL,
- * the glFrontFacing and glPointCoord values are also stashed in FOGC.
- * So, write the interpolated fog value to X, then either 0, 1, or the
- * stashed values to Y, Z, W. Note that this means that
- * glFogFragCoord.yzw can be wrong in those cases!
- */
-
/* Interpolate the fog coordinate */
emit_op(c,
WM_PINTERP,
@@ -393,26 +385,40 @@ static void emit_interp( struct brw_wm_compile *c,
deltas,
get_pixel_w(c));
- /* Move the front facing value into FOGC.y if it's needed. */
- if (c->fp->program.UsesFrontFacing) {
- emit_op(c,
- WM_FRONTFACING,
- dst_mask(dst, WRITEMASK_Y),
- 0,
- src_undef(),
- src_undef(),
- src_undef());
- } else {
- emit_op(c,
- OPCODE_MOV,
- dst_mask(dst, WRITEMASK_Y),
- 0,
- src_swizzle1(interp, SWIZZLE_ZERO),
- src_undef(),
- src_undef());
- }
+ emit_op(c,
+ OPCODE_MOV,
+ dst_mask(dst, WRITEMASK_YZW),
+ 0,
+ src_swizzle(interp,
+ SWIZZLE_ZERO,
+ SWIZZLE_ZERO,
+ SWIZZLE_ZERO,
+ SWIZZLE_ONE),
+ src_undef(),
+ src_undef());
+ break;
+
+ case FRAG_ATTRIB_FACE:
+ /* XXX review/test this case */
+ emit_op(c,
+ WM_FRONTFACING,
+ dst_mask(dst, WRITEMASK_X),
+ 0,
+ src_undef(),
+ src_undef(),
+ src_undef());
+ break;
+
+ case FRAG_ATTRIB_PNTC:
+ /* XXX review/test this case */
+ emit_op(c,
+ WM_PINTERP,
+ dst_mask(dst, WRITEMASK_XY),
+ 0,
+ interp,
+ deltas,
+ get_pixel_w(c));
- /* Should do the PointCoord thing here. */
emit_op(c,
OPCODE_MOV,
dst_mask(dst, WRITEMASK_ZW),
@@ -425,6 +431,7 @@ static void emit_interp( struct brw_wm_compile *c,
src_undef(),
src_undef());
break;
+
default:
emit_op(c,
WM_PINTERP,
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index d0309f5e90d..da01c58c2d8 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -227,7 +227,9 @@ typedef enum
FRAG_ATTRIB_TEX5 = 9,
FRAG_ATTRIB_TEX6 = 10,
FRAG_ATTRIB_TEX7 = 11,
- FRAG_ATTRIB_VAR0 = 12, /**< shader varying */
+ FRAG_ATTRIB_FACE = 12, /**< front/back face */
+ FRAG_ATTRIB_PNTC = 13, /**< sprite/point coord */
+ FRAG_ATTRIB_VAR0 = 14, /**< shader varying */
FRAG_ATTRIB_MAX = (FRAG_ATTRIB_VAR0 + MAX_VARYING)
} gl_frag_attrib;
@@ -239,6 +241,8 @@ typedef enum
#define FRAG_BIT_COL0 (1 << FRAG_ATTRIB_COL0)
#define FRAG_BIT_COL1 (1 << FRAG_ATTRIB_COL1)
#define FRAG_BIT_FOGC (1 << FRAG_ATTRIB_FOGC)
+#define FRAG_BIT_FACE (1 << FRAG_ATTRIB_FACE)
+#define FRAG_BIT_PNTC (1 << FRAG_ATTRIB_PNTC)
#define FRAG_BIT_TEX0 (1 << FRAG_ATTRIB_TEX0)
#define FRAG_BIT_TEX1 (1 << FRAG_ATTRIB_TEX1)
#define FRAG_BIT_TEX2 (1 << FRAG_ATTRIB_TEX2)
@@ -1834,9 +1838,6 @@ struct gl_fragment_program
struct gl_program Base; /**< base class */
GLenum FogOption;
GLboolean UsesKill; /**< shader uses KIL instruction */
- GLboolean UsesPointCoord; /**< shader uses gl_PointCoord */
- GLboolean UsesFrontFacing; /**< shader used gl_FrontFacing */
- GLboolean UsesFogFragCoord; /**< shader used gl_FogFragCoord */
};
diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c
index bc65aba39a1..f428ee541b5 100644
--- a/src/mesa/shader/arbprogparse.c
+++ b/src/mesa/shader/arbprogparse.c
@@ -3974,13 +3974,6 @@ _mesa_parse_arb_fragment_program(GLcontext* ctx, GLenum target,
if (program->FogOption)
program->Base.InputsRead |= FRAG_BIT_FOGC;
- /* XXX: assume that ARB fragment programs don't have access to the
- * FrontFacing and PointCoord values stuffed into the fog
- * coordinate in GLSL shaders.
- */
- if (program->Base.InputsRead & FRAG_BIT_FOGC)
- program->UsesFogFragCoord = GL_TRUE;
-
if (program->Base.Instructions)
_mesa_free(program->Base.Instructions);
program->Base.Instructions = ap.Base.Instructions;
diff --git a/src/mesa/shader/programopt.c b/src/mesa/shader/programopt.c
index ac5fe0f691f..f70c75cec8e 100644
--- a/src/mesa/shader/programopt.c
+++ b/src/mesa/shader/programopt.c
@@ -396,7 +396,6 @@ _mesa_append_fog_code(GLcontext *ctx, struct gl_fragment_program *fprog)
fprog->Base.Instructions = newInst;
fprog->Base.NumInstructions = inst - newInst;
fprog->Base.InputsRead |= FRAG_BIT_FOGC;
- fprog->UsesFogFragCoord = GL_TRUE;
/* XXX do this? fprog->FogOption = GL_NONE; */
}
diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c
index 2b7e781f984..4fe68eafa7f 100644
--- a/src/mesa/shader/slang/slang_codegen.c
+++ b/src/mesa/shader/slang/slang_codegen.c
@@ -381,8 +381,8 @@ _slang_input_index(const char *name, GLenum target, GLuint *swizzleOut)
{ "gl_TexCoord", FRAG_ATTRIB_TEX0, SWIZZLE_NOOP },
/* note: we're packing several quantities into the fogcoord vector */
{ "gl_FogFragCoord", FRAG_ATTRIB_FOGC, SWIZZLE_XXXX },
- { "gl_FrontFacing", FRAG_ATTRIB_FOGC, SWIZZLE_YYYY }, /*XXX*/
- { "gl_PointCoord", FRAG_ATTRIB_FOGC, SWIZZLE_ZWWW },
+ { "gl_PointCoord", FRAG_ATTRIB_PNTC, SWIZZLE_XYZW },
+ { "gl_FrontFacing", FRAG_ATTRIB_FACE, SWIZZLE_XXXX },
{ NULL, 0, SWIZZLE_NOOP }
};
GLuint i;
diff --git a/src/mesa/shader/slang/slang_link.c b/src/mesa/shader/slang/slang_link.c
index f6032d1e9ad..6c19fc895b3 100644
--- a/src/mesa/shader/slang/slang_link.c
+++ b/src/mesa/shader/slang/slang_link.c
@@ -484,20 +484,6 @@ _slang_update_inputs_outputs(struct gl_program *prog)
for (j = 0; j < numSrc; j++) {
if (inst->SrcReg[j].File == PROGRAM_INPUT) {
prog->InputsRead |= 1 << inst->SrcReg[j].Index;
- if (prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
- inst->SrcReg[j].Index == FRAG_ATTRIB_FOGC) {
- /* The fragment shader FOGC input is used for fog,
- * front-facing and sprite/point coord.
- */
- struct gl_fragment_program *fp = fragment_program(prog);
- const GLint swz = GET_SWZ(inst->SrcReg[j].Swizzle, 0);
- if (swz == SWIZZLE_X)
- fp->UsesFogFragCoord = GL_TRUE;
- else if (swz == SWIZZLE_Y)
- fp->UsesFrontFacing = GL_TRUE;
- else if (swz == SWIZZLE_Z || swz == SWIZZLE_W)
- fp->UsesPointCoord = GL_TRUE;
- }
}
else if (inst->SrcReg[j].File == PROGRAM_ADDRESS) {
maxAddrReg = MAX2(maxAddrReg, (GLuint) (inst->SrcReg[j].Index + 1));
diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c
index 8b3bb5cc033..ee649be885e 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -139,23 +139,6 @@ find_translated_vp(struct st_context *st,
if (fragInputsRead & (1 << inAttr)) {
stfp->input_to_slot[inAttr] = numIn;
numIn++;
- if (((1 << inAttr) & FRAG_BIT_FOGC)) {
- /* leave placeholders for the
- * extra registers we extract from fog */
- if (stfp->Base.UsesFrontFacing) {
- if (!stfp->Base.UsesFogFragCoord)
- --stfp->input_to_slot[inAttr];
- else
- ++numIn;
- }
- if (stfp->Base.UsesPointCoord) {
- if (!stfp->Base.UsesFrontFacing &&
- !stfp->Base.UsesFogFragCoord)
- stfp->input_to_slot[inAttr] -= 2;
- else
- ++numIn;
- }
- }
}
else {
stfp->input_to_slot[inAttr] = UNUSED;
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 6380cd6b2a8..210ca82c42e 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -112,27 +112,6 @@ map_register_file_index(
{
switch( file ) {
case TGSI_FILE_INPUT:
- if (procType == TGSI_PROCESSOR_FRAGMENT &&
- index == FRAG_ATTRIB_FOGC) {
- if (GET_SWZ(*swizzle, 0) == SWIZZLE_X) {
- /* do nothing we're, ok */
- } else if (GET_SWZ(*swizzle, 0) == SWIZZLE_Y) {
- /* replace the swizzle with xxxx */
- *swizzle = MAKE_SWIZZLE4(SWIZZLE_X,
- SWIZZLE_X,
- SWIZZLE_X,
- SWIZZLE_X);
- /* register after fog */
- return inputMapping[index] + 1;
- } else {
- *swizzle = MAKE_SWIZZLE4(SWIZZLE_Z,
- SWIZZLE_W,
- SWIZZLE_Z,
- SWIZZLE_W);
- /* register after frontface */
- return inputMapping[index] + 2;
- }
- }
/* inputs are mapped according to the user-defined map */
return inputMapping[index];
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 806e0ca8f65..d2da20ae424 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -458,34 +458,20 @@ st_translate_fragment_program(struct st_context *st,
stfp->input_semantic_index[slot] = 1;
interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
break;
- case FRAG_ATTRIB_FOGC: {
- int extra_decls = 0;
- if (stfp->Base.UsesFogFragCoord) {
- stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
- stfp->input_semantic_index[slot] = 0;
- interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
- input_flags[slot] = stfp->Base.Base.InputFlags[attr];
- ++extra_decls;
- }
- if (stfp->Base.UsesFrontFacing) {
- GLint idx = slot + extra_decls;
- stfp->input_semantic_name[idx] = TGSI_SEMANTIC_FACE;
- stfp->input_semantic_index[idx] = 0;
- interpMode[idx] = TGSI_INTERPOLATE_CONSTANT;
- input_flags[idx] = stfp->Base.Base.InputFlags[attr];
- ++extra_decls;
- }
- if (stfp->Base.UsesPointCoord) {
- GLint idx = slot + extra_decls;
- stfp->input_semantic_name[idx] = TGSI_SEMANTIC_GENERIC;
- stfp->input_semantic_index[idx] = num_generic++;
- interpMode[idx] = TGSI_INTERPOLATE_PERSPECTIVE;
- input_flags[idx] = stfp->Base.Base.InputFlags[attr];
- ++extra_decls;
- }
- fs_num_inputs += extra_decls - 1;
- continue;
- }
+ case FRAG_ATTRIB_FOGC:
+ stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
+ stfp->input_semantic_index[slot] = 0;
+ interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
+ break;
+ case FRAG_ATTRIB_FACE:
+ stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
+ stfp->input_semantic_index[slot] = num_generic++;
+ interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
+ break;
+ case FRAG_ATTRIB_PNTC:
+ stfp->input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+ stfp->input_semantic_index[slot] = num_generic++;
+ interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
break;
case FRAG_ATTRIB_TEX0:
case FRAG_ATTRIB_TEX1:
diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c
index b71fb9eae97..613a91b0ecd 100644
--- a/src/mesa/swrast/s_fragprog.c
+++ b/src/mesa/swrast/s_fragprog.c
@@ -157,9 +157,8 @@ init_machine(GLcontext *ctx, struct gl_program_machine *machine,
/* if running a GLSL program (not ARB_fragment_program) */
if (ctx->Shader.CurrentProgram) {
- /* Store front/back facing value in register FOGC.Y */
- machine->Attribs[FRAG_ATTRIB_FOGC][col][1] = 1.0 - span->facing;
- /* Note FOGC.ZW is gl_PointCoord if drawing a sprite */
+ /* Store front/back facing value */
+ machine->Attribs[FRAG_ATTRIB_FACE][col][0] = 1.0 - span->facing;
}
machine->CurElement = col;
diff --git a/src/mesa/swrast/s_points.c b/src/mesa/swrast/s_points.c
index 0a3ad97a71b..64c9cda5165 100644
--- a/src/mesa/swrast/s_points.c
+++ b/src/mesa/swrast/s_points.c
@@ -139,7 +139,8 @@ sprite_point(GLcontext *ctx, const SWvertex *vert)
}
ATTRIB_LOOP_BEGIN
- if (attr >= FRAG_ATTRIB_TEX0 && attr < FRAG_ATTRIB_VAR0) {
+ if ((attr >= FRAG_ATTRIB_TEX0 && attr <= FRAG_ATTRIB_TEX7) ||
+ attr >= FRAG_ATTRIB_VAR0) {
const GLuint u = attr - FRAG_ATTRIB_TEX0;
/* a texcoord */
if (ctx->Point.CoordReplace[u]) {
@@ -170,15 +171,15 @@ sprite_point(GLcontext *ctx, const SWvertex *vert)
continue;
}
}
- else if (attr == FRAG_ATTRIB_FOGC) {
- /* GLSL gl_PointCoord is stored in fog.zw */
- span.attrStart[FRAG_ATTRIB_FOGC][2] = 0.0;
- span.attrStart[FRAG_ATTRIB_FOGC][3] = 0.0; /* t0 set below */
- span.attrStepX[FRAG_ATTRIB_FOGC][2] = dsdx;
- span.attrStepX[FRAG_ATTRIB_FOGC][3] = 0.0;
- span.attrStepY[FRAG_ATTRIB_FOGC][2] = 0.0;
- span.attrStepY[FRAG_ATTRIB_FOGC][3] = dtdy;
- tCoords[numTcoords++] = FRAG_ATTRIB_FOGC;
+ else if (attr == FRAG_ATTRIB_PNTC) {
+ /* GLSL gl_PointCoord.xy (.zw undefined) */
+ span.attrStart[FRAG_ATTRIB_PNTC][0] = 0.0;
+ span.attrStart[FRAG_ATTRIB_PNTC][1] = 0.0; /* t0 set below */
+ span.attrStepX[FRAG_ATTRIB_PNTC][0] = dsdx;
+ span.attrStepX[FRAG_ATTRIB_PNTC][1] = 0.0;
+ span.attrStepY[FRAG_ATTRIB_PNTC][0] = 0.0;
+ span.attrStepY[FRAG_ATTRIB_PNTC][1] = dtdy;
+ tCoords[numTcoords++] = FRAG_ATTRIB_PNTC;
continue;
}
/* use vertex's texcoord/attrib */
@@ -221,10 +222,7 @@ sprite_point(GLcontext *ctx, const SWvertex *vert)
GLuint i;
/* setup texcoord T for this row */
for (i = 0; i < numTcoords; i++) {
- if (tCoords[i] == FRAG_ATTRIB_FOGC)
- span.attrStart[FRAG_ATTRIB_FOGC][3] = tcoord;
- else
- span.attrStart[tCoords[i]][1] = tcoord;
+ span.attrStart[tCoords[i]][1] = tcoord;
}
/* these might get changed by span clipping */
--
cgit v1.2.3
From 92b9aa1646daa7d9e6470e9d1dbb3460eeae8941 Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Wed, 29 Jul 2009 13:37:41 -0700
Subject: i915: Add ARB_point_sprite since we already expose NV_point_sprite.
It's all fallbacks anyway due to the DD_POINT_ATTEN fallback.
---
src/mesa/drivers/dri/intel/intel_extensions.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c
index 7742609d242..3a09a53a16a 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -71,6 +71,7 @@ static const struct dri_extension card_extensions[] = {
{ "GL_ARB_half_float_pixel", NULL },
{ "GL_ARB_multitexture", NULL },
{ "GL_ARB_point_parameters", GL_ARB_point_parameters_functions },
+ { "GL_ARB_point_sprite", NULL },
{ "GL_ARB_texture_border_clamp", NULL },
{ "GL_ARB_texture_cube_map", NULL },
{ "GL_ARB_texture_env_add", NULL },
--
cgit v1.2.3
From 246729162ccc7e2672aa6cc957053ce3a8975a2c Mon Sep 17 00:00:00 2001
From: Eric Anholt
Date: Wed, 29 Jul 2009 14:06:05 -0700
Subject: i915: Add support for EXT_stencil_two_side and ATI_separate_stencil.
Passes tests/stencil_twoside and glean/stencil2.
---
src/mesa/drivers/dri/i915/i915_context.c | 2 +
src/mesa/drivers/dri/i915/i915_context.h | 5 +-
src/mesa/drivers/dri/i915/i915_reg.h | 2 +
src/mesa/drivers/dri/i915/i915_state.c | 151 ++++++++++++++++++--------
src/mesa/drivers/dri/i915/i915_vtbl.c | 7 +-
src/mesa/drivers/dri/intel/intel_extensions.c | 2 +
6 files changed, 115 insertions(+), 54 deletions(-)
diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c
index 367d2a3b648..5aa41334b0b 100644
--- a/src/mesa/drivers/dri/i915/i915_context.c
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@@ -75,6 +75,8 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state)
if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))
i915_update_fog(ctx);
+ if (new_state & (_NEW_STENCIL | _NEW_BUFFERS | _NEW_POLYGON))
+ i915_update_stencil(ctx);
}
diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h
index 87bbf5f9271..c6b7377da80 100644
--- a/src/mesa/drivers/dri/i915/i915_context.h
+++ b/src/mesa/drivers/dri/i915/i915_context.h
@@ -82,7 +82,9 @@
#define I915_CTXREG_IAB 6
#define I915_CTXREG_BLENDCOLOR0 7
#define I915_CTXREG_BLENDCOLOR1 8
-#define I915_CTX_SETUP_SIZE 9
+#define I915_CTXREG_BF_STENCIL_OPS 9
+#define I915_CTXREG_BF_STENCIL_MASKS 10
+#define I915_CTX_SETUP_SIZE 11
#define I915_FOGREG_COLOR 0
#define I915_FOGREG_MODE0 1
@@ -321,6 +323,7 @@ extern void i915_print_ureg(const char *msg, GLuint ureg);
extern void i915InitStateFunctions(struct dd_function_table *functions);
extern void i915InitState(struct i915_context *i915);
extern void i915_update_fog(GLcontext * ctx);
+extern void i915_update_stencil(GLcontext * ctx);
/*======================================================================
diff --git a/src/mesa/drivers/dri/i915/i915_reg.h b/src/mesa/drivers/dri/i915/i915_reg.h
index 84db58ea950..80ec46190d7 100644
--- a/src/mesa/drivers/dri/i915/i915_reg.h
+++ b/src/mesa/drivers/dri/i915/i915_reg.h
@@ -86,8 +86,10 @@
#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16)
#define BFM_STENCIL_TEST_MASK_SHIFT 8
#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8)
+#define BFM_STENCIL_TEST_MASK(x) (((x)&0xff) << 8)
#define BFM_STENCIL_WRITE_MASK_SHIFT 0
#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0)
+#define BFM_STENCIL_WRITE_MASK(x) ((x)&0xff)
diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c
index 814fb59fd34..670451bc838 100644
--- a/src/mesa/drivers/dri/i915/i915_state.c
+++ b/src/mesa/drivers/dri/i915/i915_state.c
@@ -48,73 +48,119 @@
#define FILE_DEBUG_FLAG DEBUG_STATE
-static void
-i915StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref,
- GLuint mask)
+void
+i915_update_stencil(GLcontext * ctx)
{
struct i915_context *i915 = I915_CONTEXT(ctx);
- int test = intel_translate_compare_func(func);
-
- mask = mask & 0xff;
+ GLuint front_ref, front_writemask, front_mask;
+ GLenum front_func, front_fail, front_pass_z_fail, front_pass_z_pass;
+ GLuint back_ref, back_writemask, back_mask;
+ GLenum back_func, back_fail, back_pass_z_fail, back_pass_z_pass;
- DBG("%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__,
- _mesa_lookup_enum_by_nr(func), ref, mask);
+ I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+ /* The 915 considers CW to be "front" for two-sided stencil, so choose
+ * appropriately.
+ */
+ /* _NEW_POLYGON | _NEW_STENCIL */
+ if (ctx->Polygon.FrontFace == GL_CW) {
+ front_ref = ctx->Stencil.Ref[0];
+ front_mask = ctx->Stencil.ValueMask[0];
+ front_writemask = ctx->Stencil.WriteMask[0];
+ front_func = ctx->Stencil.Function[0];
+ front_fail = ctx->Stencil.FailFunc[0];
+ front_pass_z_fail = ctx->Stencil.ZFailFunc[0];
+ front_pass_z_pass = ctx->Stencil.ZPassFunc[0];
+ back_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace];
+ back_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace];
+ back_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace];
+ back_func = ctx->Stencil.Function[ctx->Stencil._BackFace];
+ back_fail = ctx->Stencil.FailFunc[ctx->Stencil._BackFace];
+ back_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace];
+ back_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace];
+ } else {
+ front_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace];
+ front_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace];
+ front_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace];
+ front_func = ctx->Stencil.Function[ctx->Stencil._BackFace];
+ front_fail = ctx->Stencil.FailFunc[ctx->Stencil._BackFace];
+ front_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace];
+ front_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace];
+ back_ref = ctx->Stencil.Ref[0];
+ back_mask = ctx->Stencil.ValueMask[0];
+ back_writemask = ctx->Stencil.WriteMask[0];
+ back_func = ctx->Stencil.Function[0];
+ back_fail = ctx->Stencil.FailFunc[0];
+ back_pass_z_fail = ctx->Stencil.ZFailFunc[0];
+ back_pass_z_pass = ctx->Stencil.ZPassFunc[0];
+ }
- I915_STATECHANGE(i915, I915_UPLOAD_CTX);
- i915->state.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK;
+ /* Set front state. */
+ i915->state.Ctx[I915_CTXREG_STATE4] &= ~(MODE4_ENABLE_STENCIL_TEST_MASK |
+ MODE4_ENABLE_STENCIL_WRITE_MASK);
i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK |
- STENCIL_TEST_MASK(mask));
+ ENABLE_STENCIL_WRITE_MASK |
+ STENCIL_TEST_MASK(front_mask) |
+ STENCIL_WRITE_MASK(front_writemask));
i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_REF_MASK |
- S5_STENCIL_TEST_FUNC_MASK);
+ S5_STENCIL_TEST_FUNC_MASK |
+ S5_STENCIL_FAIL_MASK |
+ S5_STENCIL_PASS_Z_FAIL_MASK |
+ S5_STENCIL_PASS_Z_PASS_MASK);
+
+ i915->state.Ctx[I915_CTXREG_LIS5] |=
+ (front_ref << S5_STENCIL_REF_SHIFT) |
+ (intel_translate_compare_func(front_func) << S5_STENCIL_TEST_FUNC_SHIFT) |
+ (intel_translate_stencil_op(front_fail) << S5_STENCIL_FAIL_SHIFT) |
+ (intel_translate_stencil_op(front_pass_z_fail) <<
+ S5_STENCIL_PASS_Z_FAIL_SHIFT) |
+ (intel_translate_stencil_op(front_pass_z_pass) <<
+ S5_STENCIL_PASS_Z_PASS_SHIFT);
+
+ /* Set back state if different from front. */
+ if (ctx->Stencil._TestTwoSide) {
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] &=
+ ~(BFO_STENCIL_REF_MASK |
+ BFO_STENCIL_TEST_MASK |
+ BFO_STENCIL_FAIL_MASK |
+ BFO_STENCIL_PASS_Z_FAIL_MASK |
+ BFO_STENCIL_PASS_Z_PASS_MASK);
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] |= BFO_STENCIL_TWO_SIDE |
+ (back_ref << BFO_STENCIL_REF_SHIFT) |
+ (intel_translate_compare_func(back_func) << BFO_STENCIL_TEST_SHIFT) |
+ (intel_translate_stencil_op(back_fail) << BFO_STENCIL_FAIL_SHIFT) |
+ (intel_translate_stencil_op(back_pass_z_fail) <<
+ BFO_STENCIL_PASS_Z_FAIL_SHIFT) |
+ (intel_translate_stencil_op(back_pass_z_pass) <<
+ BFO_STENCIL_PASS_Z_PASS_SHIFT);
+
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] &=
+ ~(BFM_STENCIL_TEST_MASK_MASK |
+ BFM_STENCIL_WRITE_MASK_MASK);
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] |=
+ BFM_STENCIL_TEST_MASK(back_mask) |
+ BFM_STENCIL_WRITE_MASK(back_writemask);
+ } else {
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] &= ~BFO_STENCIL_TWO_SIDE;
+ }
+}
- i915->state.Ctx[I915_CTXREG_LIS5] |= ((ref << S5_STENCIL_REF_SHIFT) |
- (test <<
- S5_STENCIL_TEST_FUNC_SHIFT));
+static void
+i915StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref,
+ GLuint mask)
+{
}
static void
i915StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
{
- struct i915_context *i915 = I915_CONTEXT(ctx);
-
- DBG("%s : mask 0x%x\n", __FUNCTION__, mask);
-
- mask = mask & 0xff;
-
- I915_STATECHANGE(i915, I915_UPLOAD_CTX);
- i915->state.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK;
- i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK |
- STENCIL_WRITE_MASK(mask));
}
-
static void
i915StencilOpSeparate(GLcontext * ctx, GLenum face, GLenum fail, GLenum zfail,
GLenum zpass)
{
- struct i915_context *i915 = I915_CONTEXT(ctx);
- int fop = intel_translate_stencil_op(fail);
- int dfop = intel_translate_stencil_op(zfail);
- int dpop = intel_translate_stencil_op(zpass);
-
-
- DBG("%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__,
- _mesa_lookup_enum_by_nr(fail),
- _mesa_lookup_enum_by_nr(zfail), _mesa_lookup_enum_by_nr(zpass));
-
- I915_STATECHANGE(i915, I915_UPLOAD_CTX);
-
- i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_FAIL_MASK |
- S5_STENCIL_PASS_Z_FAIL_MASK |
- S5_STENCIL_PASS_Z_PASS_MASK);
-
- i915->state.Ctx[I915_CTXREG_LIS5] |= ((fop << S5_STENCIL_FAIL_SHIFT) |
- (dfop <<
- S5_STENCIL_PASS_Z_FAIL_SHIFT) |
- (dpop <<
- S5_STENCIL_PASS_Z_PASS_SHIFT));
}
static void
@@ -945,6 +991,17 @@ i915_init_packets(struct i915_context *i915)
_3DSTATE_CONST_BLEND_COLOR_CMD;
i915->state.Ctx[I915_CTXREG_BLENDCOLOR1] = 0;
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] =
+ _3DSTATE_BACKFACE_STENCIL_MASKS |
+ BFM_ENABLE_STENCIL_TEST_MASK |
+ BFM_ENABLE_STENCIL_WRITE_MASK |
+ (0xff << BFM_STENCIL_WRITE_MASK_SHIFT) |
+ (0xff << BFM_STENCIL_TEST_MASK_SHIFT);
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] =
+ _3DSTATE_BACKFACE_STENCIL_OPS |
+ BFO_ENABLE_STENCIL_REF |
+ BFO_ENABLE_STENCIL_FUNCS |
+ BFO_ENABLE_STENCIL_TWO_SIDE;
}
{
diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c
index fe1be93a6d0..707864ebfdb 100644
--- a/src/mesa/drivers/dri/i915/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i915_vtbl.c
@@ -176,7 +176,7 @@ i915_emit_invarient_state(struct intel_context *intel)
{
BATCH_LOCALS;
- BEGIN_BATCH(20, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(18, IGNORE_CLIPRECTS);
OUT_BATCH(_3DSTATE_AA_CMD |
AA_LINE_ECAAR_WIDTH_ENABLE |
@@ -225,11 +225,6 @@ i915_emit_invarient_state(struct intel_context *intel)
OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */
OUT_BATCH(0);
-
- /* Don't support twosided stencil yet */
- OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
- OUT_BATCH(0);
-
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c
index 3a09a53a16a..6a68021c691 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -120,8 +120,10 @@ static const struct dri_extension i915_extensions[] = {
{ "GL_ARB_fragment_program", NULL },
{ "GL_ARB_shadow", NULL },
{ "GL_ARB_texture_non_power_of_two", NULL },
+ { "GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions },
{ "GL_ATI_texture_env_combine3", NULL },
{ "GL_EXT_shadow_funcs", NULL },
+ { "GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions },
{ "GL_NV_texture_env_combine4", NULL },
{ NULL, NULL }
};
--
cgit v1.2.3
From 0fdac3529c8a7f7ce41420b79e817407d19a12a2 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Thu, 30 Jul 2009 03:30:46 -0400
Subject: r600: fix mipmaps
redbook mipmap works
---
src/mesa/drivers/dri/r600/r600_texstate.c | 5 +++++
src/mesa/drivers/dri/r600/r700_render.c | 2 +-
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index 6918b3b8a12..4840586858d 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -610,6 +610,11 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex
SETfield(t->SQ_TEX_RESOURCE1, firstImage->Height - 1,
TEX_HEIGHT_shift, TEX_HEIGHT_mask);
+ if ((t->mt->lastLevel - t->mt->firstLevel) > 0) {
+ t->SQ_TEX_RESOURCE3 = t->mt->levels[0].size / 256;
+ SETfield(t->SQ_TEX_RESOURCE4, t->mt->firstLevel, BASE_LEVEL_shift, BASE_LEVEL_mask);
+ SETfield(t->SQ_TEX_RESOURCE5, t->mt->lastLevel, LAST_LEVEL_shift, LAST_LEVEL_mask);
+ }
}
/**
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 4e0d5391d0a..1810f4be0ee 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -173,7 +173,7 @@ GLboolean r700SendTextureState(context_t *context)
RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0, &offset_mod);
R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3,
bo,
- 0,
+ r700->textures[i]->SQ_TEX_RESOURCE3,
RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0, &offset_mod);
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4);
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5);
--
cgit v1.2.3
From 3e2b6a204966b962c9881e90fe3f0b74cf84d8c4 Mon Sep 17 00:00:00 2001
From: "Xiang, Haihao"
Date: Thu, 30 Jul 2009 14:45:11 +0800
Subject: i965: Postpone ff_sync message in CLIP kernel on IGDNG
In addition, it guarantees ff_sync message is issued
---
src/mesa/drivers/dri/i965/brw_clip.h | 3 ++
src/mesa/drivers/dri/i965/brw_clip_line.c | 7 ++--
src/mesa/drivers/dri/i965/brw_clip_point.c | 4 +--
src/mesa/drivers/dri/i965/brw_clip_tri.c | 8 +++--
src/mesa/drivers/dri/i965/brw_clip_unfilled.c | 3 +-
src/mesa/drivers/dri/i965/brw_clip_util.c | 48 ++++++++++++++++++++-------
6 files changed, 53 insertions(+), 20 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h
index 12e8548df1f..957df441ab0 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.h
+++ b/src/mesa/drivers/dri/i965/brw_clip.h
@@ -100,6 +100,8 @@ struct brw_clip_compile {
struct brw_reg fixed_planes;
struct brw_reg plane_equation;
+
+ struct brw_reg ff_sync;
} reg;
/* 3 different ways of expressing vertex size:
@@ -173,4 +175,5 @@ struct brw_reg get_tmp( struct brw_clip_compile *c );
void brw_clip_project_position(struct brw_clip_compile *c,
struct brw_reg pos );
void brw_clip_ff_sync(struct brw_clip_compile *c);
+void brw_clip_init_ff_sync(struct brw_clip_compile *c);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c
index 9abd0642aa2..048ca620fab 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_line.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_line.c
@@ -85,6 +85,10 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
i++;
}
+ if (c->need_ff_sync) {
+ c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+ i++;
+ }
c->first_tmp = i;
c->last_tmp = i;
@@ -246,8 +250,6 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
- if (c->need_ff_sync)
- brw_clip_ff_sync(c);
not_culled = brw_IF(p, BRW_EXECUTE_1);
{
brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, GL_FALSE);
@@ -265,6 +267,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
void brw_emit_line_clip( struct brw_clip_compile *c )
{
brw_clip_line_alloc_regs(c);
+ brw_clip_init_ff_sync(c);
if (c->key.do_flat_shading)
brw_clip_copy_colors(c, 0, 1);
diff --git a/src/mesa/drivers/dri/i965/brw_clip_point.c b/src/mesa/drivers/dri/i965/brw_clip_point.c
index 97382991686..8458f61c5a0 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_point.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_point.c
@@ -50,7 +50,7 @@ void brw_emit_point_clip( struct brw_clip_compile *c )
/* Send an empty message to kill the thread:
*/
brw_clip_tri_alloc_regs(c, 0);
- if (c->need_ff_sync)
- brw_clip_ff_sync(c);
+ brw_clip_init_ff_sync(c);
+
brw_clip_kill_thread(c);
}
diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c
index 4c2d655fb10..0efd77225e2 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_tri.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c
@@ -119,6 +119,11 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
i++;
}
+ if (c->need_ff_sync) {
+ c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+ i++;
+ }
+
c->first_tmp = i;
c->last_tmp = i;
@@ -563,6 +568,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
brw_clip_tri_init_vertices(c);
brw_clip_init_clipmask(c);
+ brw_clip_init_ff_sync(c);
/* if -ve rhw workaround bit is set,
do cliptest */
@@ -589,8 +595,6 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
else
maybe_do_clip_tri(c);
- if (c->need_ff_sync)
- brw_clip_ff_sync(c);
brw_clip_tri_emit_polygon(c);
/* Send an empty message to kill the thread:
diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
index 26950383c1b..ad1bfa435fb 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
@@ -453,6 +453,7 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c )
brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
brw_clip_tri_init_vertices(c);
+ brw_clip_init_ff_sync(c);
assert(c->offset[VERT_RESULT_EDGE]);
@@ -496,8 +497,6 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c )
}
brw_ENDIF(p, do_clip);
- if (c->need_ff_sync)
- brw_clip_ff_sync(c);
emit_unfilled_primitives(c);
brw_clip_kill_thread(c);
}
diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c
index e09efc07ed4..5a73abdfee9 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_util.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_util.c
@@ -213,6 +213,8 @@ void brw_clip_emit_vue(struct brw_clip_compile *c,
struct brw_compile *p = &c->func;
GLuint start = c->last_mrf;
+ brw_clip_ff_sync(c);
+
assert(!(allocate && eot));
/* Cycle through mrf regs - probably futile as we have to wait for
@@ -263,6 +265,7 @@ void brw_clip_kill_thread(struct brw_clip_compile *c)
{
struct brw_compile *p = &c->func;
+ brw_clip_ff_sync(c);
/* Send an empty message to kill the thread and release any
* allocated urb entry:
*/
@@ -356,17 +359,38 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
void brw_clip_ff_sync(struct brw_clip_compile *c)
{
+ if (c->need_ff_sync) {
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *need_ff_sync;
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
+ need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1,
+ 1, /* used */
+ 1, /* msg length */
+ 1, /* response length */
+ 0, /* eot */
+ 1, /* write compelete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+ }
+ brw_ENDIF(p, need_ff_sync);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+}
+
+void brw_clip_init_ff_sync(struct brw_clip_compile *c)
+{
+ if (c->need_ff_sync) {
struct brw_compile *p = &c->func;
- brw_ff_sync(p,
- c->reg.R0,
- 0,
- c->reg.R0,
- 1,
- 1, /* used */
- 1, /* msg length */
- 1, /* response length */
- 0, /* eot */
- 1, /* write compelete */
- 0, /* urb offset */
- BRW_URB_SWIZZLE_NONE);
+
+ brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
+ }
}
--
cgit v1.2.3
From b724dd28e24ec1c38af1082f5e16cd9a12d1653d Mon Sep 17 00:00:00 2001
From: Michal Krol
Date: Thu, 30 Jul 2009 10:12:09 +0200
Subject: tgsi: Document LOOP/ENDLOOP instruction operation.
---
src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt | 20 ++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
index a3f4947c734..5f88cc2acac 100644
--- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
+++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
@@ -667,7 +667,16 @@ TGSI Instruction Specification
1.9.8 LOOP - Loop
- TBD
+ dst.x = floor(src.x)
+ dst.y = floor(src.y)
+ dst.z = floor(src.z)
+
+ if (dst.y <= 0)
+ pc = [matching ENDLOOP] + 1
+ endif
+
+ Note: The destination must be a loop register.
+ The source must be a constant register.
1.9.9 REP - Repeat
@@ -687,7 +696,14 @@ TGSI Instruction Specification
1.9.12 ENDLOOP - End Loop
- TBD
+ dst.x = dst.x + dst.z
+ dst.y = dst.y - 1.0
+
+ if (dst.y > 0)
+ pc = [matching LOOP instruction] + 1
+ endif
+
+ Note: The destination must be a loop register.
1.9.13 ENDREP - End Repeat
--
cgit v1.2.3
From 1e9d3ad4e1d987e1130e4bcd2ffc35f0e18064c3 Mon Sep 17 00:00:00 2001
From: Michal Krol
Date: Thu, 30 Jul 2009 10:31:57 +0200
Subject: vbo: Fix build on windows.
---
src/mesa/vbo/vbo_split_inplace.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/mesa/vbo/vbo_split_inplace.c b/src/mesa/vbo/vbo_split_inplace.c
index 266bc56c826..9628227e7c4 100644
--- a/src/mesa/vbo/vbo_split_inplace.c
+++ b/src/mesa/vbo/vbo_split_inplace.c
@@ -59,11 +59,11 @@ struct split_context {
static void flush_vertex( struct split_context *split )
{
GLuint min_index, max_index;
+ GLuint i;
if (!split->dstprim_nr)
return;
- GLuint i;
min_index = split->dstprim[0].start;
max_index = min_index + split->dstprim[0].count - 1;
--
cgit v1.2.3
From 65fb2c52f9e86627652cac0d4139b40bef102596 Mon Sep 17 00:00:00 2001
From: Michal Krol
Date: Thu, 30 Jul 2009 10:33:18 +0200
Subject: tgsi: Fix number operands for LOOP/ENDLOOP.
---
src/gallium/auxiliary/tgsi/tgsi_info.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index fedda7bff90..3a47e9b84df 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -106,11 +106,11 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{ 1, 2, 1, 0, "TXL", TGSI_OPCODE_TXL },
{ 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK },
{ 0, 1, 0, 1, "IF", TGSI_OPCODE_IF },
- { 0, 0, 0, 0, "LOOP", TGSI_OPCODE_LOOP },
+ { 1, 1, 0, 0, "LOOP", TGSI_OPCODE_LOOP },
{ 0, 1, 0, 0, "REP", TGSI_OPCODE_REP },
{ 0, 0, 0, 1, "ELSE", TGSI_OPCODE_ELSE },
{ 0, 0, 0, 0, "ENDIF", TGSI_OPCODE_ENDIF },
- { 0, 0, 0, 0, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
+ { 1, 0, 0, 0, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
{ 0, 0, 0, 0, "ENDREP", TGSI_OPCODE_ENDREP },
{ 0, 1, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA },
{ 1, 0, 0, 0, "POPA", TGSI_OPCODE_POPA },
--
cgit v1.2.3
From cf8907018e449580b397f09c267219a612ba5db4 Mon Sep 17 00:00:00 2001
From: Michal Krol
Date: Thu, 30 Jul 2009 10:34:06 +0200
Subject: tgsi: Declare a LOOP register.
The only valid usage for LOOP/ENDLOOP instructions
is LOOP[0] as a destination register.
The only valid usage for the remaining instructions
is LOOP[0].x as an indirect register.
---
src/gallium/auxiliary/tgsi/tgsi_dump.c | 5 +++--
src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 5 +++--
src/gallium/auxiliary/tgsi/tgsi_sanity.c | 5 +++--
src/gallium/auxiliary/tgsi/tgsi_text.c | 3 ++-
src/gallium/include/pipe/p_shader_tokens.h | 1 +
5 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index e1cd8479cb4..4ca3a16b8ae 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -82,7 +82,7 @@ static const char *processor_type_names[] =
"GEOM"
};
-static const char *file_names[] =
+static const char *file_names[TGSI_FILE_COUNT] =
{
"NULL",
"CONST",
@@ -91,7 +91,8 @@ static const char *file_names[] =
"TEMP",
"SAMP",
"ADDR",
- "IMM"
+ "IMM",
+ "LOOP"
};
static const char *interpolate_names[] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index c944760ca67..4a9c02b1413 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -69,7 +69,7 @@ static const char *TGSI_TOKEN_TYPES[] =
"TOKEN_TYPE_INSTRUCTION"
};
-static const char *TGSI_FILES[] =
+static const char *TGSI_FILES[TGSI_FILE_COUNT] =
{
"FILE_NULL",
"FILE_CONSTANT",
@@ -78,7 +78,8 @@ static const char *TGSI_FILES[] =
"FILE_TEMPORARY",
"FILE_SAMPLER",
"FILE_ADDRESS",
- "FILE_IMMEDIATE"
+ "FILE_IMMEDIATE",
+ "FILE_LOOP"
};
static const char *TGSI_INTERPOLATES[] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 6f1f5c2b4b0..8bb186bae54 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -131,7 +131,7 @@ is_register_used(
return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE;
}
-static const char *file_names[] =
+static const char *file_names[TGSI_FILE_COUNT] =
{
"NULL",
"CONST",
@@ -140,7 +140,8 @@ static const char *file_names[] =
"TEMP",
"SAMP",
"ADDR",
- "IMM"
+ "IMM",
+ "LOOP"
};
static boolean
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index bfcbc40982a..d438450b1e4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -231,7 +231,8 @@ static const char *file_names[TGSI_FILE_COUNT] =
"TEMP",
"SAMP",
"ADDR",
- "IMM"
+ "IMM",
+ "LOOP"
};
static boolean
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index e6e29a04ec0..c4be604e5a0 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -78,6 +78,7 @@ enum tgsi_file_type {
TGSI_FILE_SAMPLER =5,
TGSI_FILE_ADDRESS =6,
TGSI_FILE_IMMEDIATE =7,
+ TGSI_FILE_LOOP =8,
TGSI_FILE_COUNT /**< how many TGSI_FILE_ types */
};
--
cgit v1.2.3
From 6c70285e330bd19db78b7d45e43a01b0255ca15f Mon Sep 17 00:00:00 2001
From: Michal Krol
Date: Thu, 30 Jul 2009 11:39:06 +0200
Subject: tgsi: Add proper constraints to sanity.
---
src/gallium/auxiliary/tgsi/tgsi_sanity.c | 24 ++++++++++++++++++++++--
1 file changed, 22 insertions(+), 2 deletions(-)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 8bb186bae54..cb62a95fc0a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -235,9 +235,29 @@ iter_instruction(
index,
"indirect",
FALSE );
- if (file != TGSI_FILE_ADDRESS || index != 0)
- report_warning( ctx, "Indirect register not ADDR[0]" );
+ if (!(file == TGSI_FILE_ADDRESS || file == TGSI_FILE_LOOP) || index != 0) {
+ report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]");
+ }
+ }
+ }
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_LOOP:
+ case TGSI_OPCODE_ENDLOOP:
+ if (inst->FullDstRegisters[0].DstRegister.File != TGSI_FILE_LOOP ||
+ inst->FullDstRegisters[0].DstRegister.Index != 0) {
+ report_error(ctx, "Destination register must be LOOP[0]");
+ }
+ break;
+ }
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_LOOP:
+ if (inst->FullSrcRegisters[0].SrcRegister.File != TGSI_FILE_CONSTANT &&
+ inst->FullSrcRegisters[0].SrcRegister.File != TGSI_FILE_IMMEDIATE) {
+ report_error(ctx, "Source register file must be either CONST or IMM");
}
+ break;
}
ctx->num_instructions++;
--
cgit v1.2.3
From f583745519b2b99ca637cdfa6201fd653c848fd6 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom