summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/state_trackers/nine/device9.c85
-rw-r--r--src/gallium/state_trackers/nine/device9.h1
-rw-r--r--src/gallium/state_trackers/nine/nine_shader.c5
-rw-r--r--src/gallium/state_trackers/nine/nine_state.c170
-rw-r--r--src/gallium/state_trackers/nine/nine_state.h13
-rw-r--r--src/gallium/state_trackers/nine/stateblock9.c85
-rw-r--r--src/gallium/state_trackers/nine/vertexshader9.c15
-rw-r--r--src/gallium/state_trackers/nine/vertexshader9.h8
8 files changed, 322 insertions, 60 deletions
diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c
index dca75c53e6e..f1354904344 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -168,12 +168,31 @@ NineDevice9_ctor( struct NineDevice9 *This,
if (This->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) {
DBG("Application asked full Software Vertex Processing.\n");
This->swvp = true;
+ This->may_swvp = true;
} else
This->swvp = false;
- if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING)
+ if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING) {
DBG("Application asked mixed Software Vertex Processing.\n");
+ This->may_swvp = true;
+ }
/* TODO: check if swvp is resetted by device Resets */
+ if (This->may_swvp &&
+ (This->screen->get_shader_param(This->screen, PIPE_SHADER_VERTEX,
+ PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE)
+ < (NINE_MAX_CONST_F_SWVP/2) * sizeof(float[4]) ||
+ This->screen->get_shader_param(This->screen, PIPE_SHADER_VERTEX,
+ PIPE_SHADER_CAP_MAX_CONST_BUFFERS) < 5)) {
+ /* Note: We just go on, some apps never use the abilities of
+ * swvp, and just set more constants than allowed at init.
+ * Only cards we support that are affected are the r500 */
+ WARN("Card unable to handle Software Vertex Processing. Game may fail\n");
+ }
+
+ /* When may_swvp, SetConstant* limits are different */
+ if (This->may_swvp)
+ This->caps.MaxVertexShaderConst = NINE_MAX_CONST_F_SWVP;
+
This->pipe = This->screen->context_create(This->screen, NULL, 0);
if (!This->pipe) { return E_OUTOFMEMORY; } /* guess */
@@ -322,12 +341,22 @@ NineDevice9_ctor( struct NineDevice9 *This,
This->vs_const_size = max_const_vs * sizeof(float[4]);
This->ps_const_size = max_const_ps * sizeof(float[4]);
/* Include space for I,B constants for user constbuf. */
+ if (This->may_swvp) {
+ This->state.vs_const_f_swvp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1);
+ if (!This->state.vs_const_f_swvp)
+ return E_OUTOFMEMORY;
+ This->state.vs_lconstf_temp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1);
+ This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I_SWVP * sizeof(int[4]), 1);
+ This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B_SWVP * sizeof(BOOL), 1);
+ } else {
+ This->state.vs_const_f_swvp = NULL;
+ This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1);
+ This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I * sizeof(int[4]), 1);
+ This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B * sizeof(BOOL), 1);
+ }
This->state.vs_const_f = CALLOC(This->vs_const_size, 1);
This->state.ps_const_f = CALLOC(This->ps_const_size, 1);
- This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1);
This->state.ps_lconstf_temp = CALLOC(This->ps_const_size,1);
- This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I * sizeof(int[4]), 1);
- This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B * sizeof(BOOL), 1);
if (!This->state.vs_const_f || !This->state.ps_const_f ||
!This->state.vs_lconstf_temp || !This->state.ps_lconstf_temp ||
!This->state.vs_const_i || !This->state.vs_const_b)
@@ -464,6 +493,7 @@ NineDevice9_dtor( struct NineDevice9 *This )
FREE(This->state.ps_lconstf_temp);
FREE(This->state.vs_const_i);
FREE(This->state.vs_const_b);
+ FREE(This->state.vs_const_f_swvp);
if (This->swapchains) {
for (i = 0; i < This->nswapchains; ++i)
@@ -2490,11 +2520,11 @@ NineDevice9_CreateStateBlock( struct NineDevice9 *This,
/* TODO: texture/sampler state */
memcpy(dst->changed.rs,
nine_render_states_vertex, sizeof(dst->changed.rs));
- nine_ranges_insert(&dst->changed.vs_const_f, 0, This->max_vs_const_f,
+ nine_ranges_insert(&dst->changed.vs_const_f, 0, This->may_swvp ? NINE_MAX_CONST_F_SWVP : This->max_vs_const_f,
&This->range_pool);
- nine_ranges_insert(&dst->changed.vs_const_i, 0, NINE_MAX_CONST_I,
+ nine_ranges_insert(&dst->changed.vs_const_i, 0, This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I,
&This->range_pool);
- nine_ranges_insert(&dst->changed.vs_const_b, 0, NINE_MAX_CONST_B,
+ nine_ranges_insert(&dst->changed.vs_const_b, 0, This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B,
&This->range_pool);
for (s = 0; s < NINE_MAX_SAMPLERS; ++s)
dst->changed.sampler[s] |= 1 << D3DSAMP_DMAPOFFSET;
@@ -2890,6 +2920,7 @@ NineDevice9_SetSoftwareVertexProcessing( struct NineDevice9 *This,
{
if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING) {
This->swvp = bSoftware;
+ This->state.changed.group |= NINE_STATE_SWVP;
return D3D_OK;
} else
return D3DERR_INVALIDCALL; /* msdn. TODO: check in practice */
@@ -3376,6 +3407,7 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
UINT Vector4fCount )
{
struct nine_state *state = This->update;
+ float *vs_const_f = This->may_swvp ? state->vs_const_f_swvp : state->vs_const_f;
DBG("This=%p StartRegister=%u pConstantData=%p Vector4fCount=%u\n",
This, StartRegister, pConstantData, Vector4fCount);
@@ -3388,12 +3420,12 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
user_assert(pConstantData, D3DERR_INVALIDCALL);
if (!This->is_recording) {
- if (!memcmp(&state->vs_const_f[StartRegister * 4], pConstantData,
+ if (!memcmp(&vs_const_f[StartRegister * 4], pConstantData,
Vector4fCount * 4 * sizeof(state->vs_const_f[0])))
return D3D_OK;
}
- memcpy(&state->vs_const_f[StartRegister * 4],
+ memcpy(&vs_const_f[StartRegister * 4],
pConstantData,
Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
@@ -3401,6 +3433,14 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
StartRegister, StartRegister + Vector4fCount,
&This->range_pool);
+ if (This->may_swvp) {
+ Vector4fCount = MIN2(StartRegister + Vector4fCount, NINE_MAX_CONST_F) - StartRegister;
+ if (StartRegister < NINE_MAX_CONST_F)
+ memcpy(&state->vs_const_f[StartRegister * 4],
+ pConstantData,
+ Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
+ }
+
state->changed.group |= NINE_STATE_VS_CONST;
return D3D_OK;
@@ -3413,13 +3453,14 @@ NineDevice9_GetVertexShaderConstantF( struct NineDevice9 *This,
UINT Vector4fCount )
{
const struct nine_state *state = &This->state;
+ float *vs_const_f = This->may_swvp ? state->vs_const_f_swvp : state->vs_const_f;
user_assert(StartRegister < This->caps.MaxVertexShaderConst, D3DERR_INVALIDCALL);
user_assert(StartRegister + Vector4fCount <= This->caps.MaxVertexShaderConst, D3DERR_INVALIDCALL);
user_assert(pConstantData, D3DERR_INVALIDCALL);
memcpy(pConstantData,
- &state->vs_const_f[StartRegister * 4],
+ &vs_const_f[StartRegister * 4],
Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
return D3D_OK;
@@ -3437,8 +3478,10 @@ NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This,
DBG("This=%p StartRegister=%u pConstantData=%p Vector4iCount=%u\n",
This, StartRegister, pConstantData, Vector4iCount);
- user_assert(StartRegister < NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
- user_assert(StartRegister + Vector4iCount <= NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
+ user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I),
+ D3DERR_INVALIDCALL);
+ user_assert(StartRegister + Vector4iCount <= (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I),
+ D3DERR_INVALIDCALL);
user_assert(pConstantData, D3DERR_INVALIDCALL);
if (This->driver_caps.vs_integer) {
@@ -3476,8 +3519,10 @@ NineDevice9_GetVertexShaderConstantI( struct NineDevice9 *This,
const struct nine_state *state = &This->state;
int i;
- user_assert(StartRegister < NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
- user_assert(StartRegister + Vector4iCount <= NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
+ user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I),
+ D3DERR_INVALIDCALL);
+ user_assert(StartRegister + Vector4iCount <= (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I),
+ D3DERR_INVALIDCALL);
user_assert(pConstantData, D3DERR_INVALIDCALL);
if (This->driver_caps.vs_integer) {
@@ -3509,8 +3554,10 @@ NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This,
DBG("This=%p StartRegister=%u pConstantData=%p BoolCount=%u\n",
This, StartRegister, pConstantData, BoolCount);
- user_assert(StartRegister < NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
- user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
+ user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B),
+ D3DERR_INVALIDCALL);
+ user_assert(StartRegister + BoolCount <= (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B),
+ D3DERR_INVALIDCALL);
user_assert(pConstantData, D3DERR_INVALIDCALL);
if (!This->is_recording) {
@@ -3543,8 +3590,10 @@ NineDevice9_GetVertexShaderConstantB( struct NineDevice9 *This,
const struct nine_state *state = &This->state;
int i;
- user_assert(StartRegister < NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
- user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
+ user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B),
+ D3DERR_INVALIDCALL);
+ user_assert(StartRegister + BoolCount <= (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B),
+ D3DERR_INVALIDCALL);
user_assert(pConstantData, D3DERR_INVALIDCALL);
for (i = 0; i < BoolCount; i++)
diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h
index f2fd164cc12..b6aa5e06531 100644
--- a/src/gallium/state_trackers/nine/device9.h
+++ b/src/gallium/state_trackers/nine/device9.h
@@ -48,6 +48,7 @@ struct NineDevice9
{
struct NineUnknown base;
boolean ex;
+ boolean may_swvp;
/* G3D context */
struct pipe_screen *screen;
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index 2d4e323a4ae..2b573e6879e 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -3501,7 +3501,10 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
tx->parse++; /* for byte_size */
if (tx->failure) {
- ERR("Encountered buggy shader\n");
+ /* For VS shaders, we print the warning later,
+ * we first try with swvp. */
+ if (IS_PS)
+ ERR("Encountered buggy shader\n");
ureg_destroy(tx->ureg);
hr = D3DERR_INVALIDCALL;
goto out;
diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c
index 2faca121fe6..024e639f92f 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -79,6 +79,143 @@ prepare_rasterizer(struct NineDevice9 *device)
}
static void
+prepare_vs_constants_userbuf_swvp(struct NineDevice9 *device)
+{
+ struct nine_state *state = &device->state;
+
+ if (state->changed.vs_const_f || state->changed.group & NINE_STATE_SWVP) {
+ struct pipe_constant_buffer cb;
+
+ cb.buffer = NULL;
+ cb.buffer_offset = 0;
+ cb.buffer_size = 4096 * sizeof(float[4]);
+ cb.user_buffer = state->vs_const_f_swvp;
+
+ if (state->vs->lconstf.ranges) {
+ const struct nine_lconstf *lconstf = &device->state.vs->lconstf;
+ const struct nine_range *r = lconstf->ranges;
+ unsigned n = 0;
+ float *dst = device->state.vs_lconstf_temp;
+ float *src = (float *)cb.user_buffer;
+ memcpy(dst, src, cb.buffer_size);
+ while (r) {
+ unsigned p = r->bgn;
+ unsigned c = r->end - r->bgn;
+ memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
+ n += c;
+ r = r->next;
+ }
+ cb.user_buffer = dst;
+ }
+
+ state->pipe.cb0_swvp = cb;
+
+ cb.user_buffer = (char *)cb.user_buffer + 4096 * sizeof(float[4]);
+ state->pipe.cb1_swvp = cb;
+ }
+
+ if (state->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) {
+ struct pipe_constant_buffer cb;
+
+ cb.buffer = NULL;
+ cb.buffer_offset = 0;
+ cb.buffer_size = 2048 * sizeof(float[4]);
+ cb.user_buffer = state->vs_const_i;
+
+ state->pipe.cb2_swvp = cb;
+ state->changed.vs_const_i = 0;
+ }
+
+ if (state->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) {
+ struct pipe_constant_buffer cb;
+
+ cb.buffer = NULL;
+ cb.buffer_offset = 0;
+ cb.buffer_size = 512 * sizeof(float[4]);
+ cb.user_buffer = state->vs_const_b;
+
+ state->pipe.cb3_swvp = cb;
+ state->changed.vs_const_b = 0;
+ }
+
+ if (!device->driver_caps.user_cbufs) {
+ struct pipe_constant_buffer *cb = &(state->pipe.cb0_swvp);
+ u_upload_data(device->constbuf_uploader,
+ 0,
+ cb->buffer_size,
+ device->constbuf_alignment,
+ cb->user_buffer,
+ &(cb->buffer_offset),
+ &(cb->buffer));
+ u_upload_unmap(device->constbuf_uploader);
+ cb->user_buffer = NULL;
+
+ cb = &(state->pipe.cb1_swvp);
+ u_upload_data(device->constbuf_uploader,
+ 0,
+ cb->buffer_size,
+ device->constbuf_alignment,
+ cb->user_buffer,
+ &(cb->buffer_offset),
+ &(cb->buffer));
+ u_upload_unmap(device->constbuf_uploader);
+ cb->user_buffer = NULL;
+
+ cb = &(state->pipe.cb2_swvp);
+ u_upload_data(device->constbuf_uploader,
+ 0,
+ cb->buffer_size,
+ device->constbuf_alignment,
+ cb->user_buffer,
+ &(cb->buffer_offset),
+ &(cb->buffer));
+ u_upload_unmap(device->constbuf_uploader);
+ cb->user_buffer = NULL;
+
+ cb = &(state->pipe.cb3_swvp);
+ u_upload_data(device->constbuf_uploader,
+ 0,
+ cb->buffer_size,
+ device->constbuf_alignment,
+ cb->user_buffer,
+ &(cb->buffer_offset),
+ &(cb->buffer));
+ u_upload_unmap(device->constbuf_uploader);
+ cb->user_buffer = NULL;
+ }
+
+ if (device->state.changed.vs_const_f) {
+ struct nine_range *r = device->state.changed.vs_const_f;
+ struct nine_range *p = r;
+ while (p->next)
+ p = p->next;
+ nine_range_pool_put_chain(&device->range_pool, r, p);
+ device->state.changed.vs_const_f = NULL;
+ }
+
+ if (device->state.changed.vs_const_i) {
+ struct nine_range *r = device->state.changed.vs_const_i;
+ struct nine_range *p = r;
+ while (p->next)
+ p = p->next;
+ nine_range_pool_put_chain(&device->range_pool, r, p);
+ device->state.changed.vs_const_i = NULL;
+ }
+
+ if (device->state.changed.vs_const_b) {
+ struct nine_range *r = device->state.changed.vs_const_b;
+ struct nine_range *p = r;
+ while (p->next)
+ p = p->next;
+ nine_range_pool_put_chain(&device->range_pool, r, p);
+ device->state.changed.vs_const_b = NULL;
+ }
+
+ state->changed.group &= ~NINE_STATE_VS_CONST;
+ state->commit |= NINE_STATE_COMMIT_CONST_VS;
+}
+
+static void
prepare_vs_constants_userbuf(struct NineDevice9 *device)
{
struct nine_state *state = &device->state;
@@ -88,21 +225,27 @@ prepare_vs_constants_userbuf(struct NineDevice9 *device)
cb.buffer_size = device->state.vs->const_used_size;
cb.user_buffer = device->state.vs_const_f;
- if (!cb.buffer_size)
+ if (device->swvp) {
+ prepare_vs_constants_userbuf_swvp(device);
return;
+ }
- if (state->changed.vs_const_i) {
+ if (state->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) {
int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f];
memcpy(idst, state->vs_const_i, NINE_MAX_CONST_I * sizeof(int[4]));
state->changed.vs_const_i = 0;
}
- if (state->changed.vs_const_b) {
+
+ if (state->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) {
int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f];
uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
memcpy(bdst, state->vs_const_b, NINE_MAX_CONST_B * sizeof(BOOL));
state->changed.vs_const_b = 0;
}
+ if (!cb.buffer_size)
+ return;
+
if (device->state.vs->lconstf.ranges) {
/* TODO: Can we make it so that we don't have to copy everything ? */
const struct nine_lconstf *lconstf = &device->state.vs->lconstf;
@@ -251,7 +394,7 @@ prepare_vs(struct NineDevice9 *device, uint8_t shader_changed)
int has_key_changed = 0;
if (likely(state->programmable_vs))
- has_key_changed = NineVertexShader9_UpdateKey(vs, state);
+ has_key_changed = NineVertexShader9_UpdateKey(vs, device);
if (!shader_changed && !has_key_changed)
return 0;
@@ -740,8 +883,16 @@ commit_vs_constants(struct NineDevice9 *device)
if (unlikely(!device->state.programmable_vs))
pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs_ff);
- else
- pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs);
+ else {
+ if (device->swvp) {
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb0_swvp);
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 1, &device->state.pipe.cb1_swvp);
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 2, &device->state.pipe.cb2_swvp);
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 3, &device->state.pipe.cb3_swvp);
+ } else {
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs);
+ }
+ }
}
static inline void
@@ -777,7 +928,8 @@ commit_ps(struct NineDevice9 *device)
(NINE_STATE_VS | \
NINE_STATE_TEXTURE | \
NINE_STATE_FOG_SHADER | \
- NINE_STATE_POINTSIZE_SHADER)
+ NINE_STATE_POINTSIZE_SHADER | \
+ NINE_STATE_SWVP)
#define NINE_STATE_SHADER_CHANGE_PS \
(NINE_STATE_PS | \
@@ -886,14 +1038,14 @@ nine_update_state(struct NineDevice9 *device)
commit_index_buffer(device);
}
- if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS))) {
+ if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS | NINE_STATE_SWVP))) {
if (group & NINE_STATE_MULTISAMPLE)
group |= check_multisample(device);
if (group & NINE_STATE_RASTERIZER)
prepare_rasterizer(device);
if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER))
update_textures_and_samplers(device);
- if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->programmable_vs)
+ if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS | NINE_STATE_SWVP)) && state->programmable_vs)
prepare_vs_constants_userbuf(device);
if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps)
prepare_ps_constants_userbuf(device);
diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h
index 8c9483231e0..2aa424d46a7 100644
--- a/src/gallium/state_trackers/nine/nine_state.h
+++ b/src/gallium/state_trackers/nine/nine_state.h
@@ -84,8 +84,9 @@
#define NINE_STATE_PS1X_SHADER (1 << 26)
#define NINE_STATE_POINTSIZE_SHADER (1 << 27)
#define NINE_STATE_MULTISAMPLE (1 << 28)
-#define NINE_STATE_ALL 0x1fffffff
-#define NINE_STATE_UNHANDLED (1 << 29)
+#define NINE_STATE_SWVP (1 << 29)
+#define NINE_STATE_ALL 0x3fffffff
+#define NINE_STATE_UNHANDLED (1 << 30)
#define NINE_STATE_COMMIT_DSA (1 << 0)
#define NINE_STATE_COMMIT_RASTERIZER (1 << 1)
@@ -101,6 +102,9 @@
#define NINE_MAX_CONST_F 256
#define NINE_MAX_CONST_I 16
#define NINE_MAX_CONST_B 16
+#define NINE_MAX_CONST_F_SWVP 8192
+#define NINE_MAX_CONST_I_SWVP 2048
+#define NINE_MAX_CONST_B_SWVP 2048
#define NINE_MAX_CONST_ALL 276 /* B consts count only 1/4 th */
#define NINE_CONST_I_BASE(nconstf) \
@@ -157,6 +161,7 @@ struct nine_state
*/
struct NineVertexShader9 *vs;
float *vs_const_f;
+ float *vs_const_f_swvp;
int *vs_const_i;
BOOL *vs_const_b;
float *vs_lconstf_temp;
@@ -229,6 +234,10 @@ struct nine_state
struct pipe_rasterizer_state rast;
struct pipe_blend_state blend;
struct pipe_constant_buffer cb_vs;
+ struct pipe_constant_buffer cb0_swvp;
+ struct pipe_constant_buffer cb1_swvp;
+ struct pipe_constant_buffer cb2_swvp;
+ struct pipe_constant_buffer cb3_swvp;
struct pipe_constant_buffer cb_ps;
struct pipe_constant_buffer cb_vs_ff;
struct pipe_constant_buffer cb_ps_ff;
diff --git a/src/gallium/state_trackers/nine/stateblock9.c b/src/gallium/state_trackers/nine/stateblock9.c
index 19c3766b3c6..102213e417e 100644
--- a/src/gallium/state_trackers/nine/stateblock9.c
+++ b/src/gallium/state_trackers/nine/stateblock9.c
@@ -30,8 +30,9 @@
/* XXX TODO: handling of lights is broken */
-#define VS_CONST_I_SIZE (NINE_MAX_CONST_I * sizeof(int[4]))
-#define VS_CONST_B_SIZE (NINE_MAX_CONST_B * sizeof(BOOL))
+#define VS_CONST_I_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_I_SWVP * sizeof(int[4])) : (NINE_MAX_CONST_I * sizeof(int[4])))
+#define VS_CONST_B_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_B_SWVP * sizeof(BOOL)) : (NINE_MAX_CONST_B * sizeof(BOOL)))
+#define VS_CONST_F_SWVP_SIZE (NINE_MAX_CONST_F_SWVP * sizeof(float[4]))
HRESULT
NineStateBlock9_ctor( struct NineStateBlock9 *This,
@@ -49,12 +50,19 @@ NineStateBlock9_ctor( struct NineStateBlock9 *This,
This->state.vs_const_f = MALLOC(This->base.device->vs_const_size);
This->state.ps_const_f = MALLOC(This->base.device->ps_const_size);
- This->state.vs_const_i = MALLOC(VS_CONST_I_SIZE);
- This->state.vs_const_b = MALLOC(VS_CONST_B_SIZE);
+ This->state.vs_const_i = MALLOC(VS_CONST_I_SIZE(This->base.device));
+ This->state.vs_const_b = MALLOC(VS_CONST_B_SIZE(This->base.device));
if (!This->state.vs_const_f || !This->state.ps_const_f ||
!This->state.vs_const_i || !This->state.vs_const_b)
return E_OUTOFMEMORY;
+ if (This->base.device->may_swvp) {
+ This->state.vs_const_f_swvp = MALLOC(VS_CONST_F_SWVP_SIZE);
+ if (!This->state.vs_const_f_swvp)
+ return E_OUTOFMEMORY;
+ } else
+ This->state.vs_const_f_swvp = NULL;
+
return D3D_OK;
}
@@ -71,6 +79,7 @@ NineStateBlock9_dtor( struct NineStateBlock9 *This )
FREE(state->ps_const_f);
FREE(state->vs_const_i);
FREE(state->vs_const_b);
+ FREE(state->vs_const_f_swvp);
FREE(state->ff.light);
@@ -101,7 +110,8 @@ NineStateBlock9_dtor( struct NineStateBlock9 *This )
* TODO: compare ?
*/
static void
-nine_state_copy_common(struct nine_state *dst,
+nine_state_copy_common(struct NineDevice9 *device,
+ struct nine_state *dst,
struct nine_state *src,
struct nine_state *mask, /* aliases either src or dst */
const boolean apply,
@@ -130,13 +140,32 @@ nine_state_copy_common(struct nine_state *dst,
*/
if (mask->changed.group & NINE_STATE_VS_CONST) {
struct nine_range *r;
- for (r = mask->changed.vs_const_f; r; r = r->next) {
- memcpy(&dst->vs_const_f[r->bgn * 4],
- &src->vs_const_f[r->bgn * 4],
- (r->end - r->bgn) * 4 * sizeof(float));
- if (apply)
- nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end,
- pool);
+ if (device->may_swvp) {
+ for (r = mask->changed.vs_const_f; r; r = r->next) {
+ int bgn = r->bgn;
+ int end = r->end;
+ memcpy(&dst->vs_const_f_swvp[bgn * 4],
+ &src->vs_const_f_swvp[bgn * 4],
+ (end - bgn) * 4 * sizeof(float));
+ if (apply)
+ nine_ranges_insert(&dst->changed.vs_const_f, bgn, end,
+ pool);
+ if (bgn < device->max_vs_const_f) {
+ end = MIN2(end, device->max_vs_const_f);
+ memcpy(&dst->vs_const_f[bgn * 4],
+ &src->vs_const_f[bgn * 4],
+ (end - bgn) * 4 * sizeof(float));
+ }
+ }
+ } else {
+ for (r = mask->changed.vs_const_f; r; r = r->next) {
+ memcpy(&dst->vs_const_f[r->bgn * 4],
+ &src->vs_const_f[r->bgn * 4],
+ (r->end - r->bgn) * 4 * sizeof(float));
+ if (apply)
+ nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end,
+ pool);
+ }
}
for (r = mask->changed.vs_const_i; r; r = r->next) {
memcpy(&dst->vs_const_i[r->bgn * 4],
@@ -342,7 +371,8 @@ nine_state_copy_common(struct nine_state *dst,
}
static void
-nine_state_copy_common_all(struct nine_state *dst,
+nine_state_copy_common_all(struct NineDevice9 *device,
+ struct nine_state *dst,
const struct nine_state *src,
struct nine_state *help,
const boolean apply,
@@ -369,12 +399,15 @@ nine_state_copy_common_all(struct nine_state *dst,
if (1) {
struct nine_range *r = help->changed.vs_const_f;
memcpy(&dst->vs_const_f[0],
- &src->vs_const_f[0], (r->end - r->bgn) * 4 * sizeof(float));
+ &src->vs_const_f[0], device->max_vs_const_f * 4 * sizeof(float));
+ if (device->may_swvp)
+ memcpy(dst->vs_const_f_swvp,
+ src->vs_const_f_swvp, VS_CONST_F_SWVP_SIZE);
if (apply)
nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end, pool);
- memcpy(dst->vs_const_i, src->vs_const_i, VS_CONST_I_SIZE);
- memcpy(dst->vs_const_b, src->vs_const_b, VS_CONST_B_SIZE);
+ memcpy(dst->vs_const_i, src->vs_const_i, VS_CONST_I_SIZE(device));
+ memcpy(dst->vs_const_b, src->vs_const_b, VS_CONST_B_SIZE(device));
if (apply) {
r = help->changed.vs_const_i;
nine_ranges_insert(&dst->changed.vs_const_i, r->bgn, r->end, pool);
@@ -491,17 +524,18 @@ nine_state_copy_common_all(struct nine_state *dst,
HRESULT NINE_WINAPI
NineStateBlock9_Capture( struct NineStateBlock9 *This )
{
+ struct NineDevice9 *device = This->base.device;
struct nine_state *dst = &This->state;
- struct nine_state *src = &This->base.device->state;
- const int MaxStreams = This->base.device->caps.MaxStreams;
+ struct nine_state *src = &device->state;
+ const int MaxStreams = device->caps.MaxStreams;
unsigned s;
DBG("This=%p\n", This);
if (This->type == NINESBT_ALL)
- nine_state_copy_common_all(dst, src, dst, FALSE, NULL, MaxStreams);
+ nine_state_copy_common_all(device, dst, src, dst, FALSE, NULL, MaxStreams);
else
- nine_state_copy_common(dst, src, dst, FALSE, NULL);
+ nine_state_copy_common(device, dst, src, dst, FALSE, NULL);
if (dst->changed.group & NINE_STATE_VDECL)
nine_bind(&dst->vdecl, src->vdecl);
@@ -521,18 +555,19 @@ NineStateBlock9_Capture( struct NineStateBlock9 *This )
HRESULT NINE_WINAPI
NineStateBlock9_Apply( struct NineStateBlock9 *This )
{
- struct nine_state *dst = &This->base.device->state;
+ struct NineDevice9 *device = This->base.device;
+ struct nine_state *dst = &device->state;
struct nine_state *src = &This->state;
- struct nine_range_pool *pool = &This->base.device->range_pool;
- const int MaxStreams = This->base.device->caps.MaxStreams;
+ struct nine_range_pool *pool = &device->range_pool;
+ const int MaxStreams = device->caps.MaxStreams;
unsigned s;
DBG("This=%p\n", This);
if (This->type == NINESBT_ALL)
- nine_state_copy_common_all(dst, src, src, TRUE, pool, MaxStreams);
+ nine_state_copy_common_all(device, dst, src, src, TRUE, pool, MaxStreams);
else
- nine_state_copy_common(dst, src, src, TRUE, pool);
+ nine_state_copy_common(device, dst, src, src, TRUE, pool);
if ((src->changed.group & NINE_STATE_VDECL) && src->vdecl)
NineDevice9_SetVertexDeclaration(This->base.device, (IDirect3DVertexDeclaration9 *)src->vdecl);
diff --git a/src/gallium/state_trackers/nine/vertexshader9.c b/src/gallium/state_trackers/nine/vertexshader9.c
index bc09a413fab..92f8f6bb581 100644
--- a/src/gallium/state_trackers/nine/vertexshader9.c
+++ b/src/gallium/state_trackers/nine/vertexshader9.c
@@ -63,12 +63,21 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This,
info.fog_enable = 0;
info.point_size_min = 0;
info.point_size_max = 0;
- info.swvp_on = false;
+ info.swvp_on = !!(device->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING);
hr = nine_translate_shader(device, &info);
+ if (hr == D3DERR_INVALIDCALL &&
+ (device->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING)) {
+ /* Retry with a swvp shader. It will require swvp to be on. */
+ info.swvp_on = true;
+ hr = nine_translate_shader(device, &info);
+ }
+ if (hr == D3DERR_INVALIDCALL)
+ ERR("Encountered buggy shader\n");
if (FAILED(hr))
return hr;
This->byte_code.version = info.version;
+ This->swvp_only = info.swvp_on;
This->byte_code.tokens = mem_dup(pFunction, info.byte_size);
if (!This->byte_code.tokens)
@@ -77,7 +86,7 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This,
This->variant.cso = info.cso;
This->last_cso = info.cso;
- This->last_key = 0;
+ This->last_key = (uint32_t) (info.swvp_on << 9);
This->const_used_size = info.const_used_size;
This->lconstf = info.lconstf;
@@ -168,7 +177,7 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This )
info.fog_enable = device->state.rs[D3DRS_FOGENABLE];
info.point_size_min = asfloat(device->state.rs[D3DRS_POINTSIZE_MIN]);
info.point_size_max = asfloat(device->state.rs[D3DRS_POINTSIZE_MAX]);
- info.swvp_on = false;
+ info.swvp_on = device->swvp;
hr = nine_translate_shader(This->base.device, &info);
if (FAILED(hr))
diff --git a/src/gallium/state_trackers/nine/vertexshader9.h b/src/gallium/state_trackers/nine/vertexshader9.h
index 3c9db7990a0..823c71aa85e 100644
--- a/src/gallium/state_trackers/nine/vertexshader9.h
+++ b/src/gallium/state_trackers/nine/vertexshader9.h
@@ -26,6 +26,7 @@
#include "util/u_half.h"
#include "iunknown.h"
+#include "device9.h"
#include "nine_helpers.h"
#include "nine_shader.h"
#include "nine_state.h"
@@ -50,6 +51,7 @@ struct NineVertexShader9
boolean position_t; /* if true, disable vport transform */
boolean point_size; /* if true, set rasterizer.point_size_per_vertex to 1 */
+ boolean swvp_only;
unsigned const_used_size; /* in bytes */
@@ -73,8 +75,9 @@ NineVertexShader9( void *data )
static inline BOOL
NineVertexShader9_UpdateKey( struct NineVertexShader9 *vs,
- struct nine_state *state )
+ struct NineDevice9 *device )
{
+ struct nine_state *state = &(device->state);
uint8_t samplers_shadow;
uint64_t key;
BOOL res;
@@ -84,7 +87,8 @@ NineVertexShader9_UpdateKey( struct NineVertexShader9 *vs,
key = samplers_shadow;
if (vs->byte_code.version < 0x30)
- key |= (uint32_t) (state->rs[D3DRS_FOGENABLE] << 8);
+ key |= (uint32_t) ((!!state->rs[D3DRS_FOGENABLE]) << 8);
+ key |= (uint32_t) (device->swvp << 9);
/* We want to use a 64 bits key for performance.
* Use compressed float16 values for the pointsize min/max in the key.