summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2011-04-24 19:47:37 -0700
committerEric Anholt <eric@anholt.net>2011-04-29 15:26:43 -0700
commit588cebce2d5b6afd24b72603d744d390481310dd (patch)
treeb57a0ed872af0d5f473f9c814f2ffddf35a8d358
parent04e3f1d3c29c68343e709d566b7fe13d617f8d13 (diff)
i965/gen4: Move VS state to state streaming.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_dump.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_state.c130
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c2
5 files changed, 53 insertions, 88 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 0876c3e94e6..a81d6157390 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -631,8 +631,8 @@ struct brw_context
int8_t *constant_map; /* variable array following prog_data */
drm_intel_bo *prog_bo;
- drm_intel_bo *state_bo;
drm_intel_bo *const_bo;
+ uint32_t state_offset;
/** Binding table of pointers to surf_bo entries */
uint32_t bind_bo_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 7325a2b6c8d..b61a7ad099e 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -143,7 +143,8 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
- OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ brw->vs.state_offset);
if (brw->gs.prog_active)
OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
else
@@ -163,7 +164,6 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
static void prepare_psp_urb_cbs(struct brw_context *brw)
{
- brw_add_validated_bo(brw, brw->vs.state_bo);
brw_add_validated_bo(brw, brw->gs.state_bo);
brw_add_validated_bo(brw, brw->clip.state_bo);
}
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index 930eb5155db..4313b2aaa98 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -388,7 +388,8 @@ void brw_debug_batch(struct intel_context *intel)
dump_wm_sampler_state(brw);
if (intel->gen < 6)
- state_struct_out("VS", brw->vs.state_bo, 0, sizeof(struct brw_vs_unit_state));
+ state_struct_out("VS", intel->batch.bo, brw->vs.state_offset,
+ sizeof(struct brw_vs_unit_state));
brw_debug_prog("VS prog", brw->vs.prog_bo);
if (intel->gen < 6)
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index c3a7cc247c5..1eee5b7e5de 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -49,48 +49,19 @@ struct brw_vs_unit_key {
};
static void
-vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
-{
- struct gl_context *ctx = &brw->intel.ctx;
-
- memset(key, 0, sizeof(*key));
-
- /* CACHE_NEW_VS_PROG */
- key->total_grf = brw->vs.prog_data->total_grf;
- key->urb_entry_read_length = brw->vs.prog_data->urb_read_length;
- key->curb_entry_read_length = brw->vs.prog_data->curb_read_length;
-
- /* BRW_NEW_URB_FENCE */
- key->nr_urb_entries = brw->urb.nr_vs_entries;
- key->urb_size = brw->urb.vsize;
-
- /* BRW_NEW_NR_VS_SURFACES */
- key->nr_surfaces = brw->vs.nr_surfaces;
-
- /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
- if (ctx->Transform.ClipPlanesEnabled) {
- /* Note that we read in the userclip planes as well, hence
- * clip_start:
- */
- key->curbe_offset = brw->curbe.clip_start;
- }
- else {
- key->curbe_offset = brw->curbe.vs_start;
- }
-}
-
-static drm_intel_bo *
-vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
+brw_prepare_vs_unit(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
- struct brw_vs_unit_state vs;
- drm_intel_bo *bo;
+ struct gl_context *ctx = &intel->ctx;
+ struct brw_vs_unit_state *vs;
- memset(&vs, 0, sizeof(vs));
+ vs = brw_state_batch(brw, sizeof(*vs), 32, &brw->vs.state_offset);
+ memset(vs, 0, sizeof(*vs));
- vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
- vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
- vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ /* CACHE_NEW_VS_PROG */
+ vs->thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
+ vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1;
+ vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
/* Choosing multiple program flow means that we may get 2-vertex threads,
* which will have the channel mask for dwords 4-7 enabled in the thread,
* and those dwords will be written to the second URB handle when we
@@ -103,21 +74,34 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
* The most notable and reliably failing application is the Humus
* demo "CelShading"
*/
- vs.thread1.single_program_flow = (intel->gen == 5);
+ vs->thread1.single_program_flow = (intel->gen == 5);
+ /* BRW_NEW_NR_VS_SURFACES */
if (intel->gen == 5)
- vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
+ vs->thread1.binding_table_entry_count = 0; /* hardware requirement */
else
- vs.thread1.binding_table_entry_count = key->nr_surfaces;
+ vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces;
+
+ vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
+ vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
+ vs->thread3.dispatch_grf_start_reg = 1;
+ vs->thread3.urb_entry_read_offset = 0;
- vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
- vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
- vs.thread3.dispatch_grf_start_reg = 1;
- vs.thread3.urb_entry_read_offset = 0;
- vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+ /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
+ if (ctx->Transform.ClipPlanesEnabled) {
+ /* Note that we read in the userclip planes as well, hence
+ * clip_start:
+ */
+ vs->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
+ }
+ else {
+ vs->thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2;
+ }
+
+ /* BRW_NEW_URB_FENCE */
if (intel->gen == 5) {
- switch (key->nr_urb_entries) {
+ switch (brw->urb.nr_vs_entries) {
case 8:
case 12:
case 16:
@@ -129,13 +113,13 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
case 192:
case 224:
case 256:
- vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
+ vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries >> 2;
break;
default:
assert(0);
}
} else {
- switch (key->nr_urb_entries) {
+ switch (brw->urb.nr_vs_entries) {
case 8:
case 12:
case 16:
@@ -147,63 +131,45 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
default:
assert(0);
}
- vs.thread4.nr_urb_entries = key->nr_urb_entries;
+ vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries;
}
- vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
+ vs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
- vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2,
- 1, brw->vs_max_threads) - 1;
+ vs->thread4.max_threads = CLAMP(brw->urb.nr_vs_entries / 2,
+ 1, brw->vs_max_threads) - 1;
/* No samplers for ARB_vp programs:
*/
/* It has to be set to 0 for Ironlake
*/
- vs.vs5.sampler_count = 0;
+ vs->vs5.sampler_count = 0;
if (unlikely(INTEL_DEBUG & DEBUG_STATS))
- vs.thread4.stats_enable = 1;
+ vs->thread4.stats_enable = 1;
/* Vertex program always enabled:
*/
- vs.vs6.vs_enable = 1;
-
- bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT,
- key, sizeof(*key),
- &brw->vs.prog_bo, 1,
- &vs, sizeof(vs));
+ vs->vs6.vs_enable = 1;
/* Emit VS program relocation */
- drm_intel_bo_emit_reloc(bo, offsetof(struct brw_vs_unit_state, thread0),
- brw->vs.prog_bo, vs.thread0.grf_reg_count << 1,
+ drm_intel_bo_emit_reloc(intel->batch.bo, (brw->vs.state_offset +
+ offsetof(struct brw_vs_unit_state,
+ thread0)),
+ brw->vs.prog_bo, vs->thread0.grf_reg_count << 1,
I915_GEM_DOMAIN_INSTRUCTION, 0);
- return bo;
-}
-
-static void prepare_vs_unit(struct brw_context *brw)
-{
- struct brw_vs_unit_key key;
-
- vs_unit_populate_key(brw, &key);
-
- drm_intel_bo_unreference(brw->vs.state_bo);
- brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT,
- &key, sizeof(key),
- &brw->vs.prog_bo, 1,
- NULL);
- if (brw->vs.state_bo == NULL) {
- brw->vs.state_bo = vs_unit_create_from_key(brw, &key);
- }
+ brw->state.dirty.cache |= CACHE_NEW_VS_UNIT;
}
const struct brw_tracked_state brw_vs_unit = {
.dirty = {
.mesa = _NEW_TRANSFORM,
- .brw = (BRW_NEW_CURBE_OFFSETS |
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_CURBE_OFFSETS |
BRW_NEW_NR_VS_SURFACES |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_VS_PROG
},
- .prepare = prepare_vs_unit,
+ .prepare = brw_prepare_vs_unit,
};
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index b1b2e42d3de..8d1497c8215 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -60,7 +60,6 @@ dri_bo_release(drm_intel_bo **bo)
static void brw_destroy_context( struct intel_context *intel )
{
struct brw_context *brw = brw_context(&intel->ctx);
- int i;
brw_destroy_state(brw);
brw_draw_destroy( brw );
@@ -77,7 +76,6 @@ static void brw_destroy_context( struct intel_context *intel )
dri_bo_release(&brw->curbe.curbe_bo);
dri_bo_release(&brw->vs.prog_bo);
- dri_bo_release(&brw->vs.state_bo);
dri_bo_release(&brw->vs.const_bo);
dri_bo_release(&brw->gs.prog_bo);
dri_bo_release(&brw->gs.state_bo);