summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_state_dump.c
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2011-04-27 13:33:10 -0700
committerEric Anholt <eric@anholt.net>2011-06-18 16:00:45 -0700
commitc173541d9769d41a85cc899bc49699a3587df4bf (patch)
tree0b445fd0db1f9eb806b7fe48fa8ac4fced4baa8a /src/mesa/drivers/dri/i965/brw_state_dump.c
parent962dab948609c97c1c01fde6a27e19307948d302 (diff)
i965: Use state streaming on programs, and state base address on gen5+.
There will be a little bit of thrashing of the program cache BO as the cache warms up, but once the application is in steady state, this reduces relocations on gen5 and later. On my T420 laptop, cairogl firefox-talos-gfx performance improves 2.6% +/- 1.3% (n=6). No statistically significant performance difference on nexuiz (n=5).
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_state_dump.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_dump.c26
1 files changed, 13 insertions, 13 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index ff06cb3a91e..7a3a88f04f5 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -459,21 +459,19 @@ static void dump_blend_state(struct brw_context *brw)
}
-static void brw_debug_prog(const char *name, drm_intel_bo *prog)
+static void brw_debug_prog(struct brw_context *brw,
+ const char *name, uint32_t prog_offset)
{
unsigned int i;
uint32_t *data;
- if (prog == NULL)
- return;
-
- drm_intel_bo_map(prog, GL_FALSE);
+ drm_intel_bo_map(brw->cache.bo, false);
- data = prog->virtual;
+ data = brw->cache.bo->virtual + prog_offset;
- for (i = 0; i < prog->size / 4 / 4; i++) {
+ for (i = 0; i < brw->cache.bo->size / 4 / 4; i++) {
fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
- name, (unsigned int)prog->offset + i * 4 * 4,
+ name, (unsigned int)brw->cache.bo->offset + i * 4 * 4,
data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]);
/* Stop at the end of the program. It'd be nice to keep track of the actual
* intended program size instead of guessing like this.
@@ -485,7 +483,7 @@ static void brw_debug_prog(const char *name, drm_intel_bo *prog)
break;
}
- drm_intel_bo_unmap(prog);
+ drm_intel_bo_unmap(brw->cache.bo);
}
@@ -518,17 +516,19 @@ void brw_debug_batch(struct intel_context *intel)
if (intel->gen < 6)
state_struct_out("VS", intel->batch.bo, brw->vs.state_offset,
sizeof(struct brw_vs_unit_state));
- brw_debug_prog("VS prog", brw->vs.prog_bo);
+ brw_debug_prog(brw, "VS prog", brw->vs.prog_offset);
if (intel->gen < 6)
state_struct_out("GS", intel->batch.bo, brw->gs.state_offset,
sizeof(struct brw_gs_unit_state));
- brw_debug_prog("GS prog", brw->gs.prog_bo);
+ if (brw->gs.prog_active) {
+ brw_debug_prog(brw, "GS prog", brw->gs.prog_offset);
+ }
if (intel->gen < 6) {
state_struct_out("SF", intel->batch.bo, brw->sf.state_offset,
sizeof(struct brw_sf_unit_state));
- brw_debug_prog("SF prog", brw->sf.prog_bo);
+ brw_debug_prog(brw, "SF prog", brw->sf.prog_offset);
}
if (intel->gen >= 7)
dump_sf_clip_viewport_state(brw);
@@ -540,7 +540,7 @@ void brw_debug_batch(struct intel_context *intel)
if (intel->gen < 6)
state_struct_out("WM", intel->batch.bo, brw->wm.state_offset,
sizeof(struct brw_wm_unit_state));
- brw_debug_prog("WM prog", brw->wm.prog_bo);
+ brw_debug_prog(brw, "WM prog", brw->wm.prog_offset);
if (intel->gen >= 6) {
dump_cc_viewport_state(brw);