summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2012-02-15 13:33:07 -0800
committerEric Anholt <eric@anholt.net>2012-02-21 11:54:12 -0800
commit07e00b3040d6da381595c65db5afe597f20d99fc (patch)
treea6fcc905b8a51cee1c8e78f1136c9217b898c912
parent83871566207d6692d1f20e4b666adb5dd0628dc3 (diff)
i965: Split the VS binding table to a separate table.
This is a step toward making the samplers/binding tables reflect sampler uniform mappings instead of embedding those in the programs. No significant performance difference on the microbenchmark (n=10). Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h34
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_emit.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_surface_state.c51
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c8
-rw-r--r--src/mesa/drivers/dri/i965/gen7_vs_state.c3
9 files changed, 94 insertions, 17 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 98f68e7c381..44a01e69ba5 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -409,6 +409,8 @@ struct brw_vs_prog_data {
bool uses_new_param_layout;
bool uses_vertexid;
bool userclip;
+
+ int num_surfaces;
};
@@ -468,7 +470,7 @@ struct brw_vs_ouput_sizes {
* (VS, HS, DS, GS, PS), we currently share a single binding table for all of
* them. This is purely for convenience.
*
- * Currently our binding tables are (arbitrarily) programmed as follows:
+ * Currently our SOL/WM binding tables are (arbitrarily) programmed as follows:
*
* +-------------------------------+
* | 0 | Draw buffer 0 | .
@@ -476,18 +478,28 @@ struct brw_vs_ouput_sizes {
* | : | : | > Only relevant to the WM.
* | 7 | Draw buffer 7 | /
* |-----|-------------------------| `
- * | 8 | VS Pull Constant Buffer |
- * | 9 | WM Pull Constant Buffer |
+ * | 8 | WM Pull Constant Buffer |
* |-----|-------------------------|
- * | 10 | Texture 0 |
+ * | 9 | Texture 0 |
* | . | . |
* | : | : |
- * | 25 | Texture 15 |
+ * | 24 | Texture 15 |
* +-----|-------------------------+
- * | 26 | SOL Binding 0 |
+ * | 25 | SOL Binding 0 |
+ * | . | . |
+ * | : | : |
+ * | 88 | SOL Binding 63 |
+ * +-------------------------------+
+ *
+ * Our VS binding tables are programmed as follows:
+ *
+ * +-----+-------------------------+ `
+ * | 0 | VS Pull Constant Buffer |
+ * +-----+-------------------------+
+ * | 1 | Texture 0 |
* | . | . |
* | : | : |
- * | 89 | SOL Binding 63 |
+ * | 16 | Texture 15 |
* +-------------------------------+
*
* Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be
@@ -495,7 +507,6 @@ struct brw_vs_ouput_sizes {
* first so we can use headerless render target writes for RT 0.
*/
#define SURF_INDEX_DRAW(d) (d)
-#define SURF_INDEX_VERT_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 0)
#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 2 + (t))
#define SURF_INDEX_SOL_BINDING(t) (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + (t))
@@ -503,6 +514,10 @@ struct brw_vs_ouput_sizes {
/** Maximum size of the binding table. */
#define BRW_MAX_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
+#define SURF_INDEX_VERT_CONST_BUFFER (0)
+#define SURF_INDEX_VS_TEXTURE(t) (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t))
+#define BRW_MAX_VS_SURFACES SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT)
+
enum brw_cache_id {
BRW_BLEND_STATE,
BRW_DEPTH_STENCIL_STATE,
@@ -841,6 +856,9 @@ struct brw_context
*/
uint8_t *ra_reg_to_grf;
/** @} */
+
+ uint32_t bind_bo_offset;
+ uint32_t surf_offset[BRW_MAX_VS_SURFACES];
} vs;
struct {
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 0343ae19073..7bc7e1c1025 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -77,7 +77,7 @@ static void upload_binding_table_pointers(struct brw_context *brw)
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
- OUT_BATCH(brw->bind.bo_offset);
+ OUT_BATCH(brw->vs.bind_bo_offset);
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
@@ -115,7 +115,7 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw)
GEN6_BINDING_TABLE_MODIFY_GS |
GEN6_BINDING_TABLE_MODIFY_PS |
(4 - 2));
- OUT_BATCH(brw->bind.bo_offset); /* vs */
+ OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
OUT_BATCH(brw->bind.bo_offset); /* gs */
OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
ADVANCE_BATCH();
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 2dd566538ee..59a2bb32501 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -71,6 +71,7 @@ extern const struct brw_tracked_state brw_wm_prog;
extern const struct brw_tracked_state brw_renderbuffer_surfaces;
extern const struct brw_tracked_state brw_texture_surfaces;
extern const struct brw_tracked_state brw_binding_table;
+extern const struct brw_tracked_state brw_vs_binding_table;
extern const struct brw_tracked_state brw_wm_unit;
extern const struct brw_tracked_state brw_psp_urb_cbs;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index ea506950c46..28e4d26209e 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -70,6 +70,7 @@ static const struct brw_tracked_state *gen4_atoms[] =
&brw_wm_pull_constants,
&brw_renderbuffer_surfaces,
&brw_texture_surfaces,
+ &brw_vs_binding_table,
&brw_binding_table,
&brw_samplers,
@@ -146,6 +147,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
&gen6_renderbuffer_surfaces,
&brw_texture_surfaces,
&gen6_sol_surface,
+ &brw_vs_binding_table,
&brw_binding_table,
&brw_samplers,
@@ -214,6 +216,7 @@ const struct brw_tracked_state *gen7_atoms[] =
&brw_wm_pull_constants,
&gen6_renderbuffer_surfaces,
&brw_texture_surfaces,
+ &brw_vs_binding_table,
&brw_binding_table,
&gen7_samplers,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index f9eed61d92c..9df7b11f5ad 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -465,7 +465,7 @@ vec4_visitor::generate_tex(vec4_instruction *inst,
dst,
inst->base_mrf,
src,
- SURF_INDEX_TEXTURE(inst->sampler),
+ SURF_INDEX_VS_TEXTURE(inst->sampler),
inst->sampler,
WRITEMASK_XYZW,
msg_type,
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index ca205cdf79a..bd703c7389a 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -247,6 +247,11 @@ do_vs_prog(struct brw_context *brw,
brw_old_vs_emit(&c);
}
+ if (c.prog_data.nr_pull_params)
+ c.prog_data.num_surfaces = 1;
+ if (c.vp->program.Base.SamplersUsed)
+ c.prog_data.num_surfaces = BRW_MAX_VS_SURFACES;
+
/* Scratch space is used for register spilling */
if (c.last_scratch) {
c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch);
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index 2f7b211d5ec..b29e414a54e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -65,7 +65,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw)
if (brw->vs.const_bo) {
drm_intel_bo_unreference(brw->vs.const_bo);
brw->vs.const_bo = NULL;
- brw->bind.surf_offset[SURF_INDEX_VERT_CONST_BUFFER] = 0;
+ brw->vs.surf_offset[SURF_INDEX_VERT_CONST_BUFFER] = 0;
brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
}
return;
@@ -97,7 +97,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw)
const int surf = SURF_INDEX_VERT_CONST_BUFFER;
intel->vtbl.create_constant_surface(brw, brw->vs.const_bo,
params->NumParameters,
- &brw->bind.surf_offset[surf]);
+ &brw->vs.surf_offset[surf]);
brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
}
@@ -110,3 +110,50 @@ const struct brw_tracked_state brw_vs_pull_constants = {
},
.emit = brw_upload_vs_pull_constants,
};
+
+/**
+ * Constructs the binding table for the WM surface state, which maps unit
+ * numbers to surface state objects.
+ */
+static void
+brw_vs_upload_binding_table(struct brw_context *brw)
+{
+ uint32_t *bind;
+ int i;
+
+ /* CACHE_NEW_VS_PROG: Skip making a binding table if we don't use textures or
+ * pull constants.
+ */
+ if (brw->vs.prog_data->num_surfaces == 0) {
+ if (brw->vs.bind_bo_offset != 0) {
+ brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
+ brw->vs.bind_bo_offset = 0;
+ }
+ return;
+ }
+
+ /* Might want to calculate nr_surfaces first, to avoid taking up so much
+ * space for the binding table.
+ */
+ bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+ sizeof(uint32_t) * BRW_MAX_SURFACES,
+ 32, &brw->vs.bind_bo_offset);
+
+ /* BRW_NEW_SURFACES and BRW_NEW_VS_CONSTBUF */
+ for (i = 0; i < BRW_MAX_VS_SURFACES; i++) {
+ bind[i] = brw->vs.surf_offset[i];
+ }
+
+ brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
+}
+
+const struct brw_tracked_state brw_vs_binding_table = {
+ .dirty = {
+ .mesa = 0,
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_VS_CONSTBUF |
+ BRW_NEW_SURFACES),
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .emit = brw_vs_upload_binding_table,
+};
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 97ae489ea96..a975b2d1c55 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1097,6 +1097,10 @@ brw_update_texture_surfaces(struct brw_context *brw)
} else {
brw->bind.surf_offset[surf] = 0;
}
+
+ /* For now, just mirror the texture setup to the VS slots. */
+ brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(i)] =
+ brw->bind.surf_offset[surf];
}
brw->state.dirty.brw |= BRW_NEW_SURFACES;
@@ -1128,12 +1132,11 @@ brw_upload_binding_table(struct brw_context *brw)
sizeof(uint32_t) * BRW_MAX_SURFACES,
32, &brw->bind.bo_offset);
- /* BRW_NEW_SURFACES and BRW_NEW_VS_CONSTBUF */
+ /* BRW_NEW_SURFACES */
for (i = 0; i < BRW_MAX_SURFACES; i++) {
bind[i] = brw->bind.surf_offset[i];
}
- brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
}
@@ -1141,7 +1144,6 @@ const struct brw_tracked_state brw_binding_table = {
.dirty = {
.mesa = 0,
.brw = (BRW_NEW_BATCH |
- BRW_NEW_VS_CONSTBUF |
BRW_NEW_SURFACES),
.cache = 0
},
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index a3d652cb6f7..73822e3350c 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -37,9 +37,10 @@ upload_vs_state(struct brw_context *brw)
gen7_emit_vs_workaround_flush(intel);
+ /* BRW_NEW_VS_BINDING_TABLE */
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2));
- OUT_BATCH(brw->bind.bo_offset);
+ OUT_BATCH(brw->vs.bind_bo_offset);
ADVANCE_BATCH();
/* CACHE_NEW_SAMPLER */