summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2011-11-09 15:16:04 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2011-11-09 15:27:34 +0000
commit3252b46e115e6cc46abbcc4f502feeb9edc31ad6 (patch)
tree5677238d8f624d6dc1005759f96b384e75ae8471
parent8e926759528a12076bd7565672acd82a37aa3eb1 (diff)
sna/gen3: Pack video frames
If you're going to benchmark, you may as well do so favourably. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--src/sna/gen3_render.c206
1 files changed, 125 insertions, 81 deletions
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index c8ad2096..83ac23a9 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2959,36 +2959,36 @@ gen3_emit_video_state(struct sna *sna,
struct kgem_bo *dst_bo,
int width, int height)
{
- uint32_t shader_offset;
- uint32_t ms3;
+ struct gen3_render_state *state = &sna->render_state.gen3;
+ uint32_t id, ms3, rewind;
gen3_emit_target(sna, dst_bo, width, height,
sna_format_for_depth(pixmap->drawable.depth));
/* XXX share with composite? Is it worth the effort? */
- OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
- I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) |
- 2);
- OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
- OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
- S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
- S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
- S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
- S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
- S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
- S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
- S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
- OUT_BATCH((2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
- (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
- S6_COLOR_WRITE_ENABLE);
-
- sna->render_state.gen3.last_blend = 0;
- sna->render_state.gen3.last_sampler = 0;
- sna->render_state.gen3.floats_per_vertex = 4;
- sna->render_state.gen3.last_shader = -1;
- sna->render_state.gen3.last_constants = 0;
+ if ((state->last_shader & (1<<31)) == 0) {
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+ I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) |
+ 2);
+ OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
+ OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
+ S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
+ OUT_BATCH((2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
+ (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
+ S6_COLOR_WRITE_ENABLE);
+
+ state->last_blend = 0;
+ state->floats_per_vertex = 4;
+ }
if (!is_planar_fourcc(frame->id)) {
+ rewind = sna->kgem.nbatch;
OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
OUT_BATCH(0x0000001); /* constant 0 */
/* constant 0: brightness/contrast */
@@ -2996,7 +2996,15 @@ gen3_emit_video_state(struct sna *sna,
OUT_BATCH_F(video->contrast / 255.0);
OUT_BATCH_F(0.0);
OUT_BATCH_F(0.0);
+ if (state->last_constants &&
+ memcmp(&sna->kgem.batch[state->last_constants],
+ &sna->kgem.batch[rewind],
+ 6*sizeof(uint32_t)) == 0)
+ sna->kgem.nbatch = rewind;
+ else
+ state->last_constants = rewind;
+ rewind = sna->kgem.nbatch;
OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3);
OUT_BATCH(0x00000001);
OUT_BATCH(SS2_COLORSPACE_CONVERSION |
@@ -3009,6 +3017,13 @@ gen3_emit_video_state(struct sna *sna,
(0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
SS3_NORMALIZED_COORDS);
OUT_BATCH(0x00000000);
+ if (state->last_sampler &&
+ memcmp(&sna->kgem.batch[state->last_sampler],
+ &sna->kgem.batch[rewind],
+ 5*sizeof(uint32_t)) == 0)
+ sna->kgem.nbatch = rewind;
+ else
+ state->last_sampler = rewind;
OUT_BATCH(_3DSTATE_MAP_STATE | 3);
OUT_BATCH(0x00000001); /* texture map #1 */
@@ -3031,15 +3046,23 @@ gen3_emit_video_state(struct sna *sna,
OUT_BATCH(ms3);
OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
- shader_offset = sna->kgem.nbatch++;
+ id = 1<<31 | 1<<1 | !!video->brightness;
+ if (state->last_shader != id) {
+ state->last_shader = id;
+ id = sna->kgem.nbatch++;
+
+ gen3_fs_dcl(FS_S0);
+ gen3_fs_dcl(FS_T0);
+ gen3_fs_texld(FS_OC, FS_S0, FS_T0);
+ if (video->brightness != 0) {
+ gen3_fs_add(FS_OC,
+ gen3_fs_operand_reg(FS_OC),
+ gen3_fs_operand(FS_C0, X, X, X, ZERO));
+ }
- gen3_fs_dcl(FS_S0);
- gen3_fs_dcl(FS_T0);
- gen3_fs_texld(FS_OC, FS_S0, FS_T0);
- if (video->brightness != 0) {
- gen3_fs_add(FS_OC,
- gen3_fs_operand_reg(FS_OC),
- gen3_fs_operand(FS_C0, X, X, X, ZERO));
+ sna->kgem.batch[id] =
+ _3DSTATE_PIXEL_SHADER_PROGRAM |
+ (sna->kgem.nbatch - id - 2);
}
} else {
/* For the planar formats, we set up three samplers --
@@ -3062,6 +3085,7 @@ gen3_emit_video_state(struct sna *sna,
* r3 = (v,v,v,v)
* OC = (r,g,b,1)
*/
+ rewind = sna->kgem.nbatch;
OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2));
OUT_BATCH(0x000001f); /* constants 0-4 */
/* constant 0: normalization offsets */
@@ -3089,7 +3113,15 @@ gen3_emit_video_state(struct sna *sna,
OUT_BATCH_F(video->contrast / 255.0);
OUT_BATCH_F(0.0);
OUT_BATCH_F(0.0);
+ if (state->last_constants &&
+ memcmp(&sna->kgem.batch[state->last_constants],
+ &sna->kgem.batch[rewind],
+ 22*sizeof(uint32_t)) == 0)
+ sna->kgem.nbatch = rewind;
+ else
+ state->last_constants = rewind;
+ rewind = sna->kgem.nbatch;
OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9);
OUT_BATCH(0x00000007);
/* sampler 0 */
@@ -3122,6 +3154,13 @@ gen3_emit_video_state(struct sna *sna,
(2 << SS3_TEXTUREMAP_INDEX_SHIFT) |
SS3_NORMALIZED_COORDS);
OUT_BATCH(0x00000000);
+ if (state->last_sampler &&
+ memcmp(&sna->kgem.batch[state->last_sampler],
+ &sna->kgem.batch[rewind],
+ 11*sizeof(uint32_t)) == 0)
+ sna->kgem.nbatch = rewind;
+ else
+ state->last_sampler = rewind;
OUT_BATCH(_3DSTATE_MAP_STATE | 9);
OUT_BATCH(0x00000007);
@@ -3166,63 +3205,68 @@ gen3_emit_video_state(struct sna *sna,
OUT_BATCH(ms3);
OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
- shader_offset = sna->kgem.nbatch++;
+ id = 1<<31 | 2<<1 | !!video->brightness;
+ if (state->last_shader != id) {
+ state->last_shader = id;
+ id = sna->kgem.nbatch++;
- /* Declare samplers */
- gen3_fs_dcl(FS_S0); /* Y */
- gen3_fs_dcl(FS_S1); /* U */
- gen3_fs_dcl(FS_S2); /* V */
- gen3_fs_dcl(FS_T0); /* normalized coords */
+ /* Declare samplers */
+ gen3_fs_dcl(FS_S0); /* Y */
+ gen3_fs_dcl(FS_S1); /* U */
+ gen3_fs_dcl(FS_S2); /* V */
+ gen3_fs_dcl(FS_T0); /* normalized coords */
- /* Load samplers to temporaries. */
- gen3_fs_texld(FS_R1, FS_S0, FS_T0);
- gen3_fs_texld(FS_R2, FS_S1, FS_T0);
- gen3_fs_texld(FS_R3, FS_S2, FS_T0);
+ /* Load samplers to temporaries. */
+ gen3_fs_texld(FS_R1, FS_S0, FS_T0);
+ gen3_fs_texld(FS_R2, FS_S1, FS_T0);
+ gen3_fs_texld(FS_R3, FS_S2, FS_T0);
- /* Move the sampled YUV data in R[123] to the first
- * 3 channels of R0.
- */
- gen3_fs_mov_masked(FS_R0, MASK_X,
- gen3_fs_operand_reg(FS_R1));
- gen3_fs_mov_masked(FS_R0, MASK_Y,
- gen3_fs_operand_reg(FS_R2));
- gen3_fs_mov_masked(FS_R0, MASK_Z,
- gen3_fs_operand_reg(FS_R3));
-
- /* Normalize the YUV data */
- gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0),
- gen3_fs_operand_reg(FS_C0));
- /* dot-product the YUV data in R0 by the vectors of
- * coefficients for calculating R, G, and B, storing
- * the results in the R, G, or B channels of the output
- * color. The OC results are implicitly clamped
- * at the end of the program.
- */
- gen3_fs_dp3(FS_OC, MASK_X,
- gen3_fs_operand_reg(FS_R0),
- gen3_fs_operand_reg(FS_C1));
- gen3_fs_dp3(FS_OC, MASK_Y,
- gen3_fs_operand_reg(FS_R0),
- gen3_fs_operand_reg(FS_C2));
- gen3_fs_dp3(FS_OC, MASK_Z,
- gen3_fs_operand_reg(FS_R0),
- gen3_fs_operand_reg(FS_C3));
- /* Set alpha of the output to 1.0, by wiring W to 1
- * and not actually using the source.
- */
- gen3_fs_mov_masked(FS_OC, MASK_W,
- gen3_fs_operand_one());
+ /* Move the sampled YUV data in R[123] to the first
+ * 3 channels of R0.
+ */
+ gen3_fs_mov_masked(FS_R0, MASK_X,
+ gen3_fs_operand_reg(FS_R1));
+ gen3_fs_mov_masked(FS_R0, MASK_Y,
+ gen3_fs_operand_reg(FS_R2));
+ gen3_fs_mov_masked(FS_R0, MASK_Z,
+ gen3_fs_operand_reg(FS_R3));
+
+ /* Normalize the YUV data */
+ gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0),
+ gen3_fs_operand_reg(FS_C0));
+ /* dot-product the YUV data in R0 by the vectors of
+ * coefficients for calculating R, G, and B, storing
+ * the results in the R, G, or B channels of the output
+ * color. The OC results are implicitly clamped
+ * at the end of the program.
+ */
+ gen3_fs_dp3(FS_OC, MASK_X,
+ gen3_fs_operand_reg(FS_R0),
+ gen3_fs_operand_reg(FS_C1));
+ gen3_fs_dp3(FS_OC, MASK_Y,
+ gen3_fs_operand_reg(FS_R0),
+ gen3_fs_operand_reg(FS_C2));
+ gen3_fs_dp3(FS_OC, MASK_Z,
+ gen3_fs_operand_reg(FS_R0),
+ gen3_fs_operand_reg(FS_C3));
+ /* Set alpha of the output to 1.0, by wiring W to 1
+ * and not actually using the source.
+ */
+ gen3_fs_mov_masked(FS_OC, MASK_W,
+ gen3_fs_operand_one());
+
+ if (video->brightness != 0) {
+ gen3_fs_add(FS_OC,
+ gen3_fs_operand_reg(FS_OC),
+ gen3_fs_operand(FS_C4, X, X, X, ZERO));
+ }
- if (video->brightness != 0) {
- gen3_fs_add(FS_OC,
- gen3_fs_operand_reg(FS_OC),
- gen3_fs_operand(FS_C4, X, X, X, ZERO));
+ sna->kgem.batch[id] =
+ _3DSTATE_PIXEL_SHADER_PROGRAM |
+ (sna->kgem.nbatch - id - 2);
}
}
- sna->kgem.batch[shader_offset] =
- _3DSTATE_PIXEL_SHADER_PROGRAM |
- (sna->kgem.nbatch - shader_offset - 2);
}
static void