summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2016-12-11 14:59:17 -0800
committerEric Anholt <eric@anholt.net>2017-06-30 12:25:45 -0700
commit8d36bd3d086f2a3ab76b06ca21f3b1b2d12f7277 (patch)
treef5fbc0cfbadeb6d2ea4ff4f9abbe4f232da7b41c
parent4cef255872e8467aabce52938038a9d2bf27d9b2 (diff)
vc4: Simplify pack header usage
Take the CL pointer in, which will be useful for enabling relocs. However, our code expands a bit more: before: 4449 0 0 4449 1161 src/gallium/drivers/vc4/.libs/vc4_draw.o 988 0 0 988 3dc src/gallium/drivers/vc4/.libs/vc4_emit.o after: 4481 0 0 4481 1181 src/gallium/drivers/vc4/.libs/vc4_draw.o 1020 0 0 1020 3fc src/gallium/drivers/vc4/.libs/vc4_emit.o
-rw-r--r--src/gallium/drivers/vc4/vc4_cl.h15
-rw-r--r--src/gallium/drivers/vc4/vc4_draw.c21
-rw-r--r--src/gallium/drivers/vc4/vc4_emit.c21
-rw-r--r--src/gallium/drivers/vc4/vc4_job.c6
4 files changed, 28 insertions, 35 deletions
diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h
index bec177cd03..966756f503 100644
--- a/src/gallium/drivers/vc4/vc4_cl.h
+++ b/src/gallium/drivers/vc4/vc4_cl.h
@@ -255,17 +255,20 @@ cl_get_emit_space(struct vc4_cl_out **cl, size_t size)
* Also, *dst is actually of the wrong type, it's the
* uint8_t[cl_packet_length()] in the CL, not a cl_packet_struct(packet).
*/
-#define cl_emit(cl_out, packet, name) \
+#define cl_emit(cl, packet, name) \
for (struct cl_packet_struct(packet) name = { \
cl_packet_header(packet) \
}, \
- *_dst = cl_get_emit_space(cl_out, cl_packet_length(packet)); \
- __builtin_expect(_dst != NULL, 1); \
+ *_loop_terminate = &name; \
+ __builtin_expect(_loop_terminate != NULL, 1); \
({ \
- cl_packet_pack(packet)(NULL, (uint8_t *)_dst, &name); \
- VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, \
+ struct vc4_cl_out *cl_out = cl_start(cl); \
+ cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \
+ VG(VALGRIND_CHECK_MEM_IS_DEFINED(cl_out, \
cl_packet_length(packet))); \
- _dst = NULL; \
+ cl_advance(&cl_out, cl_packet_length(packet)); \
+ cl_end(cl, cl_out); \
+ _loop_terminate = NULL; \
})) \
#endif /* VC4_CL_H */
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index 4b3fa8ab8f..f7955ad3a8 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -81,8 +81,7 @@ vc4_start_draw(struct vc4_context *vc4)
vc4_get_draw_cl_space(job, 0);
- struct vc4_cl_out *bcl = cl_start(&job->bcl);
- cl_emit(&bcl, TILE_BINNING_MODE_CONFIGURATION, bin) {
+ cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION, bin) {
bin.width_in_tiles = job->draw_tiles_x;
bin.height_in_tiles = job->draw_tiles_y;
bin.multisample_mode_4x = job->msaa;
@@ -93,14 +92,14 @@ vc4_start_draw(struct vc4_context *vc4)
* figure out what new state packets need to be written to that tile's
* command list.
*/
- cl_emit(&bcl, START_TILE_BINNING, start);
+ cl_emit(&job->bcl, START_TILE_BINNING, start);
/* Reset the current compressed primitives format. This gets modified
* by VC4_PACKET_GL_INDEXED_PRIMITIVE and
* VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
* of every tile.
*/
- cl_emit(&bcl, PRIMITIVE_LIST_FORMAT, list) {
+ cl_emit(&job->bcl, PRIMITIVE_LIST_FORMAT, list) {
list.data_type = _16_BIT_INDEX;
list.primitive_type = TRIANGLES_LIST;
}
@@ -108,8 +107,6 @@ vc4_start_draw(struct vc4_context *vc4)
job->needs_flush = true;
job->draw_width = vc4->framebuffer.width;
job->draw_height = vc4->framebuffer.height;
-
- cl_end(&job->bcl, bcl);
}
static void
@@ -216,8 +213,7 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4,
}
cl_end(&job->shader_rec, shader_rec);
- struct vc4_cl_out *bcl = cl_start(&job->bcl);
- cl_emit(&bcl, GL_SHADER_STATE, shader_state) {
+ cl_emit(&job->bcl, GL_SHADER_STATE, shader_state) {
/* Note that number of attributes == 0 in the packet means 8
* attributes. This field also contains the offset into
* shader_rec.
@@ -226,7 +222,6 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4,
shader_state.number_of_attribute_arrays =
num_elements_emit & 0x7;
}
- cl_end(&job->bcl, bcl);
vc4_write_uniforms(vc4, vc4->prog.fs,
&vc4->constbuf[PIPE_SHADER_FRAGMENT],
@@ -336,7 +331,6 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
/* Note that the primitive type fields match with OpenGL/gallium
* definitions, up to but not including QUADS.
*/
- struct vc4_cl_out *bcl = cl_start(&job->bcl);
if (info->index_size) {
uint32_t index_size = info->index_size;
uint32_t offset = info->start * index_size;
@@ -359,6 +353,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
}
struct vc4_resource *rsc = vc4_resource(prsc);
+ struct vc4_cl_out *bcl = cl_start(&job->bcl);
cl_start_reloc(&job->bcl, &bcl, 1);
cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
cl_u8(&bcl,
@@ -369,6 +364,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
cl_u32(&bcl, info->count);
cl_reloc(job, &job->bcl, &bcl, rsc->bo, offset);
cl_u32(&bcl, vc4->max_index);
+ cl_end(&job->bcl, bcl);
job->draw_calls_queued++;
if (info->index_size == 4 || info->has_user_indices)
@@ -395,10 +391,8 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
* plus whatever remainder.
*/
if (extra_index_bias) {
- cl_end(&job->bcl, bcl);
vc4_emit_gl_shader_state(vc4, info,
extra_index_bias);
- bcl = cl_start(&job->bcl);
}
if (start + count > max_verts) {
@@ -434,7 +428,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
}
}
- cl_emit(&bcl, VERTEX_ARRAY_PRIMITIVES, array) {
+ cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, array) {
array.primitive_mode = info->mode;
array.length = this_count;
array.index_of_first_vertex = start;
@@ -446,7 +440,6 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
start = 0;
}
}
- cl_end(&job->bcl, bcl);
/* We shouldn't have tripped the HW_2116 bug with the GFXH-515
* workaround.
diff --git a/src/gallium/drivers/vc4/vc4_emit.c b/src/gallium/drivers/vc4/vc4_emit.c
index 9fc266e5ba..8fb379df5e 100644
--- a/src/gallium/drivers/vc4/vc4_emit.c
+++ b/src/gallium/drivers/vc4/vc4_emit.c
@@ -29,7 +29,6 @@ vc4_emit_state(struct pipe_context *pctx)
struct vc4_context *vc4 = vc4_context(pctx);
struct vc4_job *job = vc4->job;
- struct vc4_cl_out *bcl = cl_start(&job->bcl);
if (vc4->dirty & (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT |
VC4_DIRTY_RASTERIZER)) {
float *vpscale = vc4->viewport.scale;
@@ -60,7 +59,7 @@ vc4_emit_state(struct pipe_context *pctx)
maxy = MIN2(vp_maxy, vc4->scissor.maxy);
}
- cl_emit(&bcl, CLIP_WINDOW, clip) {
+ cl_emit(&job->bcl, CLIP_WINDOW, clip) {
clip.clip_window_left_pixel_coordinate = minx;
clip.clip_window_bottom_pixel_coordinate = miny;
clip.clip_window_height_in_pixels = maxy - miny;
@@ -79,6 +78,7 @@ vc4_emit_state(struct pipe_context *pctx)
uint8_t ez_enable_mask_out = ~0;
uint8_t rasosm_mask_out = ~0;
+ struct vc4_cl_out *bcl = cl_start(&job->bcl);
/* HW-2905: If the RCL ends up doing a full-res load when
* multisampling, then early Z tracking may end up with values
* from the previous tile due to a HW bug. Disable it to
@@ -111,41 +111,42 @@ vc4_emit_state(struct pipe_context *pctx)
cl_u8(&bcl,
(vc4->rasterizer->config_bits[2] |
vc4->zsa->config_bits[2]) & ez_enable_mask_out);
+ cl_end(&job->bcl, bcl);
}
if (vc4->dirty & VC4_DIRTY_RASTERIZER) {
- cl_emit(&bcl, DEPTH_OFFSET, depth) {
+ cl_emit(&job->bcl, DEPTH_OFFSET, depth) {
depth.depth_offset_units =
vc4->rasterizer->offset_units;
depth.depth_offset_factor =
vc4->rasterizer->offset_factor;
}
- cl_emit(&bcl, POINT_SIZE, points) {
+ cl_emit(&job->bcl, POINT_SIZE, points) {
points.point_size = vc4->rasterizer->point_size;
}
- cl_emit(&bcl, LINE_WIDTH, points) {
+ cl_emit(&job->bcl, LINE_WIDTH, points) {
points.line_width = vc4->rasterizer->base.line_width;
}
}
if (vc4->dirty & VC4_DIRTY_VIEWPORT) {
- cl_emit(&bcl, CLIPPER_XY_SCALING, clip) {
+ cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
clip.viewport_half_width_in_1_16th_of_pixel =
vc4->viewport.scale[0] * 16.0f;
clip.viewport_half_height_in_1_16th_of_pixel =
vc4->viewport.scale[1] * 16.0f;
}
- cl_emit(&bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
+ cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
clip.viewport_z_offset_zc_to_zs =
vc4->viewport.translate[2];
clip.viewport_z_scale_zc_to_zs =
vc4->viewport.scale[2];
}
- cl_emit(&bcl, VIEWPORT_OFFSET, vp) {
+ cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
vp.viewport_centre_x_coordinate =
16 * vc4->viewport.translate[0];
vp.viewport_centre_y_coordinate =
@@ -154,12 +155,10 @@ vc4_emit_state(struct pipe_context *pctx)
}
if (vc4->dirty & VC4_DIRTY_FLAT_SHADE_FLAGS) {
- cl_emit(&bcl, FLAT_SHADE_FLAGS, flags) {
+ cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
if (vc4->rasterizer->base.flatshade)
flags.flat_shading_flags =
vc4->prog.fs->color_inputs;
}
}
-
- cl_end(&job->bcl, bcl);
}
diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c
index afdac8c991..ed6c86c3e6 100644
--- a/src/gallium/drivers/vc4/vc4_job.c
+++ b/src/gallium/drivers/vc4/vc4_job.c
@@ -377,13 +377,11 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
* until the FLUSH completes.
*/
cl_ensure_space(&job->bcl, 8);
- struct vc4_cl_out *bcl = cl_start(&job->bcl);
- cl_emit(&bcl, INCREMENT_SEMAPHORE, incr);
+ cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);
/* The FLUSH caps all of our bin lists with a
* VC4_PACKET_RETURN.
*/
- cl_emit(&bcl, FLUSH, flush);
- cl_end(&job->bcl, bcl);
+ cl_emit(&job->bcl, FLUSH, flush);
}
struct drm_vc4_submit_cl submit = {
.color_read.hindex = ~0,