diff options
Diffstat (limited to 'src/gallium/drivers/freedreno/ir3/ir3_const.h')
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_const.h | 152 |
1 files changed, 110 insertions, 42 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_const.h b/src/gallium/drivers/freedreno/ir3/ir3_const.h index ceed8c87e6a..fab31130616 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_const.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_const.h @@ -111,8 +111,7 @@ ir3_user_consts_size(struct ir3_ubo_analysis_state *state, unsigned *packets, * constant buffer. */ static inline void -ir3_emit_constant_data(struct fd_screen *screen, - const struct ir3_shader_variant *v, +ir3_emit_constant_data(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { const struct ir3_const_state *const_state = ir3_const_state(v); @@ -120,7 +119,7 @@ ir3_emit_constant_data(struct fd_screen *screen, for (unsigned i = 0; i < state->num_enabled; i++) { unsigned ubo = state->range[i].ubo.block; - if (ubo != const_state->constant_data_ubo) + if (ubo != const_state->consts_ubo.idx) continue; uint32_t size = state->range[i].end - state->range[i].start; @@ -151,8 +150,7 @@ ir3_emit_constant_data(struct fd_screen *screen, * shader). */ static inline void -ir3_emit_user_consts(struct fd_screen *screen, - const struct ir3_shader_variant *v, +ir3_emit_user_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) { @@ -163,7 +161,7 @@ ir3_emit_user_consts(struct fd_screen *screen, assert(!state->range[i].ubo.bindless); unsigned ubo = state->range[i].ubo.block; if (!(constbuf->enabled_mask & (1 << ubo)) || - ubo == const_state->constant_data_ubo) { + ubo == const_state->consts_ubo.idx) { continue; } struct pipe_constant_buffer *cb = &constbuf->cb[ubo]; @@ -186,13 +184,14 @@ ir3_emit_user_consts(struct fd_screen *screen, continue; /* things should be aligned to vec4: */ - debug_assert((state->range[i].offset % 16) == 0); - debug_assert((size % 16) == 0); - debug_assert((offset % 16) == 0); + assert((state->range[i].offset % 16) == 0); + assert((size % 16) == 0); + assert((offset % 16) == 0); if (cb->user_buffer) { - emit_const_user(ring, v, state->range[i].offset / 4, size / 4, - cb->user_buffer + state->range[i].start); + uint8_t *p = (uint8_t *)cb->user_buffer; + p += state->range[i].start; + emit_const_user(ring, v, state->range[i].offset / 4, size / 4, (uint32_t *)p); } else { emit_const_prsc(ring, v, state->range[i].offset / 4, offset, size / 4, cb->buffer); @@ -219,7 +218,7 @@ ir3_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_bo *bos[params]; for (uint32_t i = 0; i < params; i++) { - if (i == const_state->constant_data_ubo) { + if (i == const_state->consts_ubo.idx) { bos[i] = v->bo; offsets[i] = v->info.constant_data_offset; continue; @@ -308,8 +307,7 @@ ir3_emit_image_dims(struct fd_screen *screen, } static inline void -ir3_emit_immediates(struct fd_screen *screen, - const struct ir3_shader_variant *v, +ir3_emit_immediates(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { const struct ir3_const_state *const_state = ir3_const_state(v); @@ -331,30 +329,29 @@ ir3_emit_immediates(struct fd_screen *screen, /* NIR constant data has the same lifetime as immediates, so upload it * now, too. */ - ir3_emit_constant_data(screen, v, ring); + ir3_emit_constant_data(v, ring); } static inline void -ir3_emit_link_map(struct fd_screen *screen, - const struct ir3_shader_variant *producer, - const struct ir3_shader_variant *v, +ir3_emit_link_map(const struct ir3_shader_variant *producer, + const struct ir3_shader_variant *consumer, struct fd_ringbuffer *ring) { - const struct ir3_const_state *const_state = ir3_const_state(v); + const struct ir3_const_state *const_state = ir3_const_state(consumer); uint32_t base = const_state->offsets.primitive_map; - int size = DIV_ROUND_UP(v->input_size, 4); + int size = DIV_ROUND_UP(consumer->input_size, 4); /* truncate size to avoid writing constants that shader * does not use: */ - size = MIN2(size + base, v->constlen) - base; + size = MIN2(size + base, consumer->constlen) - base; /* convert out of vec4: */ base *= 4; size *= 4; if (size > 0) - emit_const_user(ring, v, base, size, producer->output_loc); + emit_const_user(ring, consumer, base, size, producer->output_loc); } /* emit stream-out buffers: */ @@ -367,7 +364,7 @@ emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v, uint32_t offset = const_state->offsets.tfbo; if (v->constlen > offset) { struct fd_streamout_stateobj *so = &ctx->streamout; - struct ir3_stream_output_info *info = &v->shader->stream_output; + const struct ir3_stream_output_info *info = &v->stream_output; uint32_t params = 4; uint32_t offsets[params]; struct fd_bo *bos[params]; @@ -409,7 +406,7 @@ emit_common_consts(const struct ir3_shader_variant *v, * different state-objects we could avoid this. */ if (dirty && is_stateobj(ring)) - dirty = ~0; + dirty = (enum fd_dirty_shader_state)~0; if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) { struct fd_constbuf_stateobj *constbuf; @@ -420,10 +417,10 @@ emit_common_consts(const struct ir3_shader_variant *v, ring_wfi(ctx->batch, ring); - ir3_emit_user_consts(ctx->screen, v, ring, constbuf); + ir3_emit_user_consts(v, ring, constbuf); ir3_emit_ubos(ctx, v, ring, constbuf); if (shader_dirty) - ir3_emit_immediates(ctx->screen, v, ring); + ir3_emit_immediates(v, ring); } if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) { @@ -433,22 +430,40 @@ emit_common_consts(const struct ir3_shader_variant *v, } } +/* emit kernel params */ static inline void -ir3_emit_vs_driver_params(const struct ir3_shader_variant *v, - struct fd_ringbuffer *ring, struct fd_context *ctx, - const struct pipe_draw_info *info, - const struct pipe_draw_indirect_info *indirect, - const struct pipe_draw_start_count_bias *draw) assert_dt +emit_kernel_params(struct fd_context *ctx, const struct ir3_shader_variant *v, + struct fd_ringbuffer *ring, const struct pipe_grid_info *info) + assert_dt +{ + const struct ir3_const_state *const_state = ir3_const_state(v); + uint32_t offset = const_state->offsets.kernel_params; + if (v->constlen > offset) { + ring_wfi(ctx->batch, ring); + emit_const_user(ring, v, offset * 4, + align(v->cs.req_input_mem, 4), + (uint32_t *)info->input); + } +} + +static inline void +ir3_emit_driver_params(const struct ir3_shader_variant *v, + struct fd_ringbuffer *ring, struct fd_context *ctx, + const struct pipe_draw_info *info, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count_bias *draw, + const uint32_t draw_id) assert_dt { assert(v->need_driver_params); const struct ir3_const_state *const_state = ir3_const_state(v); uint32_t offset = const_state->offsets.driver_param; uint32_t vertex_params[IR3_DP_VS_COUNT] = { - [IR3_DP_DRAWID] = 0, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */ + [IR3_DP_DRAWID] = draw_id, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */ [IR3_DP_VTXID_BASE] = info->index_size ? draw->index_bias : draw->start, [IR3_DP_INSTID_BASE] = info->start_instance, [IR3_DP_VTXCNT_MAX] = ctx->streamout.max_tf_vtx, + [IR3_DP_IS_INDEXED_DRAW] = info->index_size != 0 ? ~0 : 0, }; if (v->key.ucp_enables) { struct pipe_clip_state *ucp = &ctx->ucp; @@ -484,7 +499,6 @@ ir3_emit_vs_driver_params(const struct ir3_shader_variant *v, pipe_buffer_create(&ctx->screen->base, PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM, vertex_params_area * 4); unsigned src_off = indirect->offset; - ; void *ptr; ptr = fd_bo_map(fd_resource(vertex_params_rsc)->bo); @@ -516,6 +530,34 @@ ir3_emit_vs_driver_params(const struct ir3_shader_variant *v, } } + +static inline void +ir3_emit_hs_driver_params(const struct ir3_shader_variant *v, + struct fd_ringbuffer *ring, + struct fd_context *ctx) + assert_dt +{ + assert(v->need_driver_params); + + const struct ir3_const_state *const_state = ir3_const_state(v); + uint32_t offset = const_state->offsets.driver_param; + uint32_t hs_params[IR3_DP_HS_COUNT] = { + [IR3_DP_HS_DEFAULT_OUTER_LEVEL_X] = fui(ctx->default_outer_level[0]), + [IR3_DP_HS_DEFAULT_OUTER_LEVEL_Y] = fui(ctx->default_outer_level[1]), + [IR3_DP_HS_DEFAULT_OUTER_LEVEL_Z] = fui(ctx->default_outer_level[2]), + [IR3_DP_HS_DEFAULT_OUTER_LEVEL_W] = fui(ctx->default_outer_level[3]), + [IR3_DP_HS_DEFAULT_INNER_LEVEL_X] = fui(ctx->default_inner_level[0]), + [IR3_DP_HS_DEFAULT_INNER_LEVEL_Y] = fui(ctx->default_inner_level[1]), + }; + + const uint32_t hs_params_size = + MIN2(const_state->num_driver_params, (v->constlen - offset) * 4); + assert(hs_params_size <= IR3_DP_HS_COUNT); + + emit_const_user(ring, v, offset * 4, hs_params_size, hs_params); +} + + static inline void ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_context *ctx, @@ -523,14 +565,14 @@ ir3_emit_vs_consts(const struct ir3_shader_variant *v, const struct pipe_draw_indirect_info *indirect, const struct pipe_draw_start_count_bias *draw) assert_dt { - debug_assert(v->type == MESA_SHADER_VERTEX); + assert(v->type == MESA_SHADER_VERTEX); emit_common_consts(v, ring, ctx, PIPE_SHADER_VERTEX); /* emit driver params every time: */ if (info && v->need_driver_params) { ring_wfi(ctx->batch, ring); - ir3_emit_vs_driver_params(v, ring, ctx, info, indirect, draw); + ir3_emit_driver_params(v, ring, ctx, info, indirect, draw, 0); } } @@ -538,20 +580,22 @@ static inline void ir3_emit_fs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_context *ctx) assert_dt { - debug_assert(v->type == MESA_SHADER_FRAGMENT); + assert(v->type == MESA_SHADER_FRAGMENT); emit_common_consts(v, ring, ctx, PIPE_SHADER_FRAGMENT); } -/* emit compute-shader consts: */ static inline void -ir3_emit_cs_consts(const struct ir3_shader_variant *v, - struct fd_ringbuffer *ring, struct fd_context *ctx, - const struct pipe_grid_info *info) assert_dt +ir3_emit_cs_driver_params(const struct ir3_shader_variant *v, + struct fd_ringbuffer *ring, struct fd_context *ctx, + const struct pipe_grid_info *info) + assert_dt { - debug_assert(gl_shader_stage_is_compute(v->type)); + emit_kernel_params(ctx, v, ring, info); - emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE); + /* a3xx/a4xx can inject these directly */ + if (ctx->screen->gen <= 4) + return; /* emit compute-shader driver-params: */ const struct ir3_const_state *const_state = ir3_const_state(v); @@ -588,13 +632,24 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, pipe_resource_reference(&indirect, NULL); } else { + // TODO some of these are not part of the indirect state.. so we + // need to emit some of this directly in both cases. uint32_t compute_params[IR3_DP_CS_COUNT] = { [IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0], [IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1], [IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2], + [IR3_DP_WORK_DIM] = info->work_dim, + [IR3_DP_BASE_GROUP_X] = info->grid_base[0], + [IR3_DP_BASE_GROUP_Y] = info->grid_base[1], + [IR3_DP_BASE_GROUP_Z] = info->grid_base[2], + [IR3_DP_CS_SUBGROUP_SIZE] = v->info.subgroup_size, [IR3_DP_LOCAL_GROUP_SIZE_X] = info->block[0], [IR3_DP_LOCAL_GROUP_SIZE_Y] = info->block[1], [IR3_DP_LOCAL_GROUP_SIZE_Z] = info->block[2], + [IR3_DP_SUBGROUP_ID_SHIFT] = util_logbase2(v->info.subgroup_size), + [IR3_DP_WORKGROUP_ID_X] = 0, // TODO + [IR3_DP_WORKGROUP_ID_Y] = 0, // TODO + [IR3_DP_WORKGROUP_ID_Z] = 0, // TODO }; uint32_t size = MIN2(const_state->num_driver_params, v->constlen * 4 - offset * 4); @@ -603,3 +658,16 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, } } } + +/* emit compute-shader consts: */ +static inline void +ir3_emit_cs_consts(const struct ir3_shader_variant *v, + struct fd_ringbuffer *ring, struct fd_context *ctx, + const struct pipe_grid_info *info) assert_dt +{ + assert(gl_shader_stage_is_compute(v->type)); + + emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE); + + ir3_emit_cs_driver_params(v, ring, ctx, info); +} |