From 7f28775edcc727791528d8439c86e8dffd5059a9 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Wed, 16 Dec 2020 12:20:39 -0500 Subject: zink: implement uniform inlining this lets us inline away our problems Reviewed-by: Dave Airlie Part-of: --- src/gallium/drivers/zink/driinfo_zink.h | 1 + src/gallium/drivers/zink/zink_compiler.c | 20 ++++++++++++++++++++ src/gallium/drivers/zink/zink_context.c | 18 ++++++++++++++++++ src/gallium/drivers/zink/zink_context.h | 5 +++++ src/gallium/drivers/zink/zink_draw.c | 8 ++++++++ src/gallium/drivers/zink/zink_program.c | 17 +++++++++++++++++ src/gallium/drivers/zink/zink_screen.c | 2 ++ src/gallium/drivers/zink/zink_screen.h | 1 + src/gallium/drivers/zink/zink_shader_keys.h | 6 ++++++ 9 files changed, 78 insertions(+) diff --git a/src/gallium/drivers/zink/driinfo_zink.h b/src/gallium/drivers/zink/driinfo_zink.h index e637ccd4512..e1cf6d7d559 100644 --- a/src/gallium/drivers/zink/driinfo_zink.h +++ b/src/gallium/drivers/zink/driinfo_zink.h @@ -2,6 +2,7 @@ DRI_CONF_SECTION_DEBUG DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION(false) + DRI_CONF_OPT_B(radeonsi_inline_uniforms, false, "Optimize shaders by replacing uniforms with literals") DRI_CONF_SECTION_END DRI_CONF_SECTION_PERFORMANCE diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index ed11e0031f3..9d3bd3a3bf2 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -570,6 +570,24 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, struct z VkShaderModule mod = VK_NULL_HANDLE; void *streamout = NULL; nir_shader *nir = zs->nir; + + if (key) { + if (key->inline_uniforms) { + if (nir == zs->nir) + nir = nir_shader_clone(NULL, nir); + NIR_PASS_V(nir, nir_inline_uniforms, + nir->info.num_inlinable_uniforms, + key->base.inlined_uniform_values, + nir->info.inlinable_uniform_dw_offsets); + + optimize_nir(nir); + + /* This must be done again. */ + NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | + nir_var_shader_out); + } + } + /* TODO: use a separate mem ctx here for ralloc */ if (zs->nir->info.stage < MESA_SHADER_FRAGMENT) { if (zink_vs_key(key)->last_vertex_stage) { @@ -903,6 +921,8 @@ zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr, bool optimize) if (nir->info.num_ubos || nir->info.num_ssbos) NIR_PASS_V(nir, nir_lower_dynamic_bo_access); nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + if (screen->driconf.inline_uniforms) + nir_find_inlinable_uniforms(nir); } void diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c index f2a0ee50150..e1ff9f15bf8 100644 --- a/src/gallium/drivers/zink/zink_context.c +++ b/src/gallium/drivers/zink/zink_context.c @@ -918,6 +918,18 @@ zink_set_scissor_states(struct pipe_context *pctx, ctx->vp_state.scissor_states[start_slot + i] = states[i]; } +static void +zink_set_inlinable_constants(struct pipe_context *pctx, + enum pipe_shader_type shader, + uint num_values, uint32_t *values) +{ + struct zink_context *ctx = (struct zink_context *)pctx; + + memcpy(ctx->inlinable_uniforms[shader], values, num_values * 4); + ctx->inlinable_uniforms_dirty_mask |= 1 << shader; + ctx->inlinable_uniforms_valid_mask |= 1 << shader; +} + static void zink_set_constant_buffer(struct pipe_context *pctx, enum pipe_shader_type shader, uint index, @@ -966,6 +978,11 @@ zink_set_constant_buffer(struct pipe_context *pctx, ctx->ubos[shader][index].buffer_size = 0; ctx->ubos[shader][index].user_buffer = NULL; } + if (index == 0) { + /* Invalidate current inlinable uniforms. */ + ctx->inlinable_uniforms_valid_mask &= ~(1 << shader); + } + if (update) zink_context_invalidate_descriptor_state(ctx, shader, ZINK_DESCRIPTOR_TYPE_UBO); } @@ -2668,6 +2685,7 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) ctx->base.set_vertex_buffers = zink_set_vertex_buffers; ctx->base.set_viewport_states = zink_set_viewport_states; ctx->base.set_scissor_states = zink_set_scissor_states; + ctx->base.set_inlinable_constants = zink_set_inlinable_constants; ctx->base.set_constant_buffer = zink_set_constant_buffer; ctx->base.set_shader_buffers = zink_set_shader_buffers; ctx->base.set_shader_images = zink_set_shader_images; diff --git a/src/gallium/drivers/zink/zink_context.h b/src/gallium/drivers/zink/zink_context.h index fd78a85ab45..62bb448523a 100644 --- a/src/gallium/drivers/zink/zink_context.h +++ b/src/gallium/drivers/zink/zink_context.h @@ -143,6 +143,11 @@ struct zink_context { struct util_dynarray free_batch_states; //unused batch states VkDeviceSize resource_size; //the accumulated size of resources in submitted buffers + unsigned shader_has_inlinable_uniforms_mask; + unsigned inlinable_uniforms_dirty_mask; + unsigned inlinable_uniforms_valid_mask; + uint32_t inlinable_uniforms[PIPE_SHADER_TYPES][MAX_INLINABLE_UNIFORMS]; + struct pipe_constant_buffer ubos[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; struct pipe_shader_buffer ssbos[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS]; uint32_t writable_ssbos[PIPE_SHADER_TYPES]; diff --git a/src/gallium/drivers/zink/zink_draw.c b/src/gallium/drivers/zink/zink_draw.c index a7c9507e5ec..8fc31b13d7f 100644 --- a/src/gallium/drivers/zink/zink_draw.c +++ b/src/gallium/drivers/zink/zink_draw.c @@ -180,6 +180,9 @@ static struct zink_compute_program * get_compute_program(struct zink_context *ctx) { unsigned bits = 1 << PIPE_SHADER_COMPUTE; + ctx->dirty_shader_stages |= ctx->inlinable_uniforms_dirty_mask & + ctx->inlinable_uniforms_valid_mask & + ctx->shader_has_inlinable_uniforms_mask & bits; if (ctx->dirty_shader_stages & bits) { struct hash_entry *entry = _mesa_hash_table_search(ctx->compute_program_cache, &ctx->compute_stage->shader_id); @@ -194,6 +197,7 @@ get_compute_program(struct zink_context *ctx) ctx->compute_pipeline_state.dirty = true; ctx->curr_compute = entry->data; ctx->dirty_shader_stages &= bits; + ctx->inlinable_uniforms_dirty_mask &= bits; } assert(ctx->curr_compute); @@ -213,6 +217,9 @@ get_gfx_program(struct zink_context *ctx) ctx->last_vertex_stage_dirty = false; } unsigned bits = u_bit_consecutive(PIPE_SHADER_VERTEX, 5); + ctx->dirty_shader_stages |= ctx->inlinable_uniforms_dirty_mask & + ctx->inlinable_uniforms_valid_mask & + ctx->shader_has_inlinable_uniforms_mask & bits; if (ctx->dirty_shader_stages & bits) { struct hash_entry *entry = _mesa_hash_table_search(ctx->program_cache, ctx->gfx_stages); @@ -229,6 +236,7 @@ get_gfx_program(struct zink_context *ctx) ctx->gfx_pipeline_state.combined_dirty = true; ctx->curr_program = entry->data; ctx->dirty_shader_stages &= ~bits; + ctx->inlinable_uniforms_dirty_mask &= ~bits; } assert(ctx->curr_program); diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index da368c3316f..f9a7239db4a 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -276,13 +276,26 @@ static struct zink_shader_module * get_shader_module_for_stage(struct zink_context *ctx, struct zink_shader *zs, struct zink_gfx_program *prog) { gl_shader_stage stage = zs->nir->info.stage; + enum pipe_shader_type pstage = pipe_shader_type_from_mesa(stage); struct zink_shader_key key = {}; VkShaderModule mod; struct zink_shader_module *zm; struct keybox *keybox; uint32_t hash; + bool needs_base_size = false; shader_key_vtbl[stage](ctx, zs, ctx->gfx_stages, &key); + + if (zs->nir->info.num_inlinable_uniforms && + ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(pstage)) { + key.inline_uniforms = true; + memcpy(key.base.inlined_uniform_values, + ctx->inlinable_uniforms[pstage], + zs->nir->info.num_inlinable_uniforms * 4); + needs_base_size = true; + } + if (needs_base_size) + key.size += sizeof(struct zink_shader_key_base); keybox = make_keybox(prog->shader_cache, stage, &key, key.size); hash = keybox_hash(keybox); struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(prog->shader_cache->shader_cache, @@ -999,6 +1012,10 @@ bind_stage(struct zink_context *ctx, enum pipe_shader_type stage, else ctx->gfx_stages[stage] = shader; ctx->dirty_shader_stages |= 1 << stage; + if (shader && shader->nir->info.num_inlinable_uniforms) + ctx->shader_has_inlinable_uniforms_mask |= 1 << stage; + else + ctx->shader_has_inlinable_uniforms_mask &= ~(1 << stage); } static void diff --git a/src/gallium/drivers/zink/zink_screen.c b/src/gallium/drivers/zink/zink_screen.c index 70c14b0f3de..256f0326728 100644 --- a/src/gallium/drivers/zink/zink_screen.c +++ b/src/gallium/drivers/zink/zink_screen.c @@ -250,6 +250,7 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_GL_SPIRV: case PIPE_CAP_CLEAR_SCISSORED: case PIPE_CAP_INVALIDATE_BUFFER: + case PIPE_CAP_PREFER_REAL_BUFFER_IN_CONSTBUF0: return 1; case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE: @@ -1493,6 +1494,7 @@ zink_internal_create_screen(const struct pipe_screen_config *config) #if WITH_XMLCONFIG if (config) screen->driconf.dual_color_blend_by_location = driQueryOptionb(config->options, "dual_color_blend_by_location"); + //screen->driconf.inline_uniforms = driQueryOptionb(config->options, "radeonsi_inline_uniforms"); #endif screen->total_video_mem = get_video_mem(screen); diff --git a/src/gallium/drivers/zink/zink_screen.h b/src/gallium/drivers/zink/zink_screen.h index b14658cb5e3..8d3ff3a6a51 100644 --- a/src/gallium/drivers/zink/zink_screen.h +++ b/src/gallium/drivers/zink/zink_screen.h @@ -146,6 +146,7 @@ struct zink_screen { struct { bool dual_color_blend_by_location; + bool inline_uniforms; } driconf; VkFormatProperties format_props[PIPE_FORMAT_COUNT]; diff --git a/src/gallium/drivers/zink/zink_shader_keys.h b/src/gallium/drivers/zink/zink_shader_keys.h index 395f18e31b7..7933474a9fe 100644 --- a/src/gallium/drivers/zink/zink_shader_keys.h +++ b/src/gallium/drivers/zink/zink_shader_keys.h @@ -48,6 +48,10 @@ struct zink_tcs_key { uint64_t vs_outputs_written; }; +struct zink_shader_key_base { + uint32_t inlined_uniform_values[MAX_INLINABLE_UNIFORMS]; +}; + /* a shader key is used for swapping out shader modules based on pipeline states, * e.g., if sampleCount changes, we must verify that the fs doesn't need a recompile * to account for GL ignoring gl_SampleMask in some cases when VK will not @@ -60,6 +64,8 @@ struct zink_shader_key { struct zink_fs_key fs; struct zink_tcs_key tcs; } key; + struct zink_shader_key_base base; + unsigned inline_uniforms:1; uint32_t size; }; -- cgit v1.2.3