diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/glsl/Makefile.sources | 4 | ||||
-rw-r--r-- | src/glsl/glsl_parser_extras.cpp | 12 | ||||
-rw-r--r-- | src/glsl/glsl_types.h | 1 | ||||
-rw-r--r-- | src/glsl/link_uniforms.cpp | 2 | ||||
-rw-r--r-- | src/glsl/linker.cpp | 6 | ||||
-rw-r--r-- | src/glsl/linker.h | 2 | ||||
-rw-r--r-- | src/glsl/shader_cache.cpp | 443 | ||||
-rw-r--r-- | src/glsl/shader_cache.h | 38 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.sources | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader_cache.c | 382 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state_upload.c | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs.c | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs.h | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm.c | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm.h | 4 | ||||
-rw-r--r-- | src/mesa/main/context.c | 3 | ||||
-rw-r--r-- | src/mesa/main/mtypes.h | 7 | ||||
-rw-r--r-- | src/mesa/program/ir_to_mesa.cpp | 12 |
20 files changed, 946 insertions, 7 deletions
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 6b1b075b1d..f97ef7ce70 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -196,7 +196,9 @@ LIBGLSL_FILES = \ LIBGLSL_SHADER_CACHE_FILES = \ cache.c \ - cache.h + cache.h \ + shader_cache.cpp \ + shader_cache.h # glsl_compiler diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 046d5d7b5b..01bd1697da 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -35,6 +35,8 @@ #include "glsl_parser.h" #include "ir_optimization.h" #include "loop_analysis.h" +#include "cache.h" +#include "util/mesa-sha1.h" /** * Format a short human-readable description of the given GLSL version. @@ -1503,6 +1505,16 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader, state->error = glcpp_preprocess(state, &source, &state->info_log, &ctx->Extensions, ctx); + char buf[41]; + _mesa_sha1_compute(source, strlen(source), shader->sha1); + if (ctx->Cache && cache_has_key(ctx->Cache, shader->sha1)) { + /* We've seen this shader before and know it compiles */ + printf("deferring compile of shader: %s\n", + _mesa_sha1_format(buf, shader->sha1)); + shader->CompileStatus = true; + return; + } + if (!state->error) { _mesa_glsl_lexer_ctor(state, source); _mesa_glsl_parse(state); diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index f54a9393e7..712890d0bb 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -106,6 +106,7 @@ enum glsl_matrix_layout { #include "main/mtypes.h" /* for gl_texture_index, C++'s enum rules are broken */ struct glsl_type { + GLenum gl_type; glsl_base_type base_type; diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp index 11ae06f9bf..ad4da89267 100644 --- a/src/glsl/link_uniforms.cpp +++ b/src/glsl/link_uniforms.cpp @@ -1062,6 +1062,8 @@ link_assign_uniform_locations(struct gl_shader_program *prog, prog->NumUniformStorage = num_uniforms; prog->NumHiddenUniforms = hidden_uniforms; prog->UniformStorage = uniforms; + prog->NumUniformDataSlots = num_data_slots; + prog->UniformDataSlots = data; link_set_image_access_qualifiers(prog); link_set_uniform_initializers(prog, boolean_true); diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index b7a783c098..25530a243c 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -76,6 +76,9 @@ #include "ir_optimization.h" #include "ir_rvalue_visitor.h" #include "ir_uniform.h" +#include "util/mesa-sha1.h" +#include "cache.h" +#include "shader_cache.h" #include "main/shaderobj.h" #include "main/enums.h" @@ -2839,6 +2842,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) tfeedback_decl *tfeedback_decls = NULL; unsigned num_tfeedback_decls = prog->TransformFeedback.NumVarying; + if (shader_cache_read_program_metadata(ctx, prog)) + return; + void *mem_ctx = ralloc_context(NULL); // temporary linker context prog->LinkStatus = true; /* All error paths will set this to false */ diff --git a/src/glsl/linker.h b/src/glsl/linker.h index ce3dc32329..8fe63d705f 100644 --- a/src/glsl/linker.h +++ b/src/glsl/linker.h @@ -201,3 +201,5 @@ void linker_warning(gl_shader_program *prog, const char *fmt, ...); #endif /* GLSL_LINKER_H */ + + diff --git a/src/glsl/shader_cache.cpp b/src/glsl/shader_cache.cpp new file mode 100644 index 0000000000..ff92b69b9b --- /dev/null +++ b/src/glsl/shader_cache.cpp @@ -0,0 +1,443 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file shader_cache.c + * + * GLSL shader cache implementation + * + * This uses the generic cache in cache.c to implement a cache of linked + * shader programs. + * + * \author Tapani Pälli <tapani.palli@intel.com> + * \author Kristian Høgsberg <kristian.h.kristensen@intel.com> + * \author Carl Worth <carl.d.worth@intel.com> + */ + +#include "main/core.h" +#include "glsl_symbol_table.h" +#include "glsl_parser_extras.h" +#include "ir.h" +#include "program.h" +#include "program/hash_table.h" +#include "linker.h" +#include "link_varyings.h" +#include "ir_optimization.h" +#include "ir_rvalue_visitor.h" +#include "ir_uniform.h" +#include "util/mesa-sha1.h" +#include "blob.h" +#include "cache.h" + +extern "C" { +#include "main/shaderobj.h" +#include "main/enums.h" +} + +struct cache_program { + uint32_t num_uniforms; + uint32_t samplers_validated; + uint32_t uniforms_offset; + uint32_t num_data_slots; + uint32_t num_remap_entries; + uint32_t remap_offset; + struct { + uint64_t inputs_read; + uint64_t outputs_written; + GLbitfield textures_used[MAX_COMBINED_TEXTURE_IMAGE_UNITS]; + uint64_t samplers_used; + } vs, fs; +}; + +struct cache_uniform { + uint32_t name; + uint32_t storage; + uint32_t type; +}; + +static void +encode_type_to_blob(struct blob *blob, const glsl_type *type) +{ + uint32_t encoding; + + /* FIXME: This only handles scalar/vector/matrix types. We encode those + * immediately in the uint32_t, but for aggregate types, we have to make + * the lower 24 bits an index into a pool of type descriptions. */ + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + encoding = (type->base_type << 24) | + (type->vector_elements << 4) | + (type->matrix_columns); + break; + case GLSL_TYPE_SAMPLER: + encoding = (type->base_type) << 24 | + (type->sampler_dimensionality << 4) | + (type->sampler_shadow << 3) | + (type->sampler_array << 2) | + (type->sampler_type); + break; + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + default: + printf ("FIXME: Do not yet know how to correctly serialize type %d (%s)\n", type->base_type, type->name); + /* FIXME: Serialize these...*/ + encoding = 0; + break; + } + + blob_write_uint32(blob, encoding); +} + +static const glsl_type * +decode_type_from_blob(struct blob_reader *blob) +{ + uint32_t u = blob_read_uint32(blob); + glsl_base_type base_type = (glsl_base_type) (u >> 24); + + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + return glsl_type::get_instance(base_type, (u >> 4) & 0x0f, u & 0x0f); + case GLSL_TYPE_SAMPLER: + return glsl_type::get_sampler_instance((enum glsl_sampler_dim) ((u >> 4) & 0x07), + (u >> 3) & 0x01, + (u >> 2) & 0x01, + (glsl_base_type) ((u >> 0) & 0x03)); + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + /* FIXME: Serialize these...*/ + default: + return NULL; + } +} + +static void +write_uniforms(struct blob *metadata, struct gl_shader_program *prog) +{ + uint32_t i; + + blob_write_uint32(metadata, prog->SamplersValidated); + blob_write_uint32(metadata, prog->NumUniformStorage); + blob_write_uint32(metadata, prog->NumUniformDataSlots); + + for (i = 0; i < prog->NumUniformStorage; i++) { + printf("uniform %s %s\n", + prog->UniformStorage[i].type->name, + prog->UniformStorage[i].name); + blob_write_string(metadata, prog->UniformStorage[i].name); + blob_write_uint32(metadata, prog->UniformStorage[i].storage - prog->UniformDataSlots); + blob_write_uint32(metadata, prog->UniformStorage[i].remap_location); + encode_type_to_blob(metadata, prog->UniformStorage[i].type); + } +} + +static void +read_uniforms(struct blob_reader *metadata, struct gl_shader_program *prog) +{ + struct gl_uniform_storage *uniforms; + union gl_constant_value *data; + uint32_t i; + + prog->SamplersValidated = blob_read_uint32(metadata); + prog->NumUniformStorage = blob_read_uint32(metadata); + prog->NumUniformDataSlots = blob_read_uint32(metadata); + + uniforms = rzalloc_array(prog, struct gl_uniform_storage, + prog->NumUniformStorage); + prog->UniformStorage = uniforms; + + data = rzalloc_array(uniforms, union gl_constant_value, + prog->NumUniformDataSlots); + prog->UniformDataSlots = data; + + prog->UniformHash = new string_to_uint_map; + + for (i = 0; i < prog->NumUniformStorage; i++) { + uniforms[i].name = ralloc_strdup(prog, blob_read_string (metadata)); + uniforms[i].storage = data + blob_read_uint32(metadata); + uniforms[i].remap_location = blob_read_uint32(metadata); + uniforms[i].block_index = -1; + uniforms[i].atomic_buffer_index = -1; + uniforms[i].type = decode_type_from_blob(metadata); + prog->UniformHash->put(i, uniforms[i].name); + + printf("uniform %d: %s %s\n", + i, uniforms[i].type->name, uniforms[i].name); + } +} + + +static void +write_uniform_remap_table(struct blob *metadata, struct gl_shader_program *prog) +{ + blob_write_uint32(metadata, prog->NumUniformRemapTable); + + for (unsigned i = 0; i < prog->NumUniformRemapTable; i++) { + blob_write_uint32(metadata, prog->UniformRemapTable[i] - prog->UniformStorage); + } +} + +static void +read_uniform_remap_table(struct blob_reader *metadata, + struct gl_shader_program *prog) +{ + unsigned i; + + prog->NumUniformRemapTable = blob_read_uint32(metadata); + + prog->UniformRemapTable =rzalloc_array(prog, struct gl_uniform_storage *, + prog->NumUniformRemapTable); + + for (i = 0; i < prog->NumUniformRemapTable; i++) { + prog->UniformRemapTable[i] = prog->UniformStorage + blob_read_uint32(metadata); + } +} + +static void +write_shader_parameters(struct blob *metadata, + struct gl_program_parameter_list *params) +{ + unsigned i; + struct gl_program_parameter *param; + + blob_write_uint32(metadata, params->NumParameters); + + for (i = 0; i < params->NumParameters; i++) { + param = ¶ms->Parameters[i]; + + blob_write_uint32(metadata, param->Type); + blob_write_string(metadata, param->Name); + blob_write_uint32(metadata, param->Size); + blob_write_uint32(metadata, param->DataType); + blob_write_bytes(metadata, param->StateIndexes, + sizeof(param->StateIndexes)); + } + + blob_write_uint32(metadata, params->StateFlags); +} + +static void +read_shader_parameters(struct blob_reader *metadata, + struct gl_program_parameter_list *params) +{ + uint32_t i, num_parameters; + const char *name; + gl_register_file type; + GLuint size; + GLenum data_type; + gl_state_index state_indexes[STATE_LENGTH]; + + num_parameters = blob_read_uint32(metadata); + + for (i = 0; i < num_parameters; i++) { + + type = (gl_register_file) blob_read_uint32(metadata); + name = blob_read_string(metadata); + size = blob_read_uint32(metadata); + data_type = blob_read_uint32(metadata); + blob_copy_bytes(metadata, (uint8_t *) state_indexes, + sizeof(state_indexes)); + + _mesa_add_parameter(params, type, name, size, data_type, + NULL, state_indexes); + } + + params->StateFlags = blob_read_uint32(metadata); +} + +static void +write_shader_metadata(struct blob *metadata, gl_shader *shader) +{ + struct gl_program *glprog; + + if (shader->Program) { + glprog = shader->Program; + /* Use the lowest bit to indicate that there is shader_metadata here. */ + blob_write_uint64(metadata, glprog->InputsRead << 1 | 1); + blob_write_uint64(metadata, glprog->OutputsWritten); + blob_write_bytes(metadata, glprog->TexturesUsed, sizeof(glprog->TexturesUsed)); + blob_write_uint64(metadata, glprog->SamplersUsed); + blob_write_uint64(metadata, shader->num_samplers); + + write_shader_parameters(metadata, glprog->Parameters); + + } else { + /* An initial value of 0 indicates that this shader is not present. */ + blob_write_uint64(metadata, 0); + } +} + +static void +read_shader_metadata(struct blob_reader *metadata, + struct gl_program *glprog, + gl_shader *linked) +{ + uint64_t has_shader; + + has_shader = blob_read_uint64(metadata); + + if (has_shader) { + glprog->InputsRead = has_shader >> 1; + glprog->OutputsWritten = blob_read_uint64(metadata); + memcpy(glprog->TexturesUsed, blob_read_bytes(metadata, sizeof(glprog->TexturesUsed)), sizeof(glprog->TexturesUsed)); + glprog->SamplersUsed = blob_read_uint64(metadata); + linked->num_samplers = blob_read_uint64(metadata); + + glprog->Parameters = _mesa_new_parameter_list(); + + read_shader_parameters(metadata, glprog->Parameters); + + linked->Program = glprog; + } else { + linked->Program = NULL; + } +} + +void +shader_cache_write_program_metadata(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + struct blob *metadata; + char sha1_buf[41]; + struct program_cache *cache; + + cache = ctx->Cache; + if (!cache) + return; + + /* We should be able to serialize any valid combinations of shaders, but + * for now we only support vs+fs. */ + if (!prog->_LinkedShaders[MESA_SHADER_VERTEX] || + !prog->_LinkedShaders[MESA_SHADER_FRAGMENT] || + prog->_LinkedShaders[MESA_SHADER_GEOMETRY]) + return; + + metadata = blob_create(NULL); + + write_uniforms(metadata, prog); + + write_uniform_remap_table(metadata, prog); + + write_shader_metadata(metadata, prog->_LinkedShaders[MESA_SHADER_VERTEX]); + write_shader_metadata(metadata, prog->_LinkedShaders[MESA_SHADER_FRAGMENT]); + + for (unsigned i = 0; i < prog->NumShaders; i++) { + cache_put_key(cache, prog->Shaders[i]->sha1); + printf("marking shader: %s\n", + _mesa_sha1_format(sha1_buf, prog->Shaders[i]->sha1)); + } + + cache_put(cache, prog->sha1, metadata->data, metadata->size); + + ralloc_free(metadata); + + printf("putting program metadata in cache: %s\n", + _mesa_sha1_format(sha1_buf, prog->sha1)); +} + +bool +shader_cache_read_program_metadata(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + const char *stage_name[] = { "vs", "gs", "fs", "cs" }; + char buf[256], sha1buf[41]; + int offset = 0; + uint8_t *buffer; + struct program_cache *cache; + size_t size; + struct blob_reader metadata; + struct gl_program *glprog; + gl_shader *linked; + + cache = ctx->Cache; + if (!cache) + return false; + + for (unsigned i = 0; i < prog->NumShaders; i++) { + if (prog->Shaders[i]->Source == NULL) + return false; + + offset += snprintf(buf + offset, sizeof(buf) - offset, + "%s: %s\n", + stage_name[prog->Shaders[i]->Stage], + _mesa_sha1_format(sha1buf, prog->Shaders[i]->sha1)); + } + + _mesa_sha1_compute(buf, offset, prog->sha1); + buffer = (uint8_t *) cache_get(cache, prog->sha1, &size); + if (buffer == NULL) { + /* FIXME: Fall back and link shaders here, if necessary, compile any + * shaders we didn't compile earlier. */ + return false; + } + + printf("loading shader program meta data from cache: %s\n", + _mesa_sha1_format(sha1buf, prog->sha1)); + + blob_reader_init(&metadata, buffer, size); + + assert(prog->UniformStorage == NULL); + + read_uniforms(&metadata, prog); + + read_uniform_remap_table(&metadata, prog); + + linked = ctx->Driver.NewShader(NULL, 0, GL_VERTEX_SHADER); + glprog = ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, prog->Name); + read_shader_metadata(&metadata, glprog, linked); + //_mesa_reference_program(ctx, &linked->Program, glprog); + glprog->RefCount++; + _mesa_reference_shader(ctx, &prog->_LinkedShaders[MESA_SHADER_VERTEX], linked); + + linked = ctx->Driver.NewShader(NULL, 0, GL_FRAGMENT_SHADER); + glprog = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, prog->Name); + read_shader_metadata(&metadata, glprog, linked); + //_mesa_reference_program(ctx, &linked->Program, glprog); + glprog->RefCount++; + _mesa_reference_shader(ctx, &prog->_LinkedShaders[MESA_SHADER_FRAGMENT], linked); + + if (metadata.current != metadata.end || metadata.overrun) { + printf ("Error reading shader metadata. FIXME At this point, we should discard the item from the cache and rebuild from source.\n"); + } + + prog->LinkStatus = true; + + free (buffer); + + return true; +} diff --git a/src/glsl/shader_cache.h b/src/glsl/shader_cache.h new file mode 100644 index 0000000000..ffcf4f8279 --- /dev/null +++ b/src/glsl/shader_cache.h @@ -0,0 +1,38 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef SHADER_CACHE +#define SHADER_CACHE + +#include "cache.h" + +void +shader_cache_write_program_metadata(struct gl_context *ctx, + struct gl_shader_program *prog); + +bool +shader_cache_read_program_metadata(struct gl_context *ctx, + struct gl_shader_program *prog); + +#endif /* GLSL_SYMBOL_TABLE */ diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 5a33aacbc2..e2476d1d77 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -101,6 +101,7 @@ i965_FILES = \ brw_sf_state.c \ brw_shader.cpp \ brw_shader.h \ + brw_shader_cache.cpp \ brw_state_batch.c \ brw_state_cache.c \ brw_state_dump.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 65f34c368d..9851ff45be 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -430,6 +430,8 @@ struct brw_wm_prog_data { * For varying slots that are not used by the FS, the value is -1. */ int urb_setup[VARYING_SLOT_MAX]; + + GLuint program_size; }; /* Note: brw_cs_prog_data_compare() must be updated when adding fields to this @@ -643,6 +645,8 @@ struct brw_vs_prog_data { bool uses_vertexid; bool uses_instanceid; + + GLuint program_size; }; /** Number of texture sampler units */ @@ -1227,6 +1231,9 @@ struct brw_context const struct gl_fragment_program *fragment_program; const struct gl_compute_program *compute_program; + bool program_written_to_cache; + unsigned char binary_sha1[20]; + /** * Number of samples in ctx->DrawBuffer, updated by BRW_NEW_NUM_SAMPLES so * that we don't have to reemit that state every time we change FBOs. diff --git a/src/mesa/drivers/dri/i965/brw_shader_cache.c b/src/mesa/drivers/dri/i965/brw_shader_cache.c new file mode 100644 index 0000000000..81fc055a55 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_shader_cache.c @@ -0,0 +1,382 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <util/macros.h> +#include <util/mesa-sha1.h> +#include <main/mtypes.h> +#include <glsl/glsl_parser_extras.h> +#include <glsl/ir_uniform.h> +#include <glsl/cache.h> +#include <glsl/blob.h> + +#include "brw_state.h" +#include "brw_wm.h" +#include "brw_vs.h" +#include "brw_context.h" + +/* Hack to avoid repeatedly loading a binary from the disk cache */ +static int been_there[64]; + +void +upload_cached_program(struct brw_context *brw) +{ + char sha1_buf[41]; + size_t size; + uint8_t *buffer; + struct blob_reader binary; + struct gl_shader_program *prog; + struct brw_wm_prog_key wm_key; + struct brw_vs_prog_key vs_key; + unsigned char sha1[20]; + char manifest[256]; + int i, offset = 0; + uint32_t *w; + struct program_cache *cache; + uint8_t *vs_program, *wm_program; + size_t vs_program_size, wm_program_size; + struct brw_vs_prog_data *vs_prog_data; + struct brw_wm_prog_data *wm_prog_data; + struct brw_stage_prog_data *prog_data; + size_t vs_prog_data_size, wm_prog_data_size; + intptr_t parameter_values_base; + intptr_t uniform_data_slots_base; + void *local = ralloc_context(NULL); + uint32_t nr_params, nr_pull_params; + + if (!brw_state_dirty(brw, 0, + BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_GEOMETRY_PROGRAM | + BRW_NEW_VERTEX_PROGRAM)) + return; + + cache = brw->ctx.Cache; + if (cache == NULL) + return; + + prog = brw->ctx.Shader.ActiveProgram; + if (prog == NULL) + return; + + offset += snprintf(manifest + offset, sizeof(manifest) - offset, + "program: %s\n", _mesa_sha1_format(sha1_buf, prog->sha1)); + + brw_wm_populate_key(brw, &wm_key); + _mesa_sha1_compute(&wm_key, sizeof wm_key, sha1); + offset += snprintf(manifest + offset, sizeof(manifest) - offset, + "wm_key: %s\n", _mesa_sha1_format(sha1_buf, sha1)); + + brw_vs_populate_key(brw, &vs_key); + _mesa_sha1_compute(&vs_key, sizeof vs_key, sha1); + offset += snprintf(manifest + offset, sizeof(manifest) - offset, + "vs_key: %s\n", _mesa_sha1_format(sha1_buf, sha1)); + + _mesa_sha1_compute(manifest, strlen(manifest), brw->binary_sha1); + + w = (uint32_t *) brw->binary_sha1; + if (been_there[*w & 63]) + return; + been_there[*w & 63] = 1; + + buffer = cache_get(cache, brw->binary_sha1, &size); + if (buffer == NULL) + goto FAIL; + + printf("populating bo cache with binary: %s\n", + _mesa_sha1_format(sha1_buf, brw->binary_sha1)); + + blob_reader_init(&binary, buffer, size); + + /* Read VS program from blob. */ + vs_program_size = blob_read_uint32(&binary); + + vs_program = blob_read_bytes(&binary, vs_program_size); + + /* Read VS program_data from blob and fixup params pointers. */ + vs_prog_data_size = blob_read_uint32(&binary); + if (vs_prog_data_size != sizeof *vs_prog_data) + goto FAIL; + + vs_prog_data = blob_read_bytes(&binary, vs_prog_data_size); + prog_data = &vs_prog_data->base.base; + + parameter_values_base = blob_read_intptr(&binary); + uniform_data_slots_base = blob_read_intptr(&binary); + + nr_params = blob_read_uint32(&binary); + if (nr_params != prog_data->nr_params) + goto FAIL; + + prog_data->param = rzalloc_array(local, const gl_constant_value *, nr_params); + printf("Allocating %d prog_data->params (%p)\n", + prog_data->nr_params, prog_data->param); + + for (i = 0; i < nr_params; i++) { + intptr_t param = blob_read_intptr(&binary); + ptrdiff_t p_offset, u_offset; + struct gl_program_parameter_list *param_list = + prog->_LinkedShaders[MESA_SHADER_VERTEX]->Program->Parameters; + + p_offset = (param - parameter_values_base) / sizeof(gl_constant_value); + u_offset = (param - uniform_data_slots_base) / sizeof(gl_constant_value); + + if (p_offset >= 0 && p_offset < 4 * param_list->NumParameters) { + prog_data->param[i] = ((gl_constant_value *) param_list->ParameterValues) + p_offset; + } else if (u_offset >= 0 && u_offset < prog->NumUniformDataSlots) { + prog_data->param[i] = prog->UniformDataSlots + u_offset; + } else { + printf("Error: Failed to fixup pointer value %p\n", (void *) param); + goto FAIL; + } + } + + nr_pull_params = blob_read_uint32(&binary); + if (nr_pull_params != prog_data->nr_pull_params) + goto FAIL; + + prog_data->pull_param = rzalloc_array(local, const gl_constant_value *, + nr_pull_params); + + + for (i = 0; i < nr_pull_params; i++) { + intptr_t pull_param = blob_read_intptr(&binary); + /* FIXME: We need to fixup pull_params pointers here. */ + } + + brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG, + &vs_key, sizeof(struct brw_vs_prog_key), + vs_program, vs_program_size, + vs_prog_data, vs_prog_data_size, + &brw->vs.base.prog_offset, &brw->vs.prog_data); + + /* Read WM program from blob. */ + wm_program_size = blob_read_uint32(&binary); + + wm_program = blob_read_bytes(&binary, wm_program_size); + + /* Read WM program_data from blob and fixup params pointers. */ + wm_prog_data_size = blob_read_uint32(&binary); + if (wm_prog_data_size != sizeof *wm_prog_data) + goto FAIL; + + wm_prog_data = blob_read_bytes(&binary, wm_prog_data_size); + prog_data = &wm_prog_data->base; + + parameter_values_base = blob_read_intptr(&binary); + + nr_params = blob_read_uint32(&binary); + if (nr_params != prog_data->nr_params) + goto FAIL; + + prog_data->param = rzalloc_array(local, const gl_constant_value *, nr_params); + printf("Allocating %d prog_data->params (%p)\n", + prog_data->nr_params, prog_data->param); + + for (i = 0; i < nr_params; i++) { + intptr_t param = blob_read_intptr(&binary); + ptrdiff_t p_offset, u_offset; + struct gl_program_parameter_list *param_list = + prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program->Parameters; + + p_offset = (param - parameter_values_base) / sizeof(gl_constant_value); + u_offset = (param - uniform_data_slots_base) / sizeof(gl_constant_value); + + if (p_offset >= 0 && p_offset < 4 * param_list->NumParameters) { + prog_data->param[i] = ((gl_constant_value *) param_list->ParameterValues) + p_offset; + } else if (u_offset >= 0 && u_offset < prog->NumUniformDataSlots) { + prog_data->param[i] = prog->UniformDataSlots + u_offset; + } else { + printf("Error: Failed to fixup pointer value %p\n", (void *) param); + goto FAIL; + } + } + + nr_pull_params = blob_read_uint32(&binary); + if (nr_pull_params != prog_data->nr_pull_params) + goto FAIL; + + prog_data->pull_param = rzalloc_array(local, const gl_constant_value *, + nr_pull_params); + + + for (i = 0; i < nr_pull_params; i++) { + intptr_t pull_param = blob_read_intptr(&binary); + /* FIXME: We need to fixup pull_params pointers here. */ + } + + brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG, + &wm_key, sizeof(struct brw_wm_prog_key), + wm_program, wm_program_size, + wm_prog_data, wm_prog_data_size, + &brw->wm.base.prog_offset, &brw->wm.prog_data); + + if (binary.current != binary.end || binary.overrun) { + printf ("Error reading program from cache (did not read every byte written)\n"); + goto FAIL; + } + + printf ("%s: Successfully read every byte written!\n", __FUNCTION__); + brw->program_written_to_cache = true; + +/* FIXME: I'm currently leaking anything allocated off of this local + * context. What we really want here is a context that lives across both + * upload_cached_program and write_cached program. To implement this, Ken + * suggests rewriting brw_state_upload.c:brw_upload_state() to pull the code + * called for the common atoms out of the loop, and then explicitly call these + * caching functions around those, (rather than hooking into the atoms to call + * our cache functions). + + ralloc_free(local); +*/ + free(buffer); + return; + +FAIL: + /* Fall back and compile from source here. */ + brw->program_written_to_cache = false; + local = NULL; + printf("FIXME: May need to fallback to compile from source here...\n"); + free(buffer); +} + +void +write_cached_program(struct brw_context *brw) +{ + struct blob *binary; + uint8_t *blob_cursor; + size_t vs_program_size, wm_program_size; + uint32_t nr_params, nr_pull_params; + struct gl_shader_program *prog; + struct program_cache *cache; + char buf[41]; + unsigned i; + + if (!brw_state_dirty(brw, 0, + BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_GEOMETRY_PROGRAM | + BRW_NEW_VERTEX_PROGRAM)) + return; + + cache = brw->ctx.Cache; + if (cache == NULL) + return; + + prog = brw->ctx.Shader.ActiveProgram; + if (prog == NULL) + return; + + if (brw->program_written_to_cache) + return; + + binary = blob_create (NULL); + if (binary == NULL) + return; + + /* Write VS program to blob. */ + vs_program_size = brw->vs.prog_data->program_size; + + blob_write_uint32(binary, vs_program_size); + + blob_cursor = blob_reserve_bytes(binary, vs_program_size); + drm_intel_bo_get_subdata(brw->cache.bo, brw->vs.base.prog_offset, + vs_program_size, blob_cursor); + + /* Write VS program_data to blob. */ + blob_write_uint32(binary, sizeof *brw->vs.prog_data); + blob_write_bytes(binary, brw->vs.prog_data, sizeof *brw->vs.prog_data); + + /* Include variable-length params from end of brw_stage_prog_data as well. + * + * Before writing either of the params or pull_params arrays, we first + * write out the addresses of the ParameterValues and UniformDataSlots + * storage. The pointers within params will be pointers to within one of + * these blocks of storage. So we can use the addresses of this storage + * together with the pointer values to correctly construct pointers to the + * actual storage when the program data is loaded from the cache. + */ + + blob_write_intptr(binary, + (intptr_t) prog->_LinkedShaders[MESA_SHADER_VERTEX]-> + Program->Parameters->ParameterValues); + blob_write_intptr(binary, (intptr_t) prog->UniformDataSlots); + + nr_params = brw->vs.prog_data->base.base.nr_params; + blob_write_uint32(binary, nr_params); + + for (i = 0; i < nr_params; i++) { + blob_write_intptr(binary, + (intptr_t) brw->vs.prog_data->base.base.param[i]); + } + + nr_pull_params = brw->vs.prog_data->base.base.nr_pull_params; + blob_write_uint32(binary, nr_pull_params); + + for (i = 0; i < nr_pull_params; i++) { + blob_write_intptr(binary, + (intptr_t) brw->vs.prog_data->base.base.pull_param[i]); + } + + /* Write WM program to blob. */ + wm_program_size = brw->wm.prog_data->program_size; + + blob_write_uint32(binary, wm_program_size); + + blob_cursor = blob_reserve_bytes(binary, wm_program_size); + drm_intel_bo_get_subdata(brw->cache.bo, brw->wm.base.prog_offset, + wm_program_size, blob_cursor); + + /* Write WM program_data to blob. */ + blob_write_uint32(binary, sizeof *brw->wm.prog_data); + blob_write_bytes(binary, brw->wm.prog_data, sizeof *brw->wm.prog_data); + + /* Include variable-length params, (don't need to rewrite UniformDataSlots + * pointer). */ + + blob_write_intptr(binary, + (intptr_t) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]-> + Program->Parameters->ParameterValues); + + nr_params = brw->wm.prog_data->base.nr_params; + blob_write_uint32(binary, nr_params); + + for (i = 0; i < nr_params; i++) { + blob_write_intptr(binary, + (intptr_t) brw->wm.prog_data->base.param[i]); + } + + nr_pull_params = brw->wm.prog_data->base.nr_pull_params; + blob_write_uint32(binary, nr_pull_params); + + for (i = 0; i < nr_pull_params; i++) { + blob_write_intptr(binary, + (intptr_t) brw->wm.prog_data->base.pull_param[i]); + } + + printf("putting binary in cache: %s\n", + _mesa_sha1_format(buf, brw->binary_sha1)); + + cache_put(cache, brw->binary_sha1, binary->data, binary->size); + + ralloc_free (binary); + + brw->program_written_to_cache = true; +} diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 987672f881..716b291af3 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -176,6 +176,12 @@ brw_depthbuffer_format(struct brw_context *brw); /* gen8_misc_state.c */ void gen8_upload_state_base_address(struct brw_context *brw); +/* brw_shader_cache.h */ +void +upload_cached_program(struct brw_context *brw); + +void +write_cached_program(struct brw_context *brw); /*********************************************************************** * brw_state.c diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 7662c3b580..a284fc487c 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -30,7 +30,6 @@ */ - #include "brw_context.h" #include "brw_state.h" #include "drivers/common/meta.h" @@ -621,6 +620,8 @@ brw_upload_programs(struct brw_context *brw, enum brw_pipeline pipeline) { if (pipeline == BRW_RENDER_PIPELINE) { + upload_cached_program(brw); + brw_upload_vs_prog(brw); if (brw->gen < 6) @@ -629,6 +630,9 @@ brw_upload_programs(struct brw_context *brw, brw_upload_gs_prog(brw); brw_upload_wm_prog(brw); + + write_cached_program(brw); + } else if (pipeline == BRW_COMPUTE_PIPELINE) { brw_upload_cs_prog(brw); } diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 6e9848fb1e..1a1bc9b6d2 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -213,6 +213,9 @@ brw_codegen_vs_prog(struct brw_context *brw, program, program_size, &prog_data, sizeof(prog_data), &brw->vs.base.prog_offset, &brw->vs.prog_data); + + brw->vs.prog_data->program_size = program_size; + ralloc_free(mem_ctx); return true; @@ -315,12 +318,11 @@ brw_vs_state_dirty(struct brw_context *brw) BRW_NEW_VS_ATTRIB_WORKAROUNDS); } -static void +void brw_vs_populate_key(struct brw_context *brw, struct brw_vs_prog_key *key) { struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_VERTEX_PROGRAM */ struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; struct gl_program *prog = (struct gl_program *) brw->vertex_program; diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 61f9b006a5..22b2f6b80f 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -81,6 +81,10 @@ brw_codegen_vs_prog(struct brw_context *brw, struct brw_vertex_program *vp, struct brw_vs_prog_key *key); +void +brw_vs_populate_key(struct brw_context *brw, + struct brw_vs_prog_key *key); + #ifdef __cplusplus } /* extern "C" */ diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 592a72927c..b36999e67a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -232,6 +232,8 @@ brw_codegen_wm_prog(struct brw_context *brw, &prog_data, sizeof(prog_data), &brw->wm.base.prog_offset, &brw->wm.prog_data); + brw->wm.prog_data->program_size = program_size; + ralloc_free(mem_ctx); return true; @@ -452,8 +454,9 @@ brw_wm_state_dirty (struct brw_context *brw) BRW_NEW_VUE_MAP_GEOM_OUT); } -static void brw_wm_populate_key( struct brw_context *brw, - struct brw_wm_prog_key *key ) +void +brw_wm_populate_key( struct brw_context *brw, + struct brw_wm_prog_key *key ) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 0a8a97b2f5..51f1ae8e8d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -90,6 +90,10 @@ bool brw_wm_prog_data_compare(const void *a, const void *b); void brw_upload_wm_prog(struct brw_context *brw); +void +brw_wm_populate_key(struct brw_context *brw, + struct brw_wm_prog_key *key); + #ifdef __cplusplus } // extern "C" #endif diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index faa1de739d..c10cefdc5e 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -86,6 +86,7 @@ #include "blend.h" #include "buffers.h" #include "bufferobj.h" +#include "cache.h" #include "context.h" #include "cpuinfo.h" #include "debug.h" @@ -1204,6 +1205,8 @@ _mesa_initialize_context(struct gl_context *ctx, memset(&ctx->TextureFormatSupported, GL_TRUE, sizeof(ctx->TextureFormatSupported)); + ctx->Cache = cache_create(); + switch (ctx->API) { case API_OPENGL_COMPAT: ctx->BeginEnd = create_beginend_table(ctx); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 7b55677de3..470fc86abd 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2373,6 +2373,7 @@ struct gl_shader GLuint Name; /**< AKA the handle */ GLint RefCount; /**< Reference count */ GLchar *Label; /**< GL_KHR_debug */ + unsigned char sha1[20]; /**< SHA1 hash of pre-processed source */ GLboolean DeletePending; GLboolean CompileStatus; bool IsES; /**< True if this shader uses GLSL ES */ @@ -2619,6 +2620,7 @@ struct gl_shader_program * Is the application intending to glGetProgramBinary this program? */ GLboolean BinaryRetreivableHint; + unsigned char sha1[20]; /**< SHA1 hash of linked program */ /** * Indicates whether program can be bound for individual pipeline stages @@ -2719,7 +2721,8 @@ struct gl_shader_program unsigned NumUniformStorage; unsigned NumHiddenUniforms; struct gl_uniform_storage *UniformStorage; - + unsigned NumUniformDataSlots; + union gl_constant_value *UniformDataSlots; /** * Mapping from GL uniform locations returned by \c glUniformLocation to * UniformStorage entries. Arrays will have multiple contiguous slots @@ -4408,6 +4411,8 @@ struct gl_context * Once this field becomes true, it is never reset to false. */ GLboolean ShareGroupReset; + + struct program_cache *Cache; }; diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 0b2eb12236..8f6ca15089 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -48,6 +48,9 @@ #include "main/uniforms.h" #include "program/hash_table.h" +#include "shader_cache.h" + +#include "program/hash_table.h" #include "program/prog_instruction.h" #include "program/prog_optimize.h" #include "program/prog_print.h" @@ -2972,6 +2975,13 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) link_shaders(ctx, prog); } + /* FIXME: We look at prog->Version to determine whether we actually linked + * the program or just loaded the uniform meta data from cache. We + * probably want to turn prog->LinkStatus into an enum that captures the + * different states.*/ + if (prog->LinkStatus && prog->Version == 0) + return; + if (prog->LinkStatus) { if (!ctx->Driver.LinkShader(ctx, prog)) { prog->LinkStatus = GL_FALSE; @@ -2990,6 +3000,8 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) fprintf(stderr, "%s\n", prog->InfoLog); } } + + shader_cache_write_program_metadata(ctx, prog); } } /* extern "C" */ |