summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_program_cache.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_program_cache.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_program_cache.c523
1 files changed, 0 insertions, 523 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c b/src/mesa/drivers/dri/i965/brw_program_cache.c
deleted file mode 100644
index df2b92e0d2c..00000000000
--- a/src/mesa/drivers/dri/i965/brw_program_cache.c
+++ /dev/null
@@ -1,523 +0,0 @@
-/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keithw@vmware.com>
- */
-
-/** @file brw_program_cache.c
- *
- * This file implements a simple program cache for 965. The consumers can
- * query the hash table of programs using a cache_id and program key, and
- * receive the corresponding program buffer object (plus associated auxiliary
- * data) in return. Objects in the cache may not have relocations
- * (pointers to other BOs) in them.
- *
- * The inner workings are a simple hash table based on a FNV-1a of the
- * key data.
- *
- * Replacement is not implemented. Instead, when the cache gets too
- * big we throw out all of the cache data and let it get regenerated.
- */
-
-#include "main/streaming-load-memcpy.h"
-#include "x86/common_x86_asm.h"
-#include "brw_batch.h"
-#include "brw_state.h"
-#include "brw_wm.h"
-#include "brw_gs.h"
-#include "brw_cs.h"
-#include "brw_program.h"
-#include "compiler/brw_eu.h"
-#include "util/u_memory.h"
-#define XXH_INLINE_ALL
-#include "util/xxhash.h"
-
-#define FILE_DEBUG_FLAG DEBUG_STATE
-
-struct brw_cache_item {
- /**
- * Effectively part of the key, cache_id identifies what kind of state
- * buffer is involved, and also which dirty flag should set.
- */
- enum brw_cache_id cache_id;
-
- /** 32-bit hash of the key data */
- GLuint hash;
-
- /** for variable-sized keys */
- GLuint key_size;
- GLuint prog_data_size;
- const struct brw_base_prog_key *key;
-
- uint32_t offset;
- uint32_t size;
-
- struct brw_cache_item *next;
-};
-
-enum brw_cache_id
-brw_stage_cache_id(gl_shader_stage stage)
-{
- static const enum brw_cache_id stage_ids[] = {
- BRW_CACHE_VS_PROG,
- BRW_CACHE_TCS_PROG,
- BRW_CACHE_TES_PROG,
- BRW_CACHE_GS_PROG,
- BRW_CACHE_FS_PROG,
- BRW_CACHE_CS_PROG,
- };
- assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_ids));
- return stage_ids[stage];
-}
-
-static GLuint
-hash_key(struct brw_cache_item *item)
-{
- uint32_t hash = 0;
- hash = XXH32(&item->cache_id, sizeof(item->cache_id), hash);
- hash = XXH32(item->key, item->key_size, hash);
-
- return hash;
-}
-
-static int
-brw_cache_item_equals(const struct brw_cache_item *a,
- const struct brw_cache_item *b)
-{
- return a->cache_id == b->cache_id &&
- a->hash == b->hash &&
- a->key_size == b->key_size &&
- (memcmp(a->key, b->key, a->key_size) == 0);
-}
-
-static struct brw_cache_item *
-search_cache(struct brw_cache *cache, GLuint hash,
- struct brw_cache_item *lookup)
-{
- struct brw_cache_item *c;
-
-#if 0
- int bucketcount = 0;
-
- for (c = cache->items[hash % cache->size]; c; c = c->next)
- bucketcount++;
-
- fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size,
- cache->size, bucketcount, cache->n_items);
-#endif
-
- for (c = cache->items[hash % cache->size]; c; c = c->next) {
- if (brw_cache_item_equals(lookup, c))
- return c;
- }
-
- return NULL;
-}
-
-
-static void
-rehash(struct brw_cache *cache)
-{
- struct brw_cache_item **items;
- struct brw_cache_item *c, *next;
- GLuint size, i;
-
- size = cache->size * 3;
- items = calloc(size, sizeof(*items));
-
- for (i = 0; i < cache->size; i++)
- for (c = cache->items[i]; c; c = next) {
- next = c->next;
- c->next = items[c->hash % size];
- items[c->hash % size] = c;
- }
-
- free(cache->items);
- cache->items = items;
- cache->size = size;
-}
-
-
-/**
- * Returns the buffer object matching cache_id and key, or NULL.
- */
-bool
-brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
- const void *key, GLuint key_size, uint32_t *inout_offset,
- void *inout_prog_data, bool flag_state)
-{
- struct brw_cache_item *item;
- struct brw_cache_item lookup;
- GLuint hash;
-
- lookup.cache_id = cache_id;
- lookup.key = key;
- lookup.key_size = key_size;
- hash = hash_key(&lookup);
- lookup.hash = hash;
-
- item = search_cache(cache, hash, &lookup);
-
- if (item == NULL)
- return false;
-
- void *prog_data = ((char *) item->key) + item->key_size;
-
- if (item->offset != *inout_offset ||
- prog_data != *((void **) inout_prog_data)) {
- if (likely(flag_state))
- cache->brw->ctx.NewDriverState |= (1 << cache_id);
- *inout_offset = item->offset;
- *((void **) inout_prog_data) = prog_data;
- }
-
- return true;
-}
-
-static void
-brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
-{
- struct brw_context *brw = cache->brw;
- struct brw_bo *new_bo;
-
- perf_debug("Copying to larger program cache: %u kB -> %u kB\n",
- (unsigned) cache->bo->size / 1024, new_size / 1024);
-
- new_bo = brw_bo_alloc(brw->bufmgr, "program cache", new_size,
- BRW_MEMZONE_SHADER);
- if (can_do_exec_capture(brw->screen))
- new_bo->kflags |= EXEC_OBJECT_CAPTURE;
-
- void *map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE |
- MAP_ASYNC | MAP_PERSISTENT);
-
- /* Copy any existing data that needs to be saved. */
- if (cache->next_offset != 0) {
-#ifdef USE_SSE41
- if (!cache->bo->cache_coherent && cpu_has_sse4_1)
- _mesa_streaming_load_memcpy(map, cache->map, cache->next_offset);
- else
-#endif
- memcpy(map, cache->map, cache->next_offset);
- }
-
- brw_bo_unmap(cache->bo);
- brw_bo_unreference(cache->bo);
- cache->bo = new_bo;
- cache->map = map;
-
- /* Since we have a new BO in place, we need to signal the units
- * that depend on it (state base address on gfx5+, or unit state before).
- */
- brw->ctx.NewDriverState |= BRW_NEW_PROGRAM_CACHE;
- brw->batch.state_base_address_emitted = false;
-}
-
-/**
- * Attempts to find an item in the cache with identical data.
- */
-static const struct brw_cache_item *
-brw_lookup_prog(const struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *data, unsigned data_size)
-{
- unsigned i;
- const struct brw_cache_item *item;
-
- for (i = 0; i < cache->size; i++) {
- for (item = cache->items[i]; item; item = item->next) {
- if (item->cache_id != cache_id || item->size != data_size ||
- memcmp(cache->map + item->offset, data, item->size) != 0)
- continue;
-
- return item;
- }
- }
-
- return NULL;
-}
-
-static uint32_t
-brw_alloc_item_data(struct brw_cache *cache, uint32_t size)
-{
- uint32_t offset;
-
- /* Allocate space in the cache BO for our new program. */
- if (cache->next_offset + size > cache->bo->size) {
- uint32_t new_size = cache->bo->size * 2;
-
- while (cache->next_offset + size > new_size)
- new_size *= 2;
-
- brw_cache_new_bo(cache, new_size);
- }
-
- offset = cache->next_offset;
-
- /* Programs are always 64-byte aligned, so set up the next one now */
- cache->next_offset = ALIGN(offset + size, 64);
-
- return offset;
-}
-
-const void *
-brw_find_previous_compile(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- unsigned program_string_id)
-{
- for (unsigned i = 0; i < cache->size; i++) {
- for (struct brw_cache_item *c = cache->items[i]; c; c = c->next) {
- if (c->cache_id == cache_id &&
- c->key->program_string_id == program_string_id) {
- return c->key;
- }
- }
- }
-
- return NULL;
-}
-
-void
-brw_upload_cache(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *key,
- GLuint key_size,
- const void *data,
- GLuint data_size,
- const void *prog_data,
- GLuint prog_data_size,
- uint32_t *out_offset,
- void *out_prog_data)
-{
- struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
- const struct brw_cache_item *matching_data =
- brw_lookup_prog(cache, cache_id, data, data_size);
- GLuint hash;
- void *tmp;
-
- item->cache_id = cache_id;
- item->size = data_size;
- item->key = key;
- item->key_size = key_size;
- item->prog_data_size = prog_data_size;
- hash = hash_key(item);
- item->hash = hash;
-
- /* If we can find a matching prog in the cache already, then reuse the
- * existing stuff without creating new copy into the underlying buffer
- * object. This is notably useful for programs generating shaders at
- * runtime, where multiple shaders may compile to the same thing in our
- * backend.
- */
- if (matching_data) {
- item->offset = matching_data->offset;
- } else {
- item->offset = brw_alloc_item_data(cache, data_size);
-
- /* Copy data to the buffer */
- memcpy(cache->map + item->offset, data, data_size);
- }
-
- /* Set up the memory containing the key and prog_data */
- tmp = malloc(key_size + prog_data_size);
-
- memcpy(tmp, key, key_size);
- memcpy(tmp + key_size, prog_data, prog_data_size);
-
- item->key = tmp;
-
- if (cache->n_items > cache->size * 1.5f)
- rehash(cache);
-
- hash %= cache->size;
- item->next = cache->items[hash];
- cache->items[hash] = item;
- cache->n_items++;
-
- *out_offset = item->offset;
- *(void **)out_prog_data = (void *)((char *)item->key + item->key_size);
- cache->brw->ctx.NewDriverState |= 1 << cache_id;
-}
-
-void
-brw_init_caches(struct brw_context *brw)
-{
- struct brw_cache *cache = &brw->cache;
-
- cache->brw = brw;
-
- cache->size = 7;
- cache->n_items = 0;
- cache->items =
- calloc(cache->size, sizeof(struct brw_cache_item *));
-
- cache->bo = brw_bo_alloc(brw->bufmgr, "program cache", 16384,
- BRW_MEMZONE_SHADER);
- if (can_do_exec_capture(brw->screen))
- cache->bo->kflags |= EXEC_OBJECT_CAPTURE;
-
- cache->map = brw_bo_map(brw, cache->bo, MAP_READ | MAP_WRITE |
- MAP_ASYNC | MAP_PERSISTENT);
-}
-
-static void
-brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
-{
- struct brw_cache_item *c, *next;
- GLuint i;
-
- DBG("%s\n", __func__);
-
- for (i = 0; i < cache->size; i++) {
- for (c = cache->items[i]; c; c = next) {
- next = c->next;
- if (c->cache_id == BRW_CACHE_VS_PROG ||
- c->cache_id == BRW_CACHE_TCS_PROG ||
- c->cache_id == BRW_CACHE_TES_PROG ||
- c->cache_id == BRW_CACHE_GS_PROG ||
- c->cache_id == BRW_CACHE_FS_PROG ||
- c->cache_id == BRW_CACHE_CS_PROG) {
- const void *item_prog_data = ((char *)c->key) + c->key_size;
- brw_stage_prog_data_free(item_prog_data);
- }
- free((void *)c->key);
- free(c);
- }
- cache->items[i] = NULL;
- }
-
- cache->n_items = 0;
-
- /* Start putting programs into the start of the BO again, since
- * we'll never find the old results.
- */
- cache->next_offset = 0;
-
- /* We need to make sure that the programs get regenerated, since
- * any offsets leftover in brw_context will no longer be valid.
- */
- brw->NewGLState = ~0;
- brw->ctx.NewDriverState = ~0ull;
- brw->state.pipelines[BRW_RENDER_PIPELINE].mesa = ~0;
- brw->state.pipelines[BRW_RENDER_PIPELINE].brw = ~0ull;
- brw->state.pipelines[BRW_COMPUTE_PIPELINE].mesa = ~0;
- brw->state.pipelines[BRW_COMPUTE_PIPELINE].brw = ~0ull;
-
- /* Also, NULL out any stale program pointers. */
- brw->vs.base.prog_data = NULL;
- brw->tcs.base.prog_data = NULL;
- brw->tes.base.prog_data = NULL;
- brw->gs.base.prog_data = NULL;
- brw->wm.base.prog_data = NULL;
- brw->cs.base.prog_data = NULL;
-
- brw_batch_flush(brw);
-}
-
-void
-brw_program_cache_check_size(struct brw_context *brw)
-{
- /* un-tuned guess. Each object is generally a page, so 2000 of them is 8 MB of
- * state cache.
- */
- if (brw->cache.n_items > 2000) {
- perf_debug("Exceeded state cache size limit. Clearing the set "
- "of compiled programs, which will trigger recompiles\n");
- brw_clear_cache(brw, &brw->cache);
- brw_cache_new_bo(&brw->cache, brw->cache.bo->size);
- }
-}
-
-
-static void
-brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
-{
-
- DBG("%s\n", __func__);
-
- /* This can be NULL if context creation failed early on */
- if (cache->bo) {
- brw_bo_unmap(cache->bo);
- brw_bo_unreference(cache->bo);
- cache->bo = NULL;
- cache->map = NULL;
- }
- brw_clear_cache(brw, cache);
- free(cache->items);
- cache->items = NULL;
- cache->size = 0;
-}
-
-
-void
-brw_destroy_caches(struct brw_context *brw)
-{
- brw_destroy_cache(brw, &brw->cache);
-}
-
-static const char *
-cache_name(enum brw_cache_id cache_id)
-{
- switch (cache_id) {
- case BRW_CACHE_VS_PROG:
- return "VS kernel";
- case BRW_CACHE_TCS_PROG:
- return "TCS kernel";
- case BRW_CACHE_TES_PROG:
- return "TES kernel";
- case BRW_CACHE_FF_GS_PROG:
- return "Fixed-function GS kernel";
- case BRW_CACHE_GS_PROG:
- return "GS kernel";
- case BRW_CACHE_CLIP_PROG:
- return "CLIP kernel";
- case BRW_CACHE_SF_PROG:
- return "SF kernel";
- case BRW_CACHE_FS_PROG:
- return "FS kernel";
- case BRW_CACHE_CS_PROG:
- return "CS kernel";
- default:
- return "unknown";
- }
-}
-
-void
-brw_print_program_cache(struct brw_context *brw)
-{
- const struct brw_cache *cache = &brw->cache;
- struct brw_cache_item *item;
-
- for (unsigned i = 0; i < cache->size; i++) {
- for (item = cache->items[i]; item; item = item->next) {
- fprintf(stderr, "%s:\n", cache_name(i));
- brw_disassemble_with_labels(&brw->screen->devinfo, cache->map,
- item->offset, item->size, stderr);
- }
- }
-}