diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_state_upload.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state_upload.c | 789 |
1 files changed, 0 insertions, 789 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c deleted file mode 100644 index 94fda31d692..00000000000 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ /dev/null @@ -1,789 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keithw@vmware.com> - */ - - - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "brw_program.h" -#include "drivers/common/meta.h" -#include "brw_batch.h" -#include "brw_buffers.h" -#include "brw_vs.h" -#include "brw_ff_gs.h" -#include "brw_gs.h" -#include "brw_wm.h" -#include "brw_cs.h" -#include "genxml/genX_bits.h" -#include "main/framebuffer.h" - -void -brw_enable_obj_preemption(struct brw_context *brw, bool enable) -{ - ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo; - assert(devinfo->ver >= 9); - - if (enable == brw->object_preemption) - return; - - /* A fixed function pipe flush is required before modifying this field */ - brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH); - - bool replay_mode = enable ? - GFX9_REPLAY_MODE_MIDOBJECT : GFX9_REPLAY_MODE_MIDBUFFER; - - /* enable object level preemption */ - brw_load_register_imm32(brw, CS_CHICKEN1, - replay_mode | GFX9_REPLAY_MODE_MASK); - - brw->object_preemption = enable; -} - -static void -brw_upload_gfx11_slice_hashing_state(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - int subslices_delta = - devinfo->ppipe_subslices[0] - devinfo->ppipe_subslices[1]; - if (subslices_delta == 0) - return; - - unsigned size = GFX11_SLICE_HASH_TABLE_length * 4; - uint32_t hash_address; - - uint32_t *map = brw_state_batch(brw, size, 64, &hash_address); - - unsigned idx = 0; - - unsigned sl_small = 0; - unsigned sl_big = 1; - if (subslices_delta > 0) { - sl_small = 1; - sl_big = 0; - } - - /** - * Create a 16x16 slice hashing table like the following one: - * - * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ] - * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ] - * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ] - * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ] - * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ] - * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ] - * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ] - * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ] - * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ] - * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ] - * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ] - * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ] - * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ] - * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ] - * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ] - * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ] - * - * The table above is used when the pixel pipe 0 has less subslices than - * pixel pipe 1. When pixel pipe 0 has more subslices, then a similar table - * with 0's and 1's inverted is used. - */ - for (int i = 0; i < GFX11_SLICE_HASH_TABLE_length; i++) { - uint32_t dw = 0; - - for (int j = 0; j < 8; j++) { - unsigned slice = idx++ % 3 ? sl_big : sl_small; - dw |= slice << (j * 4); - } - map[i] = dw; - } - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SLICE_TABLE_STATE_POINTERS << 16 | (2 - 2)); - OUT_RELOC(brw->batch.state.bo, 0, hash_address | 1); - ADVANCE_BATCH(); - - /* From gfx10/gfx11 workaround table in h/w specs: - * - * "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1 - * a value of 0xFFFF" - * - * This means that whenever we update a field with this instruction, we need - * to update all the others. - * - * Since this is the first time we emit this - * instruction, we are only setting the fSLICE_HASHING_TABLE_ENABLE flag, - * and leaving everything else at their default state (0). - */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_3D_MODE << 16 | (2 - 2)); - OUT_BATCH(0xffff0000 | SLICE_HASHING_TABLE_ENABLE); - ADVANCE_BATCH(); -} - -static void -brw_upload_initial_gpu_state(struct brw_context *brw) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - const struct brw_compiler *compiler = brw->screen->compiler; - - /* On platforms with hardware contexts, we can set our initial GPU state - * right away rather than doing it via state atoms. This saves a small - * amount of overhead on every draw call. - */ - if (!brw->hw_ctx) - return; - - if (devinfo->ver == 6) - brw_emit_post_sync_nonzero_flush(brw); - - brw_upload_invariant_state(brw); - - if (devinfo->ver == 11) { - /* The default behavior of bit 5 "Headerless Message for Pre-emptable - * Contexts" in SAMPLER MODE register is set to 0, which means - * headerless sampler messages are not allowed for pre-emptable - * contexts. Set the bit 5 to 1 to allow them. - */ - brw_load_register_imm32(brw, GFX11_SAMPLER_MODE, - HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK | - HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS); - - /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in - * HALF_SLICE_CHICKEN7 register. - */ - brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7, - TEXEL_OFFSET_FIX_MASK | - TEXEL_OFFSET_FIX_ENABLE); - - /* Wa_1406697149: Bit 9 "Error Detection Behavior Control" must be set - * in L3CNTLREG register. The default setting of the bit is not the - * desirable behavior. - */ - brw_load_register_imm32(brw, GFX8_L3CNTLREG, - GFX8_L3CNTLREG_EDBC_NO_HANG); - } - - /* hardware specification recommends disabling repacking for - * the compatibility with decompression mechanism in display controller. - */ - if (devinfo->disable_ccs_repack) { - brw_load_register_imm32(brw, GFX7_CACHE_MODE_0, - GFX11_DISABLE_REPACKING_FOR_COMPRESSION | - REG_MASK(GFX11_DISABLE_REPACKING_FOR_COMPRESSION)); - } - - if (devinfo->ver == 9) { - /* Recommended optimizations for Victim Cache eviction and floating - * point blending. - */ - brw_load_register_imm32(brw, GFX7_CACHE_MODE_1, - REG_MASK(GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE) | - REG_MASK(GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT) | - REG_MASK(GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC) | - GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE | - GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT | - GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC); - } - - if (devinfo->ver >= 8) { - gfx8_emit_3dstate_sample_pattern(brw); - - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so - * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address. - * - * This is only safe on kernels with context isolation support. - */ - if (!compiler->constant_buffer_0_is_relative) { - if (devinfo->ver >= 9) { - BEGIN_BATCH(3); - OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); - OUT_BATCH(CS_DEBUG_MODE2); - OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) | - CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE); - ADVANCE_BATCH(); - } else if (devinfo->ver == 8) { - BEGIN_BATCH(3); - OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); - OUT_BATCH(INSTPM); - OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) | - INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE); - ADVANCE_BATCH(); - } - } - - brw->object_preemption = false; - - if (devinfo->ver >= 10) - brw_enable_obj_preemption(brw, true); - - if (devinfo->ver == 11) - brw_upload_gfx11_slice_hashing_state(brw); -} - -static inline const struct brw_tracked_state * -brw_get_pipeline_atoms(struct brw_context *brw, - enum brw_pipeline pipeline) -{ - switch (pipeline) { - case BRW_RENDER_PIPELINE: - return brw->render_atoms; - case BRW_COMPUTE_PIPELINE: - return brw->compute_atoms; - default: - STATIC_ASSERT(BRW_NUM_PIPELINES == 2); - unreachable("Unsupported pipeline"); - return NULL; - } -} - -void -brw_copy_pipeline_atoms(struct brw_context *brw, - enum brw_pipeline pipeline, - const struct brw_tracked_state **atoms, - int num_atoms) -{ - /* This is to work around brw_context::atoms being declared const. We want - * it to be const, but it needs to be initialized somehow! - */ - struct brw_tracked_state *context_atoms = - (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline); - - for (int i = 0; i < num_atoms; i++) { - context_atoms[i] = *atoms[i]; - assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw); - assert(context_atoms[i].emit); - } - - brw->num_atoms[pipeline] = num_atoms; -} - -void brw_init_state( struct brw_context *brw ) -{ - struct gl_context *ctx = &brw->ctx; - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - /* Force the first brw_select_pipeline to emit pipeline select */ - brw->last_pipeline = BRW_NUM_PIPELINES; - - brw_init_caches(brw); - - if (devinfo->ver >= 11) - gfx11_init_atoms(brw); - else if (devinfo->ver >= 10) - unreachable("Gfx10 support dropped."); - else if (devinfo->ver >= 9) - gfx9_init_atoms(brw); - else if (devinfo->ver >= 8) - gfx8_init_atoms(brw); - else if (devinfo->is_haswell) - gfx75_init_atoms(brw); - else if (devinfo->ver >= 7) - gfx7_init_atoms(brw); - else if (devinfo->ver >= 6) - gfx6_init_atoms(brw); - else if (devinfo->ver >= 5) - gfx5_init_atoms(brw); - else if (devinfo->is_g4x) - gfx45_init_atoms(brw); - else - gfx4_init_atoms(brw); - - brw_upload_initial_gpu_state(brw); - - brw->NewGLState = ~0; - brw->ctx.NewDriverState = ~0ull; - - /* ~0 is a nonsensical value which won't match anything we program, so - * the programming will take effect on the first time around. - */ - brw->pma_stall_bits = ~0; - - /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible - * dirty flags. - */ - STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState)); - - ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK; - ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK; - ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD; - ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER; - ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER; - ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER; - ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER; - ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS; - ctx->DriverFlags.NewTessState = BRW_NEW_DEFAULT_TESS_LEVELS; - ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION; -} - - -void brw_destroy_state( struct brw_context *brw ) -{ - brw_destroy_caches(brw); -} - -/*********************************************************************** - */ - -static bool -check_state(const struct brw_state_flags *a, const struct brw_state_flags *b) -{ - return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0; -} - -static void -accumulate_state(struct brw_state_flags *a, const struct brw_state_flags *b) -{ - a->mesa |= b->mesa; - a->brw |= b->brw; -} - - -static void -xor_states(struct brw_state_flags *result, - const struct brw_state_flags *a, - const struct brw_state_flags *b) -{ - result->mesa = a->mesa ^ b->mesa; - result->brw = a->brw ^ b->brw; -} - -struct dirty_bit_map { - uint64_t bit; - char *name; - uint32_t count; -}; - -#define DEFINE_BIT(name) {name, #name, 0} - -static struct dirty_bit_map mesa_bits[] = { - DEFINE_BIT(_NEW_MODELVIEW), - DEFINE_BIT(_NEW_PROJECTION), - DEFINE_BIT(_NEW_TEXTURE_MATRIX), - DEFINE_BIT(_NEW_COLOR), - DEFINE_BIT(_NEW_DEPTH), - DEFINE_BIT(_NEW_FOG), - DEFINE_BIT(_NEW_HINT), - DEFINE_BIT(_NEW_LIGHT), - DEFINE_BIT(_NEW_LINE), - DEFINE_BIT(_NEW_PIXEL), - DEFINE_BIT(_NEW_POINT), - DEFINE_BIT(_NEW_POLYGON), - DEFINE_BIT(_NEW_POLYGONSTIPPLE), - DEFINE_BIT(_NEW_SCISSOR), - DEFINE_BIT(_NEW_STENCIL), - DEFINE_BIT(_NEW_TEXTURE_OBJECT), - DEFINE_BIT(_NEW_TRANSFORM), - DEFINE_BIT(_NEW_VIEWPORT), - DEFINE_BIT(_NEW_TEXTURE_STATE), - DEFINE_BIT(_NEW_RENDERMODE), - DEFINE_BIT(_NEW_BUFFERS), - DEFINE_BIT(_NEW_CURRENT_ATTRIB), - DEFINE_BIT(_NEW_MULTISAMPLE), - DEFINE_BIT(_NEW_TRACK_MATRIX), - DEFINE_BIT(_NEW_PROGRAM), - DEFINE_BIT(_NEW_PROGRAM_CONSTANTS), - DEFINE_BIT(_NEW_FRAG_CLAMP), - {0, 0, 0} -}; - -static struct dirty_bit_map brw_bits[] = { - DEFINE_BIT(BRW_NEW_FS_PROG_DATA), - DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA), - DEFINE_BIT(BRW_NEW_SF_PROG_DATA), - DEFINE_BIT(BRW_NEW_VS_PROG_DATA), - DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA), - DEFINE_BIT(BRW_NEW_GS_PROG_DATA), - DEFINE_BIT(BRW_NEW_TCS_PROG_DATA), - DEFINE_BIT(BRW_NEW_TES_PROG_DATA), - DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA), - DEFINE_BIT(BRW_NEW_CS_PROG_DATA), - DEFINE_BIT(BRW_NEW_URB_FENCE), - DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM), - DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM), - DEFINE_BIT(BRW_NEW_TESS_PROGRAMS), - DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM), - DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE), - DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE), - DEFINE_BIT(BRW_NEW_PRIMITIVE), - DEFINE_BIT(BRW_NEW_CONTEXT), - DEFINE_BIT(BRW_NEW_PSP), - DEFINE_BIT(BRW_NEW_SURFACES), - DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS), - DEFINE_BIT(BRW_NEW_INDICES), - DEFINE_BIT(BRW_NEW_VERTICES), - DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS), - DEFINE_BIT(BRW_NEW_BATCH), - DEFINE_BIT(BRW_NEW_INDEX_BUFFER), - DEFINE_BIT(BRW_NEW_VS_CONSTBUF), - DEFINE_BIT(BRW_NEW_TCS_CONSTBUF), - DEFINE_BIT(BRW_NEW_TES_CONSTBUF), - DEFINE_BIT(BRW_NEW_GS_CONSTBUF), - DEFINE_BIT(BRW_NEW_PROGRAM_CACHE), - DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS), - DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT), - DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK), - DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD), - DEFINE_BIT(BRW_NEW_STATS_WM), - DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER), - DEFINE_BIT(BRW_NEW_IMAGE_UNITS), - DEFINE_BIT(BRW_NEW_META_IN_PROGRESS), - DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION), - DEFINE_BIT(BRW_NEW_NUM_SAMPLES), - DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER), - DEFINE_BIT(BRW_NEW_GFX4_UNIT_STATE), - DEFINE_BIT(BRW_NEW_CC_VP), - DEFINE_BIT(BRW_NEW_SF_VP), - DEFINE_BIT(BRW_NEW_CLIP_VP), - DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE), - DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS), - DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM), - DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS), - DEFINE_BIT(BRW_NEW_URB_SIZE), - DEFINE_BIT(BRW_NEW_CC_STATE), - DEFINE_BIT(BRW_NEW_BLORP), - DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT), - DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION), - DEFINE_BIT(BRW_NEW_DRAW_CALL), - DEFINE_BIT(BRW_NEW_AUX_STATE), - {0, 0, 0} -}; - -static void -brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits) -{ - for (int i = 0; bit_map[i].bit != 0; i++) { - if (bit_map[i].bit & bits) - bit_map[i].count++; - } -} - -static void -brw_print_dirty_count(struct dirty_bit_map *bit_map) -{ - for (int i = 0; bit_map[i].bit != 0; i++) { - if (bit_map[i].count > 1) { - fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n", - bit_map[i].bit, bit_map[i].count, bit_map[i].name); - } - } -} - -static inline void -brw_upload_tess_programs(struct brw_context *brw) -{ - if (brw->programs[MESA_SHADER_TESS_EVAL]) { - brw_upload_tcs_prog(brw); - brw_upload_tes_prog(brw); - } else { - brw->tcs.base.prog_data = NULL; - brw->tes.base.prog_data = NULL; - } -} - -static inline void -brw_upload_programs(struct brw_context *brw, - enum brw_pipeline pipeline) -{ - struct gl_context *ctx = &brw->ctx; - const struct intel_device_info *devinfo = &brw->screen->devinfo; - - if (pipeline == BRW_RENDER_PIPELINE) { - brw_upload_vs_prog(brw); - brw_upload_tess_programs(brw); - - if (brw->programs[MESA_SHADER_GEOMETRY]) { - brw_upload_gs_prog(brw); - } else { - brw->gs.base.prog_data = NULL; - if (devinfo->ver < 7) - brw_upload_ff_gs_prog(brw); - } - - /* Update the VUE map for data exiting the GS stage of the pipeline. - * This comes from the last enabled shader stage. - */ - GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid; - bool old_separate = brw->vue_map_geom_out.separate; - struct brw_vue_prog_data *vue_prog_data; - if (brw->programs[MESA_SHADER_GEOMETRY]) - vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data); - else if (brw->programs[MESA_SHADER_TESS_EVAL]) - vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data); - else - vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data); - - brw->vue_map_geom_out = vue_prog_data->vue_map; - - /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */ - if (old_slots != brw->vue_map_geom_out.slots_valid || - old_separate != brw->vue_map_geom_out.separate) - brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT; - - if ((old_slots ^ brw->vue_map_geom_out.slots_valid) & - VARYING_BIT_VIEWPORT) { - ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT; - brw->clip.viewport_count = - (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ? - ctx->Const.MaxViewports : 1; - } - - brw_upload_wm_prog(brw); - - if (devinfo->ver < 6) { - brw_upload_clip_prog(brw); - brw_upload_sf_prog(brw); - } - - brw_disk_cache_write_render_programs(brw); - } else if (pipeline == BRW_COMPUTE_PIPELINE) { - brw_upload_cs_prog(brw); - brw_disk_cache_write_compute_program(brw); - } -} - -static inline void -merge_ctx_state(struct brw_context *brw, - struct brw_state_flags *state) -{ - state->mesa |= brw->NewGLState; - state->brw |= brw->ctx.NewDriverState; -} - -static ALWAYS_INLINE void -check_and_emit_atom(struct brw_context *brw, - struct brw_state_flags *state, - const struct brw_tracked_state *atom) -{ - if (check_state(state, &atom->dirty)) { - atom->emit(brw); - merge_ctx_state(brw, state); - } -} - -static inline void -brw_upload_pipeline_state(struct brw_context *brw, - enum brw_pipeline pipeline) -{ - const struct intel_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - int i; - static int dirty_count = 0; - struct brw_state_flags state = brw->state.pipelines[pipeline]; - const unsigned fb_samples = - MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1); - - brw_select_pipeline(brw, pipeline); - - if (pipeline == BRW_RENDER_PIPELINE && brw->current_hash_scale != 1) - brw_emit_hashing_mode(brw, UINT_MAX, UINT_MAX, 1); - - if (INTEL_DEBUG & DEBUG_REEMIT) { - /* Always re-emit all state. */ - brw->NewGLState = ~0; - ctx->NewDriverState = ~0ull; - } - - if (pipeline == BRW_RENDER_PIPELINE) { - if (brw->programs[MESA_SHADER_FRAGMENT] != - ctx->FragmentProgram._Current) { - brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current; - brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM; - } - - if (brw->programs[MESA_SHADER_TESS_EVAL] != - ctx->TessEvalProgram._Current) { - brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current; - brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS; - } - - if (brw->programs[MESA_SHADER_TESS_CTRL] != - ctx->TessCtrlProgram._Current) { - brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current; - brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS; - } - - if (brw->programs[MESA_SHADER_GEOMETRY] != - ctx->GeometryProgram._Current) { - brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current; - brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM; - } - - if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) { - brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current; - brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM; - } - } - - if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) { - brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current; - brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM; - } - - if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) { - brw->meta_in_progress = _mesa_meta_in_progress(ctx); - brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS; - } - - if (brw->num_samples != fb_samples) { - brw->num_samples = fb_samples; - brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES; - } - - /* Exit early if no state is flagged as dirty */ - merge_ctx_state(brw, &state); - if ((state.mesa | state.brw) == 0) - return; - - /* Emit Sandybridge workaround flushes on every primitive, for safety. */ - if (devinfo->ver == 6) - brw_emit_post_sync_nonzero_flush(brw); - - brw_upload_programs(brw, pipeline); - merge_ctx_state(brw, &state); - - brw_upload_state_base_address(brw); - - const struct brw_tracked_state *atoms = - brw_get_pipeline_atoms(brw, pipeline); - const int num_atoms = brw->num_atoms[pipeline]; - - if (INTEL_DEBUG) { - /* Debug version which enforces various sanity checks on the - * state flags which are generated and checked to help ensure - * state atoms are ordered correctly in the list. - */ - struct brw_state_flags examined, prev; - memset(&examined, 0, sizeof(examined)); - prev = state; - - for (i = 0; i < num_atoms; i++) { - const struct brw_tracked_state *atom = &atoms[i]; - struct brw_state_flags generated; - - check_and_emit_atom(brw, &state, atom); - - accumulate_state(&examined, &atom->dirty); - - /* generated = (prev ^ state) - * if (examined & generated) - * fail; - */ - xor_states(&generated, &prev, &state); - assert(!check_state(&examined, &generated)); - prev = state; - } - } - else { - for (i = 0; i < num_atoms; i++) { - const struct brw_tracked_state *atom = &atoms[i]; - - check_and_emit_atom(brw, &state, atom); - } - } - - if (INTEL_DEBUG & DEBUG_STATE) { - STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1); - - brw_update_dirty_count(mesa_bits, state.mesa); - brw_update_dirty_count(brw_bits, state.brw); - if (dirty_count++ % 1000 == 0) { - brw_print_dirty_count(mesa_bits); - brw_print_dirty_count(brw_bits); - fprintf(stderr, "\n"); - } - } -} - -/*********************************************************************** - * Emit all state: - */ -void brw_upload_render_state(struct brw_context *brw) -{ - brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE); -} - -static inline void -brw_pipeline_state_finished(struct brw_context *brw, - enum brw_pipeline pipeline) -{ - /* Save all dirty state into the other pipelines */ - for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) { - if (i != pipeline) { - brw->state.pipelines[i].mesa |= brw->NewGLState; - brw->state.pipelines[i].brw |= brw->ctx.NewDriverState; - } else { - memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags)); - } - } - - brw->NewGLState = 0; - brw->ctx.NewDriverState = 0ull; -} - -/** - * Clear dirty bits to account for the fact that the state emitted by - * brw_upload_render_state() has been committed to the hardware. This is a - * separate call from brw_upload_render_state() because it's possible that - * after the call to brw_upload_render_state(), we will discover that we've - * run out of aperture space, and need to rewind the batch buffer to the state - * it had before the brw_upload_render_state() call. - */ -void -brw_render_state_finished(struct brw_context *brw) -{ - brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE); -} - -void -brw_upload_compute_state(struct brw_context *brw) -{ - brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE); -} - -void -brw_compute_state_finished(struct brw_context *brw) -{ - brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE); -} |