From 6c530ad1160518d9f035da4aba5a9d4df7369972 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 21 Jul 2017 16:36:52 +0100 Subject: i965: Reduce passing 2x32b of reloc_domains to 2 bits The kernel only cares about whether the object is to be written to or not, only reduces (reloc.read_domains, reloc.write_domain) down to just !!reloc.write_domain. When we use NO_RELOC, the kernel doesn't even read those relocs and instead userspace has to pass that information in the execobject.flags. We can simplify our reloc api by also removing the unused read/write domains and only pass the resultant flags. The caveat to the above are when we need to make the kernel aware that certain objects need to take into account different work arounds. Previously, this was done using the magic (INSTRUCTION, INSTRUCTION) reloc domains. NO_RELOC requires this to be passed in the execobject flags as well, and now we push that up the callstack. The API is more compact, more expressive of what happens underneath, but unfortunately requires more knowledge of the system at the point of use. Conversely it also means that knowledge is specific and not generally applied and so not overused. text data bss dec hex filename 8502991 356912 424944 9284847 8dacef lib/i965_dri.so (before) 8500455 356912 424944 9282311 8da307 lib/i965_dri.so (after) v2: (by Ken) Rebase. Reviewed-by: Kenneth Graunke --- src/intel/blorp/blorp.h | 3 +- src/mesa/drivers/dri/i965/brw_binding_tables.c | 2 +- src/mesa/drivers/dri/i965/brw_blorp.c | 8 +-- src/mesa/drivers/dri/i965/brw_compute.c | 24 +++------ src/mesa/drivers/dri/i965/brw_conditional_render.c | 14 +---- src/mesa/drivers/dri/i965/brw_context.h | 3 +- src/mesa/drivers/dri/i965/brw_curbe.c | 3 +- src/mesa/drivers/dri/i965/brw_draw.c | 7 --- src/mesa/drivers/dri/i965/brw_misc_state.c | 51 +++++++----------- src/mesa/drivers/dri/i965/brw_pipe_control.c | 12 ++--- src/mesa/drivers/dri/i965/brw_state.h | 2 +- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 47 ++++++++--------- src/mesa/drivers/dri/i965/gen4_blorp_exec.h | 4 -- src/mesa/drivers/dri/i965/gen6_depth_state.c | 12 ++--- src/mesa/drivers/dri/i965/gen7_misc_state.c | 13 ++--- src/mesa/drivers/dri/i965/gen7_sol_state.c | 8 +-- src/mesa/drivers/dri/i965/gen8_depth_state.c | 9 ++-- src/mesa/drivers/dri/i965/genX_blorp_exec.c | 7 +-- src/mesa/drivers/dri/i965/genX_state_upload.c | 24 +++------ src/mesa/drivers/dri/i965/hsw_queryobj.c | 43 +++------------ src/mesa/drivers/dri/i965/hsw_sol.c | 12 +---- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 61 ++++++++-------------- src/mesa/drivers/dri/i965/intel_batchbuffer.h | 20 +++---- src/mesa/drivers/dri/i965/intel_blit.c | 32 +++--------- 24 files changed, 128 insertions(+), 293 deletions(-) diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h index d19920e87f4..4d9a44b0926 100644 --- a/src/intel/blorp/blorp.h +++ b/src/intel/blorp/blorp.h @@ -92,8 +92,7 @@ void blorp_batch_finish(struct blorp_batch *batch); struct blorp_address { void *buffer; - uint32_t read_domains; - uint32_t write_domain; + unsigned reloc_flags; uint32_t offset; }; diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c index c0a763d0aeb..d1cbf0db3bd 100644 --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c @@ -69,7 +69,7 @@ brw_upload_binding_table(struct brw_context *brw, brw, &stage_state->surf_offset[ prog_data->binding_table.shader_time_start], brw->shader_time.bo, 0, ISL_FORMAT_RAW, - brw->shader_time.bo->size, 1, true); + brw->shader_time.bo->size, 1, RELOC_WRITE); } uint32_t *bind = brw_state_batch(brw, prog_data->binding_table.size_bytes, diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index b2987ca4faf..446f507619c 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -150,9 +150,7 @@ blorp_surf_for_miptree(struct brw_context *brw, surf->addr = (struct blorp_address) { .buffer = mt->bo, .offset = mt->offset, - .read_domains = is_render_target ? I915_GEM_DOMAIN_RENDER : - I915_GEM_DOMAIN_SAMPLER, - .write_domain = is_render_target ? I915_GEM_DOMAIN_RENDER : 0, + .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0, }; surf->aux_usage = aux_usage; @@ -175,9 +173,7 @@ blorp_surf_for_miptree(struct brw_context *brw, surf->aux_surf = aux_surf; surf->aux_addr = (struct blorp_address) { - .read_domains = is_render_target ? I915_GEM_DOMAIN_RENDER : - I915_GEM_DOMAIN_SAMPLER, - .write_domain = is_render_target ? I915_GEM_DOMAIN_RENDER : 0, + .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0, }; if (mt->mcs_buf) { diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index d6cb0161f40..ed22d712a67 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -40,15 +40,9 @@ prepare_indirect_gpgpu_walker(struct brw_context *brw) GLintptr indirect_offset = brw->compute.num_work_groups_offset; struct brw_bo *bo = brw->compute.num_work_groups_bo; - brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo, - I915_GEM_DOMAIN_VERTEX, 0, - indirect_offset + 0); - brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo, - I915_GEM_DOMAIN_VERTEX, 0, - indirect_offset + 4); - brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo, - I915_GEM_DOMAIN_VERTEX, 0, - indirect_offset + 8); + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo, indirect_offset + 0); + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo, indirect_offset + 4); + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo, indirect_offset + 8); if (brw->gen > 7) return; @@ -65,9 +59,7 @@ prepare_indirect_gpgpu_walker(struct brw_context *brw) ADVANCE_BATCH(); /* Load compute_dispatch_indirect_x_size into SRC0 */ - brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - indirect_offset + 0); + brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, indirect_offset + 0); /* predicate = (compute_dispatch_indirect_x_size == 0); */ BEGIN_BATCH(1); @@ -78,9 +70,7 @@ prepare_indirect_gpgpu_walker(struct brw_context *brw) ADVANCE_BATCH(); /* Load compute_dispatch_indirect_y_size into SRC0 */ - brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - indirect_offset + 4); + brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, indirect_offset + 4); /* predicate |= (compute_dispatch_indirect_y_size == 0); */ BEGIN_BATCH(1); @@ -91,9 +81,7 @@ prepare_indirect_gpgpu_walker(struct brw_context *brw) ADVANCE_BATCH(); /* Load compute_dispatch_indirect_z_size into SRC0 */ - brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - indirect_offset + 8); + brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, indirect_offset + 8); /* predicate |= (compute_dispatch_indirect_z_size == 0); */ BEGIN_BATCH(1); diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c b/src/mesa/drivers/dri/i965/brw_conditional_render.c index 6be4d48d21d..e33e79fb6ce 100644 --- a/src/mesa/drivers/dri/i965/brw_conditional_render.c +++ b/src/mesa/drivers/dri/i965/brw_conditional_render.c @@ -87,18 +87,8 @@ set_predicate_for_occlusion_query(struct brw_context *brw, */ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE); - brw_load_register_mem64(brw, - MI_PREDICATE_SRC0, - query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - 0, /* write domain */ - 0 /* offset */); - brw_load_register_mem64(brw, - MI_PREDICATE_SRC1, - query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - 0, /* write domain */ - 8 /* offset */); + brw_load_register_mem64(brw, MI_PREDICATE_SRC0, query->bo, 0 /* offset */); + brw_load_register_mem64(brw, MI_PREDICATE_SRC1, query->bo, 8 /* offset */); } static void diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 33e8947bb8f..d41e6aa7bde 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -458,6 +458,7 @@ struct intel_batchbuffer { struct drm_i915_gem_relocation_entry *relocs; int reloc_count; int reloc_array_size; + unsigned int valid_reloc_flags; /** The validation list */ struct drm_i915_gem_exec_object2 *validation_list; @@ -1321,12 +1322,10 @@ bool brw_check_conditional_render(struct brw_context *brw); void brw_load_register_mem(struct brw_context *brw, uint32_t reg, struct brw_bo *bo, - uint32_t read_domains, uint32_t write_domain, uint32_t offset); void brw_load_register_mem64(struct brw_context *brw, uint32_t reg, struct brw_bo *bo, - uint32_t read_domains, uint32_t write_domain, uint32_t offset); void brw_store_register_mem32(struct brw_context *brw, struct brw_bo *bo, uint32_t reg, uint32_t offset); diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 139a3bcdf86..2500dbd0c48 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -301,8 +301,7 @@ emit: OUT_BATCH(0); } else { OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); - OUT_RELOC(brw->curbe.curbe_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, + OUT_RELOC(brw->curbe.curbe_bo, 0, (brw->curbe.total_size - 1) + brw->curbe.curbe_offset); } ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index d73a3dc827a..29fb874ffae 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -205,7 +205,6 @@ brw_emit_prim(struct brw_context *brw, brw_load_register_mem(brw, GEN7_3DPRIM_VERTEX_COUNT, xfb_obj->prim_count_bo, - I915_GEM_DOMAIN_VERTEX, 0, stream * sizeof(uint32_t)); BEGIN_BATCH(9); OUT_BATCH(MI_LOAD_REGISTER_IMM | (9 - 2)); @@ -227,25 +226,19 @@ brw_emit_prim(struct brw_context *brw, indirect_flag = GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE; brw_load_register_mem(brw, GEN7_3DPRIM_VERTEX_COUNT, bo, - I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 0); brw_load_register_mem(brw, GEN7_3DPRIM_INSTANCE_COUNT, bo, - I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 4); brw_load_register_mem(brw, GEN7_3DPRIM_START_VERTEX, bo, - I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 8); if (prim->indexed) { brw_load_register_mem(brw, GEN7_3DPRIM_BASE_VERTEX, bo, - I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 12); brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo, - I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 16); } else { brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo, - I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 12); BEGIN_BATCH(3); OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 1e3be784c5b..bc98a583a52 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -63,21 +63,15 @@ upload_pipelined_state_pointers(struct brw_context *brw) BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2)); - OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->vs.base.state_offset); + OUT_RELOC(brw->batch.bo, 0, brw->vs.base.state_offset); if (brw->ff_gs.prog_active) - OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->ff_gs.state_offset | 1); + OUT_RELOC(brw->batch.bo, 0, brw->ff_gs.state_offset | 1); else OUT_BATCH(0); - OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->clip.state_offset | 1); - OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->sf.state_offset); - OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->wm.base.state_offset); - OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->cc.state_offset); + OUT_RELOC(brw->batch.bo, 0, brw->clip.state_offset | 1); + OUT_RELOC(brw->batch.bo, 0, brw->sf.state_offset); + OUT_RELOC(brw->batch.bo, 0, brw->wm.base.state_offset); + OUT_RELOC(brw->batch.bo, 0, brw->cc.state_offset); ADVANCE_BATCH(); brw->ctx.NewDriverState |= BRW_NEW_PSP; @@ -387,9 +381,7 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw, (depth_surface_type << 29)); if (depth_mt) { - OUT_RELOC(depth_mt->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - depth_offset); + OUT_RELOC(depth_mt->bo, RELOC_WRITE, depth_offset); } else { OUT_BATCH(0); } @@ -636,18 +628,14 @@ brw_upload_state_base_address(struct brw_context *brw) OUT_BATCH(0); OUT_BATCH(mocs_wb << 16); /* Surface state base address: */ - OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, - mocs_wb << 4 | 1); + OUT_RELOC64(brw->batch.bo, 0, mocs_wb << 4 | 1); /* Dynamic state base address: */ - OUT_RELOC64(brw->batch.bo, - I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0, - mocs_wb << 4 | 1); + OUT_RELOC64(brw->batch.bo, 0, mocs_wb << 4 | 1); /* Indirect object base address: MEDIA_OBJECT data */ OUT_BATCH(mocs_wb << 4 | 1); OUT_BATCH(0); /* Instruction base address: shader kernels (incl. SIP) */ - OUT_RELOC64(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - mocs_wb << 4 | 1); + OUT_RELOC64(brw->cache.bo, 0, mocs_wb << 4 | 1); /* General state buffer size */ OUT_BATCH(0xfffff001); @@ -675,7 +663,7 @@ brw_upload_state_base_address(struct brw_context *brw) * BINDING_TABLE_STATE * SURFACE_STATE */ - OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); + OUT_RELOC(brw->batch.bo, 0, 1); /* Dynamic state base address: * SAMPLER_STATE * SAMPLER_BORDER_COLOR_STATE @@ -686,12 +674,12 @@ brw_upload_state_base_address(struct brw_context *brw) * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset * Disable is clear, which we rely on) */ - OUT_RELOC(brw->batch.bo, (I915_GEM_DOMAIN_RENDER | - I915_GEM_DOMAIN_INSTRUCTION), 0, 1); + OUT_RELOC(brw->batch.bo, 0, 1); OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */ - OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - 1); /* Instruction base address: shader kernels (incl. SIP) */ + + /* Instruction base address: shader kernels (incl. SIP) */ + OUT_RELOC(brw->cache.bo, 0, 1); OUT_BATCH(1); /* General state upper bound */ /* Dynamic state upper bound. Although the documentation says that @@ -707,11 +695,9 @@ brw_upload_state_base_address(struct brw_context *brw) BEGIN_BATCH(8); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); OUT_BATCH(1); /* General state base address */ - OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, - 1); /* Surface state base address */ + OUT_RELOC(brw->batch.bo, 0, 1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ - OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - 1); /* Instruction base address */ + OUT_RELOC(brw->cache.bo, 0, 1); /* Instruction base address */ OUT_BATCH(0xfffff001); /* General state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ OUT_BATCH(1); /* Instruction access upper bound */ @@ -720,8 +706,7 @@ brw_upload_state_base_address(struct brw_context *brw) BEGIN_BATCH(6); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); OUT_BATCH(1); /* General state base address */ - OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, - 1); /* Surface state base address */ + OUT_RELOC(brw->batch.bo, 0, 1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_BATCH(1); /* General state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c index a95892c44cf..2a84fb8864e 100644 --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c @@ -109,8 +109,7 @@ brw_emit_pipe_control(struct brw_context *brw, uint32_t flags, OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2)); OUT_BATCH(flags); if (bo) { - OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, offset); + OUT_RELOC64(bo, RELOC_WRITE, offset); } else { OUT_BATCH(0); OUT_BATCH(0); @@ -141,8 +140,7 @@ brw_emit_pipe_control(struct brw_context *brw, uint32_t flags, OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); OUT_BATCH(flags); if (bo) { - OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - gen6_gtt | offset); + OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, gen6_gtt | offset); } else { OUT_BATCH(0); } @@ -153,8 +151,7 @@ brw_emit_pipe_control(struct brw_context *brw, uint32_t flags, BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2)); if (bo) { - OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT_WRITE | offset); + OUT_RELOC(bo, RELOC_WRITE, PIPE_CONTROL_GLOBAL_GTT_WRITE | offset); } else { OUT_BATCH(0); } @@ -409,8 +406,7 @@ brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags) * 3DPRIMITIVE when needed anyway. */ brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, - brw->workaround_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + brw->workaround_bo, 0); } } else { /* On gen4-5, a regular pipe control seems to suffice. */ diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 1432a6888f7..46665aae12b 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -216,7 +216,7 @@ void brw_emit_buffer_surface_state(struct brw_context *brw, unsigned surface_format, unsigned buffer_size, unsigned pitch, - bool rw); + unsigned reloc_flags); void brw_update_texture_surface(struct gl_context *ctx, unsigned unit, uint32_t *surf_offset, diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 5a3eccfe042..8537d6eeeb5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -133,7 +133,7 @@ brw_emit_surface_state(struct brw_context *brw, GLenum target, struct isl_view view, enum isl_aux_usage aux_usage, uint32_t mocs, uint32_t *surf_offset, int surf_index, - unsigned read_domains, unsigned write_domains) + unsigned reloc_flags) { uint32_t tile_x = mt->level[0].level_x; uint32_t tile_y = mt->level[0].level_y; @@ -182,7 +182,7 @@ brw_emit_surface_state(struct brw_context *brw, isl_surf_fill_state(&brw->isl_dev, state, .surf = &mt->surf, .view = &view, .address = brw_emit_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset, - mt->bo, offset, read_domains, write_domains), + mt->bo, offset, reloc_flags), .aux_surf = aux_surf, .aux_usage = aux_usage, .aux_address = aux_offset, .mocs = mocs, .clear_color = clear_color, @@ -202,7 +202,7 @@ brw_emit_surface_state(struct brw_context *brw, *surf_offset + brw->isl_dev.ss.aux_addr_offset, aux_bo, *aux_addr, - read_domains, write_domains); + reloc_flags); } } @@ -247,8 +247,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, brw_emit_surface_state(brw, mt, mt->target, view, aux_usage, rb_mocs[brw->gen], &offset, surf_index, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER); + RELOC_WRITE); return offset; } @@ -592,7 +591,7 @@ brw_update_texture_surface(struct gl_context *ctx, brw_emit_surface_state(brw, mt, mt->target, view, aux_usage, tex_mocs[brw->gen], surf_offset, surf_index, - I915_GEM_DOMAIN_SAMPLER, 0); + 0); } } @@ -604,7 +603,7 @@ brw_emit_buffer_surface_state(struct brw_context *brw, unsigned surface_format, unsigned buffer_size, unsigned pitch, - bool rw) + unsigned reloc_flags) { uint32_t *dw = brw_state_batch(brw, brw->isl_dev.ss.size, @@ -616,8 +615,7 @@ brw_emit_buffer_surface_state(struct brw_context *brw, brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset, bo, buffer_offset, - I915_GEM_DOMAIN_SAMPLER, - (rw ? I915_GEM_DOMAIN_SAMPLER : 0)), + reloc_flags), .size = buffer_size, .format = surface_format, .stride = pitch, @@ -673,7 +671,7 @@ brw_update_buffer_texture_surface(struct gl_context *ctx, isl_format, size, texel_size, - false /* rw */); + 0); } /** @@ -689,7 +687,7 @@ brw_create_constant_surface(struct brw_context *brw, { brw_emit_buffer_surface_state(brw, out_offset, bo, offset, ISL_FORMAT_R32G32B32A32_FLOAT, - size, 1, false); + size, 1, 0); } /** @@ -711,7 +709,7 @@ brw_create_buffer_surface(struct brw_context *brw, */ brw_emit_buffer_surface_state(brw, out_offset, bo, offset, ISL_FORMAT_RAW, - size, 1, true); + size, 1, RELOC_WRITE); } /** @@ -785,8 +783,7 @@ brw_update_sol_surface(struct brw_context *brw, surface_format << BRW_SURFACE_FORMAT_SHIFT | BRW_SURFACE_RC_READ_WRITE; surf[1] = brw_emit_reloc(&brw->batch, - *out_offset + 4, bo, offset_bytes, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); + *out_offset + 4, bo, offset_bytes, RELOC_WRITE); surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT | height << BRW_SURFACE_HEIGHT_SHIFT); surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT | @@ -901,8 +898,8 @@ brw_emit_null_surface_state(struct brw_context *brw, 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT); } surf[1] = !bo ? 0 : - brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); + brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0, RELOC_WRITE); + surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT | (height - 1) << BRW_SURFACE_HEIGHT_SHIFT); @@ -976,7 +973,7 @@ gen4_update_renderbuffer_surface(struct brw_context *brw, intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y), - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); + RELOC_WRITE); surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT | (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT); @@ -1160,7 +1157,7 @@ update_renderbuffer_read_surfaces(struct brw_context *brw) brw_emit_surface_state(brw, irb->mt, target, view, aux_usage, tex_mocs[brw->gen], surf_offset, surf_index, - I915_GEM_DOMAIN_SAMPLER, 0); + 0); } else { brw->vtbl.emit_null_surface_state( @@ -1453,7 +1450,8 @@ brw_upload_abo_surfaces(struct brw_context *brw, brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo, binding->Offset, ISL_FORMAT_RAW, - bo->size - binding->Offset, 1, true); + bo->size - binding->Offset, 1, + RELOC_WRITE); } brw->ctx.NewDriverState |= BRW_NEW_SURFACES; @@ -1614,7 +1612,7 @@ update_image_surface(struct brw_context *brw, brw_emit_buffer_surface_state( brw, surf_offset, intel_obj->buffer, obj->BufferOffset, format, intel_obj->Base.Size, texel_size, - access != GL_READ_ONLY); + access != GL_READ_ONLY ? RELOC_WRITE : 0); update_buffer_image_param(brw, u, surface_idx, param); @@ -1638,7 +1636,7 @@ update_image_surface(struct brw_context *brw, brw_emit_buffer_surface_state( brw, surf_offset, mt->bo, mt->offset, format, mt->bo->size - mt->offset, 1 /* pitch */, - access != GL_READ_ONLY); + access != GL_READ_ONLY ? RELOC_WRITE : 0); } else { const int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; @@ -1649,9 +1647,7 @@ update_image_surface(struct brw_context *brw, brw_emit_surface_state(brw, mt, mt->target, view, ISL_AUX_USAGE_NONE, tex_mocs[brw->gen], surf_offset, surf_index, - I915_GEM_DOMAIN_SAMPLER, - access == GL_READ_ONLY ? 0 : - I915_GEM_DOMAIN_SAMPLER); + access == GL_READ_ONLY ? 0 : RELOC_WRITE); } isl_surf_fill_image_param(&brw->isl_dev, param, &mt->surf, &view); @@ -1766,7 +1762,8 @@ brw_upload_cs_work_groups_surface(struct brw_context *brw) brw_emit_buffer_surface_state(brw, surf_offset, bo, bo_offset, ISL_FORMAT_RAW, - 3 * sizeof(GLuint), 1, true); + 3 * sizeof(GLuint), 1, + RELOC_WRITE); brw->ctx.NewDriverState |= BRW_NEW_SURFACES; } } diff --git a/src/mesa/drivers/dri/i965/gen4_blorp_exec.h b/src/mesa/drivers/dri/i965/gen4_blorp_exec.h index 764b1989aaa..fe02ab52f1f 100644 --- a/src/mesa/drivers/dri/i965/gen4_blorp_exec.h +++ b/src/mesa/drivers/dri/i965/gen4_blorp_exec.h @@ -30,8 +30,6 @@ dynamic_state_address(struct blorp_batch *batch, uint32_t offset) return (struct blorp_address) { .buffer = brw->batch.bo, .offset = offset, - .write_domain = 0, - .read_domains = I915_GEM_DOMAIN_INSTRUCTION, }; } @@ -44,8 +42,6 @@ instruction_state_address(struct blorp_batch *batch, uint32_t offset) return (struct blorp_address) { .buffer = brw->cache.bo, .offset = offset, - .write_domain = 0, - .read_domains = I915_GEM_DOMAIN_INSTRUCTION, }; } diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c b/src/mesa/drivers/dri/i965/gen6_depth_state.c index cd0423913c4..3a66b42fec1 100644 --- a/src/mesa/drivers/dri/i965/gen6_depth_state.c +++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c @@ -122,9 +122,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw, /* 3DSTATE_DEPTH_BUFFER dw2 */ if (depth_mt) { - OUT_RELOC(depth_mt->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); + OUT_RELOC(depth_mt->bo, RELOC_WRITE, 0); } else { OUT_BATCH(0); } @@ -168,9 +166,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw, BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); OUT_BATCH(depth_mt->hiz_buf->surf.row_pitch - 1); - OUT_RELOC(depth_mt->hiz_buf->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - offset); + OUT_RELOC(depth_mt->hiz_buf->bo, RELOC_WRITE, offset); ADVANCE_BATCH(); } else { BEGIN_BATCH(3); @@ -192,9 +188,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw, BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); OUT_BATCH(stencil_mt->surf.row_pitch - 1); - OUT_RELOC(stencil_mt->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - offset); + OUT_RELOC(stencil_mt->bo, RELOC_WRITE, offset); ADVANCE_BATCH(); } else { BEGIN_BATCH(3); diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c index 1a9e645084c..9d51a401668 100644 --- a/src/mesa/drivers/dri/i965/gen7_misc_state.c +++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c @@ -114,9 +114,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, /* 3DSTATE_DEPTH_BUFFER dw2 */ if (depth_mt) { - OUT_RELOC(depth_mt->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); + OUT_RELOC(depth_mt->bo, RELOC_WRITE, 0); } else { OUT_BATCH(0); } @@ -151,10 +149,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2)); OUT_BATCH((mocs << 25) | (depth_mt->hiz_buf->pitch - 1)); - OUT_RELOC(depth_mt->hiz_buf->bo, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER, - 0); + OUT_RELOC(depth_mt->hiz_buf->bo, RELOC_WRITE, 0); ADVANCE_BATCH(); } @@ -173,9 +168,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, OUT_BATCH(enabled | mocs << 25 | (stencil_mt->surf.row_pitch - 1)); - OUT_RELOC(stencil_mt->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); + OUT_RELOC(stencil_mt->bo, RELOC_WRITE, 0); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c index f54b370cd40..161e03460bf 100644 --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c @@ -120,9 +120,7 @@ gen7_pause_transform_feedback(struct gl_context *ctx, BEGIN_BATCH(3); OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); - OUT_RELOC(brw_obj->offset_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - i * sizeof(uint32_t)); + OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t)); ADVANCE_BATCH(); } @@ -149,9 +147,7 @@ gen7_resume_transform_feedback(struct gl_context *ctx, BEGIN_BATCH(3); OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); - OUT_RELOC(brw_obj->offset_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - i * sizeof(uint32_t)); + OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t)); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c index 429c4b074b0..d94ca0b6bea 100644 --- a/src/mesa/drivers/dri/i965/gen8_depth_state.c +++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c @@ -69,8 +69,7 @@ emit_depth_packets(struct brw_context *brw, depthbuffer_format << 18 | (depth_mt ? depth_mt->surf.row_pitch - 1 : 0)); if (depth_mt) { - OUT_RELOC64(depth_mt->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + OUT_RELOC64(depth_mt->bo, RELOC_WRITE, 0); } else { OUT_BATCH(0); OUT_BATCH(0); @@ -95,8 +94,7 @@ emit_depth_packets(struct brw_context *brw, BEGIN_BATCH(5); OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (5 - 2)); OUT_BATCH((depth_mt->hiz_buf->pitch - 1) | mocs_wb << 25); - OUT_RELOC64(depth_mt->hiz_buf->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + OUT_RELOC64(depth_mt->hiz_buf->bo, RELOC_WRITE, 0); OUT_BATCH(depth_mt->hiz_buf->qpitch >> 2); ADVANCE_BATCH(); } @@ -114,8 +112,7 @@ emit_depth_packets(struct brw_context *brw, OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (5 - 2)); OUT_BATCH(HSW_STENCIL_ENABLED | mocs_wb << 22 | (stencil_mt->surf.row_pitch - 1)); - OUT_RELOC64(stencil_mt->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + OUT_RELOC64(stencil_mt->bo, RELOC_WRITE, 0); OUT_BATCH(stencil_mt->surf.array_pitch_el_rows >> 2); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index c6eee4c5aea..f2c9f3f1f82 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -61,8 +61,7 @@ blorp_emit_reloc(struct blorp_batch *batch, uint32_t offset = (char *)location - (char *)brw->batch.map; return brw_emit_reloc(&brw->batch, offset, address.buffer, address.offset + delta, - address.read_domains, - address.write_domain); + address.reloc_flags); } static void @@ -75,7 +74,7 @@ blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, uint64_t reloc_val = brw_emit_reloc(&brw->batch, ss_offset, bo, address.offset + delta, - address.read_domains, address.write_domain); + address.reloc_flags); void *reloc_ptr = (void *)brw->batch.map + ss_offset; #if GEN_GEN >= 8 @@ -142,8 +141,6 @@ blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size, *addr = (struct blorp_address) { .buffer = brw->batch.bo, - .read_domains = I915_GEM_DOMAIN_VERTEX, - .write_domain = 0, .offset = offset, }; diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 4f4dd6f993b..03f3360ac56 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -71,8 +71,7 @@ emit_dwords(struct brw_context *brw, unsigned n) struct brw_address { struct brw_bo *bo; - uint32_t read_domains; - uint32_t write_domain; + unsigned reloc_flags; uint32_t offset; }; @@ -84,8 +83,7 @@ emit_reloc(struct brw_context *brw, return brw_emit_reloc(&brw->batch, offset, address.bo, address.offset + delta, - address.read_domains, - address.write_domain); + address.reloc_flags); } #define __gen_address_type struct brw_address @@ -108,8 +106,7 @@ render_bo(struct brw_bo *bo, uint32_t offset) return (struct brw_address) { .bo = bo, .offset = offset, - .read_domains = I915_GEM_DOMAIN_RENDER, - .write_domain = I915_GEM_DOMAIN_RENDER, + .reloc_flags = RELOC_WRITE, }; } @@ -119,8 +116,6 @@ render_ro_bo(struct brw_bo *bo, uint32_t offset) return (struct brw_address) { .bo = bo, .offset = offset, - .read_domains = I915_GEM_DOMAIN_RENDER, - .write_domain = 0, }; } @@ -130,8 +125,7 @@ instruction_bo(struct brw_bo *bo, uint32_t offset) return (struct brw_address) { .bo = bo, .offset = offset, - .read_domains = I915_GEM_DOMAIN_INSTRUCTION, - .write_domain = I915_GEM_DOMAIN_INSTRUCTION, + .reloc_flags = RELOC_WRITE | RELOC_NEEDS_GGTT, }; } @@ -141,8 +135,6 @@ instruction_ro_bo(struct brw_bo *bo, uint32_t offset) return (struct brw_address) { .bo = bo, .offset = offset, - .read_domains = I915_GEM_DOMAIN_INSTRUCTION, - .write_domain = 0, }; } @@ -152,8 +144,6 @@ vertex_bo(struct brw_bo *bo, uint32_t offset) return (struct brw_address) { .bo = bo, .offset = offset, - .read_domains = I915_GEM_DOMAIN_VERTEX, - .write_domain = 0, }; } @@ -4147,7 +4137,8 @@ genX(upload_cs_state)(struct brw_context *brw) brw, &stage_state->surf_offset[ prog_data->binding_table.shader_time_start], brw->shader_time.bo, 0, ISL_FORMAT_RAW, - brw->shader_time.bo->size, 1, true); + brw->shader_time.bo->size, 1, + RELOC_WRITE); } uint32_t *bind = brw_state_batch(brw, prog_data->binding_table.size_bytes, @@ -5056,8 +5047,7 @@ genX(update_sampler_state)(struct brw_context *brw, if (GEN_GEN < 6) { samp_st.BorderColorPointer = brw_emit_reloc(&brw->batch, batch_offset_for_sampler_state + 8, - brw->batch.bo, border_color_offset, - I915_GEM_DOMAIN_SAMPLER, 0); + brw->batch.bo, border_color_offset, 0); } else { samp_st.BorderColorPointer = border_color_offset; } diff --git a/src/mesa/drivers/dri/i965/hsw_queryobj.c b/src/mesa/drivers/dri/i965/hsw_queryobj.c index b81ab3b6f88..9dc3b3de865 100644 --- a/src/mesa/drivers/dri/i965/hsw_queryobj.c +++ b/src/mesa/drivers/dri/i965/hsw_queryobj.c @@ -193,36 +193,16 @@ load_overflow_data_to_cs_gprs(struct brw_context *brw, { int offset = idx * sizeof(uint64_t) * 4; - brw_load_register_mem64(brw, - HSW_CS_GPR(1), - query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, - offset); + brw_load_register_mem64(brw, HSW_CS_GPR(1), query->bo, offset); offset += sizeof(uint64_t); - brw_load_register_mem64(brw, - HSW_CS_GPR(2), - query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, - offset); + brw_load_register_mem64(brw, HSW_CS_GPR(2), query->bo, offset); offset += sizeof(uint64_t); - brw_load_register_mem64(brw, - HSW_CS_GPR(3), - query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, - offset); + brw_load_register_mem64(brw, HSW_CS_GPR(3), query->bo, offset); offset += sizeof(uint64_t); - brw_load_register_mem64(brw, - HSW_CS_GPR(4), - query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, - offset); + brw_load_register_mem64(brw, HSW_CS_GPR(4), query->bo, offset); } /* @@ -302,8 +282,6 @@ hsw_result_to_gpr0(struct gl_context *ctx, struct brw_query_object *query, brw_load_register_mem64(brw, HSW_CS_GPR(0), query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, 2 * sizeof(uint64_t)); return; } @@ -321,8 +299,6 @@ hsw_result_to_gpr0(struct gl_context *ctx, struct brw_query_object *query, brw_load_register_mem64(brw, HSW_CS_GPR(0), query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, 0 * sizeof(uint64_t)); } else if (query->Base.Target == GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB || query->Base.Target == GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB) { @@ -333,14 +309,10 @@ hsw_result_to_gpr0(struct gl_context *ctx, struct brw_query_object *query, brw_load_register_mem64(brw, HSW_CS_GPR(1), query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, 0 * sizeof(uint64_t)); brw_load_register_mem64(brw, HSW_CS_GPR(2), query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, 1 * sizeof(uint64_t)); BEGIN_BATCH(5); @@ -417,7 +389,6 @@ set_predicate(struct brw_context *brw, struct brw_bo *query_bo) /* Load query availability into SRC0 */ brw_load_register_mem64(brw, MI_PREDICATE_SRC0, query_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, 2 * sizeof(uint64_t)); /* predicate = !(query_availability == 0); */ @@ -450,11 +421,9 @@ store_query_result_reg(struct brw_context *brw, struct brw_bo *bo, (cmd_size - 2)); OUT_BATCH(reg + 4 * i); if (brw->gen >= 8) { - OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, offset + 4 * i); + OUT_RELOC64(bo, RELOC_WRITE, offset + 4 * i); } else { - OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, offset + 4 * i); + OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, offset + 4 * i); } } ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/hsw_sol.c b/src/mesa/drivers/dri/i965/hsw_sol.c index b0dd150b7df..9f6952c6bfc 100644 --- a/src/mesa/drivers/dri/i965/hsw_sol.c +++ b/src/mesa/drivers/dri/i965/hsw_sol.c @@ -92,14 +92,10 @@ tally_prims_written(struct brw_context *brw, /* GPR0 = Tally */ brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0); brw_load_register_mem(brw, HSW_CS_GPR(0), obj->prim_count_bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, TALLY_OFFSET + i * sizeof(uint32_t)); if (!obj->base.Paused) { /* GPR1 = Start Snapshot */ brw_load_register_mem64(brw, HSW_CS_GPR(1), obj->prim_count_bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, START_OFFSET + i * sizeof(uint64_t)); /* GPR2 = Ending Snapshot */ brw_load_register_reg64(brw, GEN7_SO_NUM_PRIMS_WRITTEN(i), HSW_CS_GPR(2)); @@ -209,9 +205,7 @@ hsw_pause_transform_feedback(struct gl_context *ctx, BEGIN_BATCH(3); OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); - OUT_RELOC(brw_obj->offset_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - i * sizeof(uint32_t)); + OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t)); ADVANCE_BATCH(); } } @@ -237,9 +231,7 @@ hsw_resume_transform_feedback(struct gl_context *ctx, BEGIN_BATCH(3); OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); - OUT_RELOC(brw_obj->offset_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - i * sizeof(uint32_t)); + OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t)); ADVANCE_BATCH(); } } diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 6e53bb19db5..66b9a28129e 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -89,6 +89,11 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch, batch->use_batch_first = brw->screen->kernel_features & KERNEL_ALLOWS_EXEC_BATCH_FIRST; + /* PIPE_CONTROL needs a w/a but only on gen6 */ + batch->valid_reloc_flags = EXEC_OBJECT_WRITE; + if (brw->gen == 6) + batch->valid_reloc_flags |= EXEC_OBJECT_NEEDS_GTT; + intel_batchbuffer_reset(batch, bufmgr, has_llc); } @@ -788,7 +793,7 @@ brw_batch_references(struct intel_batchbuffer *batch, struct brw_bo *bo) uint64_t brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset, struct brw_bo *target, uint32_t target_offset, - uint32_t read_domains, uint32_t write_domain) + unsigned int reloc_flags) { assert(target != NULL); @@ -801,21 +806,12 @@ brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset, /* Check args */ assert(batch_offset <= BATCH_SZ - sizeof(uint32_t)); - assert(_mesa_bitcount(write_domain) <= 1); unsigned int index = add_exec_bo(batch, target); struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[index]; - if (write_domain) { - entry->flags |= EXEC_OBJECT_WRITE; - - /* PIPECONTROL needs a w/a on gen6 */ - if (write_domain == I915_GEM_DOMAIN_INSTRUCTION) { - struct brw_context *brw = container_of(batch, brw, batch); - if (brw->gen == 6) - entry->flags |= EXEC_OBJECT_NEEDS_GTT; - } - } + if (reloc_flags) + entry->flags |= reloc_flags & batch->valid_reloc_flags; batch->relocs[batch->reloc_count++] = (struct drm_i915_gem_relocation_entry) { @@ -846,7 +842,6 @@ static void load_sized_register_mem(struct brw_context *brw, uint32_t reg, struct brw_bo *bo, - uint32_t read_domains, uint32_t write_domain, uint32_t offset, int size) { @@ -860,7 +855,7 @@ load_sized_register_mem(struct brw_context *brw, for (i = 0; i < size; i++) { OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (4 - 2)); OUT_BATCH(reg + i * 4); - OUT_RELOC64(bo, read_domains, write_domain, offset + i * 4); + OUT_RELOC64(bo, 0, offset + i * 4); } ADVANCE_BATCH(); } else { @@ -868,7 +863,7 @@ load_sized_register_mem(struct brw_context *brw, for (i = 0; i < size; i++) { OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); OUT_BATCH(reg + i * 4); - OUT_RELOC(bo, read_domains, write_domain, offset + i * 4); + OUT_RELOC(bo, 0, offset + i * 4); } ADVANCE_BATCH(); } @@ -878,20 +873,18 @@ void brw_load_register_mem(struct brw_context *brw, uint32_t reg, struct brw_bo *bo, - uint32_t read_domains, uint32_t write_domain, uint32_t offset) { - load_sized_register_mem(brw, reg, bo, read_domains, write_domain, offset, 1); + load_sized_register_mem(brw, reg, bo, offset, 1); } void brw_load_register_mem64(struct brw_context *brw, uint32_t reg, struct brw_bo *bo, - uint32_t read_domains, uint32_t write_domain, uint32_t offset) { - load_sized_register_mem(brw, reg, bo, read_domains, write_domain, offset, 2); + load_sized_register_mem(brw, reg, bo, offset, 2); } /* @@ -907,15 +900,13 @@ brw_store_register_mem32(struct brw_context *brw, BEGIN_BATCH(4); OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2)); OUT_BATCH(reg); - OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - offset); + OUT_RELOC64(bo, RELOC_WRITE, offset); ADVANCE_BATCH(); } else { BEGIN_BATCH(3); OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); OUT_BATCH(reg); - OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - offset); + OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, offset); ADVANCE_BATCH(); } } @@ -936,23 +927,19 @@ brw_store_register_mem64(struct brw_context *brw, BEGIN_BATCH(8); OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2)); OUT_BATCH(reg); - OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - offset); + OUT_RELOC64(bo, RELOC_WRITE, offset); OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2)); OUT_BATCH(reg + sizeof(uint32_t)); - OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - offset + sizeof(uint32_t)); + OUT_RELOC64(bo, RELOC_WRITE, offset + sizeof(uint32_t)); ADVANCE_BATCH(); } else { BEGIN_BATCH(6); OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); OUT_BATCH(reg); - OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - offset); + OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, offset); OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); OUT_BATCH(reg + sizeof(uint32_t)); - OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - offset + sizeof(uint32_t)); + OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, offset + sizeof(uint32_t)); ADVANCE_BATCH(); } } @@ -1034,12 +1021,10 @@ brw_store_data_imm32(struct brw_context *brw, struct brw_bo *bo, BEGIN_BATCH(4); OUT_BATCH(MI_STORE_DATA_IMM | (4 - 2)); if (brw->gen >= 8) - OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - offset); + OUT_RELOC64(bo, RELOC_WRITE, offset); else { OUT_BATCH(0); /* MBZ */ - OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - offset); + OUT_RELOC(bo, RELOC_WRITE, offset); } OUT_BATCH(imm); ADVANCE_BATCH(); @@ -1057,12 +1042,10 @@ brw_store_data_imm64(struct brw_context *brw, struct brw_bo *bo, BEGIN_BATCH(5); OUT_BATCH(MI_STORE_DATA_IMM | (5 - 2)); if (brw->gen >= 8) - OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - offset); + OUT_RELOC64(bo, 0, offset); else { OUT_BATCH(0); /* MBZ */ - OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - offset); + OUT_RELOC(bo, RELOC_WRITE, offset); } OUT_BATCH(imm & 0xffffffffu); OUT_BATCH(imm >> 32); diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index f1a5c1fd51b..4661a2a9f66 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -70,9 +70,13 @@ bool brw_batch_has_aperture_space(struct brw_context *brw, bool brw_batch_references(struct intel_batchbuffer *batch, struct brw_bo *bo); -uint64_t brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset, - struct brw_bo *target, uint32_t target_offset, - uint32_t read_domains, uint32_t write_domain); +#define RELOC_WRITE EXEC_OBJECT_WRITE +#define RELOC_NEEDS_GGTT EXEC_OBJECT_NEEDS_GTT +uint64_t brw_emit_reloc(struct intel_batchbuffer *batch, + uint32_t batch_offset, + struct brw_bo *target, + uint32_t target_offset, + unsigned flags); #define USED_BATCH(batch) ((uintptr_t)((batch).map_next - (batch).map)) @@ -158,20 +162,18 @@ intel_batchbuffer_advance(struct brw_context *brw) #define OUT_BATCH(d) *__map++ = (d) #define OUT_BATCH_F(f) OUT_BATCH(float_as_int((f))) -#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ +#define OUT_RELOC(buf, flags, delta) do { \ uint32_t __offset = (__map - brw->batch.map) * 4; \ uint32_t reloc = \ - brw_emit_reloc(&brw->batch, __offset, (buf), (delta), \ - (read_domains), (write_domain)); \ + brw_emit_reloc(&brw->batch, __offset, (buf), (delta), (flags)); \ OUT_BATCH(reloc); \ } while (0) /* Handle 48-bit address relocations for Gen8+ */ -#define OUT_RELOC64(buf, read_domains, write_domain, delta) do { \ +#define OUT_RELOC64(buf, flags, delta) do { \ uint32_t __offset = (__map - brw->batch.map) * 4; \ uint64_t reloc64 = \ - brw_emit_reloc(&brw->batch, __offset, (buf), (delta), \ - (read_domains), (write_domain)); \ + brw_emit_reloc(&brw->batch, __offset, (buf), (delta), (flags)); \ OUT_BATCH(reloc64); \ OUT_BATCH(reloc64 >> 32); \ } while (0) diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index a9cdf489f15..1da37ed7305 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -596,24 +596,16 @@ intelEmitCopyBlit(struct brw_context *brw, OUT_BATCH(SET_FIELD(dst_y, BLT_Y) | SET_FIELD(dst_x, BLT_X)); OUT_BATCH(SET_FIELD(dst_y2, BLT_Y) | SET_FIELD(dst_x2, BLT_X)); if (brw->gen >= 8) { - OUT_RELOC64(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset); } else { - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset); } OUT_BATCH(SET_FIELD(src_y, BLT_Y) | SET_FIELD(src_x, BLT_X)); OUT_BATCH((uint16_t)src_pitch); if (brw->gen >= 8) { - OUT_RELOC64(src_buffer, - I915_GEM_DOMAIN_RENDER, 0, - src_offset); + OUT_RELOC64(src_buffer, 0, src_offset); } else { - OUT_RELOC(src_buffer, - I915_GEM_DOMAIN_RENDER, 0, - src_offset); + OUT_RELOC(src_buffer, 0, src_offset); } ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled); @@ -681,13 +673,9 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw, OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ if (brw->gen >= 8) { - OUT_RELOC64(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset); } else { - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset); } OUT_BATCH(0); /* bg */ OUT_BATCH(fg_color); /* fg */ @@ -828,13 +816,9 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw, OUT_BATCH(SET_FIELD(y + chunk_y + chunk_h, BLT_Y) | SET_FIELD(x + chunk_x + chunk_w, BLT_X)); if (brw->gen >= 8) { - OUT_RELOC64(mt->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - mt->offset + offset); + OUT_RELOC64(mt->bo, RELOC_WRITE, mt->offset + offset); } else { - OUT_RELOC(mt->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - mt->offset + offset); + OUT_RELOC(mt->bo, RELOC_WRITE, mt->offset + offset); } OUT_BATCH(0xffffffff); /* white, but only alpha gets written */ ADVANCE_BATCH_TILED(dst_y_tiled, false); -- cgit v1.2.3