summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2011-04-24 20:20:16 -0700
committerEric Anholt <eric@anholt.net>2011-04-29 15:26:50 -0700
commit8ba0c025a4e0aba97ae596e2121416cf04c0c300 (patch)
treea5c4016fa252363d97c85facbccc96c63792dec8
parentd6ba7b16039b3cf03903888df23732cbb358e810 (diff)
i965/gen4: Move clip state to state streaming
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_state.c157
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c1
4 files changed, 60 insertions, 106 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c
index 60fd5fa7d9e..6015c8cbe9f 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_state.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
@@ -33,148 +33,101 @@
#include "brw_state.h"
#include "brw_defines.h"
-struct brw_clip_unit_key {
- unsigned int total_grf;
- unsigned int urb_entry_read_length;
- unsigned int curb_entry_read_length;
- unsigned int clip_mode;
-
- unsigned int curbe_offset;
-
- unsigned int nr_urb_entries, urb_size;
-
- GLboolean depth_clamp;
-};
-
static void
-clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
-{
- struct gl_context *ctx = &brw->intel.ctx;
- memset(key, 0, sizeof(*key));
-
- /* CACHE_NEW_CLIP_PROG */
- key->total_grf = brw->clip.prog_data->total_grf;
- key->urb_entry_read_length = brw->clip.prog_data->urb_read_length;
- key->curb_entry_read_length = brw->clip.prog_data->curb_read_length;
- key->clip_mode = brw->clip.prog_data->clip_mode;
-
- /* BRW_NEW_CURBE_OFFSETS */
- key->curbe_offset = brw->curbe.clip_start;
-
- /* BRW_NEW_URB_FENCE */
- key->nr_urb_entries = brw->urb.nr_clip_entries;
- key->urb_size = brw->urb.vsize;
-
- /* _NEW_TRANSOFORM */
- key->depth_clamp = ctx->Transform.DepthClamp;
-}
-
-static drm_intel_bo *
-clip_unit_create_from_key(struct brw_context *brw,
- struct brw_clip_unit_key *key)
+brw_prepare_clip_unit(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
- struct brw_clip_unit_state clip;
- drm_intel_bo *bo;
+ struct gl_context *ctx = &intel->ctx;
+ struct brw_clip_unit_state *clip;
- memset(&clip, 0, sizeof(clip));
+ clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset);
+ memset(clip, 0, sizeof(*clip));
- clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+ /* CACHE_NEW_CLIP_PROG */
+ clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) /
+ 16 - 1);
/* reloc */
- clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
+ clip->thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
- clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
- clip.thread1.single_program_flow = 1;
+ clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ clip->thread1.single_program_flow = 1;
- clip.thread3.urb_entry_read_length = key->urb_entry_read_length;
- clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
- clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- clip.thread3.dispatch_grf_start_reg = 1;
- clip.thread3.urb_entry_read_offset = 0;
+ clip->thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
+ clip->thread3.const_urb_entry_read_length =
+ brw->clip.prog_data->curb_read_length;
+
+ /* BRW_NEW_CURBE_OFFSETS */
+ clip->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
+ clip->thread3.dispatch_grf_start_reg = 1;
+ clip->thread3.urb_entry_read_offset = 0;
- clip.thread4.nr_urb_entries = key->nr_urb_entries;
- clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
+ /* BRW_NEW_URB_FENCE */
+ clip->thread4.nr_urb_entries = brw->urb.nr_clip_entries;
+ clip->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
/* If we have enough clip URB entries to run two threads, do so.
*/
- if (key->nr_urb_entries >= 10) {
+ if (brw->urb.nr_clip_entries >= 10) {
/* Half of the URB entries go to each thread, and it has to be an
* even number.
*/
- assert(key->nr_urb_entries % 2 == 0);
+ assert(brw->urb.nr_clip_entries % 2 == 0);
/* Although up to 16 concurrent Clip threads are allowed on Ironlake,
* only 2 threads can output VUEs at a time.
*/
if (intel->gen == 5)
- clip.thread4.max_threads = 16 - 1;
+ clip->thread4.max_threads = 16 - 1;
else
- clip.thread4.max_threads = 2 - 1;
+ clip->thread4.max_threads = 2 - 1;
} else {
- assert(key->nr_urb_entries >= 5);
- clip.thread4.max_threads = 1 - 1;
+ assert(brw->urb.nr_clip_entries >= 5);
+ clip->thread4.max_threads = 1 - 1;
}
if (unlikely(INTEL_DEBUG & DEBUG_SINGLE_THREAD))
- clip.thread4.max_threads = 0;
+ clip->thread4.max_threads = 0;
if (unlikely(INTEL_DEBUG & DEBUG_STATS))
- clip.thread4.stats_enable = 1;
-
- clip.clip5.userclip_enable_flags = 0x7f;
- clip.clip5.userclip_must_clip = 1;
- clip.clip5.guard_band_enable = 0;
- if (!key->depth_clamp)
- clip.clip5.viewport_z_clip_enable = 1;
- clip.clip5.viewport_xy_clip_enable = 1;
- clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
- clip.clip5.api_mode = BRW_CLIP_API_OGL;
- clip.clip5.clip_mode = key->clip_mode;
+ clip->thread4.stats_enable = 1;
- if (intel->is_g4x)
- clip.clip5.negative_w_clip_test = 1;
+ clip->clip5.userclip_enable_flags = 0x7f;
+ clip->clip5.userclip_must_clip = 1;
+ clip->clip5.guard_band_enable = 0;
+ /* _NEW_TRANSOFORM */
+ if (!ctx->Transform.DepthClamp)
+ clip->clip5.viewport_z_clip_enable = 1;
+ clip->clip5.viewport_xy_clip_enable = 1;
+ clip->clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
+ clip->clip5.api_mode = BRW_CLIP_API_OGL;
+ clip->clip5.clip_mode = brw->clip.prog_data->clip_mode;
- clip.clip6.clipper_viewport_state_ptr = 0;
- clip.viewport_xmin = -1;
- clip.viewport_xmax = 1;
- clip.viewport_ymin = -1;
- clip.viewport_ymax = 1;
+ if (intel->is_g4x)
+ clip->clip5.negative_w_clip_test = 1;
- bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
- key, sizeof(*key),
- &brw->clip.prog_bo, 1,
- &clip, sizeof(clip));
+ clip->clip6.clipper_viewport_state_ptr = 0;
+ clip->viewport_xmin = -1;
+ clip->viewport_xmax = 1;
+ clip->viewport_ymin = -1;
+ clip->viewport_ymax = 1;
/* Emit clip program relocation */
assert(brw->clip.prog_bo);
- drm_intel_bo_emit_reloc(bo, offsetof(struct brw_clip_unit_state, thread0),
- brw->clip.prog_bo, clip.thread0.grf_reg_count << 1,
+ drm_intel_bo_emit_reloc(intel->batch.bo,
+ (brw->clip.state_offset +
+ offsetof(struct brw_clip_unit_state, thread0)),
+ brw->clip.prog_bo, clip->thread0.grf_reg_count << 1,
I915_GEM_DOMAIN_INSTRUCTION, 0);
- return bo;
-}
-
-static void upload_clip_unit( struct brw_context *brw )
-{
- struct brw_clip_unit_key key;
-
- clip_unit_populate_key(brw, &key);
-
- drm_intel_bo_unreference(brw->clip.state_bo);
- brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
- &key, sizeof(key),
- &brw->clip.prog_bo, 1,
- NULL);
- if (brw->clip.state_bo == NULL) {
- brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
- }
+ brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT;
}
const struct brw_tracked_state brw_clip_unit = {
.dirty = {
.mesa = _NEW_TRANSFORM,
- .brw = (BRW_NEW_CURBE_OFFSETS |
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_CURBE_OFFSETS |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_CLIP_PROG
},
- .prepare = upload_clip_unit,
+ .prepare = brw_prepare_clip_unit,
};
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index a81d6157390..4b97bfb2ac5 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -655,7 +655,9 @@ struct brw_context
struct brw_clip_prog_data *prog_data;
drm_intel_bo *prog_bo;
- drm_intel_bo *state_bo;
+
+ /* Offset in the batch to the CLIP state on pre-gen6. */
+ uint32_t state_offset;
/* As of gen6, this is the offset in the batch to the CLIP VP,
* instead of vp_bo.
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index b61a7ad099e..3552cce62ad 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -149,7 +149,8 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
else
OUT_BATCH(0);
- OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ brw->clip.state_offset | 1);
OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
brw->sf.state_offset);
OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
@@ -165,7 +166,6 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
static void prepare_psp_urb_cbs(struct brw_context *brw)
{
brw_add_validated_bo(brw, brw->gs.state_bo);
- brw_add_validated_bo(brw, brw->clip.state_bo);
}
static void upload_psp_urb_cbs(struct brw_context *brw )
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 8d1497c8215..49d771133bf 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -80,7 +80,6 @@ static void brw_destroy_context( struct intel_context *intel )
dri_bo_release(&brw->gs.prog_bo);
dri_bo_release(&brw->gs.state_bo);
dri_bo_release(&brw->clip.prog_bo);
- dri_bo_release(&brw->clip.state_bo);
dri_bo_release(&brw->sf.prog_bo);
dri_bo_release(&brw->wm.prog_bo);
dri_bo_release(&brw->wm.const_bo);