summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2013-01-18 18:18:57 -0800
committerEric Anholt <eric@anholt.net>2013-01-29 11:25:14 +1100
commit99fe2b36cf5f9ff221be8de42c8649b26707972d (patch)
treeeca09101b3cc07ce23997fd65c9cf1f60b6735c4
parente1598cb642334c809e6ec219d793e7bc85a213de (diff)
intel: Use a CPU map of the batch on LLC-sharing architectures.
Before, we were keeping a CPU-only buffer to accumulate the batchbuffer in, which was an improvement over mapping the batch through the GTT directly (since any readback or other failure to stream through write combining correctly would hurt). However, on LLC-sharing architectures we can do better by mapping the batch directly, which reduces the cache footprint of the application since we no longer have this extra copy of a batchbuffer around. Improves performance of GLBenchmark 2.1 offscreen on IVB by 3.5% +/- 0.4% (n=21). Improves Lightsmark performance by 1.1 +/- 0.1% (n=76). Improves cairo-gl performance by 1.9% +/- 1.4% (n=57). No statistically significant difference in GLB2.1 on SNB (n=37). Improves cairo-gl performance by 2.1% +/- 0.1% (n=278).
-rw-r--r--src/mesa/drivers/dri/intel/intel_batchbuffer.c26
-rw-r--r--src/mesa/drivers/dri/intel/intel_batchbuffer.h2
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.h3
4 files changed, 24 insertions, 9 deletions
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index d36dacc6109..8c6524e71af 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -68,6 +68,11 @@ intel_batchbuffer_init(struct intel_context *intel)
68 "pipe_control workaround", 68 "pipe_control workaround",
69 4096, 4096); 69 4096, 4096);
70 } 70 }
71
72 if (!intel->has_llc) {
73 intel->batch.cpu_map = malloc(intel->maxBatchSize);
74 intel->batch.map = intel->batch.cpu_map;
75 }
71} 76}
72 77
73static void 78static void
@@ -83,6 +88,10 @@ intel_batchbuffer_reset(struct intel_context *intel)
83 88
84 intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer", 89 intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer",
85 intel->maxBatchSize, 4096); 90 intel->maxBatchSize, 4096);
91 if (intel->has_llc) {
92 drm_intel_bo_map(intel->batch.bo, true);
93 intel->batch.map = intel->batch.bo->virtual;
94 }
86 95
87 intel->batch.reserved_space = BATCH_RESERVED; 96 intel->batch.reserved_space = BATCH_RESERVED;
88 intel->batch.state_batch_offset = intel->batch.bo->size; 97 intel->batch.state_batch_offset = intel->batch.bo->size;
@@ -114,6 +123,7 @@ intel_batchbuffer_reset_to_saved(struct intel_context *intel)
114void 123void
115intel_batchbuffer_free(struct intel_context *intel) 124intel_batchbuffer_free(struct intel_context *intel)
116{ 125{
126 free(intel->batch.cpu_map);
117 drm_intel_bo_unreference(intel->batch.last_bo); 127 drm_intel_bo_unreference(intel->batch.last_bo);
118 drm_intel_bo_unreference(intel->batch.bo); 128 drm_intel_bo_unreference(intel->batch.bo);
119 drm_intel_bo_unreference(intel->batch.workaround_bo); 129 drm_intel_bo_unreference(intel->batch.workaround_bo);
@@ -168,12 +178,16 @@ do_flush_locked(struct intel_context *intel)
168 struct intel_batchbuffer *batch = &intel->batch; 178 struct intel_batchbuffer *batch = &intel->batch;
169 int ret = 0; 179 int ret = 0;
170 180
171 ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map); 181 if (intel->has_llc) {
172 if (ret == 0 && batch->state_batch_offset != batch->bo->size) { 182 drm_intel_bo_unmap(batch->bo);
173 ret = drm_intel_bo_subdata(batch->bo, 183 } else {
174 batch->state_batch_offset, 184 ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);
175 batch->bo->size - batch->state_batch_offset, 185 if (ret == 0 && batch->state_batch_offset != batch->bo->size) {
176 (char *)batch->map + batch->state_batch_offset); 186 ret = drm_intel_bo_subdata(batch->bo,
187 batch->state_batch_offset,
188 batch->bo->size - batch->state_batch_offset,
189 (char *)batch->map + batch->state_batch_offset);
190 }
177 } 191 }
178 192
179 if (!intel->intelScreen->no_hw) { 193 if (!intel->intelScreen->no_hw) {
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
index bae65553d08..39e7d26851d 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
@@ -112,7 +112,7 @@ intel_batchbuffer_require_space(struct intel_context *intel,
112 intel->batch.is_blit = is_blit; 112 intel->batch.is_blit = is_blit;
113 113
114#ifdef DEBUG 114#ifdef DEBUG
115 assert(sz < sizeof(intel->batch.map) - BATCH_RESERVED); 115 assert(sz < intel->maxBatchSize - BATCH_RESERVED);
116#endif 116#endif
117 if (intel_batchbuffer_space(intel) < sz) 117 if (intel_batchbuffer_space(intel) < sz)
118 intel_batchbuffer_flush(intel); 118 intel_batchbuffer_flush(intel);
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 3aa35e6d7f5..39460334b43 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -708,7 +708,7 @@ intelInitContext(struct intel_context *intel,
708 if (intel->gen < 4) 708 if (intel->gen < 4)
709 intel->maxBatchSize = 4096; 709 intel->maxBatchSize = 4096;
710 else 710 else
711 intel->maxBatchSize = sizeof(intel->batch.map); 711 intel->maxBatchSize = BATCH_SZ;
712 712
713 intel->bufmgr = intelScreen->bufmgr; 713 intel->bufmgr = intelScreen->bufmgr;
714 714
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index 80e4cac131d..af49ab137c3 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -129,7 +129,8 @@ struct intel_batchbuffer {
129 129
130 uint16_t emit, total; 130 uint16_t emit, total;
131 uint16_t used, reserved_space; 131 uint16_t used, reserved_space;
132 uint32_t map[8192]; 132 uint32_t *map;
133 uint32_t *cpu_map;
133#define BATCH_SZ (8192*sizeof(uint32_t)) 134#define BATCH_SZ (8192*sizeof(uint32_t))
134 135
135 uint32_t state_batch_offset; 136 uint32_t state_batch_offset;