summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2014-10-21 10:38:22 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2014-10-21 20:30:21 +0100
commitb4872f74c05a92c91d7b57146eb35ed2ecc86d97 (patch)
tree4d281d1093c42e0b303e296d233fe157ae480762
parent83a3d9147308f4777324abdea859ac0c108f03c6 (diff)
sna: Avoid pwriting large batches
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--src/sna/kgem.c381
-rw-r--r--src/sna/kgem.h28
2 files changed, 239 insertions, 170 deletions
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 234d1d06..25ed0ada 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1252,6 +1252,99 @@ out:
gem_close(kgem->fd, tiling.handle);
}
+static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink)
+{
+ int n;
+
+ DBG(("%s: shrinking by %d\n", __FUNCTION__, shrink));
+
+ bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle;
+
+ assert(kgem->nreloc__self <= 256);
+ if (kgem->nreloc__self) {
+ for (n = 0; n < kgem->nreloc__self; n++) {
+ int i = kgem->reloc__self[n];
+ assert(kgem->reloc[i].target_handle == ~0U);
+ kgem->reloc[i].target_handle = bo->target_handle;
+ kgem->reloc[i].presumed_offset = bo->presumed_offset;
+ kgem->batch[kgem->reloc[i].offset/sizeof(kgem->batch[0])] =
+ kgem->reloc[i].delta + bo->presumed_offset - shrink;
+ }
+
+ if (n == 256) {
+ for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) {
+ if (kgem->reloc[n].target_handle == ~0U) {
+ kgem->reloc[n].target_handle = bo->target_handle;
+ kgem->reloc[n].presumed_offset = bo->presumed_offset;
+ kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
+ kgem->reloc[n].delta + bo->presumed_offset - shrink;
+ }
+ }
+ }
+ }
+
+ if (shrink) {
+ for (n = 0; n < kgem->nreloc; n++) {
+ if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION &&
+ kgem->reloc[n].target_handle == bo->target_handle)
+ kgem->reloc[n].delta -= shrink;
+
+ if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch)
+ kgem->reloc[n].offset -= shrink;
+ }
+ }
+}
+
+static struct kgem_bo *kgem_new_batch(struct kgem *kgem)
+{
+ struct kgem_bo *last;
+ unsigned flags;
+
+ last = kgem->batch_bo;
+ if (last) {
+ kgem_fixup_relocs(kgem, last, 0);
+ kgem->batch = NULL;
+ }
+
+ if (kgem->batch) {
+ assert(last == NULL);
+ return NULL;
+ }
+
+ flags = CREATE_CPU_MAP | CREATE_NO_THROTTLE;
+ if (!kgem->has_llc)
+ flags |= CREATE_UNCACHED;
+
+ kgem->batch_bo = kgem_create_linear(kgem,
+ sizeof(uint32_t)*kgem->batch_size,
+ flags);
+ if (kgem->batch_bo)
+ kgem->batch = kgem_bo_map__cpu(kgem, kgem->batch_bo);
+ if (kgem->batch == NULL) {
+ DBG(("%s: unable to map batch bo, mallocing(size=%d)\n",
+ __FUNCTION__,
+ sizeof(uint32_t)*kgem->batch_size));
+ if (kgem->batch_bo) {
+ kgem_bo_destroy(kgem, kgem->batch_bo);
+ kgem->batch_bo = NULL;
+ }
+
+ if (posix_memalign((void **)&kgem->batch, PAGE_SIZE,
+ ALIGN(sizeof(uint32_t) * kgem->batch_size, PAGE_SIZE))) {
+ ERR(("%s: batch allocation failed, disabling acceleration\n", __FUNCTION__));
+ __kgem_set_wedged(kgem);
+ }
+ } else {
+ DBG(("%s: allocated and mapped batch handle=%d [size=%d]\n",
+ __FUNCTION__, kgem->batch_bo->handle,
+ sizeof(uint32_t)*kgem->batch_size));
+ kgem_bo_sync__cpu(kgem, kgem->batch_bo);
+ }
+
+ DBG(("%s: using last batch handle=%d\n",
+ __FUNCTION__, last ? last->handle : 0));
+ return last;
+}
void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
{
@@ -1362,7 +1455,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
kgem->wedged = 1;
}
- kgem->batch_size = ARRAY_SIZE(kgem->batch);
+ kgem->batch_size = UINT16_MAX & ~7;
if (gen == 020 && !kgem->has_pinned_batches)
/* Limited to what we can pin */
kgem->batch_size = 4*1024;
@@ -1382,6 +1475,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
DBG(("%s: maximum batch size? %d\n", __FUNCTION__,
kgem->batch_size));
+ kgem_new_batch(kgem);
kgem->half_cpu_cache_pages = cpu_cache_size() >> 13;
DBG(("%s: last-level cache size: %d bytes, threshold in pages: %d\n",
@@ -1803,36 +1897,6 @@ static uint32_t kgem_end_batch(struct kgem *kgem)
return kgem->nbatch;
}
-static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo)
-{
- int n;
-
- assert(kgem->nreloc__self <= 256);
- if (kgem->nreloc__self == 0)
- return;
-
- for (n = 0; n < kgem->nreloc__self; n++) {
- int i = kgem->reloc__self[n];
- assert(kgem->reloc[i].target_handle == ~0U);
- kgem->reloc[i].target_handle = bo->target_handle;
- kgem->reloc[i].presumed_offset = bo->presumed_offset;
- kgem->batch[kgem->reloc[i].offset/sizeof(kgem->batch[0])] =
- kgem->reloc[i].delta + bo->presumed_offset;
- }
-
- if (n == 256) {
- for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) {
- if (kgem->reloc[n].target_handle == ~0U) {
- kgem->reloc[n].target_handle = bo->target_handle;
- kgem->reloc[n].presumed_offset = bo->presumed_offset;
- kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
- kgem->reloc[n].delta + bo->presumed_offset;
- }
- }
-
- }
-}
-
static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo)
{
struct kgem_bo_binding *b;
@@ -3041,15 +3105,16 @@ void kgem_reset(struct kgem *kgem)
kgem->needs_reservation = false;
kgem->flush = 0;
kgem->batch_flags = kgem->batch_flags_base;
+ assert(kgem->batch);
kgem->next_request = __kgem_request_alloc(kgem);
kgem_sna_reset(kgem);
}
-static int compact_batch_surface(struct kgem *kgem)
+static int compact_batch_surface(struct kgem *kgem, int *shrink)
{
- int size, shrink, n;
+ int size, n;
if (!kgem->has_relaxed_delta)
return kgem->batch_size * sizeof(uint32_t);
@@ -3059,29 +3124,22 @@ static int compact_batch_surface(struct kgem *kgem)
size = n - kgem->surface + kgem->nbatch;
size = ALIGN(size, 1024);
- shrink = n - size;
- if (shrink) {
- DBG(("shrinking from %d to %d\n", kgem->batch_size, size));
-
- shrink *= sizeof(uint32_t);
- for (n = 0; n < kgem->nreloc; n++) {
- if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION &&
- kgem->reloc[n].target_handle == ~0U)
- kgem->reloc[n].delta -= shrink;
-
- if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch)
- kgem->reloc[n].offset -= shrink;
- }
- }
-
+ *shrink = (n - size) * sizeof(uint32_t);
return size * sizeof(uint32_t);
}
static struct kgem_bo *
-kgem_create_batch(struct kgem *kgem, int size)
+kgem_create_batch(struct kgem *kgem)
{
struct drm_i915_gem_set_domain set_domain;
struct kgem_bo *bo;
+ int shrink = 0;
+ int size;
+
+ if (kgem->surface != kgem->batch_size)
+ size = compact_batch_surface(kgem, &shrink);
+ else
+ size = kgem->nbatch * sizeof(uint32_t);
if (size <= 4096) {
bo = list_first_entry(&kgem->pinned_batches[0],
@@ -3091,7 +3149,8 @@ kgem_create_batch(struct kgem *kgem, int size)
out_4096:
assert(bo->refcnt > 0);
list_move_tail(&bo->list, &kgem->pinned_batches[0]);
- return kgem_bo_reference(bo);
+ bo = kgem_bo_reference(bo);
+ goto write;
}
if (!__kgem_busy(kgem, bo->handle)) {
@@ -3109,7 +3168,8 @@ out_4096:
out_16384:
assert(bo->refcnt > 0);
list_move_tail(&bo->list, &kgem->pinned_batches[1]);
- return kgem_bo_reference(bo);
+ bo = kgem_bo_reference(bo);
+ goto write;
}
if (!__kgem_busy(kgem, bo->handle)) {
@@ -3121,14 +3181,14 @@ out_16384:
if (kgem->gen == 020) {
bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY);
if (bo)
- return bo;
+ goto write;
/* Nothing available for reuse, rely on the kernel wa */
if (kgem->has_pinned_batches) {
bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY);
if (bo) {
kgem->batch_flags &= ~LOCAL_I915_EXEC_IS_PINNED;
- return bo;
+ goto write;
}
}
@@ -3152,11 +3212,26 @@ out_16384:
kgem_retire(kgem);
assert(bo->rq == NULL);
- return kgem_bo_reference(bo);
+ bo = kgem_bo_reference(bo);
+ goto write;
}
}
- return kgem_create_linear(kgem, size, CREATE_NO_THROTTLE);
+ bo = NULL;
+ if (!kgem->has_llc) {
+ bo = kgem_create_linear(kgem, size, CREATE_NO_THROTTLE);
+ if (bo) {
+write:
+ kgem_fixup_relocs(kgem, bo, shrink);
+ if (kgem_batch_write(kgem, bo->handle, size)) {
+ kgem_bo_destroy(kgem, bo);
+ return NULL;
+ }
+ }
+ }
+ if (bo == NULL)
+ bo = kgem_new_batch(kgem);
+ return bo;
}
#if !NDEBUG
@@ -3253,7 +3328,6 @@ void _kgem_submit(struct kgem *kgem)
{
struct kgem_request *rq;
uint32_t batch_end;
- int size;
assert(!DBG_NO_HW);
assert(!kgem->wedged);
@@ -3282,17 +3356,14 @@ void _kgem_submit(struct kgem *kgem)
__kgem_batch_debug(kgem, batch_end);
#endif
- if (kgem->surface != kgem->batch_size)
- size = compact_batch_surface(kgem);
- else
- size = kgem->nbatch * sizeof(kgem->batch[0]);
-
rq = kgem->next_request;
assert(rq->bo == NULL);
- rq->bo = kgem_create_batch(kgem, size);
+
+ rq->bo = kgem_create_batch(kgem);
if (rq->bo) {
+ struct drm_i915_gem_execbuffer2 execbuf;
uint32_t handle = rq->bo->handle;
- int i;
+ int i, ret;
assert(!rq->bo->needs_flush);
@@ -3306,113 +3377,105 @@ void _kgem_submit(struct kgem *kgem)
kgem->exec[i].rsvd1 = 0;
kgem->exec[i].rsvd2 = 0;
- rq->bo->target_handle = kgem->has_handle_lut ? i : handle;
rq->bo->exec = &kgem->exec[i];
rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */
list_add(&rq->bo->request, &rq->buffers);
rq->ring = kgem->ring == KGEM_BLT;
- kgem_fixup_self_relocs(kgem, rq->bo);
-
- if (kgem_batch_write(kgem, handle, size) == 0) {
- struct drm_i915_gem_execbuffer2 execbuf;
- int ret;
-
- memset(&execbuf, 0, sizeof(execbuf));
- execbuf.buffers_ptr = (uintptr_t)kgem->exec;
- execbuf.buffer_count = kgem->nexec;
- execbuf.batch_len = batch_end*sizeof(uint32_t);
- execbuf.flags = kgem->ring | kgem->batch_flags;
-
- if (DBG_DUMP) {
- int fd = open("/tmp/i915-batchbuffers.dump",
- O_WRONLY | O_CREAT | O_APPEND,
- 0666);
- if (fd != -1) {
- ret = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
- fd = close(fd);
- }
+ memset(&execbuf, 0, sizeof(execbuf));
+ execbuf.buffers_ptr = (uintptr_t)kgem->exec;
+ execbuf.buffer_count = kgem->nexec;
+ execbuf.batch_len = batch_end*sizeof(uint32_t);
+ execbuf.flags = kgem->ring | kgem->batch_flags;
+
+ if (DBG_DUMP) {
+ int fd = open("/tmp/i915-batchbuffers.dump",
+ O_WRONLY | O_CREAT | O_APPEND,
+ 0666);
+ if (fd != -1) {
+ ret = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
+ fd = close(fd);
}
+ }
- ret = do_execbuf(kgem, &execbuf);
- if (DEBUG_SYNC && ret == 0) {
- struct drm_i915_gem_set_domain set_domain;
+ ret = do_execbuf(kgem, &execbuf);
+ if (DEBUG_SYNC && ret == 0) {
+ struct drm_i915_gem_set_domain set_domain;
- VG_CLEAR(set_domain);
- set_domain.handle = handle;
- set_domain.read_domains = I915_GEM_DOMAIN_GTT;
- set_domain.write_domain = I915_GEM_DOMAIN_GTT;
+ VG_CLEAR(set_domain);
+ set_domain.handle = handle;
+ set_domain.read_domains = I915_GEM_DOMAIN_GTT;
+ set_domain.write_domain = I915_GEM_DOMAIN_GTT;
- ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
+ ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
+ }
+ if (ret < 0) {
+ kgem_throttle(kgem);
+ if (!kgem->wedged) {
+ xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR,
+ "Failed to submit rendering commands, disabling acceleration.\n");
+ __kgem_set_wedged(kgem);
}
- if (ret < 0) {
- kgem_throttle(kgem);
- if (!kgem->wedged) {
- xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR,
- "Failed to submit rendering commands, disabling acceleration.\n");
- __kgem_set_wedged(kgem);
- }
#if !NDEBUG
- ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n",
- kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
- kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret);
+ ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n",
+ kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
+ kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret);
- for (i = 0; i < kgem->nexec; i++) {
- struct kgem_bo *bo, *found = NULL;
+ for (i = 0; i < kgem->nexec; i++) {
+ struct kgem_bo *bo, *found = NULL;
- list_for_each_entry(bo, &kgem->next_request->buffers, request) {
- if (bo->handle == kgem->exec[i].handle) {
- found = bo;
- break;
- }
+ list_for_each_entry(bo, &kgem->next_request->buffers, request) {
+ if (bo->handle == kgem->exec[i].handle) {
+ found = bo;
+ break;
}
- ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n",
- i,
- kgem->exec[i].handle,
- (int)kgem->exec[i].offset,
- found ? kgem_bo_size(found) : -1,
- found ? found->tiling : -1,
- (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE),
- found ? found->snoop : -1,
- found ? found->purged : -1);
- }
- for (i = 0; i < kgem->nreloc; i++) {
- ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n",
- i,
- (int)kgem->reloc[i].offset,
- kgem->reloc[i].target_handle,
- kgem->reloc[i].delta,
- kgem->reloc[i].read_domains,
- kgem->reloc[i].write_domain,
- (int)kgem->reloc[i].presumed_offset);
}
+ ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n",
+ i,
+ kgem->exec[i].handle,
+ (int)kgem->exec[i].offset,
+ found ? kgem_bo_size(found) : -1,
+ found ? found->tiling : -1,
+ (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE),
+ found ? found->snoop : -1,
+ found ? found->purged : -1);
+ }
+ for (i = 0; i < kgem->nreloc; i++) {
+ ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n",
+ i,
+ (int)kgem->reloc[i].offset,
+ kgem->reloc[i].target_handle,
+ kgem->reloc[i].delta,
+ kgem->reloc[i].read_domains,
+ kgem->reloc[i].write_domain,
+ (int)kgem->reloc[i].presumed_offset);
+ }
- {
- struct drm_i915_gem_get_aperture aperture;
- if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0)
- ErrorF("Aperture size %lld, available %lld\n",
- (long long)aperture.aper_size,
- (long long)aperture.aper_available_size);
- }
+ {
+ struct drm_i915_gem_get_aperture aperture;
+ if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0)
+ ErrorF("Aperture size %lld, available %lld\n",
+ (long long)aperture.aper_size,
+ (long long)aperture.aper_available_size);
+ }
- if (ret == -ENOSPC)
- dump_gtt_info(kgem);
- if (ret == -EDEADLK)
- dump_fence_regs(kgem);
-
- if (DEBUG_SYNC) {
- int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
- if (fd != -1) {
- int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
- assert(ignored == batch_end*sizeof(uint32_t));
- close(fd);
- }
+ if (ret == -ENOSPC)
+ dump_gtt_info(kgem);
+ if (ret == -EDEADLK)
+ dump_fence_regs(kgem);
- FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret);
+ if (DEBUG_SYNC) {
+ int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
+ if (fd != -1) {
+ int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
+ assert(ignored == batch_end*sizeof(uint32_t));
+ close(fd);
}
-#endif
+
+ FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret);
}
+#endif
}
}
#if SHOW_BATCH_AFTER
@@ -4167,16 +4230,18 @@ struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags)
}
size = NUM_PAGES(size);
- bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags);
- if (bo) {
- assert(bo->domain != DOMAIN_GPU);
- ASSERT_IDLE(kgem, bo->handle);
- bo->refcnt = 1;
- return bo;
- }
+ if ((flags & CREATE_UNCACHED) == 0) {
+ bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags);
+ if (bo) {
+ assert(bo->domain != DOMAIN_GPU);
+ ASSERT_IDLE(kgem, bo->handle);
+ bo->refcnt = 1;
+ return bo;
+ }
- if (flags & CREATE_CACHED)
- return NULL;
+ if (flags & CREATE_CACHED)
+ return NULL;
+ }
handle = gem_create(kgem->fd, size);
if (handle == 0)
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 8bd57156..4a74f2ef 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -120,12 +120,22 @@ enum {
};
struct kgem {
- int fd;
unsigned wedged;
+ int fd;
unsigned gen;
uint32_t unique_id;
+ uint16_t nbatch;
+ uint16_t surface;
+ uint16_t nexec;
+ uint16_t nreloc;
+ uint16_t nreloc__self;
+ uint16_t nfence;
+ uint16_t batch_size;
+
+ uint32_t *batch;
+
enum kgem_mode {
/* order matches I915_EXEC_RING ordering */
KGEM_NONE = 0,
@@ -158,14 +168,6 @@ struct kgem {
#define I915_EXEC_SECURE (1<<9)
#define LOCAL_EXEC_OBJECT_WRITE (1<<2)
- uint16_t nbatch;
- uint16_t surface;
- uint16_t nexec;
- uint16_t nreloc;
- uint16_t nreloc__self;
- uint16_t nfence;
- uint16_t batch_size;
-
uint32_t flush:1;
uint32_t need_expire:1;
uint32_t need_purge:1;
@@ -217,8 +219,9 @@ struct kgem {
int16_t dst_x, int16_t dst_y,
uint16_t width, uint16_t height);
+ struct kgem_bo *batch_bo;
+
uint16_t reloc__self[256];
- uint32_t batch[64*1024-8] page_aligned;
struct drm_i915_gem_exec_object2 exec[384] page_aligned;
struct drm_i915_gem_relocation_entry reloc[8192] page_aligned;
@@ -299,8 +302,9 @@ enum {
CREATE_PRIME = 0x20,
CREATE_TEMPORARY = 0x40,
CREATE_CACHED = 0x80,
- CREATE_NO_RETIRE = 0x100,
- CREATE_NO_THROTTLE = 0x200,
+ CREATE_UNCACHED = 0x100,
+ CREATE_NO_RETIRE = 0x200,
+ CREATE_NO_THROTTLE = 0x400,
};
struct kgem_bo *kgem_create_2d(struct kgem *kgem,
int width,