summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c237
1 files changed, 151 insertions, 86 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 22071cbc206f..e57061ac0219 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1713,8 +1713,8 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
/**
* i915_gem_fault - fault a page into the GTT
- * vma: VMA in question
- * vmf: fault info
+ * @vma: VMA in question
+ * @vmf: fault info
*
* The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
* from userspace. The fault handler takes care of binding the object to
@@ -3208,7 +3208,7 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
old_write_domain);
}
-int i915_vma_unbind(struct i915_vma *vma)
+static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
{
struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
@@ -3227,9 +3227,11 @@ int i915_vma_unbind(struct i915_vma *vma)
BUG_ON(obj->pages == NULL);
- ret = i915_gem_object_wait_rendering(obj, false);
- if (ret)
- return ret;
+ if (wait) {
+ ret = i915_gem_object_wait_rendering(obj, false);
+ if (ret)
+ return ret;
+ }
if (i915_is_ggtt(vma->vm) &&
vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
@@ -3274,6 +3276,16 @@ int i915_vma_unbind(struct i915_vma *vma)
return 0;
}
+int i915_vma_unbind(struct i915_vma *vma)
+{
+ return __i915_vma_unbind(vma, true);
+}
+
+int __i915_vma_unbind_no_wait(struct i915_vma *vma)
+{
+ return __i915_vma_unbind(vma, false);
+}
+
int i915_gpu_idle(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3354,11 +3366,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
u32 fence_alignment, unfenced_alignment;
+ u32 search_flag, alloc_flag;
+ u64 start, end;
u64 size, fence_size;
- u64 start =
- flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
- u64 end =
- flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
struct i915_vma *vma;
int ret;
@@ -3398,6 +3408,13 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
}
+ start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
+ end = vm->total;
+ if (flags & PIN_MAPPABLE)
+ end = min_t(u64, end, dev_priv->gtt.mappable_end);
+ if (flags & PIN_ZONE_4G)
+ end = min_t(u64, end, (1ULL << 32));
+
if (alignment == 0)
alignment = flags & PIN_MAPPABLE ? fence_alignment :
unfenced_alignment;
@@ -3433,13 +3450,21 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
if (IS_ERR(vma))
goto err_unpin;
+ if (flags & PIN_HIGH) {
+ search_flag = DRM_MM_SEARCH_BELOW;
+ alloc_flag = DRM_MM_CREATE_TOP;
+ } else {
+ search_flag = DRM_MM_SEARCH_DEFAULT;
+ alloc_flag = DRM_MM_CREATE_DEFAULT;
+ }
+
search_free:
ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
size, alignment,
obj->cache_level,
start, end,
- DRM_MM_SEARCH_DEFAULT,
- DRM_MM_CREATE_DEFAULT);
+ search_flag,
+ alloc_flag);
if (ret) {
ret = i915_gem_evict_something(dev, vm, size, alignment,
obj->cache_level,
@@ -3632,53 +3657,106 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
return 0;
}
+/**
+ * Changes the cache-level of an object across all VMA.
+ *
+ * After this function returns, the object will be in the new cache-level
+ * across all GTT and the contents of the backing storage will be coherent,
+ * with respect to the new cache-level. In order to keep the backing storage
+ * coherent for all users, we only allow a single cache level to be set
+ * globally on the object and prevent it from being changed whilst the
+ * hardware is reading from the object. That is if the object is currently
+ * on the scanout it will be set to uncached (or equivalent display
+ * cache coherency) and all non-MOCS GPU access will also be uncached so
+ * that all direct access to the scanout remains coherent.
+ */
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
enum i915_cache_level cache_level)
{
struct drm_device *dev = obj->base.dev;
struct i915_vma *vma, *next;
+ bool bound = false;
int ret = 0;
if (obj->cache_level == cache_level)
goto out;
- if (i915_gem_obj_is_pinned(obj)) {
- DRM_DEBUG("can not change the cache level of pinned objects\n");
- return -EBUSY;
- }
-
+ /* Inspect the list of currently bound VMA and unbind any that would
+ * be invalid given the new cache-level. This is principally to
+ * catch the issue of the CS prefetch crossing page boundaries and
+ * reading an invalid PTE on older architectures.
+ */
list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
+ if (!drm_mm_node_allocated(&vma->node))
+ continue;
+
+ if (vma->pin_count) {
+ DRM_DEBUG("can not change the cache level of pinned objects\n");
+ return -EBUSY;
+ }
+
if (!i915_gem_valid_gtt_space(vma, cache_level)) {
ret = i915_vma_unbind(vma);
if (ret)
return ret;
- }
+ } else
+ bound = true;
}
- if (i915_gem_obj_bound_any(obj)) {
+ /* We can reuse the existing drm_mm nodes but need to change the
+ * cache-level on the PTE. We could simply unbind them all and
+ * rebind with the correct cache-level on next use. However since
+ * we already have a valid slot, dma mapping, pages etc, we may as
+ * rewrite the PTE in the belief that doing so tramples upon less
+ * state and so involves less work.
+ */
+ if (bound) {
+ /* Before we change the PTE, the GPU must not be accessing it.
+ * If we wait upon the object, we know that all the bound
+ * VMA are no longer active.
+ */
ret = i915_gem_object_wait_rendering(obj, false);
if (ret)
return ret;
- i915_gem_object_finish_gtt(obj);
-
- /* Before SandyBridge, you could not use tiling or fence
- * registers with snooped memory, so relinquish any fences
- * currently pointing to our region in the aperture.
- */
- if (INTEL_INFO(dev)->gen < 6) {
+ if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
+ /* Access to snoopable pages through the GTT is
+ * incoherent and on some machines causes a hard
+ * lockup. Relinquish the CPU mmaping to force
+ * userspace to refault in the pages and we can
+ * then double check if the GTT mapping is still
+ * valid for that pointer access.
+ */
+ i915_gem_release_mmap(obj);
+
+ /* As we no longer need a fence for GTT access,
+ * we can relinquish it now (and so prevent having
+ * to steal a fence from someone else on the next
+ * fence request). Note GPU activity would have
+ * dropped the fence as all snoopable access is
+ * supposed to be linear.
+ */
ret = i915_gem_object_put_fence(obj);
if (ret)
return ret;
+ } else {
+ /* We either have incoherent backing store and
+ * so no GTT access or the architecture is fully
+ * coherent. In such cases, existing GTT mmaps
+ * ignore the cache bit in the PTE and we can
+ * rewrite it without confusing the GPU or having
+ * to force userspace to fault back in its mmaps.
+ */
}
- list_for_each_entry(vma, &obj->vma_list, vma_link)
- if (drm_mm_node_allocated(&vma->node)) {
- ret = i915_vma_bind(vma, cache_level,
- PIN_UPDATE);
- if (ret)
- return ret;
- }
+ list_for_each_entry(vma, &obj->vma_list, vma_link) {
+ if (!drm_mm_node_allocated(&vma->node))
+ continue;
+
+ ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
+ if (ret)
+ return ret;
+ }
}
list_for_each_entry(vma, &obj->vma_list, vma_link)
@@ -3686,6 +3764,10 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
obj->cache_level = cache_level;
out:
+ /* Flush the dirty CPU caches to the backing storage so that the
+ * object is now coherent at its new cache level (with respect
+ * to the access domain).
+ */
if (obj->cache_dirty &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
cpu_write_needs_clflush(obj)) {
@@ -4533,22 +4615,6 @@ void i915_gem_init_swizzling(struct drm_device *dev)
BUG();
}
-static bool
-intel_enable_blt(struct drm_device *dev)
-{
- if (!HAS_BLT(dev))
- return false;
-
- /* The blitter was dysfunctional on early prototypes */
- if (IS_GEN6(dev) && dev->pdev->revision < 8) {
- DRM_INFO("BLT not supported on this pre-production hardware;"
- " graphics performance will be degraded.\n");
- return false;
- }
-
- return true;
-}
-
static void init_unused_ring(struct drm_device *dev, u32 base)
{
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -4591,7 +4657,7 @@ int i915_gem_init_rings(struct drm_device *dev)
goto cleanup_render_ring;
}
- if (intel_enable_blt(dev)) {
+ if (HAS_BLT(dev)) {
ret = intel_init_blt_ring_buffer(dev);
if (ret)
goto cleanup_bsd_ring;
@@ -4609,14 +4675,8 @@ int i915_gem_init_rings(struct drm_device *dev)
goto cleanup_vebox_ring;
}
- ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
- if (ret)
- goto cleanup_bsd2_ring;
-
return 0;
-cleanup_bsd2_ring:
- intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]);
cleanup_vebox_ring:
intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
cleanup_blt_ring:
@@ -4687,21 +4747,32 @@ i915_gem_init_hw(struct drm_device *dev)
}
/* We can't enable contexts until all firmware is loaded */
- ret = intel_guc_ucode_load(dev);
- if (ret) {
- /*
- * If we got an error and GuC submission is enabled, map
- * the error to -EIO so the GPU will be declared wedged.
- * OTOH, if we didn't intend to use the GuC anyway, just
- * discard the error and carry on.
- */
- DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret,
- i915.enable_guc_submission ? "" : " (ignored)");
- ret = i915.enable_guc_submission ? -EIO : 0;
- if (ret)
- goto out;
+ if (HAS_GUC_UCODE(dev)) {
+ ret = intel_guc_ucode_load(dev);
+ if (ret) {
+ /*
+ * If we got an error and GuC submission is enabled, map
+ * the error to -EIO so the GPU will be declared wedged.
+ * OTOH, if we didn't intend to use the GuC anyway, just
+ * discard the error and carry on.
+ */
+ DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret,
+ i915.enable_guc_submission ? "" :
+ " (ignored)");
+ ret = i915.enable_guc_submission ? -EIO : 0;
+ if (ret)
+ goto out;
+ }
}
+ /*
+ * Increment the next seqno by 0x100 so we have a visible break
+ * on re-initialisation
+ */
+ ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100);
+ if (ret)
+ goto out;
+
/* Now it is safe to go back round and do everything else: */
for_each_ring(ring, dev_priv, i) {
struct drm_i915_gem_request *req;
@@ -4839,18 +4910,6 @@ init_ring_lists(struct intel_engine_cs *ring)
INIT_LIST_HEAD(&ring->request_list);
}
-void i915_init_vm(struct drm_i915_private *dev_priv,
- struct i915_address_space *vm)
-{
- if (!i915_is_ggtt(vm))
- drm_mm_init(&vm->mm, vm->start, vm->total);
- vm->dev = dev_priv->dev;
- INIT_LIST_HEAD(&vm->active_list);
- INIT_LIST_HEAD(&vm->inactive_list);
- INIT_LIST_HEAD(&vm->global_link);
- list_add_tail(&vm->global_link, &dev_priv->vm_list);
-}
-
void
i915_gem_load(struct drm_device *dev)
{
@@ -4874,8 +4933,6 @@ i915_gem_load(struct drm_device *dev)
NULL);
INIT_LIST_HEAD(&dev_priv->vm_list);
- i915_init_vm(dev_priv, &dev_priv->gtt.base);
-
INIT_LIST_HEAD(&dev_priv->context_list);
INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
INIT_LIST_HEAD(&dev_priv->mm.bound_list);
@@ -4903,6 +4960,14 @@ i915_gem_load(struct drm_device *dev)
dev_priv->num_fence_regs =
I915_READ(vgtif_reg(avail_rs.fence_num));
+ /*
+ * Set initial sequence number for requests.
+ * Using this number allows the wraparound to happen early,
+ * catching any obvious problems.
+ */
+ dev_priv->next_seqno = ((u32)~0 - 0x1100);
+ dev_priv->last_seqno = ((u32)~0 - 0x1101);
+
/* Initialize fence registers to zero */
INIT_LIST_HEAD(&dev_priv->mm.fence_list);
i915_gem_restore_fences(dev);
@@ -4972,9 +5037,9 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file)
/**
* i915_gem_track_fb - update frontbuffer tracking
- * old: current GEM buffer for the frontbuffer slots
- * new: new GEM buffer for the frontbuffer slots
- * frontbuffer_bits: bitmask of frontbuffer slots
+ * @old: current GEM buffer for the frontbuffer slots
+ * @new: new GEM buffer for the frontbuffer slots
+ * @frontbuffer_bits: bitmask of frontbuffer slots
*
* This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
* from @old and setting them in @new. Both @old and @new can be NULL.