diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 237 |
1 files changed, 151 insertions, 86 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 22071cbc206f..e57061ac0219 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1713,8 +1713,8 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, /** * i915_gem_fault - fault a page into the GTT - * vma: VMA in question - * vmf: fault info + * @vma: VMA in question + * @vmf: fault info * * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped * from userspace. The fault handler takes care of binding the object to @@ -3208,7 +3208,7 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) old_write_domain); } -int i915_vma_unbind(struct i915_vma *vma) +static int __i915_vma_unbind(struct i915_vma *vma, bool wait) { struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_private *dev_priv = obj->base.dev->dev_private; @@ -3227,9 +3227,11 @@ int i915_vma_unbind(struct i915_vma *vma) BUG_ON(obj->pages == NULL); - ret = i915_gem_object_wait_rendering(obj, false); - if (ret) - return ret; + if (wait) { + ret = i915_gem_object_wait_rendering(obj, false); + if (ret) + return ret; + } if (i915_is_ggtt(vma->vm) && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { @@ -3274,6 +3276,16 @@ int i915_vma_unbind(struct i915_vma *vma) return 0; } +int i915_vma_unbind(struct i915_vma *vma) +{ + return __i915_vma_unbind(vma, true); +} + +int __i915_vma_unbind_no_wait(struct i915_vma *vma) +{ + return __i915_vma_unbind(vma, false); +} + int i915_gpu_idle(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -3354,11 +3366,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; u32 fence_alignment, unfenced_alignment; + u32 search_flag, alloc_flag; + u64 start, end; u64 size, fence_size; - u64 start = - flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; - u64 end = - flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; struct i915_vma *vma; int ret; @@ -3398,6 +3408,13 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; } + start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; + end = vm->total; + if (flags & PIN_MAPPABLE) + end = min_t(u64, end, dev_priv->gtt.mappable_end); + if (flags & PIN_ZONE_4G) + end = min_t(u64, end, (1ULL << 32)); + if (alignment == 0) alignment = flags & PIN_MAPPABLE ? fence_alignment : unfenced_alignment; @@ -3433,13 +3450,21 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) goto err_unpin; + if (flags & PIN_HIGH) { + search_flag = DRM_MM_SEARCH_BELOW; + alloc_flag = DRM_MM_CREATE_TOP; + } else { + search_flag = DRM_MM_SEARCH_DEFAULT; + alloc_flag = DRM_MM_CREATE_DEFAULT; + } + search_free: ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, size, alignment, obj->cache_level, start, end, - DRM_MM_SEARCH_DEFAULT, - DRM_MM_CREATE_DEFAULT); + search_flag, + alloc_flag); if (ret) { ret = i915_gem_evict_something(dev, vm, size, alignment, obj->cache_level, @@ -3632,53 +3657,106 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) return 0; } +/** + * Changes the cache-level of an object across all VMA. + * + * After this function returns, the object will be in the new cache-level + * across all GTT and the contents of the backing storage will be coherent, + * with respect to the new cache-level. In order to keep the backing storage + * coherent for all users, we only allow a single cache level to be set + * globally on the object and prevent it from being changed whilst the + * hardware is reading from the object. That is if the object is currently + * on the scanout it will be set to uncached (or equivalent display + * cache coherency) and all non-MOCS GPU access will also be uncached so + * that all direct access to the scanout remains coherent. + */ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { struct drm_device *dev = obj->base.dev; struct i915_vma *vma, *next; + bool bound = false; int ret = 0; if (obj->cache_level == cache_level) goto out; - if (i915_gem_obj_is_pinned(obj)) { - DRM_DEBUG("can not change the cache level of pinned objects\n"); - return -EBUSY; - } - + /* Inspect the list of currently bound VMA and unbind any that would + * be invalid given the new cache-level. This is principally to + * catch the issue of the CS prefetch crossing page boundaries and + * reading an invalid PTE on older architectures. + */ list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { + if (!drm_mm_node_allocated(&vma->node)) + continue; + + if (vma->pin_count) { + DRM_DEBUG("can not change the cache level of pinned objects\n"); + return -EBUSY; + } + if (!i915_gem_valid_gtt_space(vma, cache_level)) { ret = i915_vma_unbind(vma); if (ret) return ret; - } + } else + bound = true; } - if (i915_gem_obj_bound_any(obj)) { + /* We can reuse the existing drm_mm nodes but need to change the + * cache-level on the PTE. We could simply unbind them all and + * rebind with the correct cache-level on next use. However since + * we already have a valid slot, dma mapping, pages etc, we may as + * rewrite the PTE in the belief that doing so tramples upon less + * state and so involves less work. + */ + if (bound) { + /* Before we change the PTE, the GPU must not be accessing it. + * If we wait upon the object, we know that all the bound + * VMA are no longer active. + */ ret = i915_gem_object_wait_rendering(obj, false); if (ret) return ret; - i915_gem_object_finish_gtt(obj); - - /* Before SandyBridge, you could not use tiling or fence - * registers with snooped memory, so relinquish any fences - * currently pointing to our region in the aperture. - */ - if (INTEL_INFO(dev)->gen < 6) { + if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { + /* Access to snoopable pages through the GTT is + * incoherent and on some machines causes a hard + * lockup. Relinquish the CPU mmaping to force + * userspace to refault in the pages and we can + * then double check if the GTT mapping is still + * valid for that pointer access. + */ + i915_gem_release_mmap(obj); + + /* As we no longer need a fence for GTT access, + * we can relinquish it now (and so prevent having + * to steal a fence from someone else on the next + * fence request). Note GPU activity would have + * dropped the fence as all snoopable access is + * supposed to be linear. + */ ret = i915_gem_object_put_fence(obj); if (ret) return ret; + } else { + /* We either have incoherent backing store and + * so no GTT access or the architecture is fully + * coherent. In such cases, existing GTT mmaps + * ignore the cache bit in the PTE and we can + * rewrite it without confusing the GPU or having + * to force userspace to fault back in its mmaps. + */ } - list_for_each_entry(vma, &obj->vma_list, vma_link) - if (drm_mm_node_allocated(&vma->node)) { - ret = i915_vma_bind(vma, cache_level, - PIN_UPDATE); - if (ret) - return ret; - } + list_for_each_entry(vma, &obj->vma_list, vma_link) { + if (!drm_mm_node_allocated(&vma->node)) + continue; + + ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); + if (ret) + return ret; + } } list_for_each_entry(vma, &obj->vma_list, vma_link) @@ -3686,6 +3764,10 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, obj->cache_level = cache_level; out: + /* Flush the dirty CPU caches to the backing storage so that the + * object is now coherent at its new cache level (with respect + * to the access domain). + */ if (obj->cache_dirty && obj->base.write_domain != I915_GEM_DOMAIN_CPU && cpu_write_needs_clflush(obj)) { @@ -4533,22 +4615,6 @@ void i915_gem_init_swizzling(struct drm_device *dev) BUG(); } -static bool -intel_enable_blt(struct drm_device *dev) -{ - if (!HAS_BLT(dev)) - return false; - - /* The blitter was dysfunctional on early prototypes */ - if (IS_GEN6(dev) && dev->pdev->revision < 8) { - DRM_INFO("BLT not supported on this pre-production hardware;" - " graphics performance will be degraded.\n"); - return false; - } - - return true; -} - static void init_unused_ring(struct drm_device *dev, u32 base) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -4591,7 +4657,7 @@ int i915_gem_init_rings(struct drm_device *dev) goto cleanup_render_ring; } - if (intel_enable_blt(dev)) { + if (HAS_BLT(dev)) { ret = intel_init_blt_ring_buffer(dev); if (ret) goto cleanup_bsd_ring; @@ -4609,14 +4675,8 @@ int i915_gem_init_rings(struct drm_device *dev) goto cleanup_vebox_ring; } - ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); - if (ret) - goto cleanup_bsd2_ring; - return 0; -cleanup_bsd2_ring: - intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]); cleanup_vebox_ring: intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); cleanup_blt_ring: @@ -4687,21 +4747,32 @@ i915_gem_init_hw(struct drm_device *dev) } /* We can't enable contexts until all firmware is loaded */ - ret = intel_guc_ucode_load(dev); - if (ret) { - /* - * If we got an error and GuC submission is enabled, map - * the error to -EIO so the GPU will be declared wedged. - * OTOH, if we didn't intend to use the GuC anyway, just - * discard the error and carry on. - */ - DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret, - i915.enable_guc_submission ? "" : " (ignored)"); - ret = i915.enable_guc_submission ? -EIO : 0; - if (ret) - goto out; + if (HAS_GUC_UCODE(dev)) { + ret = intel_guc_ucode_load(dev); + if (ret) { + /* + * If we got an error and GuC submission is enabled, map + * the error to -EIO so the GPU will be declared wedged. + * OTOH, if we didn't intend to use the GuC anyway, just + * discard the error and carry on. + */ + DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret, + i915.enable_guc_submission ? "" : + " (ignored)"); + ret = i915.enable_guc_submission ? -EIO : 0; + if (ret) + goto out; + } } + /* + * Increment the next seqno by 0x100 so we have a visible break + * on re-initialisation + */ + ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); + if (ret) + goto out; + /* Now it is safe to go back round and do everything else: */ for_each_ring(ring, dev_priv, i) { struct drm_i915_gem_request *req; @@ -4839,18 +4910,6 @@ init_ring_lists(struct intel_engine_cs *ring) INIT_LIST_HEAD(&ring->request_list); } -void i915_init_vm(struct drm_i915_private *dev_priv, - struct i915_address_space *vm) -{ - if (!i915_is_ggtt(vm)) - drm_mm_init(&vm->mm, vm->start, vm->total); - vm->dev = dev_priv->dev; - INIT_LIST_HEAD(&vm->active_list); - INIT_LIST_HEAD(&vm->inactive_list); - INIT_LIST_HEAD(&vm->global_link); - list_add_tail(&vm->global_link, &dev_priv->vm_list); -} - void i915_gem_load(struct drm_device *dev) { @@ -4874,8 +4933,6 @@ i915_gem_load(struct drm_device *dev) NULL); INIT_LIST_HEAD(&dev_priv->vm_list); - i915_init_vm(dev_priv, &dev_priv->gtt.base); - INIT_LIST_HEAD(&dev_priv->context_list); INIT_LIST_HEAD(&dev_priv->mm.unbound_list); INIT_LIST_HEAD(&dev_priv->mm.bound_list); @@ -4903,6 +4960,14 @@ i915_gem_load(struct drm_device *dev) dev_priv->num_fence_regs = I915_READ(vgtif_reg(avail_rs.fence_num)); + /* + * Set initial sequence number for requests. + * Using this number allows the wraparound to happen early, + * catching any obvious problems. + */ + dev_priv->next_seqno = ((u32)~0 - 0x1100); + dev_priv->last_seqno = ((u32)~0 - 0x1101); + /* Initialize fence registers to zero */ INIT_LIST_HEAD(&dev_priv->mm.fence_list); i915_gem_restore_fences(dev); @@ -4972,9 +5037,9 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file) /** * i915_gem_track_fb - update frontbuffer tracking - * old: current GEM buffer for the frontbuffer slots - * new: new GEM buffer for the frontbuffer slots - * frontbuffer_bits: bitmask of frontbuffer slots + * @old: current GEM buffer for the frontbuffer slots + * @new: new GEM buffer for the frontbuffer slots + * @frontbuffer_bits: bitmask of frontbuffer slots * * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them * from @old and setting them in @new. Both @old and @new can be NULL. |