| author | Maarten Lankhorst <maarten.lankhorst@canonical.com> | 2012-08-07 18:07:44 (GMT) |
|---|---|---|
| committer | Maarten Lankhorst <maarten.lankhorst@canonical.com> | 2012-08-10 11:56:04 (GMT) |
| commit | 673c4b2550bc63ec134bca47a95dabd617a689ce (patch) (side-by-side diff) | |
| tree | 102fc4c49f778f4e2890bf5239f2d7b13ece8e5a | |
| parent | 7c94791a583b95b02e5746c028152ecfc1d33027 (diff) | |
| download | linux-673c4b2550bc63ec134bca47a95dabd617a689ce.zip linux-673c4b2550bc63ec134bca47a95dabd617a689ce.tar.gz | |
i915: use excc for asynchronous signalling of prime, v2dma-fence-v8
v2: Fix fallout from rebase..
| -rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 89 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 197 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/i915_reg.h | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.h | 4 |
5 files changed, 274 insertions, 21 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 700dc83..49925f8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1027,6 +1027,10 @@ struct drm_i915_gem_request { /** global list entry for this request */ struct list_head list; + struct list_head prime_list; + spinlock_t prime_rm_lock; + int excc; + struct drm_i915_file_private *file_priv; /** file_priv list entry for this request */ struct list_head client_list; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f26e2b2..a8651cb 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -35,7 +35,7 @@ #include <linux/slab.h> #include <linux/swap.h> #include <linux/pci.h> -#include <linux/dma-buf.h> +#include <linux/dma-buf-mgr.h> static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); @@ -1558,6 +1558,8 @@ i915_add_request(struct intel_ring_buffer *ring, if (request == NULL) return -ENOMEM; } + memset(&request->prime_list, 0, sizeof(request->prime_list)); + request->excc = -1; seqno = i915_gem_next_request_seqno(ring); @@ -1610,6 +1612,67 @@ i915_add_request(struct intel_ring_buffer *ring, return 0; } +static void i915_put_NOP(struct kref *ref) +{ +} + +static inline void +i915_gem_reset_requests(struct drm_i915_gem_request *request) +{ + unsigned long flags; + + spin_lock_irqsave(&request->prime_rm_lock, flags); + while (!list_empty(&request->prime_list)) { + int i; + struct dmabufmgr_validate *val; + + val = list_first_entry(&request->prime_list, + struct dmabufmgr_validate, head); + + dmabufmgr_validate_get(val); + for (i = 0; i < val->num_fences; ++i) { + unsigned long qflags; + struct dma_fence *f; + + if (!val->fences[i]) + continue; + + /* We cannot acquire event_queue.lock without dropping + * prime_rm_lock first, this will trigger a deadlock. + * as such, take a ref on dma_fence so it won't get + * released behind our back. This is because our + * callback hasn't run to completion yet, else + * val->fencess[i] would be NULL. + */ + f = val->fences[i]; + dma_fence_get(f); + spin_unlock_irqrestore(&request->prime_rm_lock, flags); + + spin_lock_irqsave(&f->event_queue.lock, qflags); + if (val->fences[i]) { + __remove_wait_queue(NULL, &val->wait[i].base); + val->fences[i] = NULL; + + /* We're still holding a ref, + * so no free should be done here + */ + WARN_ON(kref_put(&val->refcount, i915_put_NOP)); + } + spin_unlock_irqrestore(&f->event_queue.lock, qflags); + + spin_lock_irqsave(&request->prime_rm_lock, flags); + dma_fence_put(f); + } + + if (WARN_ON(!dmabufmgr_validate_put(val))) + /* Uh oh, we screwed up refcounting.. + * pretend we didn't and free anyhow. + */ + dmabufmgr_validate_free(&val->refcount); + } + spin_unlock_irqrestore(&request->prime_rm_lock, flags); +} + static inline void i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) { @@ -1624,6 +1687,9 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) request->file_priv = NULL; } spin_unlock(&file_priv->mm.lock); + + if (request->prime_list.next) + i915_gem_reset_requests(request); } static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, @@ -1655,6 +1721,7 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, static void i915_gem_reset_fences(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring; int i; for (i = 0; i < dev_priv->num_fence_regs; i++) { @@ -1670,6 +1737,10 @@ static void i915_gem_reset_fences(struct drm_device *dev) INIT_LIST_HEAD(®->lru_list); } + for_each_ring(ring, dev_priv, i) + /* Clear out EXCC slots */ + memset(ring->excc_seqno, 0, sizeof(ring->excc_seqno)); + INIT_LIST_HEAD(&dev_priv->mm.fence_list); } @@ -1736,6 +1807,22 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) ring->last_retired_head = request->tail; list_del(&request->list); + + if (request->prime_list.next) { + unsigned long flags; + + /* Acquire lock to prevent a tiny race condition + * where kfree occurs before spin_unlock_irqrestore + * in the last trigger + * + * The request list should be empty at this point, + * otherwise request started before all fences were + * signaled, which should be impossible. + */ + spin_lock_irqsave(&request->prime_rm_lock, flags); + WARN_ON(!list_empty(&request->prime_list)); + spin_unlock_irqrestore(&request->prime_rm_lock, flags); + } i915_gem_request_remove_from_client(request); kfree(request); } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index ee43614..207e19d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -403,7 +403,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, if (!val) return -ENOMEM; - dmabufmgr_validate_init(val, prime_val, dmabuf, obj, false); + dmabufmgr_validate_init(val, prime_val, dmabuf, NULL, false); } if (!list_empty(prime_val)) { @@ -805,16 +805,130 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects, intel_mark_busy(ring->dev, NULL); } +static void i915_put_BUG(struct kref *ref) +{ + BUG(); +} + +static void i915_dmabufmgr_validate_free(struct kref *ref) +{ + struct dmabufmgr_validate *val = container_of(ref, + struct dmabufmgr_validate, refcount); + struct drm_i915_gem_request *request = val->priv; + struct intel_ring_buffer *ring = request->ring; + struct drm_i915_private *dev_priv = ring->dev->dev_private; + + list_del(&val->head); + kfree(val); + + if (list_empty(&request->prime_list) && + !WARN_ON(request->excc < 0)) { + I915_WRITE(EXCC(ring), _MASKED_BIT_DISABLE(1 << request->excc)); + POSTING_READ(EXCC(ring)); + } +} + +static int +i915_prime_trigger(struct dma_fence_cb *cb, void *priv) +{ + struct dmabufmgr_validate *val = priv; + struct drm_i915_gem_request *request = val->priv; + unsigned long flags; + int i; + + spin_lock_irqsave(&request->prime_rm_lock, flags); + + for (i = 0; i < val->num_fences; ++i) { + if (val->fences[i] == cb->fence) { + val->fences[i] = NULL; + break; + } + } + WARN_ON(i == val->num_fences); + + kref_put(&val->refcount, i915_dmabufmgr_validate_free); + spin_unlock_irqrestore(&request->prime_rm_lock, flags); + return 0; +} + static void i915_gem_execbuffer_retire_commands(struct drm_device *dev, - struct drm_file *file, - struct intel_ring_buffer *ring) + struct drm_file *file, int excc, + struct dma_seqno_fence *fence, + struct list_head *prime_val, + struct intel_ring_buffer *ring, u32 seqno) { + struct dmabufmgr_validate *val, *tmp; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_request *request; + unsigned long flags; + /* Unconditionally force add_request to emit a full flush. */ ring->gpu_caches_dirty = true; /* Add a breadcrumb for the completion of the batch buffer */ - (void)i915_add_request(ring, file, NULL); + if (list_empty(prime_val)) { + (void)i915_add_request(ring, file, NULL); + return; + } + + request = kzalloc(sizeof(*request), GFP_KERNEL); + if (request == NULL) + goto err; + + if (WARN_ON(excc < 0) || i915_add_request(ring, file, request)) { + kfree(request); + goto err; + } + + if (WARN_ON(request->seqno != seqno)) + fence->seqno = request->seqno; + + request->excc = excc; + INIT_LIST_HEAD(&request->prime_list); + spin_lock_init(&request->prime_rm_lock); + list_splice(prime_val, &request->prime_list); + + list_for_each_entry(val, &request->prime_list, head) { + int i, ret; + val->priv = request; + for (i = 0; i < val->num_fences; ++i) { + struct dma_seqno_fence *f; + + f = to_seqno_fence(val->fences[i]); + if (f && f->sync_buf == ring->sync_buf) + continue; + + kref_get(&val->refcount); + ret = dma_fence_add_callback(val->fences[i], + &val->wait[i], i915_prime_trigger, val); + if (!ret) + val->num_waits++; + else { + val->fences[i] = NULL; + kref_put(&val->refcount, i915_put_BUG); + } + } + } + + dmabufmgr_fence_buffer_objects(&fence->base, + &request->prime_list); + + spin_lock_irqsave(&request->prime_rm_lock, flags); + list_for_each_entry_safe(val, tmp, &request->prime_list, head) + kref_put(&val->refcount, i915_dmabufmgr_validate_free); + spin_unlock_irqrestore(&request->prime_rm_lock, flags); + + return; + +err: + if (excc >= 0) { + I915_WRITE(EXCC(ring), _MASKED_BIT_DISABLE(1 << excc)); + POSTING_READ(EXCC(ring)); + } + dmabufmgr_backoff_reservation(prime_val); + list_for_each_entry_safe(val, tmp, prime_val, head) + dmabufmgr_validate_put(val); } static int @@ -887,26 +1001,69 @@ i915_gem_execbuffer_fence_prime_enable(struct dma_seqno_fence *fence) static int i915_gem_execbuffer_fence_prime(struct list_head *prime_list, struct intel_ring_buffer *ring, - u32 seqno, struct dma_seqno_fence **pfence) + u32 seqno, struct dma_seqno_fence **pfence, + int *pexcc) { + drm_i915_private_t *dev_priv = ring->dev->dev_private; struct dma_seqno_fence *f; - int ret; + int ret, excc, cond; if (list_empty(prime_list)) return 0; - ret = dmabufmgr_wait_completed_cpu(prime_list, false, true); - if (ret) - return ret; + excc = ring->excc_idx; + if (ring->excc_seqno[excc]) { + ret = i915_wait_seqno(ring, ring->excc_seqno[excc]); + if (ret) + return ret; + } f = kzalloc(sizeof(*f) + sizeof(wait_queue_t), GFP_KERNEL); if (!f) return -ENOMEM; + I915_WRITE(EXCC(ring), _MASKED_BIT_ENABLE(1 << excc)); + POSTING_READ(EXCC(ring)); + + ret = intel_ring_begin(ring, dev_priv->info->gen < 7 ? 2 : 10); + if (ret) { + kfree(f); + return ret; + } + + if (dev_priv->info->gen >= 7) { + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, FORCEWAKE_MT); + intel_ring_emit(ring, _MASKED_BIT_ENABLE(2)); + } + + if (dev_priv->info->gen < 6) + cond = (1 + excc) << 9; + else + cond = (1 + excc) << 16; + + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | cond); + intel_ring_emit(ring, MI_NOOP); + + if (dev_priv->info->gen >= 7) { + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, FORCEWAKE_MT); + intel_ring_emit(ring, _MASKED_BIT_DISABLE(2)); + } + intel_ring_advance(ring); + dma_seqno_fence_init(f, ring->sync_buf, ring->sync_seqno_ofs, seqno, ring, i915_gem_execbuffer_fence_prime_enable, NULL); + + ring->excc_seqno[excc] = seqno; + if (++ring->excc_idx == ARRAY_SIZE(ring->excc_seqno)) + ring->excc_idx = 0; + *pfence = f; + *pexcc = excc; return 0; } @@ -929,7 +1086,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, u32 exec_start, exec_len; u32 seqno; u32 mask; - int ret, mode, i; + int ret, mode, i, excc = -1; if (!i915_gem_check_execbuffer(args)) { DRM_DEBUG("execbuf with invalid offset/length\n"); @@ -1165,7 +1322,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) goto err; - ret = i915_gem_execbuffer_fence_prime(&prime_val, ring, seqno, &fence); + ret = i915_gem_execbuffer_fence_prime(&prime_val, ring, seqno, + &fence, &excc); if (ret) goto err; @@ -1192,23 +1350,22 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } i915_gem_execbuffer_move_to_active(&objects, ring, seqno); - i915_gem_execbuffer_retire_commands(dev, file, ring); - if (!list_empty(&prime_val)) { - if (WARN_ON(!fence)) - goto err; - dmabufmgr_fence_buffer_objects(&fence->base, &prime_val); - } + i915_gem_execbuffer_retire_commands(dev, file, excc, fence, + &prime_val, ring, seqno); goto out; err: + if (excc >= 0) { + I915_WRITE(EXCC(ring), _MASKED_BIT_DISABLE(1 << excc)); + POSTING_READ(EXCC(ring)); + } dmabufmgr_backoff_reservation(&prime_val); + list_for_each_entry_safe(val, tmp, &prime_val, head) + dmabufmgr_validate_put(val); out: if (fence) dma_fence_put(&fence->base); - list_for_each_entry_safe(val, tmp, &prime_val, head) - dmabufmgr_validate_put(val); - eb_destroy(eb); while (!list_empty(&objects)) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 81a3de6..0980574 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -423,6 +423,7 @@ #define BSD_RING_BASE 0x04000 #define GEN6_BSD_RING_BASE 0x12000 #define BLT_RING_BASE 0x22000 +#define EXCC(ring) ((ring)->mmio_base+0x28) #define RING_TAIL(base) ((base)+0x30) #define RING_HEAD(base) ((base)+0x34) #define RING_START(base) ((base)+0x38) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 3d95be4..6a24efa 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -119,6 +119,10 @@ struct intel_ring_buffer { u32 sync_seqno_ofs; void *private; + + /* EXCC wait conditions */ + u32 excc_seqno[5]; + u32 excc_idx; }; static inline bool |
