summaryrefslogtreecommitdiff
authorMaarten Lankhorst <maarten.lankhorst@canonical.com>2012-08-07 18:07:44 (GMT)
committer Maarten Lankhorst <maarten.lankhorst@canonical.com>2012-08-10 11:56:04 (GMT)
commit673c4b2550bc63ec134bca47a95dabd617a689ce (patch) (side-by-side diff)
tree102fc4c49f778f4e2890bf5239f2d7b13ece8e5a
parent7c94791a583b95b02e5746c028152ecfc1d33027 (diff)
downloadlinux-673c4b2550bc63ec134bca47a95dabd617a689ce.zip
linux-673c4b2550bc63ec134bca47a95dabd617a689ce.tar.gz
i915: use excc for asynchronous signalling of prime, v2dma-fence-v8
v2: Fix fallout from rebase..
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h4
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c89
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c197
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h1
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h4
5 files changed, 274 insertions, 21 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 700dc83..49925f8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1027,6 +1027,10 @@ struct drm_i915_gem_request {
/** global list entry for this request */
struct list_head list;
+ struct list_head prime_list;
+ spinlock_t prime_rm_lock;
+ int excc;
+
struct drm_i915_file_private *file_priv;
/** file_priv list entry for this request */
struct list_head client_list;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f26e2b2..a8651cb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -35,7 +35,7 @@
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/pci.h>
-#include <linux/dma-buf.h>
+#include <linux/dma-buf-mgr.h>
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
@@ -1558,6 +1558,8 @@ i915_add_request(struct intel_ring_buffer *ring,
if (request == NULL)
return -ENOMEM;
}
+ memset(&request->prime_list, 0, sizeof(request->prime_list));
+ request->excc = -1;
seqno = i915_gem_next_request_seqno(ring);
@@ -1610,6 +1612,67 @@ i915_add_request(struct intel_ring_buffer *ring,
return 0;
}
+static void i915_put_NOP(struct kref *ref)
+{
+}
+
+static inline void
+i915_gem_reset_requests(struct drm_i915_gem_request *request)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&request->prime_rm_lock, flags);
+ while (!list_empty(&request->prime_list)) {
+ int i;
+ struct dmabufmgr_validate *val;
+
+ val = list_first_entry(&request->prime_list,
+ struct dmabufmgr_validate, head);
+
+ dmabufmgr_validate_get(val);
+ for (i = 0; i < val->num_fences; ++i) {
+ unsigned long qflags;
+ struct dma_fence *f;
+
+ if (!val->fences[i])
+ continue;
+
+ /* We cannot acquire event_queue.lock without dropping
+ * prime_rm_lock first, this will trigger a deadlock.
+ * as such, take a ref on dma_fence so it won't get
+ * released behind our back. This is because our
+ * callback hasn't run to completion yet, else
+ * val->fencess[i] would be NULL.
+ */
+ f = val->fences[i];
+ dma_fence_get(f);
+ spin_unlock_irqrestore(&request->prime_rm_lock, flags);
+
+ spin_lock_irqsave(&f->event_queue.lock, qflags);
+ if (val->fences[i]) {
+ __remove_wait_queue(NULL, &val->wait[i].base);
+ val->fences[i] = NULL;
+
+ /* We're still holding a ref,
+ * so no free should be done here
+ */
+ WARN_ON(kref_put(&val->refcount, i915_put_NOP));
+ }
+ spin_unlock_irqrestore(&f->event_queue.lock, qflags);
+
+ spin_lock_irqsave(&request->prime_rm_lock, flags);
+ dma_fence_put(f);
+ }
+
+ if (WARN_ON(!dmabufmgr_validate_put(val)))
+ /* Uh oh, we screwed up refcounting..
+ * pretend we didn't and free anyhow.
+ */
+ dmabufmgr_validate_free(&val->refcount);
+ }
+ spin_unlock_irqrestore(&request->prime_rm_lock, flags);
+}
+
static inline void
i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
{
@@ -1624,6 +1687,9 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
request->file_priv = NULL;
}
spin_unlock(&file_priv->mm.lock);
+
+ if (request->prime_list.next)
+ i915_gem_reset_requests(request);
}
static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
@@ -1655,6 +1721,7 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
static void i915_gem_reset_fences(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
+ struct intel_ring_buffer *ring;
int i;
for (i = 0; i < dev_priv->num_fence_regs; i++) {
@@ -1670,6 +1737,10 @@ static void i915_gem_reset_fences(struct drm_device *dev)
INIT_LIST_HEAD(&reg->lru_list);
}
+ for_each_ring(ring, dev_priv, i)
+ /* Clear out EXCC slots */
+ memset(ring->excc_seqno, 0, sizeof(ring->excc_seqno));
+
INIT_LIST_HEAD(&dev_priv->mm.fence_list);
}
@@ -1736,6 +1807,22 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
ring->last_retired_head = request->tail;
list_del(&request->list);
+
+ if (request->prime_list.next) {
+ unsigned long flags;
+
+ /* Acquire lock to prevent a tiny race condition
+ * where kfree occurs before spin_unlock_irqrestore
+ * in the last trigger
+ *
+ * The request list should be empty at this point,
+ * otherwise request started before all fences were
+ * signaled, which should be impossible.
+ */
+ spin_lock_irqsave(&request->prime_rm_lock, flags);
+ WARN_ON(!list_empty(&request->prime_list));
+ spin_unlock_irqrestore(&request->prime_rm_lock, flags);
+ }
i915_gem_request_remove_from_client(request);
kfree(request);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ee43614..207e19d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -403,7 +403,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
if (!val)
return -ENOMEM;
- dmabufmgr_validate_init(val, prime_val, dmabuf, obj, false);
+ dmabufmgr_validate_init(val, prime_val, dmabuf, NULL, false);
}
if (!list_empty(prime_val)) {
@@ -805,16 +805,130 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
intel_mark_busy(ring->dev, NULL);
}
+static void i915_put_BUG(struct kref *ref)
+{
+ BUG();
+}
+
+static void i915_dmabufmgr_validate_free(struct kref *ref)
+{
+ struct dmabufmgr_validate *val = container_of(ref,
+ struct dmabufmgr_validate, refcount);
+ struct drm_i915_gem_request *request = val->priv;
+ struct intel_ring_buffer *ring = request->ring;
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
+
+ list_del(&val->head);
+ kfree(val);
+
+ if (list_empty(&request->prime_list) &&
+ !WARN_ON(request->excc < 0)) {
+ I915_WRITE(EXCC(ring), _MASKED_BIT_DISABLE(1 << request->excc));
+ POSTING_READ(EXCC(ring));
+ }
+}
+
+static int
+i915_prime_trigger(struct dma_fence_cb *cb, void *priv)
+{
+ struct dmabufmgr_validate *val = priv;
+ struct drm_i915_gem_request *request = val->priv;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&request->prime_rm_lock, flags);
+
+ for (i = 0; i < val->num_fences; ++i) {
+ if (val->fences[i] == cb->fence) {
+ val->fences[i] = NULL;
+ break;
+ }
+ }
+ WARN_ON(i == val->num_fences);
+
+ kref_put(&val->refcount, i915_dmabufmgr_validate_free);
+ spin_unlock_irqrestore(&request->prime_rm_lock, flags);
+ return 0;
+}
+
static void
i915_gem_execbuffer_retire_commands(struct drm_device *dev,
- struct drm_file *file,
- struct intel_ring_buffer *ring)
+ struct drm_file *file, int excc,
+ struct dma_seqno_fence *fence,
+ struct list_head *prime_val,
+ struct intel_ring_buffer *ring, u32 seqno)
{
+ struct dmabufmgr_validate *val, *tmp;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_i915_gem_request *request;
+ unsigned long flags;
+
/* Unconditionally force add_request to emit a full flush. */
ring->gpu_caches_dirty = true;
/* Add a breadcrumb for the completion of the batch buffer */
- (void)i915_add_request(ring, file, NULL);
+ if (list_empty(prime_val)) {
+ (void)i915_add_request(ring, file, NULL);
+ return;
+ }
+
+ request = kzalloc(sizeof(*request), GFP_KERNEL);
+ if (request == NULL)
+ goto err;
+
+ if (WARN_ON(excc < 0) || i915_add_request(ring, file, request)) {
+ kfree(request);
+ goto err;
+ }
+
+ if (WARN_ON(request->seqno != seqno))
+ fence->seqno = request->seqno;
+
+ request->excc = excc;
+ INIT_LIST_HEAD(&request->prime_list);
+ spin_lock_init(&request->prime_rm_lock);
+ list_splice(prime_val, &request->prime_list);
+
+ list_for_each_entry(val, &request->prime_list, head) {
+ int i, ret;
+ val->priv = request;
+ for (i = 0; i < val->num_fences; ++i) {
+ struct dma_seqno_fence *f;
+
+ f = to_seqno_fence(val->fences[i]);
+ if (f && f->sync_buf == ring->sync_buf)
+ continue;
+
+ kref_get(&val->refcount);
+ ret = dma_fence_add_callback(val->fences[i],
+ &val->wait[i], i915_prime_trigger, val);
+ if (!ret)
+ val->num_waits++;
+ else {
+ val->fences[i] = NULL;
+ kref_put(&val->refcount, i915_put_BUG);
+ }
+ }
+ }
+
+ dmabufmgr_fence_buffer_objects(&fence->base,
+ &request->prime_list);
+
+ spin_lock_irqsave(&request->prime_rm_lock, flags);
+ list_for_each_entry_safe(val, tmp, &request->prime_list, head)
+ kref_put(&val->refcount, i915_dmabufmgr_validate_free);
+ spin_unlock_irqrestore(&request->prime_rm_lock, flags);
+
+ return;
+
+err:
+ if (excc >= 0) {
+ I915_WRITE(EXCC(ring), _MASKED_BIT_DISABLE(1 << excc));
+ POSTING_READ(EXCC(ring));
+ }
+ dmabufmgr_backoff_reservation(prime_val);
+ list_for_each_entry_safe(val, tmp, prime_val, head)
+ dmabufmgr_validate_put(val);
}
static int
@@ -887,26 +1001,69 @@ i915_gem_execbuffer_fence_prime_enable(struct dma_seqno_fence *fence)
static int
i915_gem_execbuffer_fence_prime(struct list_head *prime_list,
struct intel_ring_buffer *ring,
- u32 seqno, struct dma_seqno_fence **pfence)
+ u32 seqno, struct dma_seqno_fence **pfence,
+ int *pexcc)
{
+ drm_i915_private_t *dev_priv = ring->dev->dev_private;
struct dma_seqno_fence *f;
- int ret;
+ int ret, excc, cond;
if (list_empty(prime_list))
return 0;
- ret = dmabufmgr_wait_completed_cpu(prime_list, false, true);
- if (ret)
- return ret;
+ excc = ring->excc_idx;
+ if (ring->excc_seqno[excc]) {
+ ret = i915_wait_seqno(ring, ring->excc_seqno[excc]);
+ if (ret)
+ return ret;
+ }
f = kzalloc(sizeof(*f) + sizeof(wait_queue_t), GFP_KERNEL);
if (!f)
return -ENOMEM;
+ I915_WRITE(EXCC(ring), _MASKED_BIT_ENABLE(1 << excc));
+ POSTING_READ(EXCC(ring));
+
+ ret = intel_ring_begin(ring, dev_priv->info->gen < 7 ? 2 : 10);
+ if (ret) {
+ kfree(f);
+ return ret;
+ }
+
+ if (dev_priv->info->gen >= 7) {
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, FORCEWAKE_MT);
+ intel_ring_emit(ring, _MASKED_BIT_ENABLE(2));
+ }
+
+ if (dev_priv->info->gen < 6)
+ cond = (1 + excc) << 9;
+ else
+ cond = (1 + excc) << 16;
+
+ intel_ring_emit(ring, MI_WAIT_FOR_EVENT | cond);
+ intel_ring_emit(ring, MI_NOOP);
+
+ if (dev_priv->info->gen >= 7) {
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, FORCEWAKE_MT);
+ intel_ring_emit(ring, _MASKED_BIT_DISABLE(2));
+ }
+ intel_ring_advance(ring);
+
dma_seqno_fence_init(f, ring->sync_buf,
ring->sync_seqno_ofs, seqno, ring,
i915_gem_execbuffer_fence_prime_enable, NULL);
+
+ ring->excc_seqno[excc] = seqno;
+ if (++ring->excc_idx == ARRAY_SIZE(ring->excc_seqno))
+ ring->excc_idx = 0;
+
*pfence = f;
+ *pexcc = excc;
return 0;
}
@@ -929,7 +1086,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
u32 exec_start, exec_len;
u32 seqno;
u32 mask;
- int ret, mode, i;
+ int ret, mode, i, excc = -1;
if (!i915_gem_check_execbuffer(args)) {
DRM_DEBUG("execbuf with invalid offset/length\n");
@@ -1165,7 +1322,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (ret)
goto err;
- ret = i915_gem_execbuffer_fence_prime(&prime_val, ring, seqno, &fence);
+ ret = i915_gem_execbuffer_fence_prime(&prime_val, ring, seqno,
+ &fence, &excc);
if (ret)
goto err;
@@ -1192,23 +1350,22 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
}
i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
- i915_gem_execbuffer_retire_commands(dev, file, ring);
- if (!list_empty(&prime_val)) {
- if (WARN_ON(!fence))
- goto err;
- dmabufmgr_fence_buffer_objects(&fence->base, &prime_val);
- }
+ i915_gem_execbuffer_retire_commands(dev, file, excc, fence,
+ &prime_val, ring, seqno);
goto out;
err:
+ if (excc >= 0) {
+ I915_WRITE(EXCC(ring), _MASKED_BIT_DISABLE(1 << excc));
+ POSTING_READ(EXCC(ring));
+ }
dmabufmgr_backoff_reservation(&prime_val);
+ list_for_each_entry_safe(val, tmp, &prime_val, head)
+ dmabufmgr_validate_put(val);
out:
if (fence)
dma_fence_put(&fence->base);
- list_for_each_entry_safe(val, tmp, &prime_val, head)
- dmabufmgr_validate_put(val);
-
eb_destroy(eb);
while (!list_empty(&objects)) {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 81a3de6..0980574 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -423,6 +423,7 @@
#define BSD_RING_BASE 0x04000
#define GEN6_BSD_RING_BASE 0x12000
#define BLT_RING_BASE 0x22000
+#define EXCC(ring) ((ring)->mmio_base+0x28)
#define RING_TAIL(base) ((base)+0x30)
#define RING_HEAD(base) ((base)+0x34)
#define RING_START(base) ((base)+0x38)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 3d95be4..6a24efa 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -119,6 +119,10 @@ struct intel_ring_buffer {
u32 sync_seqno_ofs;
void *private;
+
+ /* EXCC wait conditions */
+ u32 excc_seqno[5];
+ u32 excc_idx;
};
static inline bool