From 06b2b1a40e3a9a1ea80b4c51885d863ca194ef3f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 May 2019 13:01:00 +0100 Subject: drm/i915: Rearrange i915_scheduler.c To avoid pulling in a forward declaration in the next patch, move the i915_sched_node handling to after the main dfs of the scheduler. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190513120102.29660-1-chris@chris-wilson.co.uk (cherry picked from commit 5ae87063c162679a61f2141041d0918cc3045daf) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_scheduler.c | 210 +++++++++++++++++----------------- 1 file changed, 105 insertions(+), 105 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_scheduler.c') diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 39bc4f54e272..5756fcfe343d 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -35,109 +35,6 @@ static inline bool node_signaled(const struct i915_sched_node *node) return i915_request_completed(node_to_request(node)); } -void i915_sched_node_init(struct i915_sched_node *node) -{ - INIT_LIST_HEAD(&node->signalers_list); - INIT_LIST_HEAD(&node->waiters_list); - INIT_LIST_HEAD(&node->link); - node->attr.priority = I915_PRIORITY_INVALID; - node->semaphores = 0; - node->flags = 0; -} - -static struct i915_dependency * -i915_dependency_alloc(void) -{ - return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL); -} - -static void -i915_dependency_free(struct i915_dependency *dep) -{ - kmem_cache_free(global.slab_dependencies, dep); -} - -bool __i915_sched_node_add_dependency(struct i915_sched_node *node, - struct i915_sched_node *signal, - struct i915_dependency *dep, - unsigned long flags) -{ - bool ret = false; - - spin_lock_irq(&schedule_lock); - - if (!node_signaled(signal)) { - INIT_LIST_HEAD(&dep->dfs_link); - list_add(&dep->wait_link, &signal->waiters_list); - list_add(&dep->signal_link, &node->signalers_list); - dep->signaler = signal; - dep->flags = flags; - - /* Keep track of whether anyone on this chain has a semaphore */ - if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN && - !node_started(signal)) - node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; - - ret = true; - } - - spin_unlock_irq(&schedule_lock); - - return ret; -} - -int i915_sched_node_add_dependency(struct i915_sched_node *node, - struct i915_sched_node *signal) -{ - struct i915_dependency *dep; - - dep = i915_dependency_alloc(); - if (!dep) - return -ENOMEM; - - if (!__i915_sched_node_add_dependency(node, signal, dep, - I915_DEPENDENCY_ALLOC)) - i915_dependency_free(dep); - - return 0; -} - -void i915_sched_node_fini(struct i915_sched_node *node) -{ - struct i915_dependency *dep, *tmp; - - GEM_BUG_ON(!list_empty(&node->link)); - - spin_lock_irq(&schedule_lock); - - /* - * Everyone we depended upon (the fences we wait to be signaled) - * should retire before us and remove themselves from our list. - * However, retirement is run independently on each timeline and - * so we may be called out-of-order. - */ - list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { - GEM_BUG_ON(!node_signaled(dep->signaler)); - GEM_BUG_ON(!list_empty(&dep->dfs_link)); - - list_del(&dep->wait_link); - if (dep->flags & I915_DEPENDENCY_ALLOC) - i915_dependency_free(dep); - } - - /* Remove ourselves from everyone who depends upon us */ - list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { - GEM_BUG_ON(dep->signaler != node); - GEM_BUG_ON(!list_empty(&dep->dfs_link)); - - list_del(&dep->signal_link); - if (dep->flags & I915_DEPENDENCY_ALLOC) - i915_dependency_free(dep); - } - - spin_unlock_irq(&schedule_lock); -} - static inline struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); @@ -239,6 +136,11 @@ out: return &p->requests[idx]; } +void __i915_priolist_free(struct i915_priolist *p) +{ + kmem_cache_free(global.slab_priorities, p); +} + struct sched_cache { struct list_head *priolist; }; @@ -436,9 +338,107 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump) spin_unlock_irqrestore(&schedule_lock, flags); } -void __i915_priolist_free(struct i915_priolist *p) +void i915_sched_node_init(struct i915_sched_node *node) { - kmem_cache_free(global.slab_priorities, p); + INIT_LIST_HEAD(&node->signalers_list); + INIT_LIST_HEAD(&node->waiters_list); + INIT_LIST_HEAD(&node->link); + node->attr.priority = I915_PRIORITY_INVALID; + node->semaphores = 0; + node->flags = 0; +} + +static struct i915_dependency * +i915_dependency_alloc(void) +{ + return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL); +} + +static void +i915_dependency_free(struct i915_dependency *dep) +{ + kmem_cache_free(global.slab_dependencies, dep); +} + +bool __i915_sched_node_add_dependency(struct i915_sched_node *node, + struct i915_sched_node *signal, + struct i915_dependency *dep, + unsigned long flags) +{ + bool ret = false; + + spin_lock_irq(&schedule_lock); + + if (!node_signaled(signal)) { + INIT_LIST_HEAD(&dep->dfs_link); + list_add(&dep->wait_link, &signal->waiters_list); + list_add(&dep->signal_link, &node->signalers_list); + dep->signaler = signal; + dep->flags = flags; + + /* Keep track of whether anyone on this chain has a semaphore */ + if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN && + !node_started(signal)) + node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; + + ret = true; + } + + spin_unlock_irq(&schedule_lock); + + return ret; +} + +int i915_sched_node_add_dependency(struct i915_sched_node *node, + struct i915_sched_node *signal) +{ + struct i915_dependency *dep; + + dep = i915_dependency_alloc(); + if (!dep) + return -ENOMEM; + + if (!__i915_sched_node_add_dependency(node, signal, dep, + I915_DEPENDENCY_ALLOC)) + i915_dependency_free(dep); + + return 0; +} + +void i915_sched_node_fini(struct i915_sched_node *node) +{ + struct i915_dependency *dep, *tmp; + + GEM_BUG_ON(!list_empty(&node->link)); + + spin_lock_irq(&schedule_lock); + + /* + * Everyone we depended upon (the fences we wait to be signaled) + * should retire before us and remove themselves from our list. + * However, retirement is run independently on each timeline and + * so we may be called out-of-order. + */ + list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { + GEM_BUG_ON(!node_signaled(dep->signaler)); + GEM_BUG_ON(!list_empty(&dep->dfs_link)); + + list_del(&dep->wait_link); + if (dep->flags & I915_DEPENDENCY_ALLOC) + i915_dependency_free(dep); + } + + /* Remove ourselves from everyone who depends upon us */ + list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { + GEM_BUG_ON(dep->signaler != node); + GEM_BUG_ON(!list_empty(&dep->dfs_link)); + + list_del(&dep->signal_link); + if (dep->flags & I915_DEPENDENCY_ALLOC) + i915_dependency_free(dep); + } + + spin_unlock_irq(&schedule_lock); } static void i915_global_scheduler_shrink(void) -- cgit v1.2.3 From f312c23ff92319f5f242b69d93885c53f92030b5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 May 2019 13:01:01 +0100 Subject: drm/i915: Pass i915_sched_node around internally To simplify the next patch, update bump_priority and schedule to accept the internal i915_sched_ndoe directly and not expect a request pointer. add/remove: 0/0 grow/shrink: 2/1 up/down: 8/-15 (-7) Function old new delta i915_schedule_bump_priority 109 113 +4 i915_schedule 50 54 +4 __i915_schedule 922 907 -15 v2: Adopt node for the old rq local, since it no longer is a request but the origin node. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190513120102.29660-2-chris@chris-wilson.co.uk (cherry picked from commit 52c76fb18a34fc08dd06f32b9fc83f1375f083ee) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_scheduler.c | 36 ++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_scheduler.c') diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 5756fcfe343d..3cfadb9db988 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -175,7 +175,7 @@ static bool inflight(const struct i915_request *rq, return active->hw_context == rq->hw_context; } -static void __i915_schedule(struct i915_request *rq, +static void __i915_schedule(struct i915_sched_node *node, const struct i915_sched_attr *attr) { struct intel_engine_cs *engine; @@ -189,13 +189,13 @@ static void __i915_schedule(struct i915_request *rq, lockdep_assert_held(&schedule_lock); GEM_BUG_ON(prio == I915_PRIORITY_INVALID); - if (i915_request_completed(rq)) + if (node_signaled(node)) return; - if (prio <= READ_ONCE(rq->sched.attr.priority)) + if (prio <= READ_ONCE(node->attr.priority)) return; - stack.signaler = &rq->sched; + stack.signaler = node; list_add(&stack.dfs_link, &dfs); /* @@ -246,9 +246,9 @@ static void __i915_schedule(struct i915_request *rq, * execlists_submit_request()), we can set our own priority and skip * acquiring the engine locks. */ - if (rq->sched.attr.priority == I915_PRIORITY_INVALID) { - GEM_BUG_ON(!list_empty(&rq->sched.link)); - rq->sched.attr = *attr; + if (node->attr.priority == I915_PRIORITY_INVALID) { + GEM_BUG_ON(!list_empty(&node->link)); + node->attr = *attr; if (stack.dfs_link.next == stack.dfs_link.prev) return; @@ -257,15 +257,14 @@ static void __i915_schedule(struct i915_request *rq, } memset(&cache, 0, sizeof(cache)); - engine = rq->engine; + engine = node_to_request(node)->engine; spin_lock(&engine->timeline.lock); /* Fifo and depth-first replacement ensure our deps execute before us */ list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { - struct i915_sched_node *node = dep->signaler; - INIT_LIST_HEAD(&dep->dfs_link); + node = dep->signaler; engine = sched_lock_engine(node, engine, &cache); lockdep_assert_held(&engine->timeline.lock); @@ -315,13 +314,20 @@ static void __i915_schedule(struct i915_request *rq, void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) { spin_lock_irq(&schedule_lock); - __i915_schedule(rq, attr); + __i915_schedule(&rq->sched, attr); spin_unlock_irq(&schedule_lock); } +static void __bump_priority(struct i915_sched_node *node, unsigned int bump) +{ + struct i915_sched_attr attr = node->attr; + + attr.priority |= bump; + __i915_schedule(node, &attr); +} + void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump) { - struct i915_sched_attr attr; unsigned long flags; GEM_BUG_ON(bump & ~I915_PRIORITY_MASK); @@ -330,11 +336,7 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump) return; spin_lock_irqsave(&schedule_lock, flags); - - attr = rq->sched.attr; - attr.priority |= bump; - __i915_schedule(rq, &attr); - + __bump_priority(&rq->sched, bump); spin_unlock_irqrestore(&schedule_lock, flags); } -- cgit v1.2.3 From 9981927cc9e10fb4dbc24b2a5f8c17ab133859a0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 May 2019 14:00:50 +0100 Subject: drm/i915: Bump signaler priority on adding a waiter The handling of the no-preemption priority level imposes the restriction that we need to maintain the implied ordering even though preemption is disabled. Otherwise we may end up with an AB-BA deadlock across multiple engine due to a real preemption event reordering the no-preemption WAITs. To resolve this issue we currently promote all requests to WAIT on unsubmission, however this interferes with the timeslicing requirement that we do not apply any implicit promotion that will defeat the round-robin timeslice list. (If we automatically promote the active request it will go back to the head of the queue and not the tail!) So we need implicit promotion to prevent reordering around semaphores where we are not allowed to preempt, and we must avoid implicit promotion on unsubmission. So instead of at unsubmit, if we apply that implicit promotion on adding the dependency, we avoid the semaphore deadlock and we also reduce the gains made by the promotion for user space waiting. Furthermore, by keeping the earlier dependencies at a higher level, we reduce the search space for timeslicing without altering runtime scheduling too badly (no dependencies at all will be assigned a higher priority for rrul). v2: Limit the bump to external edges (as originally intended) i.e. between contexts and out to the user. Testcase: igt/gem_concurrent_blit Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190515130052.4475-3-chris@chris-wilson.co.uk (cherry picked from commit 6e7eb7a80769e7250e31652b96918cf7f3e0d285) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_request.c | 9 --------- drivers/gpu/drm/i915/i915_scheduler.c | 11 +++++++++++ drivers/gpu/drm/i915/i915_scheduler_types.h | 3 ++- drivers/gpu/drm/i915/selftests/intel_lrc.c | 12 ++++++++---- 4 files changed, 21 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_scheduler.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f6c78c0fa74b..f258281bf3f3 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -502,15 +502,6 @@ void __i915_request_unsubmit(struct i915_request *request) /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - /* - * As we do not allow WAIT to preempt inflight requests, - * once we have executed a request, along with triggering - * any execution callbacks, we must preserve its ordering - * within the non-preemptible FIFO. - */ - BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */ - request->sched.attr.priority |= __NO_PREEMPTION; - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) i915_request_cancel_breadcrumb(request); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 3cfadb9db988..108f52e1bf35 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -383,6 +383,16 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, !node_started(signal)) node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; + /* + * As we do not allow WAIT to preempt inflight requests, + * once we have executed a request, along with triggering + * any execution callbacks, we must preserve its ordering + * within the non-preemptible FIFO. + */ + BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); + if (flags & I915_DEPENDENCY_EXTERNAL) + __bump_priority(signal, __NO_PREEMPTION); + ret = true; } @@ -401,6 +411,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node, return -ENOMEM; if (!__i915_sched_node_add_dependency(node, signal, dep, + I915_DEPENDENCY_EXTERNAL | I915_DEPENDENCY_ALLOC)) i915_dependency_free(dep); diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index f1af3916a808..4f2b2eb7c3e5 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -66,7 +66,8 @@ struct i915_dependency { struct list_head wait_link; struct list_head dfs_link; unsigned long flags; -#define I915_DEPENDENCY_ALLOC BIT(0) +#define I915_DEPENDENCY_ALLOC BIT(0) +#define I915_DEPENDENCY_EXTERNAL BIT(1) }; #endif /* _I915_SCHEDULER_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index fbee030db940..e8b0b5dbcb2c 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -99,12 +99,14 @@ static int live_busywait_preempt(void *arg) ctx_hi = kernel_context(i915); if (!ctx_hi) goto err_unlock; - ctx_hi->sched.priority = INT_MAX; + ctx_hi->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); ctx_lo = kernel_context(i915); if (!ctx_lo) goto err_ctx_hi; - ctx_lo->sched.priority = INT_MIN; + ctx_lo->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { @@ -954,12 +956,14 @@ static int live_preempt_hang(void *arg) ctx_hi = kernel_context(i915); if (!ctx_hi) goto err_spin_lo; - ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; + ctx_hi->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); ctx_lo = kernel_context(i915); if (!ctx_lo) goto err_ctx_hi; - ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; + ctx_lo->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); for_each_engine(engine, i915, id) { struct i915_request *rq; -- cgit v1.2.3