From 00febf644648f430593a8b8675ad4bc97c33a07b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 19 May 2020 14:20:46 +0100 Subject: drm/i915/gt: Incorporate the virtual engine into timeslicing It was quite the oversight to only factor in the normal queue to decide the timeslicing switch priority. By leaving out the next virtual request from the priority decision, we would not timeslice the current engine if there was an available virtual request. Testcase: igt/gem_exec_balancer/sliced Fixes: 3df2deed411e ("drm/i915/execlists: Enable timeslice on partial virtual engine dequeue") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200519132046.22443-3-chris@chris-wilson.co.uk (cherry picked from commit 6ad249ba59badc7ff157d4db1f835748f0e2c9b6) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_lrc.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 2dfaddb8811e..e77f89b43e5f 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1760,7 +1760,8 @@ static void defer_active(struct intel_engine_cs *engine) static bool need_timeslice(const struct intel_engine_cs *engine, - const struct i915_request *rq) + const struct i915_request *rq, + const struct rb_node *rb) { int hint; @@ -1768,6 +1769,24 @@ need_timeslice(const struct intel_engine_cs *engine, return false; hint = engine->execlists.queue_priority_hint; + + if (rb) { + const struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + const struct intel_engine_cs *inflight = + intel_context_inflight(&ve->context); + + if (!inflight || inflight == engine) { + struct i915_request *next; + + rcu_read_lock(); + next = READ_ONCE(ve->request); + if (next) + hint = max(hint, rq_prio(next)); + rcu_read_unlock(); + } + } + if (!list_is_last(&rq->sched.link, &engine->active.requests)) hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); @@ -1837,10 +1856,9 @@ static void set_timeslice(struct intel_engine_cs *engine) set_timer_ms(&engine->execlists.timer, active_timeslice(engine)); } -static void start_timeslice(struct intel_engine_cs *engine) +static void start_timeslice(struct intel_engine_cs *engine, int prio) { struct intel_engine_execlists *execlists = &engine->execlists; - int prio = queue_prio(execlists); WRITE_ONCE(execlists->switch_priority_hint, prio); if (prio == INT_MIN) @@ -1976,7 +1994,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) __unwind_incomplete_requests(engine); last = NULL; - } else if (need_timeslice(engine, last) && + } else if (need_timeslice(engine, last, rb) && timeslice_expired(execlists, last)) { ENGINE_TRACE(engine, "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", @@ -2019,7 +2037,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ - start_timeslice(engine); + start_timeslice(engine, queue_prio(execlists)); return; } } @@ -2054,7 +2072,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); - start_timeslice(engine); + start_timeslice(engine, rq_prio(rq)); return; /* leave this for another sibling */ } -- cgit v1.2.3 From 03d11bd6aa450db7f1fa7f0c96552e05e73915de Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 May 2020 16:05:08 +0200 Subject: drm/i915/pmu: avoid an maybe-uninitialized warning Conditional spinlocks make it hard for gcc and for lockdep to follow the code flow. This one causes a warning with at least gcc-9 and higher: In file included from include/linux/irq.h:14, from drivers/gpu/drm/i915/i915_pmu.c:7: drivers/gpu/drm/i915/i915_pmu.c: In function 'i915_sample': include/linux/spinlock.h:289:3: error: 'flags' may be used uninitialized in this function [-Werror=maybe-uninitialized] 289 | _raw_spin_unlock_irqrestore(lock, flags); \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/i915_pmu.c:288:17: note: 'flags' was declared here 288 | unsigned long flags; | ^~~~~ Split out the part between the locks into a separate function for readability and to let the compiler figure out what the logic actually is. Fixes: d79e1bd676f0 ("drm/i915/pmu: Only use exclusive mmio access for gen7") Signed-off-by: Arnd Bergmann Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200527140526.1458215-1-arnd@arndb.de (cherry picked from commit 6ec81b82732e2b4a5ac0853fd33919ff1ca94238) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_pmu.c | 84 ++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 42 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 2c062534eac1..569a87d0d799 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -269,12 +269,48 @@ static bool exclusive_mmio_access(const struct drm_i915_private *i915) return IS_GEN(i915, 7); } +static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) +{ + struct intel_engine_pmu *pmu = &engine->pmu; + bool busy; + u32 val; + + val = ENGINE_READ_FW(engine, RING_CTL); + if (val == 0) /* powerwell off => engine idle */ + return; + + if (val & RING_WAIT) + add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); + if (val & RING_WAIT_SEMAPHORE) + add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); + + /* No need to sample when busy stats are supported. */ + if (intel_engine_supports_stats(engine)) + return; + + /* + * While waiting on a semaphore or event, MI_MODE reports the + * ring as idle. However, previously using the seqno, and with + * execlists sampling, we account for the ring waiting as the + * engine being busy. Therefore, we record the sample as being + * busy if either waiting or !idle. + */ + busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); + if (!busy) { + val = ENGINE_READ_FW(engine, RING_MI_MODE); + busy = !(val & MODE_IDLE); + } + if (busy) + add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); +} + static void engines_sample(struct intel_gt *gt, unsigned int period_ns) { struct drm_i915_private *i915 = gt->i915; struct intel_engine_cs *engine; enum intel_engine_id id; + unsigned long flags; if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0) return; @@ -283,53 +319,17 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) return; for_each_engine(engine, gt, id) { - struct intel_engine_pmu *pmu = &engine->pmu; - spinlock_t *mmio_lock; - unsigned long flags; - bool busy; - u32 val; - if (!intel_engine_pm_get_if_awake(engine)) continue; - mmio_lock = NULL; - if (exclusive_mmio_access(i915)) - mmio_lock = &engine->uncore->lock; - - if (unlikely(mmio_lock)) - spin_lock_irqsave(mmio_lock, flags); - - val = ENGINE_READ_FW(engine, RING_CTL); - if (val == 0) /* powerwell off => engine idle */ - goto skip; - - if (val & RING_WAIT) - add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); - if (val & RING_WAIT_SEMAPHORE) - add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); - - /* No need to sample when busy stats are supported. */ - if (intel_engine_supports_stats(engine)) - goto skip; - - /* - * While waiting on a semaphore or event, MI_MODE reports the - * ring as idle. However, previously using the seqno, and with - * execlists sampling, we account for the ring waiting as the - * engine being busy. Therefore, we record the sample as being - * busy if either waiting or !idle. - */ - busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); - if (!busy) { - val = ENGINE_READ_FW(engine, RING_MI_MODE); - busy = !(val & MODE_IDLE); + if (exclusive_mmio_access(i915)) { + spin_lock_irqsave(&engine->uncore->lock, flags); + engine_sample(engine, period_ns); + spin_unlock_irqrestore(&engine->uncore->lock, flags); + } else { + engine_sample(engine, period_ns); } - if (busy) - add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); -skip: - if (unlikely(mmio_lock)) - spin_unlock_irqrestore(mmio_lock, flags); intel_engine_pm_put_async(engine); } } -- cgit v1.2.3 From 822d6bd1890dbeaedf661d30d2c5280e127ef4b3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 May 2020 16:05:09 +0200 Subject: drm/i915: work around false-positive maybe-uninitialized warning gcc-9 gets confused by the code flow in check_dirty_whitelist: drivers/gpu/drm/i915/gt/selftest_workarounds.c: In function 'check_dirty_whitelist': drivers/gpu/drm/i915/gt/selftest_workarounds.c:492:17: error: 'rsvd' may be used uninitialized in this function [-Werror=maybe-uninitialized] I could not figure out a good way to do this in a way that gcc understands better, so initialize the variable to zero, as last resort. Fixes: aee20aaed887 ("drm/i915: Implement read-only support in whitelist selftest") Signed-off-by: Arnd Bergmann Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200527140526.1458215-2-arnd@arndb.de (cherry picked from commit cc649a9eafc1ef5c40db023084cb94422d08aa84) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 5ed323254ee1..32785463ec9e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -623,6 +623,8 @@ err_request: err = -EINVAL; goto out_unpin; } + } else { + rsvd = 0; } expect = results[0]; -- cgit v1.2.3 From 1f65efb624c4164bfd2e2a025fc25e8bfb651daa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 May 2020 17:24:18 +0100 Subject: drm/i915/gt: Prevent timeslicing into unpreemptable requests We have a I915_REQUEST_NOPREEMPT flag that we set when we must prevent the HW from preempting during the course of this request. We need to honour this flag and protect the HW even if we have a heartbeat request, or other maximum priority barrier, pending. As such, restrict the timeslicing check to avoid preempting into the topmost priority band, leaving the unpreemptable requests in blissful peace running uninterrupted on the HW. v2: Set the I915_PRIORITY_BARRIER to be less than I915_PRIORITY_UNPREEMPTABLE so that we never submit a request (heartbeat or barrier) that can legitimately preempt the current non-premptable request. Fixes: 2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption on a request") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200527162418.24755-1-chris@chris-wilson.co.uk (cherry picked from commit b72f02d78e4f257761ed003444ae52083f962076) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + drivers/gpu/drm/i915/gt/selftest_lrc.c | 118 ++++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_priolist_types.h | 2 +- 3 files changed, 119 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index e77f89b43e5f..7c369737d1cf 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1790,6 +1790,7 @@ need_timeslice(const struct intel_engine_cs *engine, if (!list_is_last(&rq->sched.link, &engine->active.requests)) hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); + GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE); return hint >= effective_prio(rq); } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index f95ae15ce865..904193960584 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -811,7 +811,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer, } } - err = release_queue(outer, vma, n, INT_MAX); + err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER); if (err) goto out; @@ -1276,6 +1276,121 @@ err_obj: return err; } +static int live_timeslice_nopreempt(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * We should not timeslice into a request that is marked with + * I915_REQUEST_NOPREEMPT. + */ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + struct i915_request *rq; + unsigned long timeslice; + + if (!intel_engine_has_preemption(engine)) + continue; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + engine_heartbeat_disable(engine); + timeslice = xchg(&engine->props.timeslice_duration_ms, 1); + + /* Create an unpreemptible spinner */ + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_heartbeat; + } + + i915_request_get(rq); + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + i915_request_put(rq); + err = -ETIME; + goto out_spin; + } + + set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags); + i915_request_put(rq); + + /* Followed by a maximum priority barrier (heartbeat) */ + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(rq); + goto out_spin; + } + + rq = intel_context_create_request(ce); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_spin; + } + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_get(rq); + i915_request_add(rq); + + /* + * Wait until the barrier is in ELSP, and we know timeslicing + * will have been activated. + */ + if (wait_for_submit(engine, rq, HZ / 2)) { + i915_request_put(rq); + err = -ETIME; + goto out_spin; + } + + /* + * Since the ELSP[0] request is unpreemptible, it should not + * allow the maximum priority barrier through. Wait long + * enough to see if it is timesliced in by mistake. + */ + if (i915_request_wait(rq, 0, timeslice_threshold(engine)) >= 0) { + pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n", + engine->name); + err = -EINVAL; + } + i915_request_put(rq); + +out_spin: + igt_spinner_end(&spin); +out_heartbeat: + xchg(&engine->props.timeslice_duration_ms, timeslice); + engine_heartbeat_enable(engine); + if (err) + break; + + if (igt_flush_test(gt->i915)) { + err = -EIO; + break; + } + } + + igt_spinner_fini(&spin); + return err; +} + static int live_busywait_preempt(void *arg) { struct intel_gt *gt = arg; @@ -3962,6 +4077,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_timeslice_preempt), SUBTEST(live_timeslice_rewind), SUBTEST(live_timeslice_queue), + SUBTEST(live_timeslice_nopreempt), SUBTEST(live_busywait_preempt), SUBTEST(live_preempt), SUBTEST(live_late_preempt), diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index 732aad148881..eb97d626f9ad 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -45,7 +45,7 @@ enum { * active request. */ #define I915_PRIORITY_UNPREEMPTABLE INT_MAX -#define I915_PRIORITY_BARRIER INT_MAX +#define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1) #define __NO_PREEMPTION (I915_PRIORITY_WAIT) -- cgit v1.2.3