summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/intel_lrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_lrc.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c120
1 files changed, 81 insertions, 39 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 683014e7bc51..2dfaddb8811e 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -456,10 +456,10 @@ assert_priority_queue(const struct i915_request *prev,
* engine info, SW context ID and SW counter need to form a unique number
* (Context ID) per lrc.
*/
-static u64
+static u32
lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
{
- u64 desc;
+ u32 desc;
desc = INTEL_LEGACY_32B_CONTEXT;
if (i915_vm_is_4lvl(ce->vm))
@@ -470,21 +470,7 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
if (IS_GEN(engine->i915, 8))
desc |= GEN8_CTX_L3LLC_COHERENT;
- desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
- /*
- * The following 32bits are copied into the OA reports (dword 2).
- * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
- * anything below.
- */
- if (INTEL_GEN(engine->i915) >= 11) {
- desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
- /* bits 48-53 */
-
- desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
- /* bits 61-63 */
- }
-
- return desc;
+ return i915_ggtt_offset(ce->state) | desc;
}
static inline unsigned int dword_in_page(void *addr)
@@ -1192,7 +1178,7 @@ static void reset_active(struct i915_request *rq,
__execlists_update_reg_state(ce, engine, head);
/* We've switched away, so this should be a no-op, but intent matters */
- ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
}
static u32 intel_context_get_runtime(const struct intel_context *ce)
@@ -1251,18 +1237,23 @@ __execlists_schedule_in(struct i915_request *rq)
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
execlists_check_context(ce, engine);
- ce->lrc_desc &= ~GENMASK_ULL(47, 37);
if (ce->tag) {
/* Use a fixed tag for OA and friends */
- ce->lrc_desc |= (u64)ce->tag << 32;
+ GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
+ ce->lrc.ccid = ce->tag;
} else {
/* We don't need a strict matching tag, just different values */
- ce->lrc_desc |=
- (u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
- GEN11_SW_CTX_ID_SHIFT;
- BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
+ unsigned int tag = ffs(engine->context_tag);
+
+ GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
+ clear_bit(tag - 1, &engine->context_tag);
+ ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32);
+
+ BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
}
+ ce->lrc.ccid |= engine->execlists.ccid;
+
__intel_gt_pm_get(engine->gt);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
intel_engine_context_in(engine);
@@ -1302,7 +1293,8 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
static inline void
__execlists_schedule_out(struct i915_request *rq,
- struct intel_engine_cs * const engine)
+ struct intel_engine_cs * const engine,
+ unsigned int ccid)
{
struct intel_context * const ce = rq->context;
@@ -1320,6 +1312,14 @@ __execlists_schedule_out(struct i915_request *rq,
i915_request_completed(rq))
intel_engine_add_retire(engine, ce->timeline);
+ ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
+ ccid &= GEN12_MAX_CONTEXT_HW_ID;
+ if (ccid < BITS_PER_LONG) {
+ GEM_BUG_ON(ccid == 0);
+ GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
+ set_bit(ccid - 1, &engine->context_tag);
+ }
+
intel_context_update_runtime(ce);
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
@@ -1345,15 +1345,17 @@ execlists_schedule_out(struct i915_request *rq)
{
struct intel_context * const ce = rq->context;
struct intel_engine_cs *cur, *old;
+ u32 ccid;
trace_i915_request_out(rq);
+ ccid = rq->context->lrc.ccid;
old = READ_ONCE(ce->inflight);
do
cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
while (!try_cmpxchg(&ce->inflight, &old, cur));
if (!cur)
- __execlists_schedule_out(rq, old);
+ __execlists_schedule_out(rq, old, ccid);
i915_request_put(rq);
}
@@ -1361,7 +1363,7 @@ execlists_schedule_out(struct i915_request *rq)
static u64 execlists_update_context(struct i915_request *rq)
{
struct intel_context *ce = rq->context;
- u64 desc = ce->lrc_desc;
+ u64 desc = ce->lrc.desc;
u32 tail, prev;
/*
@@ -1400,7 +1402,7 @@ static u64 execlists_update_context(struct i915_request *rq)
*/
wmb();
- ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
+ ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;
return desc;
}
@@ -1719,6 +1721,9 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
+ if (p->flags & I915_DEPENDENCY_WEAK)
+ continue;
+
/* Leave semaphores spinning on the other engines */
if (w->engine != rq->engine)
continue;
@@ -1754,7 +1759,8 @@ static void defer_active(struct intel_engine_cs *engine)
}
static bool
-need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
+need_timeslice(const struct intel_engine_cs *engine,
+ const struct i915_request *rq)
{
int hint;
@@ -1768,6 +1774,32 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
return hint >= effective_prio(rq);
}
+static bool
+timeslice_yield(const struct intel_engine_execlists *el,
+ const struct i915_request *rq)
+{
+ /*
+ * Once bitten, forever smitten!
+ *
+ * If the active context ever busy-waited on a semaphore,
+ * it will be treated as a hog until the end of its timeslice (i.e.
+ * until it is scheduled out and replaced by a new submission,
+ * possibly even its own lite-restore). The HW only sends an interrupt
+ * on the first miss, and we do know if that semaphore has been
+ * signaled, or even if it is now stuck on another semaphore. Play
+ * safe, yield if it might be stuck -- it will be given a fresh
+ * timeslice in the near future.
+ */
+ return rq->context->lrc.ccid == READ_ONCE(el->yield);
+}
+
+static bool
+timeslice_expired(const struct intel_engine_execlists *el,
+ const struct i915_request *rq)
+{
+ return timer_expired(&el->timer) || timeslice_yield(el, rq);
+}
+
static int
switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
{
@@ -1783,8 +1815,7 @@ timeslice(const struct intel_engine_cs *engine)
return READ_ONCE(engine->props.timeslice_duration_ms);
}
-static unsigned long
-active_timeslice(const struct intel_engine_cs *engine)
+static unsigned long active_timeslice(const struct intel_engine_cs *engine)
{
const struct intel_engine_execlists *execlists = &engine->execlists;
const struct i915_request *rq = *execlists->active;
@@ -1946,13 +1977,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
last = NULL;
} else if (need_timeslice(engine, last) &&
- timer_expired(&engine->execlists.timer)) {
+ timeslice_expired(execlists, last)) {
ENGINE_TRACE(engine,
- "expired last=%llx:%lld, prio=%d, hint=%d\n",
+ "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
last->fence.context,
last->fence.seqno,
last->sched.attr.priority,
- execlists->queue_priority_hint);
+ execlists->queue_priority_hint,
+ yesno(timeslice_yield(execlists, last)));
ring_set_paused(engine, 1);
defer_active(engine);
@@ -2213,6 +2245,7 @@ done:
}
clear_ports(port + 1, last_port - port);
+ WRITE_ONCE(execlists->yield, -1);
execlists_submit_ports(engine);
set_preempt_timeout(engine, *active);
} else {
@@ -3043,7 +3076,7 @@ __execlists_context_pin(struct intel_context *ce,
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
- ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
+ ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
__execlists_update_reg_state(ce, engine, ce->ring->tail);
@@ -3072,7 +3105,7 @@ static void execlists_context_reset(struct intel_context *ce)
ce, ce->engine, ce->ring, true);
__execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
- ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
}
static const struct intel_context_ops execlists_context_ops = {
@@ -3541,7 +3574,7 @@ static void enable_execlists(struct intel_engine_cs *engine)
enable_error_interrupt(engine);
- engine->context_tag = 0;
+ engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
}
static bool unexpected_starting_state(struct intel_engine_cs *engine)
@@ -3753,7 +3786,7 @@ out_replay:
head, ce->ring->tail);
__execlists_reset_reg_state(ce, engine);
__execlists_update_reg_state(ce, engine, head);
- ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
+ ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
unwind:
/* Push back any incomplete requests for replay after the reset. */
@@ -4369,8 +4402,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
if (!intel_vgpu_active(engine->i915)) {
engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
- if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
+ if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+ if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ engine->flags |= I915_ENGINE_HAS_TIMESLICES;
+ }
}
if (INTEL_GEN(engine->i915) >= 12)
@@ -4449,6 +4485,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
+ engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
}
static void rcs_submission_override(struct intel_engine_cs *engine)
@@ -4516,6 +4553,11 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
else
execlists->csb_size = GEN11_CSB_ENTRIES;
+ if (INTEL_GEN(engine->i915) >= 11) {
+ execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
+ execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
+ }
+
reset_csb_pointers(engine);
/* Finally, take ownership and responsibility for cleanup! */