summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2022-02-04 15:48:26 +1000
committerDave Airlie <airlied@redhat.com>2022-02-04 15:48:27 +1000
commit9ca3d3cd0857523c95ab8cdbb6cfe47b8f90e309 (patch)
treefb4f74697e0cbad23e60211d106173ec6de39b37
parent8ea2c5187d7b4901a70374415e772f1db422fb74 (diff)
parent7d73c602154df56802a9e75ac212505fc1e9a2b6 (diff)
Merge tag 'drm-intel-fixes-2022-02-03' of git://anongit.freedesktop.org/drm/drm-intel into drm-fixesdrm-fixes-2022-02-04
Fix GitLab issue #4698: DP monitor through Type-C dock(Dell DA310) doesn't work. Fixes for inconsistent engine busyness value and read timeout with GuC. Fix to use ALLOW_FAIL for error capture buffer allocation. Don't use interruptible lock on error path. Smatch fix to reject zero sized overlays. Signed-off-by: Dave Airlie <airlied@redhat.com> From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/YfuiG8SKMKP5V/Dm@jlahtine-mobl.ger.corp.intel.com
-rw-r--r--drivers/gpu/drm/i915/display/intel_overlay.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_tc.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c9
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c114
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c2
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h3
7 files changed, 117 insertions, 22 deletions
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 1a376e9a1ff3..d610e48cab94 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -959,6 +959,9 @@ static int check_overlay_dst(struct intel_overlay *overlay,
const struct intel_crtc_state *pipe_config =
overlay->crtc->config;
+ if (rec->dst_height == 0 || rec->dst_width == 0)
+ return -EINVAL;
+
if (rec->dst_x < pipe_config->pipe_src_w &&
rec->dst_x + rec->dst_width <= pipe_config->pipe_src_w &&
rec->dst_y < pipe_config->pipe_src_h &&
diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c
index 40faa18947c9..dbd7d0d83a14 100644
--- a/drivers/gpu/drm/i915/display/intel_tc.c
+++ b/drivers/gpu/drm/i915/display/intel_tc.c
@@ -345,10 +345,11 @@ static bool icl_tc_phy_status_complete(struct intel_digital_port *dig_port)
static bool adl_tc_phy_status_complete(struct intel_digital_port *dig_port)
{
struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
+ enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port);
struct intel_uncore *uncore = &i915->uncore;
u32 val;
- val = intel_uncore_read(uncore, TCSS_DDI_STATUS(dig_port->tc_phy_fia_idx));
+ val = intel_uncore_read(uncore, TCSS_DDI_STATUS(tc_port));
if (val == 0xffffffff) {
drm_dbg_kms(&i915->drm,
"Port %s: PHY in TCCOLD, assuming not complete\n",
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 3a5b247be738..1736efa43339 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2505,9 +2505,14 @@ static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce,
timeout) < 0) {
i915_request_put(rq);
- tl = intel_context_timeline_lock(ce);
+ /*
+ * Error path, cannot use intel_context_timeline_lock as
+ * that is user interruptable and this clean up step
+ * must be done.
+ */
+ mutex_lock(&ce->timeline->mutex);
intel_context_exit(ce);
- intel_context_timeline_unlock(tl);
+ mutex_unlock(&ce->timeline->mutex);
if (nonblock)
return -EWOULDBLOCK;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index f9240d4baa69..3aabe164c329 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -206,6 +206,11 @@ struct intel_guc {
* context usage for overflows.
*/
struct delayed_work work;
+
+ /**
+ * @shift: Right shift value for the gpm timestamp
+ */
+ u32 shift;
} timestamp;
#ifdef CONFIG_DRM_I915_SELFTEST
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index e7517206af82..154ad726e266 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1113,6 +1113,19 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
if (new_start == lower_32_bits(*prev_start))
return;
+ /*
+ * When gt is unparked, we update the gt timestamp and start the ping
+ * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
+ * is unparked, all switched in contexts will have a start time that is
+ * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
+ *
+ * If neither gt_stamp nor new_start has rolled over, then the
+ * gt_stamp_hi does not need to be adjusted, however if one of them has
+ * rolled over, we need to adjust gt_stamp_hi accordingly.
+ *
+ * The below conditions address the cases of new_start rollover and
+ * gt_stamp_last rollover respectively.
+ */
if (new_start < gt_stamp_last &&
(new_start - gt_stamp_last) <= POLL_TIME_CLKS)
gt_stamp_hi++;
@@ -1124,17 +1137,45 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
*prev_start = ((u64)gt_stamp_hi << 32) | new_start;
}
-static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
+/*
+ * GuC updates shared memory and KMD reads it. Since this is not synchronized,
+ * we run into a race where the value read is inconsistent. Sometimes the
+ * inconsistency is in reading the upper MSB bytes of the last_in value when
+ * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
+ * 24 bits are zero. Since these are non-zero values, it is non-trivial to
+ * determine validity of these values. Instead we read the values multiple times
+ * until they are consistent. In test runs, 3 attempts results in consistent
+ * values. The upper bound is set to 6 attempts and may need to be tuned as per
+ * any new occurences.
+ */
+static void __get_engine_usage_record(struct intel_engine_cs *engine,
+ u32 *last_in, u32 *id, u32 *total)
{
struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine);
+ int i = 0;
+
+ do {
+ *last_in = READ_ONCE(rec->last_switch_in_stamp);
+ *id = READ_ONCE(rec->current_context_index);
+ *total = READ_ONCE(rec->total_runtime);
+
+ if (READ_ONCE(rec->last_switch_in_stamp) == *last_in &&
+ READ_ONCE(rec->current_context_index) == *id &&
+ READ_ONCE(rec->total_runtime) == *total)
+ break;
+ } while (++i < 6);
+}
+
+static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
+{
struct intel_engine_guc_stats *stats = &engine->stats.guc;
struct intel_guc *guc = &engine->gt->uc.guc;
- u32 last_switch = rec->last_switch_in_stamp;
- u32 ctx_id = rec->current_context_index;
- u32 total = rec->total_runtime;
+ u32 last_switch, ctx_id, total;
lockdep_assert_held(&guc->timestamp.lock);
+ __get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
+
stats->running = ctx_id != ~0U && last_switch;
if (stats->running)
__extend_last_switch(guc, &stats->start_gt_clk, last_switch);
@@ -1149,23 +1190,51 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
}
}
-static void guc_update_pm_timestamp(struct intel_guc *guc,
- struct intel_engine_cs *engine,
- ktime_t *now)
+static u32 gpm_timestamp_shift(struct intel_gt *gt)
+{
+ intel_wakeref_t wakeref;
+ u32 reg, shift;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
+
+ shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
+ GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
+
+ return 3 - shift;
+}
+
+static u64 gpm_timestamp(struct intel_gt *gt)
+{
+ u32 lo, hi, old_hi, loop = 0;
+
+ hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
+ do {
+ lo = intel_uncore_read(gt->uncore, MISC_STATUS0);
+ old_hi = hi;
+ hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
+ } while (old_hi != hi && loop++ < 2);
+
+ return ((u64)hi << 32) | lo;
+}
+
+static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
{
- u32 gt_stamp_now, gt_stamp_hi;
+ struct intel_gt *gt = guc_to_gt(guc);
+ u32 gt_stamp_lo, gt_stamp_hi;
+ u64 gpm_ts;
lockdep_assert_held(&guc->timestamp.lock);
gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
- gt_stamp_now = intel_uncore_read(engine->uncore,
- RING_TIMESTAMP(engine->mmio_base));
+ gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift;
+ gt_stamp_lo = lower_32_bits(gpm_ts);
*now = ktime_get();
- if (gt_stamp_now < lower_32_bits(guc->timestamp.gt_stamp))
+ if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp))
gt_stamp_hi++;
- guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_now;
+ guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo;
}
/*
@@ -1208,8 +1277,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
stats_saved = *stats;
gt_stamp_saved = guc->timestamp.gt_stamp;
+ /*
+ * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
+ * start_gt_clk' calculation below for active engines.
+ */
guc_update_engine_gt_clks(engine);
- guc_update_pm_timestamp(guc, engine, now);
+ guc_update_pm_timestamp(guc, now);
intel_gt_pm_put_async(gt);
if (i915_reset_count(gpu_error) != reset_count) {
*stats = stats_saved;
@@ -1241,8 +1314,8 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
spin_lock_irqsave(&guc->timestamp.lock, flags);
+ guc_update_pm_timestamp(guc, &unused);
for_each_engine(engine, gt, id) {
- guc_update_pm_timestamp(guc, engine, &unused);
guc_update_engine_gt_clks(engine);
engine->stats.guc.prev_total = 0;
}
@@ -1259,10 +1332,11 @@ static void __update_guc_busyness_stats(struct intel_guc *guc)
ktime_t unused;
spin_lock_irqsave(&guc->timestamp.lock, flags);
- for_each_engine(engine, gt, id) {
- guc_update_pm_timestamp(guc, engine, &unused);
+
+ guc_update_pm_timestamp(guc, &unused);
+ for_each_engine(engine, gt, id)
guc_update_engine_gt_clks(engine);
- }
+
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
}
@@ -1335,10 +1409,15 @@ void intel_guc_busyness_park(struct intel_gt *gt)
void intel_guc_busyness_unpark(struct intel_gt *gt)
{
struct intel_guc *guc = &gt->uc.guc;
+ unsigned long flags;
+ ktime_t unused;
if (!guc_submission_initialized(guc))
return;
+ spin_lock_irqsave(&guc->timestamp.lock, flags);
+ guc_update_pm_timestamp(guc, &unused);
+ spin_unlock_irqrestore(&guc->timestamp.lock, flags);
mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
guc->timestamp.ping_delay);
}
@@ -1783,6 +1862,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
spin_lock_init(&guc->timestamp.lock);
INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
+ guc->timestamp.shift = gpm_timestamp_shift(gt);
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 5ae812d60abe..0633888a411e 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1522,7 +1522,7 @@ capture_engine(struct intel_engine_cs *engine,
struct i915_request *rq = NULL;
unsigned long flags;
- ee = intel_engine_coredump_alloc(engine, GFP_KERNEL);
+ ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL);
if (!ee)
return NULL;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c32420cb8ed5..c2bb33febb68 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2684,7 +2684,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */
#define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */
-#define GUCPMTIMESTAMP _MMIO(0xC3E8)
+#define MISC_STATUS0 _MMIO(0xA500)
+#define MISC_STATUS1 _MMIO(0xA504)
/* There are 16 64-bit CS General Purpose Registers per-engine on Gen8+ */
#define GEN8_RING_CS_GPR(base, n) _MMIO((base) + 0x600 + (n) * 8)