summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_pmu.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2020-11-13 15:01:13 +1000
committerDave Airlie <airlied@redhat.com>2020-11-13 15:01:57 +1000
commit334a1683935fceba346768b62cb3bb2d3e045578 (patch)
tree3730aa9bfefc4ef4b8ee7160c2fd04cd69a3e56f /drivers/gpu/drm/i915/i915_pmu.c
parent24bdae6993f7c430a48b1e3a16f27f98e414ee28 (diff)
parent695dc55b573985569259e18f8e6261a77924342b (diff)
Merge tag 'drm-intel-gt-next-2020-11-12-1' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Cross-subsystem Changes: - DMA mapped scatterlist fixes in i915 to unblock merging of https://lkml.org/lkml/2020/9/27/70 (Tvrtko, Tom) Driver Changes: - Fix for user reported issue #2381 (Graphical output stops with "switching to inteldrmfb from simple"): Mark ininitial fb obj as WT on eLLC machines to avoid rcu lockup during fbdev init (Ville, Chris) - Fix for Tigerlake (and earlier) to avoid spurious empty CSB events leading to hang (Chris, Bruce) - Delay execlist processing for Tigerlake to avoid hang (Chris) - Fix for Tigerlake RCS engine health check through heartbeat (Chris) - Fix for Tigerlake reserved MOCS entries (Ayaz, Chris) - Fix Media power gate sequence on Tigerlake (Rodrigo) - Enable eLLC caching of display buffers for SKL+ (Ville) - Support parsing of oversize batches on Gen9 (Matt, Chris) - Exclude low pages (128KiB) of stolen from use to avoid thrashing during reset (Chris) - Flush engines before Tigerlake breadcrumbs (Chris) - Use the local HWSP offset during submission (Chris) - Flush coherency domains on first set-domain-ioctl (Chris, Zbigniew) - Use the active reference on the vma while capturing to avoid use-after-free (Chris) - Fix MOCS PTE setting for gen9+ (Ville) - Avoid NULL dereference on IPS driver callback while unbinding i915 (Chris) - Avoid NULL dereference from PT/PD stash allocation error (Matt) - Hold request reference for canceling an active context (Chris) - Avoid infinite loop on x86-32 when mapping a lot of objects (Chris) - Disallow WC mappings when processor doesn't support them (Chris) - Return correct error in i915_gem_object_copy_blt() error path (Dan) - Return correct error in intel_context_create_request() error path (Maarten) - Tune down GuC communication enabled/disabled messages to debug (Jani) - Fix rebased commit "Remove i915_request.lock requirement for execution callbacks" (Chris) - Cancel outstanding work after disabling heartbeats on an engine (Chris) - Signal cancelled requests (Chris) - Retire cancelled requests on unload (Chris) - Scrub HW state on driver remove (Chris) - Undo forced context restores after trivial preemptions (Chris) - Handle PCI unbind in PMU code (Tvrtko) - Fix CPU hotplug with multiple GPUs in PMU code (Trtkko) - Correctly set SFC capability for video engines (Venkata) - Update GuC code to use firmware v49.0.1 (John, Matthew B., Daniele, Oscar, Michel, Rodrigo, Michal) - Improve GuC warnings on loading failure (John) - Avoid ownership race in buffer pool by clearing age (Chris) - Use MMIO to read CSB in case of failure (Chris, Mika) - Show engine properties in engine state dump to indicate changes (Chris, Joonas) - Break up error capture compression loops with cond_resched() (Chris) - Reduce GPU error capture mutex hold time to avoid khungtaskd (Chris) - Serialise debugfs i915_gem_objects with ctx->mutex (Chris) - Always test execution status on closing the context and close if not persistent (Chris) - Avoid mixing integer types during batch copies (Chris, Jared) - Skip over MI_NOOP when parsing to avoid overhead (Chris) - Hold onto an explicit ref to i915_vma_work.pinned (Chris) - Perform all asynchronous waits prior to marking payload start (Chris) - Pull phys pread/pwrite implementations to the backend (Matt) - Improve record of hung engines in error state (Tvrtko) - Allow backends to override pread implementation (Matt) - Reinforce LRC poisoning checks to confirm context survives execution (Chris) - Fix memory region max size calculation (Matt) - Fix order when adding blocks to memory region (Matt) - Eliminate unused intel_virtual_engine_get_sibling func (Chris) - Cleanup kasan warning for on-stack (unsigned long) casting (Chris) - Onion unwind for scratch page allocation failure (Chris) - Poison stolen pages before use (Chris) - Selftest improvements (Chris) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20201112163407.GA20320@jlahtine-mobl.ger.corp.intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/i915_pmu.c')
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c96
1 files changed, 74 insertions, 22 deletions
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 69c0fa20eba1..cd786ad12be7 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -30,6 +30,7 @@
#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
static cpumask_t i915_pmu_cpumask;
+static unsigned int i915_pmu_target_cpu = -1;
static u8 engine_config_sample(u64 config)
{
@@ -445,6 +446,8 @@ static void i915_pmu_event_destroy(struct perf_event *event)
container_of(event->pmu, typeof(*i915), pmu.base);
drm_WARN_ON(&i915->drm, event->parent);
+
+ drm_dev_put(&i915->drm);
}
static int
@@ -510,8 +513,12 @@ static int i915_pmu_event_init(struct perf_event *event)
{
struct drm_i915_private *i915 =
container_of(event->pmu, typeof(*i915), pmu.base);
+ struct i915_pmu *pmu = &i915->pmu;
int ret;
+ if (pmu->closed)
+ return -ENODEV;
+
if (event->attr.type != event->pmu->type)
return -ENOENT;
@@ -536,8 +543,10 @@ static int i915_pmu_event_init(struct perf_event *event)
if (ret)
return ret;
- if (!event->parent)
+ if (!event->parent) {
+ drm_dev_get(&i915->drm);
event->destroy = i915_pmu_event_destroy;
+ }
return 0;
}
@@ -594,9 +603,16 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
static void i915_pmu_event_read(struct perf_event *event)
{
+ struct drm_i915_private *i915 =
+ container_of(event->pmu, typeof(*i915), pmu.base);
struct hw_perf_event *hwc = &event->hw;
+ struct i915_pmu *pmu = &i915->pmu;
u64 prev, new;
+ if (pmu->closed) {
+ event->hw.state = PERF_HES_STOPPED;
+ return;
+ }
again:
prev = local64_read(&hwc->prev_count);
new = __i915_pmu_event_read(event);
@@ -724,6 +740,13 @@ static void i915_pmu_disable(struct perf_event *event)
static void i915_pmu_event_start(struct perf_event *event, int flags)
{
+ struct drm_i915_private *i915 =
+ container_of(event->pmu, typeof(*i915), pmu.base);
+ struct i915_pmu *pmu = &i915->pmu;
+
+ if (pmu->closed)
+ return;
+
i915_pmu_enable(event);
event->hw.state = 0;
}
@@ -738,6 +761,13 @@ static void i915_pmu_event_stop(struct perf_event *event, int flags)
static int i915_pmu_event_add(struct perf_event *event, int flags)
{
+ struct drm_i915_private *i915 =
+ container_of(event->pmu, typeof(*i915), pmu.base);
+ struct i915_pmu *pmu = &i915->pmu;
+
+ if (pmu->closed)
+ return -ENODEV;
+
if (flags & PERF_EF_START)
i915_pmu_event_start(event, flags);
@@ -1020,25 +1050,39 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
{
struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
- unsigned int target;
+ unsigned int target = i915_pmu_target_cpu;
GEM_BUG_ON(!pmu->base.event_init);
+ /*
+ * Unregistering an instance generates a CPU offline event which we must
+ * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask.
+ */
+ if (pmu->closed)
+ return 0;
+
if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+
/* Migrate events if there is a valid target */
if (target < nr_cpu_ids) {
cpumask_set_cpu(target, &i915_pmu_cpumask);
- perf_pmu_migrate_context(&pmu->base, cpu, target);
+ i915_pmu_target_cpu = target;
}
}
+ if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
+ perf_pmu_migrate_context(&pmu->base, cpu, target);
+ pmu->cpuhp.cpu = target;
+ }
+
return 0;
}
-static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
+static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
+
+void i915_pmu_init(void)
{
- enum cpuhp_state slot;
int ret;
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
@@ -1046,27 +1090,29 @@ static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
i915_pmu_cpu_online,
i915_pmu_cpu_offline);
if (ret < 0)
- return ret;
+ pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n",
+ ret);
+ else
+ cpuhp_slot = ret;
+}
- slot = ret;
- ret = cpuhp_state_add_instance(slot, &pmu->cpuhp.node);
- if (ret) {
- cpuhp_remove_multi_state(slot);
- return ret;
- }
+void i915_pmu_exit(void)
+{
+ if (cpuhp_slot != CPUHP_INVALID)
+ cpuhp_remove_multi_state(cpuhp_slot);
+}
- pmu->cpuhp.slot = slot;
- return 0;
+static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
+{
+ if (cpuhp_slot == CPUHP_INVALID)
+ return -EINVAL;
+
+ return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
}
static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
{
- struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
-
- drm_WARN_ON(&i915->drm, pmu->cpuhp.slot == CPUHP_INVALID);
- drm_WARN_ON(&i915->drm, cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node));
- cpuhp_remove_multi_state(pmu->cpuhp.slot);
- pmu->cpuhp.slot = CPUHP_INVALID;
+ cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
}
static bool is_igp(struct drm_i915_private *i915)
@@ -1100,7 +1146,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
spin_lock_init(&pmu->lock);
hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
pmu->timer.function = i915_sample;
- pmu->cpuhp.slot = CPUHP_INVALID;
+ pmu->cpuhp.cpu = -1;
if (!is_igp(i915)) {
pmu->name = kasprintf(GFP_KERNEL,
@@ -1167,7 +1213,13 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
if (!pmu->base.event_init)
return;
- drm_WARN_ON(&i915->drm, pmu->enable);
+ /*
+ * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu
+ * ensures all currently executing ones will have exited before we
+ * proceed with unregistration.
+ */
+ pmu->closed = true;
+ synchronize_rcu();
hrtimer_cancel(&pmu->timer);