summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_engines.c2
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt.c15
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt.h9
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt_pm.c74
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c58
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_sseu.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h42
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c205
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c20
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pool.h34
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h54
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c59
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c909
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h78
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c69
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c (renamed from drivers/gpu/drm/i915/gt/intel_engine_pool.c)114
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h (renamed from drivers/gpu/drm/i915/gt/intel_engine_pool_types.h)15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c102
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h27
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.c20
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h20
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c1079
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc_reg.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c47
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.h5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c49
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c449
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h61
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps_types.h14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c33
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c49
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_context.c12
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_gt_pm.c8
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c685
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.c30
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_ring_submission.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c1331
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.h17
-rw-r--r--drivers/gpu/drm/i915/gt/shmem_utils.c173
-rw-r--r--drivers/gpu/drm/i915/gt/shmem_utils.h23
-rw-r--r--drivers/gpu/drm/i915/gt/st_shmem_utils.c63
-rw-r--r--drivers/gpu/drm/i915/gt/sysfs_engines.c94
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c46
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c42
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.h14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c97
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.h4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c124
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.h15
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c53
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c36
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.h14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c17
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c35
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c30
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.h14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c56
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h3
80 files changed, 5783 insertions, 1184 deletions
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.c b/drivers/gpu/drm/i915/gt/debugfs_engines.c
index 6a5e9ab20b94..5e3725e62241 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/debugfs_engines.c
@@ -32,5 +32,5 @@ void debugfs_engines_register(struct intel_gt *gt, struct dentry *root)
{ "engines", &engines_fops },
};
- debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files));
+ intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
}
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/debugfs_gt.c
index 75255aaacaed..1de5fbaa1cf9 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_gt.c
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt.c
@@ -9,6 +9,7 @@
#include "debugfs_engines.h"
#include "debugfs_gt.h"
#include "debugfs_gt_pm.h"
+#include "uc/intel_uc_debugfs.h"
#include "i915_drv.h"
void debugfs_gt_register(struct intel_gt *gt)
@@ -24,17 +25,19 @@ void debugfs_gt_register(struct intel_gt *gt)
debugfs_engines_register(gt, root);
debugfs_gt_pm_register(gt, root);
+
+ intel_uc_debugfs_register(&gt->uc, root);
}
-void debugfs_gt_register_files(struct intel_gt *gt,
- struct dentry *root,
- const struct debugfs_gt_file *files,
- unsigned long count)
+void intel_gt_debugfs_register_files(struct dentry *root,
+ const struct debugfs_gt_file *files,
+ unsigned long count, void *data)
{
while (count--) {
- if (!files->eval || files->eval(gt))
+ umode_t mode = files->fops->write ? 0644 : 0444;
+ if (!files->eval || files->eval(data))
debugfs_create_file(files->name,
- 0444, root, gt,
+ mode, root, data,
files->fops);
files++;
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.h b/drivers/gpu/drm/i915/gt/debugfs_gt.h
index 4ea0f06cda8f..f77540f727e9 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_gt.h
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt.h
@@ -28,12 +28,11 @@ void debugfs_gt_register(struct intel_gt *gt);
struct debugfs_gt_file {
const char *name;
const struct file_operations *fops;
- bool (*eval)(const struct intel_gt *gt);
+ bool (*eval)(void *data);
};
-void debugfs_gt_register_files(struct intel_gt *gt,
- struct dentry *root,
- const struct debugfs_gt_file *files,
- unsigned long count);
+void intel_gt_debugfs_register_files(struct dentry *root,
+ const struct debugfs_gt_file *files,
+ unsigned long count, void *data);
#endif /* DEBUGFS_GT_H */
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
index 059c9e5c002e..174a24553322 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
@@ -10,6 +10,7 @@
#include "debugfs_gt_pm.h"
#include "i915_drv.h"
#include "intel_gt.h"
+#include "intel_gt_clock_utils.h"
#include "intel_llc.h"
#include "intel_rc6.h"
#include "intel_rps.h"
@@ -268,7 +269,7 @@ static int frequency_show(struct seq_file *m, void *unused)
yesno(rpmodectl & GEN6_RP_ENABLE));
seq_printf(m, "SW control enabled: %s\n",
yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
- GEN6_RP_MEDIA_SW_MODE));
+ GEN6_RP_MEDIA_SW_MODE));
vlv_punit_get(i915);
freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
@@ -300,8 +301,9 @@ static int frequency_show(struct seq_file *m, void *unused)
u32 rp_state_cap;
u32 rpmodectl, rpinclimit, rpdeclimit;
u32 rpstat, cagf, reqf;
- u32 rpupei, rpcurup, rpprevup;
- u32 rpdownei, rpcurdown, rpprevdown;
+ u32 rpcurupei, rpcurup, rpprevup;
+ u32 rpcurdownei, rpcurdown, rpprevdown;
+ u32 rpupei, rpupt, rpdownei, rpdownt;
u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
int max_freq;
@@ -334,12 +336,19 @@ static int frequency_show(struct seq_file *m, void *unused)
rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
- rpupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
+ rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
- rpdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
+ rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
+
+ rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI);
+ rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
+
+ rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
+ rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
+
cagf = intel_rps_read_actual_frequency(rps);
intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
@@ -372,7 +381,7 @@ static int frequency_show(struct seq_file *m, void *unused)
yesno(rpmodectl & GEN6_RP_ENABLE));
seq_printf(m, "SW control enabled: %s\n",
yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
- GEN6_RP_MEDIA_SW_MODE));
+ GEN6_RP_MEDIA_SW_MODE));
seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
pm_ier, pm_imr, pm_mask);
@@ -394,23 +403,35 @@ static int frequency_show(struct seq_file *m, void *unused)
seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
seq_printf(m, "CAGF: %dMHz\n", cagf);
- seq_printf(m, "RP CUR UP EI: %d (%dus)\n",
- rpupei, GT_PM_INTERVAL_TO_US(i915, rpupei));
- seq_printf(m, "RP CUR UP: %d (%dus)\n",
- rpcurup, GT_PM_INTERVAL_TO_US(i915, rpcurup));
- seq_printf(m, "RP PREV UP: %d (%dus)\n",
- rpprevup, GT_PM_INTERVAL_TO_US(i915, rpprevup));
+ seq_printf(m, "RP CUR UP EI: %d (%dns)\n",
+ rpcurupei,
+ intel_gt_pm_interval_to_ns(gt, rpcurupei));
+ seq_printf(m, "RP CUR UP: %d (%dns)\n",
+ rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
+ seq_printf(m, "RP PREV UP: %d (%dns)\n",
+ rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
seq_printf(m, "Up threshold: %d%%\n",
rps->power.up_threshold);
-
- seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n",
- rpdownei, GT_PM_INTERVAL_TO_US(i915, rpdownei));
- seq_printf(m, "RP CUR DOWN: %d (%dus)\n",
- rpcurdown, GT_PM_INTERVAL_TO_US(i915, rpcurdown));
- seq_printf(m, "RP PREV DOWN: %d (%dus)\n",
- rpprevdown, GT_PM_INTERVAL_TO_US(i915, rpprevdown));
+ seq_printf(m, "RP UP EI: %d (%dns)\n",
+ rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
+ seq_printf(m, "RP UP THRESHOLD: %d (%dns)\n",
+ rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));
+
+ seq_printf(m, "RP CUR DOWN EI: %d (%dns)\n",
+ rpcurdownei,
+ intel_gt_pm_interval_to_ns(gt, rpcurdownei));
+ seq_printf(m, "RP CUR DOWN: %d (%dns)\n",
+ rpcurdown,
+ intel_gt_pm_interval_to_ns(gt, rpcurdown));
+ seq_printf(m, "RP PREV DOWN: %d (%dns)\n",
+ rpprevdown,
+ intel_gt_pm_interval_to_ns(gt, rpprevdown));
seq_printf(m, "Down threshold: %d%%\n",
rps->power.down_threshold);
+ seq_printf(m, "RP DOWN EI: %d (%dns)\n",
+ rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
+ seq_printf(m, "RP DOWN THRESHOLD: %d (%dns)\n",
+ rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 :
rp_state_cap >> 16) & 0xff;
@@ -506,8 +527,10 @@ static int llc_show(struct seq_file *m, void *data)
return 0;
}
-static bool llc_eval(const struct intel_gt *gt)
+static bool llc_eval(void *data)
{
+ struct intel_gt *gt = data;
+
return HAS_LLC(gt->i915);
}
@@ -533,7 +556,8 @@ static int rps_boost_show(struct seq_file *m, void *data)
struct drm_i915_private *i915 = gt->i915;
struct intel_rps *rps = &gt->rps;
- seq_printf(m, "RPS enabled? %d\n", rps->enabled);
+ seq_printf(m, "RPS enabled? %s\n", yesno(intel_rps_is_enabled(rps)));
+ seq_printf(m, "RPS active? %s\n", yesno(intel_rps_is_active(rps)));
seq_printf(m, "GPU busy? %s\n", yesno(gt->awake));
seq_printf(m, "Boosts outstanding? %d\n",
atomic_read(&rps->num_waiters));
@@ -553,7 +577,7 @@ static int rps_boost_show(struct seq_file *m, void *data)
seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
- if (INTEL_GEN(i915) >= 6 && rps->enabled && gt->awake) {
+ if (INTEL_GEN(i915) >= 6 && intel_rps_is_active(rps)) {
struct intel_uncore *uncore = gt->uncore;
u32 rpup, rpupei;
u32 rpdown, rpdownei;
@@ -580,8 +604,10 @@ static int rps_boost_show(struct seq_file *m, void *data)
return 0;
}
-static bool rps_eval(const struct intel_gt *gt)
+static bool rps_eval(void *data)
{
+ struct intel_gt *gt = data;
+
return HAS_RPS(gt->i915);
}
@@ -597,5 +623,5 @@ void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root)
{ "rps_boost", &rps_boost_fops, rps_eval },
};
- debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files));
+ intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
}
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 94e746af8926..699125928272 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -389,6 +389,16 @@ static int gen8_ppgtt_alloc(struct i915_address_space *vm,
return err;
}
+static __always_inline void
+write_pte(gen8_pte_t *pte, const gen8_pte_t val)
+{
+ /* Magic delays? Or can we refine these to flush all in one pass? */
+ *pte = val;
+ wmb(); /* cpu to cache */
+ clflush(pte); /* cache to memory */
+ wmb(); /* visible to all */
+}
+
static __always_inline u64
gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
struct i915_page_directory *pdp,
@@ -405,7 +415,8 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
do {
GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE);
- vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
+ write_pte(&vaddr[gen8_pd_index(idx, 0)],
+ pte_encode | iter->dma);
iter->dma += I915_GTT_PAGE_SIZE;
if (iter->dma >= iter->max) {
@@ -487,7 +498,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
do {
GEM_BUG_ON(iter->sg->length < page_size);
- vaddr[index++] = encode | iter->dma;
+ write_pte(&vaddr[index++], encode | iter->dma);
start += page_size;
iter->dma += page_size;
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index cbad7fe722ce..d907d538176e 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -64,7 +64,7 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
if (!--b->irq_enabled)
irq_disable(engine);
- b->irq_armed = false;
+ WRITE_ONCE(b->irq_armed, false);
intel_gt_pm_put_async(engine->gt);
}
@@ -73,7 +73,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
struct intel_breadcrumbs *b = &engine->breadcrumbs;
unsigned long flags;
- if (!b->irq_armed)
+ if (!READ_ONCE(b->irq_armed))
return;
spin_lock_irqsave(&b->irq_lock, flags);
@@ -142,6 +142,18 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
intel_engine_add_retire(engine, tl);
}
+static void __signal_request(struct i915_request *rq, struct list_head *signals)
+{
+ GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
+ clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+
+ if (!__dma_fence_signal(&rq->fence))
+ return;
+
+ i915_request_get(rq);
+ list_add_tail(&rq->signal_link, signals);
+}
+
static void signal_irq_work(struct irq_work *work)
{
struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
@@ -155,6 +167,8 @@ static void signal_irq_work(struct irq_work *work)
if (b->irq_armed && list_empty(&b->signalers))
__intel_breadcrumbs_disarm_irq(b);
+ list_splice_init(&b->signaled_requests, &signal);
+
list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
GEM_BUG_ON(list_empty(&ce->signals));
@@ -163,24 +177,15 @@ static void signal_irq_work(struct irq_work *work)
list_entry(pos, typeof(*rq), signal_link);
GEM_BUG_ON(!check_signal_order(ce, rq));
-
if (!__request_completed(rq))
break;
- GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
- &rq->fence.flags));
- clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
-
- if (!__dma_fence_signal(&rq->fence))
- continue;
-
/*
* Queue for execution after dropping the signaling
* spinlock as the callback chain may end up adding
* more signalers to the same context or engine.
*/
- i915_request_get(rq);
- list_add_tail(&rq->signal_link, &signal);
+ __signal_request(rq, &signal);
}
/*
@@ -233,7 +238,7 @@ static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
* which we can add a new waiter and avoid the cost of re-enabling
* the irq.
*/
- b->irq_armed = true;
+ WRITE_ONCE(b->irq_armed, true);
/*
* Since we are waiting on a request, the GPU should be busy
@@ -255,6 +260,7 @@ void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
spin_lock_init(&b->irq_lock);
INIT_LIST_HEAD(&b->signalers);
+ INIT_LIST_HEAD(&b->signaled_requests);
init_irq_work(&b->irq_work, signal_irq_work);
}
@@ -274,6 +280,32 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
spin_unlock_irqrestore(&b->irq_lock, flags);
}
+void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine,
+ struct intel_context *ce)
+{
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ unsigned long flags;
+
+ spin_lock_irqsave(&b->irq_lock, flags);
+ if (!list_empty(&ce->signals)) {
+ struct i915_request *rq, *next;
+
+ /* Queue for executing the signal callbacks in the irq_work */
+ list_for_each_entry_safe(rq, next, &ce->signals, signal_link) {
+ GEM_BUG_ON(rq->engine != engine);
+ GEM_BUG_ON(!__request_completed(rq));
+
+ __signal_request(rq, &b->signaled_requests);
+ }
+
+ INIT_LIST_HEAD(&ce->signals);
+ list_del_init(&ce->signal_link);
+
+ irq_work_queue(&b->irq_work);
+ }
+ spin_unlock_irqrestore(&b->irq_lock, flags);
+}
+
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
{
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index aea992e46c42..74ddb49b2941 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -114,6 +114,11 @@ int __intel_context_do_pin(struct intel_context *ce)
goto out_release;
}
+ if (unlikely(intel_context_is_closed(ce))) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
err = intel_context_active_acquire(ce);
if (unlikely(err))
diff --git a/drivers/gpu/drm/i915/gt/intel_context_sseu.c b/drivers/gpu/drm/i915/gt/intel_context_sseu.c
index 57a30956c922..487299cb91f2 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_context_sseu.c
@@ -25,8 +25,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
return PTR_ERR(cs);
offset = i915_ggtt_offset(ce->state) +
- LRC_STATE_PN * PAGE_SIZE +
- CTX_R_PWR_CLK_STATE * 4;
+ LRC_STATE_OFFSET + CTX_R_PWR_CLK_STATE * 4;
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = lower_32_bits(offset);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index ca0d4f4f3615..4954b0df4864 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -102,6 +102,8 @@ struct intel_context {
/** sseu: Control eu/slice partitioning */
struct intel_sseu sseu;
+
+ u8 wa_bb_page; /* if set, page num reserved for context workarounds */
};
#endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index a1aa0d3e8be1..9bf6d4989968 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -199,6 +199,8 @@ void intel_engine_cleanup(struct intel_engine_cs *engine);
int intel_engines_init_mmio(struct intel_gt *gt);
int intel_engines_init(struct intel_gt *gt);
+void intel_engine_free_request_pool(struct intel_engine_cs *engine);
+
void intel_engines_release(struct intel_gt *gt);
void intel_engines_free(struct intel_gt *gt);
@@ -236,22 +238,35 @@ intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
+void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine,
+ struct intel_context *ce);
+
void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
struct drm_printer *p);
-static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
+static inline u32 *__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
{
memset(batch, 0, 6 * sizeof(u32));
- batch[0] = GFX_OP_PIPE_CONTROL(6);
- batch[1] = flags;
+ batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0;
+ batch[1] = flags1;
batch[2] = offset;
return batch + 6;
}
+static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
+{
+ return __gen8_emit_pipe_control(batch, 0, flags, offset);
+}
+
+static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
+{
+ return __gen8_emit_pipe_control(batch, flags0, flags1, offset);
+}
+
static inline u32 *
-gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
+__gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1)
{
/* We're using qword write, offset should be aligned to 8 bytes. */
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
@@ -260,8 +275,8 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
* need a prior CS_STALL, which is emitted by the flush
* following the batch.
*/
- *cs++ = GFX_OP_PIPE_CONTROL(6);
- *cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
+ *cs++ = GFX_OP_PIPE_CONTROL(6) | flags0;
+ *cs++ = flags1 | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
*cs++ = gtt_offset;
*cs++ = 0;
*cs++ = value;
@@ -271,6 +286,18 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
return cs;
}
+static inline u32*
+gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
+{
+ return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, 0, flags);
+}
+
+static inline u32*
+gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1)
+{
+ return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, flags0, flags1);
+}
+
static inline u32 *
gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
{
@@ -308,9 +335,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
struct drm_printer *m,
const char *header, ...);
-int intel_enable_engine_stats(struct intel_engine_cs *engine);
-void intel_disable_engine_stats(struct intel_engine_cs *engine);
-
ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
struct i915_request *
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 883a9b7fe88d..da5b61085257 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -31,7 +31,6 @@
#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
#include "intel_engine_user.h"
#include "intel_gt.h"
#include "intel_gt_requests.h"
@@ -327,6 +326,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS)
engine->props.preempt_timeout_ms = 0;
+ engine->defaults = engine->props; /* never to change again */
+
engine->context_size = intel_engine_context_size(gt, engine->class);
if (WARN_ON(engine->context_size > BIT(20)))
engine->context_size = 0;
@@ -347,8 +348,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
gt->engine_class[info->class][info->instance] = engine;
gt->engine[id] = engine;
- i915->engine[id] = engine;
-
return 0;
}
@@ -425,17 +424,27 @@ void intel_engines_release(struct intel_gt *gt)
engine->release = NULL;
memset(&engine->reset, 0, sizeof(engine->reset));
-
- gt->i915->engine[id] = NULL;
}
}
+void intel_engine_free_request_pool(struct intel_engine_cs *engine)
+{
+ if (!engine->request_pool)
+ return;
+
+ kmem_cache_free(i915_request_slab_cache(), engine->request_pool);
+}
+
void intel_engines_free(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ /* Free the requests! dma-resv keeps fences around for an eternity */
+ rcu_barrier();
+
for_each_engine(engine, gt, id) {
+ intel_engine_free_request_pool(engine);
kfree(engine);
gt->engine[id] = NULL;
}
@@ -623,8 +632,6 @@ static int engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init__pm(engine);
intel_engine_init_retire(engine);
- intel_engine_pool_init(&engine->pool);
-
/* Use the whole device by default */
engine->sseu =
intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
@@ -821,12 +828,11 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
cleanup_status_page(engine);
intel_engine_fini_retire(engine);
- intel_engine_pool_fini(&engine->pool);
intel_engine_fini_breadcrumbs(engine);
intel_engine_cleanup_cmd_parser(engine);
if (engine->default_state)
- i915_gem_object_put(engine->default_state);
+ fput(engine->default_state);
if (engine->kernel_context) {
intel_context_unpin(engine->kernel_context);
@@ -1225,6 +1231,49 @@ static void print_request(struct drm_printer *m,
name);
}
+static struct intel_timeline *get_timeline(struct i915_request *rq)
+{
+ struct intel_timeline *tl;
+
+ /*
+ * Even though we are holding the engine->active.lock here, there
+ * is no control over the submission queue per-se and we are
+ * inspecting the active state at a random point in time, with an
+ * unknown queue. Play safe and make sure the timeline remains valid.
+ * (Only being used for pretty printing, one extra kref shouldn't
+ * cause a camel stampede!)
+ */
+ rcu_read_lock();
+ tl = rcu_dereference(rq->timeline);
+ if (!kref_get_unless_zero(&tl->kref))
+ tl = NULL;
+ rcu_read_unlock();
+
+ return tl;
+}
+
+static int print_ring(char *buf, int sz, struct i915_request *rq)
+{
+ int len = 0;
+
+ if (!i915_request_signaled(rq)) {
+ struct intel_timeline *tl = get_timeline(rq);
+
+ len = scnprintf(buf, sz,
+ "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
+ i915_ggtt_offset(rq->ring->vma),
+ tl ? tl->hwsp_offset : 0,
+ hwsp_seqno(rq),
+ DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
+ 1000 * 1000));
+
+ if (tl)
+ intel_timeline_put(tl);
+ }
+
+ return len;
+}
+
static void hexdump(struct drm_printer *m, const void *buf, size_t len)
{
const size_t rowsize = 8 * sizeof(u32);
@@ -1254,27 +1303,6 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len)
}
}
-static struct intel_timeline *get_timeline(struct i915_request *rq)
-{
- struct intel_timeline *tl;
-
- /*
- * Even though we are holding the engine->active.lock here, there
- * is no control over the submission queue per-se and we are
- * inspecting the active state at a random point in time, with an
- * unknown queue. Play safe and make sure the timeline remains valid.
- * (Only being used for pretty printing, one extra kref shouldn't
- * cause a camel stampede!)
- */
- rcu_read_lock();
- tl = rcu_dereference(rq->timeline);
- if (!kref_get_unless_zero(&tl->kref))
- tl = NULL;
- rcu_read_unlock();
-
- return tl;
-}
-
static const char *repr_timer(const struct timer_list *t)
{
if (!READ_ONCE(t->expires))
@@ -1393,39 +1421,24 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
int len;
len = scnprintf(hdr, sizeof(hdr),
- "\t\tActive[%d]: ",
- (int)(port - execlists->active));
- if (!i915_request_signaled(rq)) {
- struct intel_timeline *tl = get_timeline(rq);
-
- len += scnprintf(hdr + len, sizeof(hdr) - len,
- "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
- i915_ggtt_offset(rq->ring->vma),
- tl ? tl->hwsp_offset : 0,
- hwsp_seqno(rq),
- DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
- 1000 * 1000));
-
- if (tl)
- intel_timeline_put(tl);
- }
+ "\t\tActive[%d]: ccid:%08x, ",
+ (int)(port - execlists->active),
+ rq->context->lrc.ccid);
+ len += print_ring(hdr + len, sizeof(hdr) - len, rq);
scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
print_request(m, rq, hdr);
}
for (port = execlists->pending; (rq = *port); port++) {
- struct intel_timeline *tl = get_timeline(rq);
- char hdr[80];
-
- snprintf(hdr, sizeof(hdr),
- "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
- (int)(port - execlists->pending),
- i915_ggtt_offset(rq->ring->vma),
- tl ? tl->hwsp_offset : 0,
- hwsp_seqno(rq));
- print_request(m, rq, hdr);
+ char hdr[160];
+ int len;
- if (tl)
- intel_timeline_put(tl);
+ len = scnprintf(hdr, sizeof(hdr),
+ "\t\tPending[%d]: ccid:%08x, ",
+ (int)(port - execlists->pending),
+ rq->context->lrc.ccid);
+ len += print_ring(hdr + len, sizeof(hdr) - len, rq);
+ scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
+ print_request(m, rq, hdr);
}
rcu_read_unlock();
execlists_active_unlock_bh(execlists);
@@ -1574,58 +1587,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
intel_engine_print_breadcrumbs(engine, m);
}
-/**
- * intel_enable_engine_stats() - Enable engine busy tracking on engine
- * @engine: engine to enable stats collection
- *
- * Start collecting the engine busyness data for @engine.
- *
- * Returns 0 on success or a negative error code.
- */
-int intel_enable_engine_stats(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists *execlists = &engine->execlists;
- unsigned long flags;
- int err = 0;
-
- if (!intel_engine_supports_stats(engine))
- return -ENODEV;
-
- execlists_active_lock_bh(execlists);
- write_seqlock_irqsave(&engine->stats.lock, flags);
-
- if (unlikely(engine->stats.enabled == ~0)) {
- err = -EBUSY;
- goto unlock;
- }
-
- if (engine->stats.enabled++ == 0) {
- struct i915_request * const *port;
- struct i915_request *rq;
-
- engine->stats.enabled_at = ktime_get();
-
- /* XXX submission method oblivious? */
- for (port = execlists->active; (rq = *port); port++)
- engine->stats.active++;
-
- for (port = execlists->pending; (rq = *port); port++) {
- /* Exclude any contexts already counted in active */
- if (!intel_context_inflight_count(rq->context))
- engine->stats.active++;
- }
-
- if (engine->stats.active)
- engine->stats.start = engine->stats.enabled_at;
- }
-
-unlock:
- write_sequnlock_irqrestore(&engine->stats.lock, flags);
- execlists_active_unlock_bh(execlists);
-
- return err;
-}
-
static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
{
ktime_t total = engine->stats.total;
@@ -1634,7 +1595,7 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
* If the engine is executing something at the moment
* add it to the total.
*/
- if (engine->stats.active)
+ if (atomic_read(&engine->stats.active))
total = ktime_add(total,
ktime_sub(ktime_get(), engine->stats.start));
@@ -1660,28 +1621,6 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
return total;
}
-/**
- * intel_disable_engine_stats() - Disable engine busy tracking on engine
- * @engine: engine to disable stats collection
- *
- * Stops collecting the engine busyness data for @engine.
- */
-void intel_disable_engine_stats(struct intel_engine_cs *engine)
-{
- unsigned long flags;
-
- if (!intel_engine_supports_stats(engine))
- return;
-
- write_seqlock_irqsave(&engine->stats.lock, flags);
- WARN_ON_ONCE(engine->stats.enabled == 0);
- if (--engine->stats.enabled == 0) {
- engine->stats.total = __intel_engine_get_busy_time(engine);
- engine->stats.active = 0;
- }
- write_sequnlock_irqrestore(&engine->stats.lock, flags);
-}
-
static bool match_ring(struct i915_request *rq)
{
u32 ring = ENGINE_READ(rq->engine, RING_START);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index dd825718e4e5..5136c8bf112d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -31,7 +31,7 @@ static bool next_heartbeat(struct intel_engine_cs *engine)
delay = msecs_to_jiffies_timeout(delay);
if (delay >= HZ)
delay = round_jiffies_up_relative(delay);
- schedule_delayed_work(&engine->heartbeat.work, delay);
+ mod_delayed_work(system_wq, &engine->heartbeat.work, delay);
return true;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index b6cf284e3a2d..d0a1078ef632 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -10,31 +10,22 @@
#include "intel_engine.h"
#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
#include "intel_rc6.h"
#include "intel_ring.h"
+#include "shmem_utils.h"
static int __engine_unpark(struct intel_wakeref *wf)
{
struct intel_engine_cs *engine =
container_of(wf, typeof(*engine), wakeref);
struct intel_context *ce;
- void *map;
ENGINE_TRACE(engine, "\n");
intel_gt_pm_get(engine->gt);
- /* Pin the default state for fast resets from atomic context. */
- map = NULL;
- if (engine->default_state)
- map = i915_gem_object_pin_map(engine->default_state,
- I915_MAP_WB);
- if (!IS_ERR_OR_NULL(map))
- engine->pinned_default_state = map;
-
/* Discard stale context state from across idling */
ce = engine->kernel_context;
if (ce) {
@@ -44,6 +35,7 @@ static int __engine_unpark(struct intel_wakeref *wf)
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
struct drm_i915_gem_object *obj = ce->state->obj;
int type = i915_coherent_map_type(engine->i915);
+ void *map;
map = i915_gem_object_pin_map(obj, type);
if (!IS_ERR(map)) {
@@ -181,7 +173,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
* Ergo, if we put ourselves on the timelines.active_list
* (se intel_timeline_enter()) before we increment the
* engine->wakeref.count, we may see the request completion and retire
- * it causing an undeflow of the engine->wakeref.
+ * it causing an underflow of the engine->wakeref.
*/
flags = __timeline_mark_lock(ce);
GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
@@ -255,7 +247,6 @@ static int __engine_park(struct intel_wakeref *wf)
intel_engine_park_heartbeat(engine);
intel_engine_disarm_breadcrumbs(engine);
- intel_engine_pool_park(&engine->pool);
/* Must be reset upon idling, or we may miss the busy wakeup. */
GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
@@ -263,11 +254,6 @@ static int __engine_park(struct intel_wakeref *wf)
if (engine->park)
engine->park(engine);
- if (engine->pinned_default_state) {
- i915_gem_object_unpin_map(engine->default_state);
- engine->pinned_default_state = NULL;
- }
-
engine->execlists.no_priolist = false;
/* While gt calls i915_vma_parked(), we have to break the lock cycle */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index e52c2b0cb245..418df0a13145 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -37,6 +37,12 @@ static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine)
intel_wakeref_put_async(&engine->wakeref);
}
+static inline void intel_engine_pm_put_delay(struct intel_engine_cs *engine,
+ unsigned long delay)
+{
+ intel_wakeref_put_delay(&engine->wakeref, delay);
+}
+
static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
{
intel_wakeref_unlock_wait(&engine->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
deleted file mode 100644
index 1bd89cadc3b7..000000000000
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2014-2018 Intel Corporation
- */
-
-#ifndef INTEL_ENGINE_POOL_H
-#define INTEL_ENGINE_POOL_H
-
-#include "intel_engine_pool_types.h"
-#include "i915_active.h"
-#include "i915_request.h"
-
-struct intel_engine_pool_node *
-intel_engine_get_pool(struct intel_engine_cs *engine, size_t size);
-
-static inline int
-intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
- struct i915_request *rq)
-{
- return i915_active_add_request(&node->active, rq);
-}
-
-static inline void
-intel_engine_pool_put(struct intel_engine_pool_node *node)
-{
- i915_active_release(&node->active);
-}
-
-void intel_engine_pool_init(struct intel_engine_pool *pool);
-void intel_engine_pool_park(struct intel_engine_pool *pool);
-void intel_engine_pool_fini(struct intel_engine_pool *pool);
-
-#endif /* INTEL_ENGINE_POOL_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 0be674ae1cf6..2b6cdf47d428 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -22,7 +22,6 @@
#include "i915_pmu.h"
#include "i915_priolist_types.h"
#include "i915_selftest.h"
-#include "intel_engine_pool_types.h"
#include "intel_sseu.h"
#include "intel_timeline_types.h"
#include "intel_wakeref.h"
@@ -181,6 +180,11 @@ struct intel_engine_execlists {
u32 error_interrupt;
/**
+ * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset
+ */
+ u32 reset_ccid;
+
+ /**
* @no_priolist: priority lists disabled
*/
bool no_priolist;
@@ -321,6 +325,9 @@ struct intel_engine_cs {
struct list_head hold; /* ready requests, but on hold */
} active;
+ /* keep a request in reserve for a [pm] barrier under oom */
+ struct i915_request *request_pool;
+
struct llist_head barrier_tasks;
struct intel_context *kernel_context; /* pinned */
@@ -336,8 +343,7 @@ struct intel_engine_cs {
unsigned long wakeref_serial;
struct intel_wakeref wakeref;
- struct drm_i915_gem_object *default_state;
- void *pinned_default_state;
+ struct file *default_state;
struct {
struct intel_ring *ring;
@@ -371,6 +377,8 @@ struct intel_engine_cs {
spinlock_t irq_lock;
struct list_head signalers;
+ struct list_head signaled_requests;
+
struct irq_work irq_work; /* for use from inside irq_lock */
unsigned int irq_enabled;
@@ -402,13 +410,6 @@ struct intel_engine_cs {
struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
} pmu;
- /*
- * A pool of objects to use as shadow copies of client batch buffers
- * when the command parser is enabled. Prevents the client from
- * modifying the batch contents after software parsing.
- */
- struct intel_engine_pool pool;
-
struct intel_hw_status_page status_page;
struct i915_ctx_workarounds wa_ctx;
struct i915_wa_list ctx_wa_list;
@@ -420,6 +421,7 @@ struct intel_engine_cs {
void (*irq_enable)(struct intel_engine_cs *engine);
void (*irq_disable)(struct intel_engine_cs *engine);
+ void (*sanitize)(struct intel_engine_cs *engine);
int (*resume)(struct intel_engine_cs *engine);
struct {
@@ -529,34 +531,34 @@ struct intel_engine_cs {
struct {
/**
- * @lock: Lock protecting the below fields.
- */
- seqlock_t lock;
- /**
- * @enabled: Reference count indicating number of listeners.
+ * @active: Number of contexts currently scheduled in.
*/
- unsigned int enabled;
+ atomic_t active;
+
/**
- * @active: Number of contexts currently scheduled in.
+ * @lock: Lock protecting the below fields.
*/
- unsigned int active;
+ seqlock_t lock;
+
/**
- * @enabled_at: Timestamp when busy stats were enabled.
+ * @total: Total time this engine was busy.
+ *
+ * Accumulated time not counting the most recent block in cases
+ * where engine is currently busy (active > 0).
*/
- ktime_t enabled_at;
+ ktime_t total;
+
/**
* @start: Timestamp of the last idle to active transition.
*
* Idle is defined as active == 0, active is active > 0.
*/
ktime_t start;
+
/**
- * @total: Total time this engine was busy.
- *
- * Accumulated time not counting the most recent block in cases
- * where engine is currently busy (active > 0).
+ * @rps: Utilisation at last RPS sampling.
*/
- ktime_t total;
+ ktime_t rps;
} stats;
struct {
@@ -565,7 +567,7 @@ struct intel_engine_cs {
unsigned long preempt_timeout_ms;
unsigned long stop_timeout_ms;
unsigned long timeslice_duration_ms;
- } props;
+ } props, defaults;
};
static inline bool
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 4c5a209cb669..66165b10256e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -65,7 +65,7 @@ static int ggtt_init_hw(struct i915_ggtt *ggtt)
ggtt->mappable_end);
}
- i915_ggtt_init_fences(ggtt);
+ intel_ggtt_init_fences(ggtt);
return 0;
}
@@ -715,11 +715,13 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
*/
void i915_ggtt_driver_release(struct drm_i915_private *i915)
{
+ struct i915_ggtt *ggtt = &i915->ggtt;
struct pagevec *pvec;
- fini_aliasing_ppgtt(&i915->ggtt);
+ fini_aliasing_ppgtt(ggtt);
- ggtt_cleanup_hw(&i915->ggtt);
+ intel_ggtt_fini_fences(ggtt);
+ ggtt_cleanup_hw(ggtt);
pvec = &i915->mm.wc_stash.pvec;
if (pvec->nr) {
@@ -784,13 +786,13 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
else
ggtt->gsm = ioremap_wc(phys_addr, size);
if (!ggtt->gsm) {
- DRM_ERROR("Failed to map the ggtt page table\n");
+ drm_err(&i915->drm, "Failed to map the ggtt page table\n");
return -ENOMEM;
}
ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
if (ret) {
- DRM_ERROR("Scratch setup failed\n");
+ drm_err(&i915->drm, "Scratch setup failed\n");
/* iounmap will also get called at remove, but meh */
iounmap(ggtt->gsm);
return ret;
@@ -838,7 +840,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
struct pci_dev *pdev = i915->drm.pdev;
unsigned int size;
u16 snb_gmch_ctl;
- int err;
/* TODO: We're not aware of mappable constraints on gen8 yet */
if (!IS_DGFX(i915)) {
@@ -846,12 +847,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->mappable_end = resource_size(&ggtt->gmadr);
}
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
- if (!err)
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
- if (err)
- DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
-
pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
if (IS_CHERRYVIEW(i915))
size = chv_get_total_gtt_size(snb_gmch_ctl);
@@ -987,7 +982,6 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
struct pci_dev *pdev = i915->drm.pdev;
unsigned int size;
u16 snb_gmch_ctl;
- int err;
ggtt->gmadr = pci_resource(pdev, 2);
ggtt->mappable_end = resource_size(&ggtt->gmadr);
@@ -997,15 +991,11 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
* just a coarse sanity check.
*/
if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
- DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
+ drm_err(&i915->drm, "Unknown GMADR size (%pa)\n",
+ &ggtt->mappable_end);
return -ENXIO;
}
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
- if (!err)
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
- if (err)
- DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
size = gen6_get_total_gtt_size(snb_gmch_ctl);
@@ -1052,7 +1042,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL);
if (!ret) {
- DRM_ERROR("failed to set up gmch\n");
+ drm_err(&i915->drm, "failed to set up gmch\n");
return -EIO;
}
@@ -1075,7 +1065,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.vma_ops.clear_pages = clear_pages;
if (unlikely(ggtt->do_idle_maps))
- dev_notice(i915->drm.dev,
+ drm_notice(&i915->drm,
"Applying Ironlake quirks for intel_iommu\n");
return 0;
@@ -1100,26 +1090,29 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
return ret;
if ((ggtt->vm.total - 1) >> 32) {
- DRM_ERROR("We never expected a Global GTT with more than 32bits"
- " of address space! Found %lldM!\n",
- ggtt->vm.total >> 20);
+ drm_err(&i915->drm,
+ "We never expected a Global GTT with more than 32bits"
+ " of address space! Found %lldM!\n",
+ ggtt->vm.total >> 20);
ggtt->vm.total = 1ULL << 32;
ggtt->mappable_end =
min_t(u64, ggtt->mappable_end, ggtt->vm.total);
}
if (ggtt->mappable_end > ggtt->vm.total) {
- DRM_ERROR("mappable aperture extends past end of GGTT,"
- " aperture=%pa, total=%llx\n",
- &ggtt->mappable_end, ggtt->vm.total);
+ drm_err(&i915->drm,
+ "mappable aperture extends past end of GGTT,"
+ " aperture=%pa, total=%llx\n",
+ &ggtt->mappable_end, ggtt->vm.total);
ggtt->mappable_end = ggtt->vm.total;
}
/* GMADR is the PCI mmio aperture into the global GTT. */
- DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
- DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
- DRM_DEBUG_DRIVER("DSM size = %lluM\n",
- (u64)resource_size(&intel_graphics_stolen_res) >> 20);
+ drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> 20);
+ drm_dbg(&i915->drm, "GMADR size = %lluM\n",
+ (u64)ggtt->mappable_end >> 20);
+ drm_dbg(&i915->drm, "DSM size = %lluM\n",
+ (u64)resource_size(&intel_graphics_stolen_res) >> 20);
return 0;
}
@@ -1137,7 +1130,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
return ret;
if (intel_vtd_active())
- dev_info(i915->drm.dev, "VT-d active for gfx access\n");
+ drm_info(&i915->drm, "VT-d active for gfx access\n");
return 0;
}
@@ -1212,6 +1205,8 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
if (INTEL_GEN(ggtt->vm.i915) >= 8)
setup_private_pat(ggtt->vm.gt->uncore);
+
+ intel_ggtt_restore_fences(ggtt);
}
static struct scatterlist *
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
new file mode 100644
index 000000000000..7fb36b12fe7a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -0,0 +1,909 @@
+/*
+ * Copyright © 2008-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "i915_drv.h"
+#include "i915_scatterlist.h"
+#include "i915_pvinfo.h"
+#include "i915_vgpu.h"
+
+/**
+ * DOC: fence register handling
+ *
+ * Important to avoid confusions: "fences" in the i915 driver are not execution
+ * fences used to track command completion but hardware detiler objects which
+ * wrap a given range of the global GTT. Each platform has only a fairly limited
+ * set of these objects.
+ *
+ * Fences are used to detile GTT memory mappings. They're also connected to the
+ * hardware frontbuffer render tracking and hence interact with frontbuffer
+ * compression. Furthermore on older platforms fences are required for tiled
+ * objects used by the display engine. They can also be used by the render
+ * engine - they're required for blitter commands and are optional for render
+ * commands. But on gen4+ both display (with the exception of fbc) and rendering
+ * have their own tiling state bits and don't need fences.
+ *
+ * Also note that fences only support X and Y tiling and hence can't be used for
+ * the fancier new tiling formats like W, Ys and Yf.
+ *
+ * Finally note that because fences are such a restricted resource they're
+ * dynamically associated with objects. Furthermore fence state is committed to
+ * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must
+ * explicitly call i915_gem_object_get_fence() to synchronize fencing status
+ * for cpu access. Also note that some code wants an unfenced view, for those
+ * cases the fence can be removed forcefully with i915_gem_object_put_fence().
+ *
+ * Internally these functions will synchronize with userspace access by removing
+ * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
+ */
+
+#define pipelined 0
+
+static struct drm_i915_private *fence_to_i915(struct i915_fence_reg *fence)
+{
+ return fence->ggtt->vm.i915;
+}
+
+static struct intel_uncore *fence_to_uncore(struct i915_fence_reg *fence)
+{
+ return fence->ggtt->vm.gt->uncore;
+}
+
+static void i965_write_fence_reg(struct i915_fence_reg *fence)
+{
+ i915_reg_t fence_reg_lo, fence_reg_hi;
+ int fence_pitch_shift;
+ u64 val;
+
+ if (INTEL_GEN(fence_to_i915(fence)) >= 6) {
+ fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
+ fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
+ fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
+
+ } else {
+ fence_reg_lo = FENCE_REG_965_LO(fence->id);
+ fence_reg_hi = FENCE_REG_965_HI(fence->id);
+ fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
+ }
+
+ val = 0;
+ if (fence->tiling) {
+ unsigned int stride = fence->stride;
+
+ GEM_BUG_ON(!IS_ALIGNED(stride, 128));
+
+ val = fence->start + fence->size - I965_FENCE_PAGE;
+ val <<= 32;
+ val |= fence->start;
+ val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
+ if (fence->tiling == I915_TILING_Y)
+ val |= BIT(I965_FENCE_TILING_Y_SHIFT);
+ val |= I965_FENCE_REG_VALID;
+ }
+
+ if (!pipelined) {
+ struct intel_uncore *uncore = fence_to_uncore(fence);
+
+ /*
+ * To w/a incoherency with non-atomic 64-bit register updates,
+ * we split the 64-bit update into two 32-bit writes. In order
+ * for a partial fence not to be evaluated between writes, we
+ * precede the update with write to turn off the fence register,
+ * and only enable the fence as the last step.
+ *
+ * For extra levels of paranoia, we make sure each step lands
+ * before applying the next step.
+ */
+ intel_uncore_write_fw(uncore, fence_reg_lo, 0);
+ intel_uncore_posting_read_fw(uncore, fence_reg_lo);
+
+ intel_uncore_write_fw(uncore, fence_reg_hi, upper_32_bits(val));
+ intel_uncore_write_fw(uncore, fence_reg_lo, lower_32_bits(val));
+ intel_uncore_posting_read_fw(uncore, fence_reg_lo);
+ }
+}
+
+static void i915_write_fence_reg(struct i915_fence_reg *fence)
+{
+ u32 val;
+
+ val = 0;
+ if (fence->tiling) {
+ unsigned int stride = fence->stride;
+ unsigned int tiling = fence->tiling;
+ bool is_y_tiled = tiling == I915_TILING_Y;
+
+ if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence_to_i915(fence)))
+ stride /= 128;
+ else
+ stride /= 512;
+ GEM_BUG_ON(!is_power_of_2(stride));
+
+ val = fence->start;
+ if (is_y_tiled)
+ val |= BIT(I830_FENCE_TILING_Y_SHIFT);
+ val |= I915_FENCE_SIZE_BITS(fence->size);
+ val |= ilog2(stride) << I830_FENCE_PITCH_SHIFT;
+
+ val |= I830_FENCE_REG_VALID;
+ }
+
+ if (!pipelined) {
+ struct intel_uncore *uncore = fence_to_uncore(fence);
+ i915_reg_t reg = FENCE_REG(fence->id);
+
+ intel_uncore_write_fw(uncore, reg, val);
+ intel_uncore_posting_read_fw(uncore, reg);
+ }
+}
+
+static void i830_write_fence_reg(struct i915_fence_reg *fence)
+{
+ u32 val;
+
+ val = 0;
+ if (fence->tiling) {
+ unsigned int stride = fence->stride;
+
+ val = fence->start;
+ if (fence->tiling == I915_TILING_Y)
+ val |= BIT(I830_FENCE_TILING_Y_SHIFT);
+ val |= I830_FENCE_SIZE_BITS(fence->size);
+ val |= ilog2(stride / 128) << I830_FENCE_PITCH_SHIFT;
+ val |= I830_FENCE_REG_VALID;
+ }
+
+ if (!pipelined) {
+ struct intel_uncore *uncore = fence_to_uncore(fence);
+ i915_reg_t reg = FENCE_REG(fence->id);
+
+ intel_uncore_write_fw(uncore, reg, val);
+ intel_uncore_posting_read_fw(uncore, reg);
+ }
+}
+
+static void fence_write(struct i915_fence_reg *fence)
+{
+ struct drm_i915_private *i915 = fence_to_i915(fence);
+
+ /*
+ * Previous access through the fence register is marshalled by
+ * the mb() inside the fault handlers (i915_gem_release_mmaps)
+ * and explicitly managed for internal users.
+ */
+
+ if (IS_GEN(i915, 2))
+ i830_write_fence_reg(fence);
+ else if (IS_GEN(i915, 3))
+ i915_write_fence_reg(fence);
+ else
+ i965_write_fence_reg(fence);
+
+ /*
+ * Access through the fenced region afterwards is
+ * ordered by the posting reads whilst writing the registers.
+ */
+}
+
+static bool gpu_uses_fence_registers(struct i915_fence_reg *fence)
+{
+ return INTEL_GEN(fence_to_i915(fence)) < 4;
+}
+
+static int fence_update(struct i915_fence_reg *fence,
+ struct i915_vma *vma)
+{
+ struct i915_ggtt *ggtt = fence->ggtt;
+ struct intel_uncore *uncore = fence_to_uncore(fence);
+ intel_wakeref_t wakeref;
+ struct i915_vma *old;
+ int ret;
+
+ fence->tiling = 0;
+ if (vma) {
+ GEM_BUG_ON(!i915_gem_object_get_stride(vma->obj) ||
+ !i915_gem_object_get_tiling(vma->obj));
+
+ if (!i915_vma_is_map_and_fenceable(vma))
+ return -EINVAL;
+
+ if (gpu_uses_fence_registers(fence)) {
+ /* implicit 'unfenced' GPU blits */
+ ret = i915_vma_sync(vma);
+ if (ret)
+ return ret;
+ }
+
+ fence->start = vma->node.start;
+ fence->size = vma->fence_size;
+ fence->stride = i915_gem_object_get_stride(vma->obj);
+ fence->tiling = i915_gem_object_get_tiling(vma->obj);
+ }
+ WRITE_ONCE(fence->dirty, false);
+
+ old = xchg(&fence->vma, NULL);
+ if (old) {
+ /* XXX Ideally we would move the waiting to outside the mutex */
+ ret = i915_active_wait(&fence->active);
+ if (ret) {
+ fence->vma = old;
+ return ret;
+ }
+
+ i915_vma_flush_writes(old);
+
+ /*
+ * Ensure that all userspace CPU access is completed before
+ * stealing the fence.
+ */
+ if (old != vma) {
+ GEM_BUG_ON(old->fence != fence);
+ i915_vma_revoke_mmap(old);
+ old->fence = NULL;
+ }
+
+ list_move(&fence->link, &ggtt->fence_list);
+ }
+
+ /*
+ * We only need to update the register itself if the device is awake.
+ * If the device is currently powered down, we will defer the write
+ * to the runtime resume, see intel_ggtt_restore_fences().
+ *
+ * This only works for removing the fence register, on acquisition
+ * the caller must hold the rpm wakeref. The fence register must
+ * be cleared before we can use any other fences to ensure that
+ * the new fences do not overlap the elided clears, confusing HW.
+ */
+ wakeref = intel_runtime_pm_get_if_in_use(uncore->rpm);
+ if (!wakeref) {
+ GEM_BUG_ON(vma);
+ return 0;
+ }
+
+ WRITE_ONCE(fence->vma, vma);
+ fence_write(fence);
+
+ if (vma) {
+ vma->fence = fence;
+ list_move_tail(&fence->link, &ggtt->fence_list);
+ }
+
+ intel_runtime_pm_put(uncore->rpm, wakeref);
+ return 0;
+}
+
+/**
+ * i915_vma_revoke_fence - force-remove fence for a VMA
+ * @vma: vma to map linearly (not through a fence reg)
+ *
+ * This function force-removes any fence from the given object, which is useful
+ * if the kernel wants to do untiled GTT access.
+ */
+void i915_vma_revoke_fence(struct i915_vma *vma)
+{
+ struct i915_fence_reg *fence = vma->fence;
+ intel_wakeref_t wakeref;
+
+ lockdep_assert_held(&vma->vm->mutex);
+ if (!fence)
+ return;
+
+ GEM_BUG_ON(fence->vma != vma);
+ GEM_BUG_ON(!i915_active_is_idle(&fence->active));
+ GEM_BUG_ON(atomic_read(&fence->pin_count));
+
+ fence->tiling = 0;
+ WRITE_ONCE(fence->vma, NULL);
+ vma->fence = NULL;
+
+ with_intel_runtime_pm_if_in_use(fence_to_uncore(fence)->rpm, wakeref)
+ fence_write(fence);
+}
+
+static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
+{
+ struct i915_fence_reg *fence;
+
+ list_for_each_entry(fence, &ggtt->fence_list, link) {
+ GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
+
+ if (atomic_read(&fence->pin_count))
+ continue;
+
+ return fence;
+ }
+
+ /* Wait for completion of pending flips which consume fences */
+ if (intel_has_pending_fb_unpin(ggtt->vm.i915))
+ return ERR_PTR(-EAGAIN);
+
+ return ERR_PTR(-EDEADLK);
+}
+
+int __i915_vma_pin_fence(struct i915_vma *vma)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
+ struct i915_fence_reg *fence;
+ struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
+ int err;
+
+ lockdep_assert_held(&vma->vm->mutex);
+
+ /* Just update our place in the LRU if our fence is getting reused. */
+ if (vma->fence) {
+ fence = vma->fence;
+ GEM_BUG_ON(fence->vma != vma);
+ atomic_inc(&fence->pin_count);
+ if (!fence->dirty) {
+ list_move_tail(&fence->link, &ggtt->fence_list);
+ return 0;
+ }
+ } else if (set) {
+ fence = fence_find(ggtt);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
+
+ GEM_BUG_ON(atomic_read(&fence->pin_count));
+ atomic_inc(&fence->pin_count);
+ } else {
+ return 0;
+ }
+
+ err = fence_update(fence, set);
+ if (err)
+ goto out_unpin;
+
+ GEM_BUG_ON(fence->vma != set);
+ GEM_BUG_ON(vma->fence != (set ? fence : NULL));
+
+ if (set)
+ return 0;
+
+out_unpin:
+ atomic_dec(&fence->pin_count);
+ return err;
+}
+
+/**
+ * i915_vma_pin_fence - set up fencing for a vma
+ * @vma: vma to map through a fence reg
+ *
+ * When mapping objects through the GTT, userspace wants to be able to write
+ * to them without having to worry about swizzling if the object is tiled.
+ * This function walks the fence regs looking for a free one for @obj,
+ * stealing one if it can't find any.
+ *
+ * It then sets up the reg based on the object's properties: address, pitch
+ * and tiling format.
+ *
+ * For an untiled surface, this removes any existing fence.
+ *
+ * Returns:
+ *
+ * 0 on success, negative error code on failure.
+ */
+int i915_vma_pin_fence(struct i915_vma *vma)
+{
+ int err;
+
+ if (!vma->fence && !i915_gem_object_is_tiled(vma->obj))
+ return 0;
+
+ /*
+ * Note that we revoke fences on runtime suspend. Therefore the user
+ * must keep the device awake whilst using the fence.
+ */
+ assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm);
+ GEM_BUG_ON(!i915_vma_is_pinned(vma));
+ GEM_BUG_ON(!i915_vma_is_ggtt(vma));
+
+ err = mutex_lock_interruptible(&vma->vm->mutex);
+ if (err)
+ return err;
+
+ err = __i915_vma_pin_fence(vma);
+ mutex_unlock(&vma->vm->mutex);
+
+ return err;
+}
+
+/**
+ * i915_reserve_fence - Reserve a fence for vGPU
+ * @ggtt: Global GTT
+ *
+ * This function walks the fence regs looking for a free one and remove
+ * it from the fence_list. It is used to reserve fence for vGPU to use.
+ */
+struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt)
+{
+ struct i915_fence_reg *fence;
+ int count;
+ int ret;
+
+ lockdep_assert_held(&ggtt->vm.mutex);
+
+ /* Keep at least one fence available for the display engine. */
+ count = 0;
+ list_for_each_entry(fence, &ggtt->fence_list, link)
+ count += !atomic_read(&fence->pin_count);
+ if (count <= 1)
+ return ERR_PTR(-ENOSPC);
+
+ fence = fence_find(ggtt);
+ if (IS_ERR(fence))
+ return fence;
+
+ if (fence->vma) {
+ /* Force-remove fence from VMA */
+ ret = fence_update(fence, NULL);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ list_del(&fence->link);
+
+ return fence;
+}
+
+/**
+ * i915_unreserve_fence - Reclaim a reserved fence
+ * @fence: the fence reg
+ *
+ * This function add a reserved fence register from vGPU to the fence_list.
+ */
+void i915_unreserve_fence(struct i915_fence_reg *fence)
+{
+ struct i915_ggtt *ggtt = fence->ggtt;
+
+ lockdep_assert_held(&ggtt->vm.mutex);
+
+ list_add(&fence->link, &ggtt->fence_list);
+}
+
+/**
+ * intel_ggtt_restore_fences - restore fence state
+ * @ggtt: Global GTT
+ *
+ * Restore the hw fence state to match the software tracking again, to be called
+ * after a gpu reset and on resume. Note that on runtime suspend we only cancel
+ * the fences, to be reacquired by the user later.
+ */
+void intel_ggtt_restore_fences(struct i915_ggtt *ggtt)
+{
+ int i;
+
+ for (i = 0; i < ggtt->num_fences; i++)
+ fence_write(&ggtt->fence_regs[i]);
+}
+
+/**
+ * DOC: tiling swizzling details
+ *
+ * The idea behind tiling is to increase cache hit rates by rearranging
+ * pixel data so that a group of pixel accesses are in the same cacheline.
+ * Performance improvement from doing this on the back/depth buffer are on
+ * the order of 30%.
+ *
+ * Intel architectures make this somewhat more complicated, though, by
+ * adjustments made to addressing of data when the memory is in interleaved
+ * mode (matched pairs of DIMMS) to improve memory bandwidth.
+ * For interleaved memory, the CPU sends every sequential 64 bytes
+ * to an alternate memory channel so it can get the bandwidth from both.
+ *
+ * The GPU also rearranges its accesses for increased bandwidth to interleaved
+ * memory, and it matches what the CPU does for non-tiled. However, when tiled
+ * it does it a little differently, since one walks addresses not just in the
+ * X direction but also Y. So, along with alternating channels when bit
+ * 6 of the address flips, it also alternates when other bits flip -- Bits 9
+ * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
+ * are common to both the 915 and 965-class hardware.
+ *
+ * The CPU also sometimes XORs in higher bits as well, to improve
+ * bandwidth doing strided access like we do so frequently in graphics. This
+ * is called "Channel XOR Randomization" in the MCH documentation. The result
+ * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
+ * decode.
+ *
+ * All of this bit 6 XORing has an effect on our memory management,
+ * as we need to make sure that the 3d driver can correctly address object
+ * contents.
+ *
+ * If we don't have interleaved memory, all tiling is safe and no swizzling is
+ * required.
+ *
+ * When bit 17 is XORed in, we simply refuse to tile at all. Bit
+ * 17 is not just a page offset, so as we page an object out and back in,
+ * individual pages in it will have different bit 17 addresses, resulting in
+ * each 64 bytes being swapped with its neighbor!
+ *
+ * Otherwise, if interleaved, we have to tell the 3d driver what the address
+ * swizzling it needs to do is, since it's writing with the CPU to the pages
+ * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
+ * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
+ * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
+ * to match what the GPU expects.
+ */
+
+/**
+ * detect_bit_6_swizzle - detect bit 6 swizzling pattern
+ * @ggtt: Global GGTT
+ *
+ * Detects bit 6 swizzling of address lookup between IGD access and CPU
+ * access through main memory.
+ */
+static void detect_bit_6_swizzle(struct i915_ggtt *ggtt)
+{
+ struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+ u32 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+ u32 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+
+ if (INTEL_GEN(i915) >= 8 || IS_VALLEYVIEW(i915)) {
+ /*
+ * On BDW+, swizzling is not used. We leave the CPU memory
+ * controller in charge of optimizing memory accesses without
+ * the extra address manipulation GPU side.
+ *
+ * VLV and CHV don't have GPU swizzling.
+ */
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ } else if (INTEL_GEN(i915) >= 6) {
+ if (i915->preserve_bios_swizzle) {
+ if (intel_uncore_read(uncore, DISP_ARB_CTL) &
+ DISP_TILE_SURFACE_SWIZZLING) {
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+ swizzle_y = I915_BIT_6_SWIZZLE_9;
+ } else {
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ }
+ } else {
+ u32 dimm_c0, dimm_c1;
+ dimm_c0 = intel_uncore_read(uncore, MAD_DIMM_C0);
+ dimm_c1 = intel_uncore_read(uncore, MAD_DIMM_C1);
+ dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+ dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+ /*
+ * Enable swizzling when the channels are populated
+ * with identically sized dimms. We don't need to check
+ * the 3rd channel because no cpu with gpu attached
+ * ships in that configuration. Also, swizzling only
+ * makes sense for 2 channels anyway.
+ */
+ if (dimm_c0 == dimm_c1) {
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+ swizzle_y = I915_BIT_6_SWIZZLE_9;
+ } else {
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ }
+ }
+ } else if (IS_GEN(i915, 5)) {
+ /*
+ * On Ironlake whatever DRAM config, GPU always do
+ * same swizzling setup.
+ */
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+ swizzle_y = I915_BIT_6_SWIZZLE_9;
+ } else if (IS_GEN(i915, 2)) {
+ /*
+ * As far as we know, the 865 doesn't have these bit 6
+ * swizzling issues.
+ */
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ } else if (IS_G45(i915) || IS_I965G(i915) || IS_G33(i915)) {
+ /*
+ * The 965, G33, and newer, have a very flexible memory
+ * configuration. It will enable dual-channel mode
+ * (interleaving) on as much memory as it can, and the GPU
+ * will additionally sometimes enable different bit 6
+ * swizzling for tiled objects from the CPU.
+ *
+ * Here's what I found on the G965:
+ * slot fill memory size swizzling
+ * 0A 0B 1A 1B 1-ch 2-ch
+ * 512 0 0 0 512 0 O
+ * 512 0 512 0 16 1008 X
+ * 512 0 0 512 16 1008 X
+ * 0 512 0 512 16 1008 X
+ * 1024 1024 1024 0 2048 1024 O
+ *
+ * We could probably detect this based on either the DRB
+ * matching, which was the case for the swizzling required in
+ * the table above, or from the 1-ch value being less than
+ * the minimum size of a rank.
+ *
+ * Reports indicate that the swizzling actually
+ * varies depending upon page placement inside the
+ * channels, i.e. we see swizzled pages where the
+ * banks of memory are paired and unswizzled on the
+ * uneven portion, so leave that as unknown.
+ */
+ if (intel_uncore_read(uncore, C0DRB3) ==
+ intel_uncore_read(uncore, C1DRB3)) {
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+ swizzle_y = I915_BIT_6_SWIZZLE_9;
+ }
+ } else {
+ u32 dcc = intel_uncore_read(uncore, DCC);
+
+ /*
+ * On 9xx chipsets, channel interleave by the CPU is
+ * determined by DCC. For single-channel, neither the CPU
+ * nor the GPU do swizzling. For dual channel interleaved,
+ * the GPU's interleave is bit 9 and 10 for X tiled, and bit
+ * 9 for Y tiled. The CPU's interleave is independent, and
+ * can be based on either bit 11 (haven't seen this yet) or
+ * bit 17 (common).
+ */
+ switch (dcc & DCC_ADDRESSING_MODE_MASK) {
+ case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
+ case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ break;
+ case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
+ if (dcc & DCC_CHANNEL_XOR_DISABLE) {
+ /*
+ * This is the base swizzling by the GPU for
+ * tiled buffers.
+ */
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+ swizzle_y = I915_BIT_6_SWIZZLE_9;
+ } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
+ /* Bit 11 swizzling by the CPU in addition. */
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
+ swizzle_y = I915_BIT_6_SWIZZLE_9_11;
+ } else {
+ /* Bit 17 swizzling by the CPU in addition. */
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
+ swizzle_y = I915_BIT_6_SWIZZLE_9_17;
+ }
+ break;
+ }
+
+ /* check for L-shaped memory aka modified enhanced addressing */
+ if (IS_GEN(i915, 4) &&
+ !(intel_uncore_read(uncore, DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
+ swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+ swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+ }
+
+ if (dcc == 0xffffffff) {
+ drm_err(&i915->drm, "Couldn't read from MCHBAR. "
+ "Disabling tiling.\n");
+ swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+ swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+ }
+ }
+
+ if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
+ swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) {
+ /*
+ * Userspace likes to explode if it sees unknown swizzling,
+ * so lie. We will finish the lie when reporting through
+ * the get-tiling-ioctl by reporting the physical swizzle
+ * mode as unknown instead.
+ *
+ * As we don't strictly know what the swizzling is, it may be
+ * bit17 dependent, and so we need to also prevent the pages
+ * from being moved.
+ */
+ i915->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ }
+
+ i915->ggtt.bit_6_swizzle_x = swizzle_x;
+ i915->ggtt.bit_6_swizzle_y = swizzle_y;
+}
+
+/*
+ * Swap every 64 bytes of this page around, to account for it having a new
+ * bit 17 of its physical address and therefore being interpreted differently
+ * by the GPU.
+ */
+static void swizzle_page(struct page *page)
+{
+ char temp[64];
+ char *vaddr;
+ int i;
+
+ vaddr = kmap(page);
+
+ for (i = 0; i < PAGE_SIZE; i += 128) {
+ memcpy(temp, &vaddr[i], 64);
+ memcpy(&vaddr[i], &vaddr[i + 64], 64);
+ memcpy(&vaddr[i + 64], temp, 64);
+ }
+
+ kunmap(page);
+}
+
+/**
+ * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
+ * @obj: i915 GEM buffer object
+ * @pages: the scattergather list of physical pages
+ *
+ * This function fixes up the swizzling in case any page frame number for this
+ * object has changed in bit 17 since that state has been saved with
+ * i915_gem_object_save_bit_17_swizzle().
+ *
+ * This is called when pinning backing storage again, since the kernel is free
+ * to move unpinned backing storage around (either by directly moving pages or
+ * by swapping them out and back in again).
+ */
+void
+i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
+ struct sgt_iter sgt_iter;
+ struct page *page;
+ int i;
+
+ if (obj->bit_17 == NULL)
+ return;
+
+ i = 0;
+ for_each_sgt_page(page, sgt_iter, pages) {
+ char new_bit_17 = page_to_phys(page) >> 17;
+ if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) {
+ swizzle_page(page);
+ set_page_dirty(page);
+ }
+ i++;
+ }
+}
+
+/**
+ * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
+ * @obj: i915 GEM buffer object
+ * @pages: the scattergather list of physical pages
+ *
+ * This function saves the bit 17 of each page frame number so that swizzling
+ * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
+ * be called before the backing storage can be unpinned.
+ */
+void
+i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
+ const unsigned int page_count = obj->base.size >> PAGE_SHIFT;
+ struct sgt_iter sgt_iter;
+ struct page *page;
+ int i;
+
+ if (obj->bit_17 == NULL) {
+ obj->bit_17 = bitmap_zalloc(page_count, GFP_KERNEL);
+ if (obj->bit_17 == NULL) {
+ DRM_ERROR("Failed to allocate memory for bit 17 "
+ "record\n");
+ return;
+ }
+ }
+
+ i = 0;
+
+ for_each_sgt_page(page, sgt_iter, pages) {
+ if (page_to_phys(page) & (1 << 17))
+ __set_bit(i, obj->bit_17);
+ else
+ __clear_bit(i, obj->bit_17);
+ i++;
+ }
+}
+
+void intel_ggtt_init_fences(struct i915_ggtt *ggtt)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+ struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+ int num_fences;
+ int i;
+
+ INIT_LIST_HEAD(&ggtt->fence_list);
+ INIT_LIST_HEAD(&ggtt->userfault_list);
+ intel_wakeref_auto_init(&ggtt->userfault_wakeref, uncore->rpm);
+
+ detect_bit_6_swizzle(ggtt);
+
+ if (!i915_ggtt_has_aperture(ggtt))
+ num_fences = 0;
+ else if (INTEL_GEN(i915) >= 7 &&
+ !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)))
+ num_fences = 32;
+ else if (INTEL_GEN(i915) >= 4 ||
+ IS_I945G(i915) || IS_I945GM(i915) ||
+ IS_G33(i915) || IS_PINEVIEW(i915))
+ num_fences = 16;
+ else
+ num_fences = 8;
+
+ if (intel_vgpu_active(i915))
+ num_fences = intel_uncore_read(uncore,
+ vgtif_reg(avail_rs.fence_num));
+ ggtt->fence_regs = kcalloc(num_fences,
+ sizeof(*ggtt->fence_regs),
+ GFP_KERNEL);
+ if (!ggtt->fence_regs)
+ num_fences = 0;
+
+ /* Initialize fence registers to zero */
+ for (i = 0; i < num_fences; i++) {
+ struct i915_fence_reg *fence = &ggtt->fence_regs[i];
+
+ i915_active_init(&fence->active, NULL, NULL);
+ fence->ggtt = ggtt;
+ fence->id = i;
+ list_add_tail(&fence->link, &ggtt->fence_list);
+ }
+ ggtt->num_fences = num_fences;
+
+ intel_ggtt_restore_fences(ggtt);
+}
+
+void intel_ggtt_fini_fences(struct i915_ggtt *ggtt)
+{
+ int i;
+
+ for (i = 0; i < ggtt->num_fences; i++) {
+ struct i915_fence_reg *fence = &ggtt->fence_regs[i];
+
+ i915_active_fini(&fence->active);
+ }
+
+ kfree(ggtt->fence_regs);
+}
+
+void intel_gt_init_swizzling(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+
+ if (INTEL_GEN(i915) < 5 ||
+ i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
+ return;
+
+ intel_uncore_rmw(uncore, DISP_ARB_CTL, 0, DISP_TILE_SURFACE_SWIZZLING);
+
+ if (IS_GEN(i915, 5))
+ return;
+
+ intel_uncore_rmw(uncore, TILECTL, 0, TILECTL_SWZCTL);
+
+ if (IS_GEN(i915, 6))
+ intel_uncore_write(uncore,
+ ARB_MODE,
+ _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
+ else if (IS_GEN(i915, 7))
+ intel_uncore_write(uncore,
+ ARB_MODE,
+ _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
+ else if (IS_GEN(i915, 8))
+ intel_uncore_write(uncore,
+ GAMTARBMODE,
+ _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
+ else
+ MISSING_CASE(INTEL_GEN(i915));
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h
new file mode 100644
index 000000000000..9eef679e1311
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __INTEL_GGTT_FENCING_H__
+#define __INTEL_GGTT_FENCING_H__
+
+#include <linux/list.h>
+#include <linux/types.h>
+
+#include "i915_active.h"
+
+struct drm_i915_gem_object;
+struct i915_ggtt;
+struct i915_vma;
+struct intel_gt;
+struct sg_table;
+
+#define I965_FENCE_PAGE 4096UL
+
+struct i915_fence_reg {
+ struct list_head link;
+ struct i915_ggtt *ggtt;
+ struct i915_vma *vma;
+ atomic_t pin_count;
+ struct i915_active active;
+ int id;
+ /**
+ * Whether the tiling parameters for the currently
+ * associated fence register have changed. Note that
+ * for the purposes of tracking tiling changes we also
+ * treat the unfenced register, the register slot that
+ * the object occupies whilst it executes a fenced
+ * command (such as BLT on gen2/3), as a "fence".
+ */
+ bool dirty;
+ u32 start;
+ u32 size;
+ u32 tiling;
+ u32 stride;
+};
+
+struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt);
+void i915_unreserve_fence(struct i915_fence_reg *fence);
+
+void intel_ggtt_restore_fences(struct i915_ggtt *ggtt);
+
+void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
+ struct sg_table *pages);
+void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
+ struct sg_table *pages);
+
+void intel_ggtt_init_fences(struct i915_ggtt *ggtt);
+void intel_ggtt_fini_fences(struct i915_ggtt *ggtt);
+
+void intel_gt_init_swizzling(struct intel_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index f04214a54f75..534e435f20bc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -138,7 +138,7 @@
*/
#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
-#define MI_LRI_CS_MMIO (1<<19)
+#define MI_LRI_LRM_CS_MMIO REG_BIT(19)
#define MI_LRI_FORCE_POSTED (1<<12)
#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
@@ -156,6 +156,7 @@
#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1)
#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2)
#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+#define MI_LRR_SOURCE_CS_MMIO REG_BIT(18)
#define MI_BATCH_BUFFER MI_INSTR(0x30, 1)
#define MI_BATCH_NON_SECURE (1)
/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
@@ -235,9 +236,8 @@
#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */
#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */
#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */
-#define PIPE_CONTROL_L3_RO_CACHE_INVALIDATE REG_BIT(10) /* gen12 */
#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9)
-#define PIPE_CONTROL_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */
+#define PIPE_CONTROL0_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */
#define PIPE_CONTROL_NOTIFY (1<<8)
#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */
#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index d09f7596cb98..f069551e412f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -7,6 +7,8 @@
#include "i915_drv.h"
#include "intel_context.h"
#include "intel_gt.h"
+#include "intel_gt_buffer_pool.h"
+#include "intel_gt_clock_utils.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
#include "intel_mocs.h"
@@ -15,6 +17,7 @@
#include "intel_rps.h"
#include "intel_uncore.h"
#include "intel_pm.h"
+#include "shmem_utils.h"
void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
{
@@ -26,6 +29,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
INIT_LIST_HEAD(&gt->closed_vma);
spin_lock_init(&gt->closed_lock);
+ intel_gt_init_buffer_pool(gt);
intel_gt_init_reset(gt);
intel_gt_init_requests(gt);
intel_gt_init_timelines(gt);
@@ -370,18 +374,6 @@ static struct i915_address_space *kernel_vm(struct intel_gt *gt)
return i915_vm_get(&gt->ggtt->vm);
}
-static int __intel_context_flush_retire(struct intel_context *ce)
-{
- struct intel_timeline *tl;
-
- tl = intel_context_timeline_lock(ce);
- if (IS_ERR(tl))
- return PTR_ERR(tl);
-
- intel_context_timeline_unlock(tl);
- return 0;
-}
-
static int __engines_record_defaults(struct intel_gt *gt)
{
struct i915_request *requests[I915_NUM_ENGINES] = {};
@@ -447,8 +439,7 @@ err_rq:
for (id = 0; id < ARRAY_SIZE(requests); id++) {
struct i915_request *rq;
- struct i915_vma *state;
- void *vaddr;
+ struct file *state;
rq = requests[id];
if (!rq)
@@ -460,48 +451,16 @@ err_rq:
}
GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags));
- state = rq->context->state;
- if (!state)
+ if (!rq->context->state)
continue;
- /* Serialise with retirement on another CPU */
- GEM_BUG_ON(!i915_request_completed(rq));
- err = __intel_context_flush_retire(rq->context);
- if (err)
- goto out;
-
- /* We want to be able to unbind the state from the GGTT */
- GEM_BUG_ON(intel_context_is_pinned(rq->context));
-
- /*
- * As we will hold a reference to the logical state, it will
- * not be torn down with the context, and importantly the
- * object will hold onto its vma (making it possible for a
- * stray GTT write to corrupt our defaults). Unmap the vma
- * from the GTT to prevent such accidents and reclaim the
- * space.
- */
- err = i915_vma_unbind(state);
- if (err)
- goto out;
-
- i915_gem_object_lock(state->obj);
- err = i915_gem_object_set_to_cpu_domain(state->obj, false);
- i915_gem_object_unlock(state->obj);
- if (err)
- goto out;
-
- i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC);
-
- /* Check we can acquire the image of the context state */
- vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB);
- if (IS_ERR(vaddr)) {
- err = PTR_ERR(vaddr);
+ /* Keep a copy of the state's backing pages; free the obj */
+ state = shmem_create_from_object(rq->context->state->obj);
+ if (IS_ERR(state)) {
+ err = PTR_ERR(state);
goto out;
}
-
- rq->engine->default_state = i915_gem_object_get(state->obj);
- i915_gem_object_unpin_map(state->obj);
+ rq->engine->default_state = state;
}
out:
@@ -576,6 +535,8 @@ int intel_gt_init(struct intel_gt *gt)
*/
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+ intel_gt_init_clock_frequency(gt);
+
err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K);
if (err)
goto out_fw;
@@ -635,8 +596,7 @@ void intel_gt_driver_remove(struct intel_gt *gt)
{
__intel_gt_disable(gt);
- intel_uc_fini_hw(&gt->uc);
- intel_uc_fini(&gt->uc);
+ intel_uc_driver_remove(&gt->uc);
intel_engines_release(gt);
}
@@ -663,6 +623,7 @@ void intel_gt_driver_release(struct intel_gt *gt)
intel_gt_pm_fini(gt);
intel_gt_fini_scratch(gt);
+ intel_gt_fini_buffer_pool(gt);
}
void intel_gt_driver_late_release(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
index 397186818305..1495054a4305 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
/*
- * SPDX-License-Identifier: MIT
- *
* Copyright © 2014-2018 Intel Corporation
*/
@@ -8,15 +7,15 @@
#include "i915_drv.h"
#include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
+#include "intel_gt_buffer_pool.h"
-static struct intel_engine_cs *to_engine(struct intel_engine_pool *pool)
+static struct intel_gt *to_gt(struct intel_gt_buffer_pool *pool)
{
- return container_of(pool, struct intel_engine_cs, pool);
+ return container_of(pool, struct intel_gt, buffer_pool);
}
static struct list_head *
-bucket_for_size(struct intel_engine_pool *pool, size_t sz)
+bucket_for_size(struct intel_gt_buffer_pool *pool, size_t sz)
{
int n;
@@ -32,16 +31,50 @@ bucket_for_size(struct intel_engine_pool *pool, size_t sz)
return &pool->cache_list[n];
}
-static void node_free(struct intel_engine_pool_node *node)
+static void node_free(struct intel_gt_buffer_pool_node *node)
{
i915_gem_object_put(node->obj);
i915_active_fini(&node->active);
kfree(node);
}
+static void pool_free_work(struct work_struct *wrk)
+{
+ struct intel_gt_buffer_pool *pool =
+ container_of(wrk, typeof(*pool), work.work);
+ struct intel_gt_buffer_pool_node *node, *next;
+ unsigned long old = jiffies - HZ;
+ bool active = false;
+ LIST_HEAD(stale);
+ int n;
+
+ /* Free buffers that have not been used in the past second */
+ spin_lock_irq(&pool->lock);
+ for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
+ struct list_head *list = &pool->cache_list[n];
+
+ /* Most recent at head; oldest at tail */
+ list_for_each_entry_safe_reverse(node, next, list, link) {
+ if (time_before(node->age, old))
+ break;
+
+ list_move(&node->link, &stale);
+ }
+ active |= !list_empty(list);
+ }
+ spin_unlock_irq(&pool->lock);
+
+ list_for_each_entry_safe(node, next, &stale, link)
+ node_free(node);
+
+ if (active)
+ schedule_delayed_work(&pool->work,
+ round_jiffies_up_relative(HZ));
+}
+
static int pool_active(struct i915_active *ref)
{
- struct intel_engine_pool_node *node =
+ struct intel_gt_buffer_pool_node *node =
container_of(ref, typeof(*node), active);
struct dma_resv *resv = node->obj->base.resv;
int err;
@@ -64,29 +97,31 @@ static int pool_active(struct i915_active *ref)
__i915_active_call
static void pool_retire(struct i915_active *ref)
{
- struct intel_engine_pool_node *node =
+ struct intel_gt_buffer_pool_node *node =
container_of(ref, typeof(*node), active);
- struct intel_engine_pool *pool = node->pool;
+ struct intel_gt_buffer_pool *pool = node->pool;
struct list_head *list = bucket_for_size(pool, node->obj->base.size);
unsigned long flags;
- GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool)));
-
i915_gem_object_unpin_pages(node->obj);
/* Return this object to the shrinker pool */
i915_gem_object_make_purgeable(node->obj);
spin_lock_irqsave(&pool->lock, flags);
+ node->age = jiffies;
list_add(&node->link, list);
spin_unlock_irqrestore(&pool->lock, flags);
+
+ schedule_delayed_work(&pool->work,
+ round_jiffies_up_relative(HZ));
}
-static struct intel_engine_pool_node *
-node_create(struct intel_engine_pool *pool, size_t sz)
+static struct intel_gt_buffer_pool_node *
+node_create(struct intel_gt_buffer_pool *pool, size_t sz)
{
- struct intel_engine_cs *engine = to_engine(pool);
- struct intel_engine_pool_node *node;
+ struct intel_gt *gt = to_gt(pool);
+ struct intel_gt_buffer_pool_node *node;
struct drm_i915_gem_object *obj;
node = kmalloc(sizeof(*node),
@@ -97,7 +132,7 @@ node_create(struct intel_engine_pool *pool, size_t sz)
node->pool = pool;
i915_active_init(&node->active, pool_active, pool_retire);
- obj = i915_gem_object_create_internal(engine->i915, sz);
+ obj = i915_gem_object_create_internal(gt->i915, sz);
if (IS_ERR(obj)) {
i915_active_fini(&node->active);
kfree(node);
@@ -110,26 +145,15 @@ node_create(struct intel_engine_pool *pool, size_t sz)
return node;
}
-static struct intel_engine_pool *lookup_pool(struct intel_engine_cs *engine)
+struct intel_gt_buffer_pool_node *
+intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
{
- if (intel_engine_is_virtual(engine))
- engine = intel_virtual_engine_get_sibling(engine, 0);
-
- GEM_BUG_ON(!engine);
- return &engine->pool;
-}
-
-struct intel_engine_pool_node *
-intel_engine_get_pool(struct intel_engine_cs *engine, size_t size)
-{
- struct intel_engine_pool *pool = lookup_pool(engine);
- struct intel_engine_pool_node *node;
+ struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
+ struct intel_gt_buffer_pool_node *node;
struct list_head *list;
unsigned long flags;
int ret;
- GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool)));
-
size = PAGE_ALIGN(size);
list = bucket_for_size(pool, size);
@@ -157,34 +181,48 @@ intel_engine_get_pool(struct intel_engine_cs *engine, size_t size)
return node;
}
-void intel_engine_pool_init(struct intel_engine_pool *pool)
+void intel_gt_init_buffer_pool(struct intel_gt *gt)
{
+ struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
int n;
spin_lock_init(&pool->lock);
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
INIT_LIST_HEAD(&pool->cache_list[n]);
+ INIT_DELAYED_WORK(&pool->work, pool_free_work);
}
-void intel_engine_pool_park(struct intel_engine_pool *pool)
+static void pool_free_imm(struct intel_gt_buffer_pool *pool)
{
int n;
+ spin_lock_irq(&pool->lock);
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
+ struct intel_gt_buffer_pool_node *node, *next;
struct list_head *list = &pool->cache_list[n];
- struct intel_engine_pool_node *node, *nn;
- list_for_each_entry_safe(node, nn, list, link)
+ list_for_each_entry_safe(node, next, list, link)
node_free(node);
-
INIT_LIST_HEAD(list);
}
+ spin_unlock_irq(&pool->lock);
+}
+
+void intel_gt_flush_buffer_pool(struct intel_gt *gt)
+{
+ struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
+
+ if (cancel_delayed_work_sync(&pool->work))
+ pool_free_imm(pool);
}
-void intel_engine_pool_fini(struct intel_engine_pool *pool)
+void intel_gt_fini_buffer_pool(struct intel_gt *gt)
{
+ struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
int n;
+ intel_gt_flush_buffer_pool(gt);
+
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
GEM_BUG_ON(!list_empty(&pool->cache_list[n]));
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
new file mode 100644
index 000000000000..42cbac003e8a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#ifndef INTEL_GT_BUFFER_POOL_H
+#define INTEL_GT_BUFFER_POOL_H
+
+#include <linux/types.h>
+
+#include "i915_active.h"
+#include "intel_gt_buffer_pool_types.h"
+
+struct intel_gt;
+struct i915_request;
+
+struct intel_gt_buffer_pool_node *
+intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size);
+
+static inline int
+intel_gt_buffer_pool_mark_active(struct intel_gt_buffer_pool_node *node,
+ struct i915_request *rq)
+{
+ return i915_active_add_request(&node->active, rq);
+}
+
+static inline void
+intel_gt_buffer_pool_put(struct intel_gt_buffer_pool_node *node)
+{
+ i915_active_release(&node->active);
+}
+
+void intel_gt_init_buffer_pool(struct intel_gt *gt);
+void intel_gt_flush_buffer_pool(struct intel_gt *gt);
+void intel_gt_fini_buffer_pool(struct intel_gt *gt);
+
+#endif /* INTEL_GT_BUFFER_POOL_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
index e31ee361b76f..e28bdda771ed 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
@@ -4,26 +4,29 @@
* Copyright © 2014-2018 Intel Corporation
*/
-#ifndef INTEL_ENGINE_POOL_TYPES_H
-#define INTEL_ENGINE_POOL_TYPES_H
+#ifndef INTEL_GT_BUFFER_POOL_TYPES_H
+#define INTEL_GT_BUFFER_POOL_TYPES_H
#include <linux/list.h>
#include <linux/spinlock.h>
+#include <linux/workqueue.h>
#include "i915_active_types.h"
struct drm_i915_gem_object;
-struct intel_engine_pool {
+struct intel_gt_buffer_pool {
spinlock_t lock;
struct list_head cache_list[4];
+ struct delayed_work work;
};
-struct intel_engine_pool_node {
+struct intel_gt_buffer_pool_node {
struct i915_active active;
struct drm_i915_gem_object *obj;
struct list_head link;
- struct intel_engine_pool *pool;
+ struct intel_gt_buffer_pool *pool;
+ unsigned long age;
};
-#endif /* INTEL_ENGINE_POOL_TYPES_H */
+#endif /* INTEL_GT_BUFFER_POOL_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
new file mode 100644
index 000000000000..999079686846
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_clock_utils.h"
+
+#define MHZ_12 12000000 /* 12MHz (24MHz/2), 83.333ns */
+#define MHZ_12_5 12500000 /* 12.5MHz (25MHz/2), 80ns */
+#define MHZ_19_2 19200000 /* 19.2MHz, 52.083ns */
+
+static u32 read_clock_frequency(const struct intel_gt *gt)
+{
+ if (INTEL_GEN(gt->i915) >= 11) {
+ u32 config;
+
+ config = intel_uncore_read(gt->uncore, RPM_CONFIG0);
+ config &= GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK;
+ config >>= GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
+
+ switch (config) {
+ case 0: return MHZ_12;
+ case 1:
+ case 2: return MHZ_19_2;
+ default:
+ case 3: return MHZ_12_5;
+ }
+ } else if (INTEL_GEN(gt->i915) >= 9) {
+ if (IS_GEN9_LP(gt->i915))
+ return MHZ_19_2;
+ else
+ return MHZ_12;
+ } else {
+ return MHZ_12_5;
+ }
+}
+
+void intel_gt_init_clock_frequency(struct intel_gt *gt)
+{
+ /*
+ * Note that on gen11+, the clock frequency may be reconfigured.
+ * We do not, and we assume nobody else does.
+ */
+ gt->clock_frequency = read_clock_frequency(gt);
+ GT_TRACE(gt,
+ "Using clock frequency: %dkHz\n",
+ gt->clock_frequency / 1000);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+void intel_gt_check_clock_frequency(const struct intel_gt *gt)
+{
+ if (gt->clock_frequency != read_clock_frequency(gt)) {
+ dev_err(gt->i915->drm.dev,
+ "GT clock frequency changed, was %uHz, now %uHz!\n",
+ gt->clock_frequency,
+ read_clock_frequency(gt));
+ }
+}
+#endif
+
+static u64 div_u64_roundup(u64 nom, u32 den)
+{
+ return div_u64(nom + den - 1, den);
+}
+
+u32 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u32 count)
+{
+ return div_u64_roundup(mul_u32_u32(count, 1000 * 1000 * 1000),
+ gt->clock_frequency);
+}
+
+u32 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u32 count)
+{
+ return intel_gt_clock_interval_to_ns(gt, 16 * count);
+}
+
+u32 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u32 ns)
+{
+ return div_u64_roundup(mul_u32_u32(gt->clock_frequency, ns),
+ 1000 * 1000 * 1000);
+}
+
+u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns)
+{
+ u32 val;
+
+ /*
+ * Make these a multiple of magic 25 to avoid SNB (eg. Dell XPS
+ * 8300) freezing up around GPU hangs. Looks as if even
+ * scheduling/timer interrupts start misbehaving if the RPS
+ * EI/thresholds are "bad", leading to a very sluggish or even
+ * frozen machine.
+ */
+ val = DIV_ROUND_UP(intel_gt_ns_to_clock_interval(gt, ns), 16);
+ if (IS_GEN(gt->i915, 6))
+ val = roundup(val, 25);
+
+ return val;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h
new file mode 100644
index 000000000000..f793c89f2cbd
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_CLOCK_UTILS_H__
+#define __INTEL_GT_CLOCK_UTILS_H__
+
+#include <linux/types.h>
+
+struct intel_gt;
+
+void intel_gt_init_clock_frequency(struct intel_gt *gt);
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+void intel_gt_check_clock_frequency(const struct intel_gt *gt);
+#else
+static inline void intel_gt_check_clock_frequency(const struct intel_gt *gt) {}
+#endif
+
+u32 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u32 count);
+u32 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u32 count);
+
+u32 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u32 ns);
+u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns);
+
+#endif /* __INTEL_GT_CLOCK_UTILS_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 8b653c0f5e5f..6bdb434a442d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -12,6 +12,7 @@
#include "intel_context.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
+#include "intel_gt_clock_utils.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
#include "intel_llc.h"
@@ -138,6 +139,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
wakeref = intel_runtime_pm_get(gt->uncore->rpm);
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+ intel_gt_check_clock_frequency(gt);
+
/*
* As we have just resumed the machine and woken the device up from
* deep PCI sleep (presumably D3_cold), assume the HW has been reset
@@ -155,6 +158,10 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
intel_uc_reset_prepare(&gt->uc);
+ for_each_engine(engine, gt, id)
+ if (engine->sanitize)
+ engine->sanitize(engine);
+
if (reset_engines(gt) || force) {
for_each_engine(engine, gt, id)
__intel_engine_reset(engine, false);
@@ -164,6 +171,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
if (engine->reset.finish)
engine->reset.finish(engine);
+ intel_rps_sanitize(&gt->rps);
+
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
intel_runtime_pm_put(gt->uncore->rpm, wakeref);
}
@@ -191,11 +200,12 @@ int intel_gt_resume(struct intel_gt *gt)
* Only the kernel contexts should remain pinned over suspend,
* allowing us to fixup the user contexts on their first pin.
*/
+ gt_sanitize(gt, true);
+
intel_gt_pm_get(gt);
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
intel_rc6_sanitize(&gt->rc6);
- gt_sanitize(gt, true);
if (intel_gt_is_wedged(gt)) {
err = -EIO;
goto out_fw;
@@ -204,7 +214,7 @@ int intel_gt_resume(struct intel_gt *gt)
/* Only when the HW is re-initialised, can we replay the requests */
err = intel_gt_init_hw(gt);
if (err) {
- dev_err(gt->i915->drm.dev,
+ drm_err(&gt->i915->drm,
"Failed to initialize GPU, declaring it wedged!\n");
goto err_wedged;
}
@@ -220,7 +230,7 @@ int intel_gt_resume(struct intel_gt *gt)
intel_engine_pm_put(engine);
if (err) {
- dev_err(gt->i915->drm.dev,
+ drm_err(&gt->i915->drm,
"Failed to restart %s (%d)\n",
engine->name, err);
goto err_wedged;
@@ -324,6 +334,7 @@ int intel_gt_runtime_resume(struct intel_gt *gt)
{
GT_TRACE(gt, "\n");
intel_gt_init_swizzling(gt);
+ intel_ggtt_restore_fences(gt->ggtt);
return intel_uc_runtime_resume(&gt->uc);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index 24c99d0838af..16ff47c83bd5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -26,6 +26,11 @@ static bool retire_requests(struct intel_timeline *tl)
return !i915_active_fence_isset(&tl->last_request);
}
+static bool engine_active(const struct intel_engine_cs *engine)
+{
+ return !list_empty(&engine->kernel_context->timeline->requests);
+}
+
static bool flush_submission(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
@@ -37,8 +42,13 @@ static bool flush_submission(struct intel_gt *gt)
for_each_engine(engine, gt, id) {
intel_engine_flush_submission(engine);
- active |= flush_work(&engine->retire_work);
- active |= flush_work(&engine->wakeref.work);
+
+ /* Flush the background retirement and idle barriers */
+ flush_work(&engine->retire_work);
+ flush_delayed_work(&engine->wakeref.work);
+
+ /* Is the idle barrier still outstanding? */
+ active |= engine_active(engine);
}
return active;
@@ -162,7 +172,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
}
}
- if (!retire_requests(tl) || flush_submission(gt))
+ if (!retire_requests(tl))
active_count++;
mutex_unlock(&tl->mutex);
@@ -173,7 +183,6 @@ out_active: spin_lock(&timelines->lock);
if (atomic_dec_and_test(&tl->active_count))
list_del(&tl->link);
-
/* Defer the final release to after the spinlock */
if (refcount_dec_and_test(&tl->kref.refcount)) {
GEM_BUG_ON(atomic_read(&tl->active_count));
@@ -185,6 +194,9 @@ out_active: spin_lock(&timelines->lock);
list_for_each_entry_safe(tl, tn, &free, link)
__intel_timeline_free(&tl->kref);
+ if (flush_submission(gt)) /* Wait, there's more! */
+ active_count++;
+
return active_count ? timeout : 0;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 96890dd12b5f..0cc1d6b185dc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -17,6 +17,7 @@
#include "i915_vma.h"
#include "intel_engine_types.h"
+#include "intel_gt_buffer_pool_types.h"
#include "intel_llc_types.h"
#include "intel_reset_types.h"
#include "intel_rc6_types.h"
@@ -61,6 +62,7 @@ struct intel_gt {
struct list_head closed_vma;
spinlock_t closed_lock; /* guards the list of closed_vma */
+ ktime_t last_init_time;
struct intel_reset reset;
/**
@@ -72,14 +74,12 @@ struct intel_gt {
*/
intel_wakeref_t awake;
+ u32 clock_frequency;
+
struct intel_llc llc;
struct intel_rc6 rc6;
struct intel_rps rps;
- ktime_t last_init_time;
-
- struct i915_vma *scratch;
-
spinlock_t irq_lock;
u32 gt_imr;
u32 pm_ier;
@@ -97,6 +97,18 @@ struct intel_gt {
* Reserved for exclusive use by the kernel.
*/
struct i915_address_space *vm;
+
+ /*
+ * A pool of objects to use as shadow copies of client batch buffers
+ * when the command parser is enabled. Prevents the client from
+ * modifying the batch contents after software parsing.
+ *
+ * Buffers older than 1s are periodically reaped from the pool,
+ * or may be reclaimed by the shrinker before then.
+ */
+ struct intel_gt_buffer_pool buffer_pool;
+
+ struct i915_vma *scratch;
};
enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index b3116fe8d180..d93ebdf3fa0e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -26,7 +26,6 @@
#include <drm/drm_mm.h>
#include "gt/intel_reset.h"
-#include "i915_gem_fence_reg.h"
#include "i915_selftest.h"
#include "i915_vma_types.h"
@@ -135,6 +134,8 @@ typedef u64 gen8_pte_t;
#define GEN8_PDE_IPS_64K BIT(11)
#define GEN8_PDE_PS_2M BIT(7)
+struct i915_fence_reg;
+
#define for_each_sgt_daddr(__dp, __iter, __sgt) \
__for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE)
@@ -333,7 +334,7 @@ struct i915_ggtt {
u32 pin_bias;
unsigned int num_fences;
- struct i915_fence_reg fence_regs[I915_MAX_NUM_FENCES];
+ struct i915_fence_reg *fence_regs;
struct list_head fence_list;
/**
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 2dfaddb8811e..87e6c5bdd2dc 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -147,6 +147,7 @@
#include "intel_reset.h"
#include "intel_ring.h"
#include "intel_workarounds.h"
+#include "shmem_utils.h"
#define RING_EXECLIST_QFULL (1 << 0x2)
#define RING_EXECLIST1_VALID (1 << 0x3)
@@ -216,7 +217,7 @@ struct virtual_engine {
/* And finally, which physical engines this virtual engine maps onto. */
unsigned int num_siblings;
- struct intel_engine_cs *siblings[0];
+ struct intel_engine_cs *siblings[];
};
static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
@@ -238,6 +239,123 @@ __execlists_update_reg_state(const struct intel_context *ce,
const struct intel_engine_cs *engine,
u32 head);
+static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0x60;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return 0x54;
+ else if (engine->class == RENDER_CLASS)
+ return 0x58;
+ else
+ return -1;
+}
+
+static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0x74;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return 0x68;
+ else if (engine->class == RENDER_CLASS)
+ return 0xd8;
+ else
+ return -1;
+}
+
+static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0x12;
+ else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
+ return 0x18;
+ else
+ return -1;
+}
+
+static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
+{
+ int x;
+
+ x = lrc_ring_wa_bb_per_ctx(engine);
+ if (x < 0)
+ return x;
+
+ return x + 2;
+}
+
+static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
+{
+ int x;
+
+ x = lrc_ring_indirect_ptr(engine);
+ if (x < 0)
+ return x;
+
+ return x + 2;
+}
+
+static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
+{
+ if (engine->class != RENDER_CLASS)
+ return -1;
+
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0xb6;
+ else if (INTEL_GEN(engine->i915) >= 11)
+ return 0xaa;
+ else
+ return -1;
+}
+
+static u32
+lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
+{
+ switch (INTEL_GEN(engine->i915)) {
+ default:
+ MISSING_CASE(INTEL_GEN(engine->i915));
+ fallthrough;
+ case 12:
+ return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ case 11:
+ return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ case 10:
+ return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ case 9:
+ return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ case 8:
+ return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ }
+}
+
+static void
+lrc_ring_setup_indirect_ctx(u32 *regs,
+ const struct intel_engine_cs *engine,
+ u32 ctx_bb_ggtt_addr,
+ u32 size)
+{
+ GEM_BUG_ON(!size);
+ GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
+ GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
+ regs[lrc_ring_indirect_ptr(engine) + 1] =
+ ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
+
+ GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
+ regs[lrc_ring_indirect_offset(engine) + 1] =
+ lrc_ring_indirect_offset_default(engine) << 6;
+}
+
+static u32 intel_context_get_runtime(const struct intel_context *ce)
+{
+ /*
+ * We can use either ppHWSP[16] which is recorded before the context
+ * switch (and so excludes the cost of context switches) or use the
+ * value from the context image itself, which is saved/restored earlier
+ * and so includes the cost of the save.
+ */
+ return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
+}
+
static void mark_eio(struct i915_request *rq)
{
if (i915_request_completed(rq))
@@ -311,18 +429,7 @@ static int effective_prio(const struct i915_request *rq)
if (i915_request_has_nopreempt(rq))
prio = I915_PRIORITY_UNPREEMPTABLE;
- /*
- * On unwinding the active request, we give it a priority bump
- * if it has completed waiting on any semaphore. If we know that
- * the request has already started, we can prevent an unwanted
- * preempt-to-idle cycle by taking that into account now.
- */
- if (__i915_request_has_started(rq))
- prio |= I915_PRIORITY_NOSEMAPHORE;
-
- /* Restrict mere WAIT boosts from triggering preemption */
- BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
- return prio | __NO_PREEMPTION;
+ return prio;
}
static int queue_prio(const struct intel_engine_execlists *execlists)
@@ -489,7 +596,7 @@ static void set_offsets(u32 *regs,
#define REG16(x) \
(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
(((x) >> 2) & 0x7f)
-#define END(x) 0, (x)
+#define END(total_state_size) 0, (total_state_size)
{
const u32 base = engine->mmio_base;
@@ -512,7 +619,7 @@ static void set_offsets(u32 *regs,
if (flags & POSTED)
*regs |= MI_LRI_FORCE_POSTED;
if (INTEL_GEN(engine->i915) >= 11)
- *regs |= MI_LRI_CS_MMIO;
+ *regs |= MI_LRI_LRM_CS_MMIO;
regs++;
GEM_BUG_ON(!count);
@@ -897,8 +1004,63 @@ static const u8 gen12_rcs_offsets[] = {
NOP(6),
LRI(1, 0),
REG(0x0c8),
+ NOP(3 + 9 + 1),
+
+ LRI(51, POSTED),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG(0x028),
+ REG(0x09c),
+ REG(0x0c0),
+ REG(0x178),
+ REG(0x17c),
+ REG16(0x358),
+ REG(0x170),
+ REG(0x150),
+ REG(0x154),
+ REG(0x158),
+ REG16(0x41c),
+ REG16(0x600),
+ REG16(0x604),
+ REG16(0x608),
+ REG16(0x60c),
+ REG16(0x610),
+ REG16(0x614),
+ REG16(0x618),
+ REG16(0x61c),
+ REG16(0x620),
+ REG16(0x624),
+ REG16(0x628),
+ REG16(0x62c),
+ REG16(0x630),
+ REG16(0x634),
+ REG16(0x638),
+ REG16(0x63c),
+ REG16(0x640),
+ REG16(0x644),
+ REG16(0x648),
+ REG16(0x64c),
+ REG16(0x650),
+ REG16(0x654),
+ REG16(0x658),
+ REG16(0x65c),
+ REG16(0x660),
+ REG16(0x664),
+ REG16(0x668),
+ REG16(0x66c),
+ REG16(0x670),
+ REG16(0x674),
+ REG16(0x678),
+ REG16(0x67c),
+ REG(0x068),
+ REG(0x084),
+ NOP(1),
- END(80)
+ END(192)
};
#undef END
@@ -1026,17 +1188,14 @@ static void intel_engine_context_in(struct intel_engine_cs *engine)
{
unsigned long flags;
- if (READ_ONCE(engine->stats.enabled) == 0)
+ if (atomic_add_unless(&engine->stats.active, 1, 0))
return;
write_seqlock_irqsave(&engine->stats.lock, flags);
-
- if (engine->stats.enabled > 0) {
- if (engine->stats.active++ == 0)
- engine->stats.start = ktime_get();
- GEM_BUG_ON(engine->stats.active == 0);
+ if (!atomic_add_unless(&engine->stats.active, 1, 0)) {
+ engine->stats.start = ktime_get();
+ atomic_inc(&engine->stats.active);
}
-
write_sequnlock_irqrestore(&engine->stats.lock, flags);
}
@@ -1044,51 +1203,20 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
{
unsigned long flags;
- if (READ_ONCE(engine->stats.enabled) == 0)
+ GEM_BUG_ON(!atomic_read(&engine->stats.active));
+
+ if (atomic_add_unless(&engine->stats.active, -1, 1))
return;
write_seqlock_irqsave(&engine->stats.lock, flags);
-
- if (engine->stats.enabled > 0) {
- ktime_t last;
-
- if (engine->stats.active && --engine->stats.active == 0) {
- /*
- * Decrement the active context count and in case GPU
- * is now idle add up to the running total.
- */
- last = ktime_sub(ktime_get(), engine->stats.start);
-
- engine->stats.total = ktime_add(engine->stats.total,
- last);
- } else if (engine->stats.active == 0) {
- /*
- * After turning on engine stats, context out might be
- * the first event in which case we account from the
- * time stats gathering was turned on.
- */
- last = ktime_sub(ktime_get(), engine->stats.enabled_at);
-
- engine->stats.total = ktime_add(engine->stats.total,
- last);
- }
+ if (atomic_dec_and_test(&engine->stats.active)) {
+ engine->stats.total =
+ ktime_add(engine->stats.total,
+ ktime_sub(ktime_get(), engine->stats.start));
}
-
write_sequnlock_irqrestore(&engine->stats.lock, flags);
}
-static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
-{
- if (INTEL_GEN(engine->i915) >= 12)
- return 0x60;
- else if (INTEL_GEN(engine->i915) >= 9)
- return 0x54;
- else if (engine->class == RENDER_CLASS)
- return 0x58;
- else
- return -1;
-}
-
static void
execlists_check_context(const struct intel_context *ce,
const struct intel_engine_cs *engine)
@@ -1132,14 +1260,12 @@ execlists_check_context(const struct intel_context *ce,
static void restore_default_state(struct intel_context *ce,
struct intel_engine_cs *engine)
{
- u32 *regs = ce->lrc_reg_state;
+ u32 *regs;
- if (engine->pinned_default_state)
- memcpy(regs, /* skip restoring the vanilla PPHWSP */
- engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
- engine->context_size - PAGE_SIZE);
+ regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE);
+ execlists_init_reg_state(regs, ce, engine, ce->ring, true);
- execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+ ce->runtime.last = intel_context_get_runtime(ce);
}
static void reset_active(struct i915_request *rq,
@@ -1181,17 +1307,6 @@ static void reset_active(struct i915_request *rq,
ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
}
-static u32 intel_context_get_runtime(const struct intel_context *ce)
-{
- /*
- * We can use either ppHWSP[16] which is recorded before the context
- * switch (and so excludes the cost of context switches) or use the
- * value from the context image itself, which is saved/restored earlier
- * and so includes the cost of the save.
- */
- return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
-}
-
static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
{
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
@@ -1243,7 +1358,7 @@ __execlists_schedule_in(struct i915_request *rq)
ce->lrc.ccid = ce->tag;
} else {
/* We don't need a strict matching tag, just different values */
- unsigned int tag = ffs(engine->context_tag);
+ unsigned int tag = ffs(READ_ONCE(engine->context_tag));
GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
clear_bit(tag - 1, &engine->context_tag);
@@ -1417,6 +1532,24 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc
}
}
+static __maybe_unused char *
+dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
+{
+ if (!rq)
+ return "";
+
+ snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
+ prefix,
+ rq->context->lrc.ccid,
+ rq->fence.context, rq->fence.seqno,
+ i915_request_completed(rq) ? "!" :
+ i915_request_started(rq) ? "*" :
+ "",
+ rq_prio(rq));
+
+ return buf;
+}
+
static __maybe_unused void
trace_ports(const struct intel_engine_execlists *execlists,
const char *msg,
@@ -1424,18 +1557,14 @@ trace_ports(const struct intel_engine_execlists *execlists,
{
const struct intel_engine_cs *engine =
container_of(execlists, typeof(*engine), execlists);
+ char __maybe_unused p0[40], p1[40];
if (!ports[0])
return;
- ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
- ports[0]->fence.context,
- ports[0]->fence.seqno,
- i915_request_completed(ports[0]) ? "!" :
- i915_request_started(ports[0]) ? "*" :
- "",
- ports[1] ? ports[1]->fence.context : 0,
- ports[1] ? ports[1]->fence.seqno : 0);
+ ENGINE_TRACE(engine, "%s { %s%s }\n", msg,
+ dump_port(p0, sizeof(p0), "", ports[0]),
+ dump_port(p1, sizeof(p1), ", ", ports[1]));
}
static inline bool
@@ -1448,9 +1577,12 @@ static __maybe_unused bool
assert_pending_valid(const struct intel_engine_execlists *execlists,
const char *msg)
{
+ struct intel_engine_cs *engine =
+ container_of(execlists, typeof(*engine), execlists);
struct i915_request * const *port, *rq;
struct intel_context *ce = NULL;
bool sentinel = false;
+ u32 ccid = -1;
trace_ports(execlists, msg, execlists->pending);
@@ -1459,13 +1591,14 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
return true;
if (!execlists->pending[0]) {
- GEM_TRACE_ERR("Nothing pending for promotion!\n");
+ GEM_TRACE_ERR("%s: Nothing pending for promotion!\n",
+ engine->name);
return false;
}
if (execlists->pending[execlists_num_ports(execlists)]) {
- GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
- execlists_num_ports(execlists));
+ GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n",
+ engine->name, execlists_num_ports(execlists));
return false;
}
@@ -1477,20 +1610,31 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
GEM_BUG_ON(!i915_request_is_active(rq));
if (ce == rq->context) {
- GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
+ GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n",
+ engine->name,
ce->timeline->fence_context,
port - execlists->pending);
return false;
}
ce = rq->context;
+ if (ccid == ce->lrc.ccid) {
+ GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n",
+ engine->name,
+ ccid, ce->timeline->fence_context,
+ port - execlists->pending);
+ return false;
+ }
+ ccid = ce->lrc.ccid;
+
/*
* Sentinels are supposed to be lonely so they flush the
* current exection off the HW. Check that they are the
* only request in the pending submission.
*/
if (sentinel) {
- GEM_TRACE_ERR("context:%llx after sentinel in pending[%zd]\n",
+ GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",
+ engine->name,
ce->timeline->fence_context,
port - execlists->pending);
return false;
@@ -1498,7 +1642,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
sentinel = i915_request_has_sentinel(rq);
if (sentinel && port != execlists->pending) {
- GEM_TRACE_ERR("sentinel context:%llx not in prime position[%zd]\n",
+ GEM_TRACE_ERR("%s: sentinel context:%llx not in prime position[%zd]\n",
+ engine->name,
ce->timeline->fence_context,
port - execlists->pending);
return false;
@@ -1513,7 +1658,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
if (i915_active_is_idle(&ce->active) &&
!intel_context_is_barrier(ce)) {
- GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
+ GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n",
+ engine->name,
ce->timeline->fence_context,
port - execlists->pending);
ok = false;
@@ -1521,7 +1667,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
}
if (!i915_vma_is_pinned(ce->state)) {
- GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
+ GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n",
+ engine->name,
ce->timeline->fence_context,
port - execlists->pending);
ok = false;
@@ -1529,7 +1676,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
}
if (!i915_vma_is_pinned(ce->ring->vma)) {
- GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
+ GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n",
+ engine->name,
ce->timeline->fence_context,
port - execlists->pending);
ok = false;
@@ -1664,30 +1812,16 @@ static bool virtual_matches(const struct virtual_engine *ve,
return true;
}
-static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
- struct i915_request *rq)
+static void virtual_xfer_breadcrumbs(struct virtual_engine *ve)
{
- struct intel_engine_cs *old = ve->siblings[0];
-
- /* All unattached (rq->engine == old) must already be completed */
-
- spin_lock(&old->breadcrumbs.irq_lock);
- if (!list_empty(&ve->context.signal_link)) {
- list_del_init(&ve->context.signal_link);
-
- /*
- * We cannot acquire the new engine->breadcrumbs.irq_lock
- * (as we are holding a breadcrumbs.irq_lock already),
- * so attach this request to the signaler on submission.
- * The queued irq_work will occur when we finally drop
- * the engine->active.lock after dequeue.
- */
- set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags);
-
- /* Also transfer the pending irq_work for the old breadcrumb. */
- intel_engine_signal_breadcrumbs(rq->engine);
- }
- spin_unlock(&old->breadcrumbs.irq_lock);
+ /*
+ * All the outstanding signals on ve->siblings[0] must have
+ * been completed, just pending the interrupt handler. As those
+ * signals still refer to the old sibling (via rq->engine), we must
+ * transfer those to the old irq_worker to keep our locking
+ * consistent.
+ */
+ intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context);
}
#define for_each_waiter(p__, rq__) \
@@ -1729,7 +1863,8 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
continue;
/* No waiter should start before its signaler */
- GEM_BUG_ON(i915_request_started(w) &&
+ GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
+ i915_request_started(w) &&
!i915_request_completed(rq));
GEM_BUG_ON(i915_request_is_active(w));
@@ -1831,16 +1966,25 @@ static unsigned long active_timeslice(const struct intel_engine_cs *engine)
static void set_timeslice(struct intel_engine_cs *engine)
{
+ unsigned long duration;
+
if (!intel_engine_has_timeslices(engine))
return;
- set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
+ duration = active_timeslice(engine);
+ ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration);
+
+ set_timer_ms(&engine->execlists.timer, duration);
}
static void start_timeslice(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
- int prio = queue_prio(execlists);
+ const int prio = queue_prio(execlists);
+ unsigned long duration;
+
+ if (!intel_engine_has_timeslices(engine))
+ return;
WRITE_ONCE(execlists->switch_priority_hint, prio);
if (prio == INT_MIN)
@@ -1849,7 +1993,12 @@ static void start_timeslice(struct intel_engine_cs *engine)
if (timer_pending(&execlists->timer))
return;
- set_timer_ms(&execlists->timer, timeslice(engine));
+ duration = timeslice(engine);
+ ENGINE_TRACE(engine,
+ "start timeslicing, prio:%d, interval:%lu",
+ prio, duration);
+
+ set_timer_ms(&execlists->timer, duration);
}
static void record_preemption(struct intel_engine_execlists *execlists)
@@ -1946,11 +2095,26 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* of trouble.
*/
active = READ_ONCE(execlists->active);
- while ((last = *active) && i915_request_completed(last))
- active++;
- if (last) {
+ /*
+ * In theory we can skip over completed contexts that have not
+ * yet been processed by events (as those events are in flight):
+ *
+ * while ((last = *active) && i915_request_completed(last))
+ * active++;
+ *
+ * However, the GPU cannot handle this as it will ultimately
+ * find itself trying to jump back into a context it has just
+ * completed and barf.
+ */
+
+ if ((last = *active)) {
if (need_preempt(engine, last, rb)) {
+ if (i915_request_completed(last)) {
+ tasklet_hi_schedule(&execlists->tasklet);
+ return;
+ }
+
ENGINE_TRACE(engine,
"preempting last=%llx:%lld, prio=%d, hint=%d\n",
last->fence.context,
@@ -1978,6 +2142,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
last = NULL;
} else if (need_timeslice(engine, last) &&
timeslice_expired(execlists, last)) {
+ if (i915_request_completed(last)) {
+ tasklet_hi_schedule(&execlists->tasklet);
+ return;
+ }
+
ENGINE_TRACE(engine,
"expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
last->fence.context,
@@ -2087,7 +2256,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
engine);
if (!list_empty(&ve->context.signals))
- virtual_xfer_breadcrumbs(ve, rq);
+ virtual_xfer_breadcrumbs(ve);
/*
* Move the bound engine to the top of the list
@@ -2246,8 +2415,8 @@ done:
clear_ports(port + 1, last_port - port);
WRITE_ONCE(execlists->yield, -1);
- execlists_submit_ports(engine);
set_preempt_timeout(engine, *active);
+ execlists_submit_ports(engine);
} else {
skip_submit:
ring_set_paused(engine, 0);
@@ -2417,8 +2586,6 @@ static void process_csb(struct intel_engine_cs *engine)
if (promote) {
struct i915_request * const *old = execlists->active;
- GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
-
ring_set_paused(engine, 0);
/* Point active to the new ELSP; prevent overwriting */
@@ -2431,6 +2598,7 @@ static void process_csb(struct intel_engine_cs *engine)
execlists_schedule_out(*old++);
/* switch pending to inflight */
+ GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
memcpy(execlists->inflight,
execlists->pending,
execlists_num_ports(execlists) *
@@ -2449,17 +2617,21 @@ static void process_csb(struct intel_engine_cs *engine)
* We rely on the hardware being strongly
* ordered, that the breadcrumb write is
* coherent (visible from the CPU) before the
- * user interrupt and CSB is processed.
+ * user interrupt is processed. One might assume
+ * that the breadcrumb write being before the
+ * user interrupt and the CS event for the context
+ * switch would therefore be before the CS event
+ * itself...
*/
if (GEM_SHOW_DEBUG() &&
- !i915_request_completed(*execlists->active) &&
- !reset_in_progress(execlists)) {
- struct i915_request *rq __maybe_unused =
- *execlists->active;
+ !i915_request_completed(*execlists->active)) {
+ struct i915_request *rq = *execlists->active;
const u32 *regs __maybe_unused =
rq->context->lrc_reg_state;
ENGINE_TRACE(engine,
+ "context completed before request!\n");
+ ENGINE_TRACE(engine,
"ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
ENGINE_READ(engine, RING_START),
ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
@@ -2478,8 +2650,6 @@ static void process_csb(struct intel_engine_cs *engine)
regs[CTX_RING_START],
regs[CTX_RING_HEAD],
regs[CTX_RING_TAIL]);
-
- GEM_BUG_ON("context completed before request");
}
execlists_schedule_out(*execlists->active++);
@@ -2769,6 +2939,45 @@ err_cap:
return NULL;
}
+static struct i915_request *
+active_context(struct intel_engine_cs *engine, u32 ccid)
+{
+ const struct intel_engine_execlists * const el = &engine->execlists;
+ struct i915_request * const *port, *rq;
+
+ /*
+ * Use the most recent result from process_csb(), but just in case
+ * we trigger an error (via interrupt) before the first CS event has
+ * been written, peek at the next submission.
+ */
+
+ for (port = el->active; (rq = *port); port++) {
+ if (rq->context->lrc.ccid == ccid) {
+ ENGINE_TRACE(engine,
+ "ccid found at active:%zd\n",
+ port - el->active);
+ return rq;
+ }
+ }
+
+ for (port = el->pending; (rq = *port); port++) {
+ if (rq->context->lrc.ccid == ccid) {
+ ENGINE_TRACE(engine,
+ "ccid found at pending:%zd\n",
+ port - el->pending);
+ return rq;
+ }
+ }
+
+ ENGINE_TRACE(engine, "ccid:%x not found\n", ccid);
+ return NULL;
+}
+
+static u32 active_ccid(struct intel_engine_cs *engine)
+{
+ return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI);
+}
+
static bool execlists_capture(struct intel_engine_cs *engine)
{
struct execlists_capture *cap;
@@ -2786,7 +2995,7 @@ static bool execlists_capture(struct intel_engine_cs *engine)
return true;
spin_lock_irq(&engine->active.lock);
- cap->rq = execlists_active(&engine->execlists);
+ cap->rq = active_context(engine, active_ccid(engine));
if (cap->rq) {
cap->rq = active_request(cap->rq->context->timeline, cap->rq);
cap->rq = i915_request_get_rcu(cap->rq);
@@ -2934,10 +3143,14 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
if (reset_in_progress(execlists))
return; /* defer until we restart the engine following reset */
- if (execlists->tasklet.func == execlists_submission_tasklet)
- __execlists_submission_tasklet(engine);
- else
- tasklet_hi_schedule(&execlists->tasklet);
+ /* Hopefully we clear execlists->pending[] to let us through */
+ if (READ_ONCE(execlists->pending[0]) &&
+ tasklet_trylock(&execlists->tasklet)) {
+ process_csb(engine);
+ tasklet_unlock(&execlists->tasklet);
+ }
+
+ __execlists_submission_tasklet(engine);
}
static void submit_queue(struct intel_engine_cs *engine,
@@ -3023,19 +3236,139 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
vaddr += engine->context_size;
if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
- dev_err_once(engine->i915->drm.dev,
+ drm_err_once(&engine->i915->drm,
"%s context redzone overwritten!\n",
engine->name);
}
static void execlists_context_unpin(struct intel_context *ce)
{
- check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
+ check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
ce->engine);
i915_gem_object_unpin_map(ce->state->obj);
}
+static u32 *
+gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
+{
+ *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+ MI_SRM_LRM_GLOBAL_GTT |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
+ CTX_TIMESTAMP * sizeof(u32);
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_REG |
+ MI_LRR_SOURCE_CS_MMIO |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+
+ *cs++ = MI_LOAD_REGISTER_REG |
+ MI_LRR_SOURCE_CS_MMIO |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+
+ return cs;
+}
+
+static u32 *
+gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
+{
+ GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
+
+ *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+ MI_SRM_LRM_GLOBAL_GTT |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
+ (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
+ *cs++ = 0;
+
+ return cs;
+}
+
+static u32 *
+gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
+{
+ GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
+
+ *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+ MI_SRM_LRM_GLOBAL_GTT |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
+ (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_REG |
+ MI_LRR_SOURCE_CS_MMIO |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+ *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
+
+ return cs;
+}
+
+static u32 *
+gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
+{
+ cs = gen12_emit_timestamp_wa(ce, cs);
+ cs = gen12_emit_cmd_buf_wa(ce, cs);
+ cs = gen12_emit_restore_scratch(ce, cs);
+
+ return cs;
+}
+
+static u32 *
+gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
+{
+ cs = gen12_emit_timestamp_wa(ce, cs);
+ cs = gen12_emit_restore_scratch(ce, cs);
+
+ return cs;
+}
+
+static inline u32 context_wa_bb_offset(const struct intel_context *ce)
+{
+ return PAGE_SIZE * ce->wa_bb_page;
+}
+
+static u32 *context_indirect_bb(const struct intel_context *ce)
+{
+ void *ptr;
+
+ GEM_BUG_ON(!ce->wa_bb_page);
+
+ ptr = ce->lrc_reg_state;
+ ptr -= LRC_STATE_OFFSET; /* back to start of context image */
+ ptr += context_wa_bb_offset(ce);
+
+ return ptr;
+}
+
+static void
+setup_indirect_ctx_bb(const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ u32 *(*emit)(const struct intel_context *, u32 *))
+{
+ u32 * const start = context_indirect_bb(ce);
+ u32 *cs;
+
+ cs = emit(ce, start);
+ GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
+ while ((unsigned long)cs % CACHELINE_BYTES)
+ *cs++ = MI_NOOP;
+
+ lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine,
+ i915_ggtt_offset(ce->state) +
+ context_wa_bb_offset(ce),
+ (cs - start) * sizeof(*cs));
+}
+
static void
__execlists_update_reg_state(const struct intel_context *ce,
const struct intel_engine_cs *engine,
@@ -3059,6 +3392,18 @@ __execlists_update_reg_state(const struct intel_context *ce,
i915_oa_init_reg_state(ce, engine);
}
+
+ if (ce->wa_bb_page) {
+ u32 *(*fn)(const struct intel_context *ce, u32 *cs);
+
+ fn = gen12_emit_indirect_ctx_xcs;
+ if (ce->engine->class == RENDER_CLASS)
+ fn = gen12_emit_indirect_ctx_rcs;
+
+ /* Mutually exclusive wrt to global indirect bb */
+ GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
+ setup_indirect_ctx_bb(ce, engine, fn);
+ }
}
static int
@@ -3077,7 +3422,7 @@ __execlists_context_pin(struct intel_context *ce,
return PTR_ERR(vaddr);
ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
- ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+ ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
__execlists_update_reg_state(ce, engine, ce->ring->tail);
return 0;
@@ -3125,6 +3470,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
u32 *cs;
+ GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
if (!i915_request_timeline(rq)->has_initial_breadcrumb)
return 0;
@@ -3151,6 +3497,56 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
/* Record the updated position of the request's payload */
rq->infix = intel_ring_offset(rq, cs);
+ __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
+
+ return 0;
+}
+
+static int emit_pdps(struct i915_request *rq)
+{
+ const struct intel_engine_cs * const engine = rq->engine;
+ struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
+ int err, i;
+ u32 *cs;
+
+ GEM_BUG_ON(intel_vgpu_active(rq->i915));
+
+ /*
+ * Beware ye of the dragons, this sequence is magic!
+ *
+ * Small changes to this sequence can cause anything from
+ * GPU hangs to forcewake errors and machine lockups!
+ */
+
+ /* Flush any residual operations from the context load */
+ err = engine->emit_flush(rq, EMIT_FLUSH);
+ if (err)
+ return err;
+
+ /* Magic required to prevent forcewake errors! */
+ err = engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ return err;
+
+ cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Ensure the LRI have landed before we invalidate & continue */
+ *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
+ for (i = GEN8_3LVL_PDPES; i--; ) {
+ const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
+ u32 base = engine->mmio_base;
+
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
+ *cs++ = upper_32_bits(pd_daddr);
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
+ *cs++ = lower_32_bits(pd_daddr);
+ }
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
return 0;
}
@@ -3175,6 +3571,12 @@ static int execlists_request_alloc(struct i915_request *request)
* to cancel/unwind this request now.
*/
+ if (!i915_vm_is_4lvl(request->context->vm)) {
+ ret = emit_pdps(request);
+ if (ret)
+ return ret;
+ }
+
/* Unconditionally invalidate GPU caches and TLBs. */
ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
if (ret)
@@ -3475,7 +3877,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
ret = lrc_setup_wa_ctx(engine);
if (ret) {
- DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
+ drm_dbg(&engine->i915->drm,
+ "Failed to setup context WA page: %d\n", ret);
return ret;
}
@@ -3508,6 +3911,72 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
return ret;
}
+static void reset_csb_pointers(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ const unsigned int reset_value = execlists->csb_size - 1;
+
+ ring_set_paused(engine, 0);
+
+ /*
+ * Sometimes Icelake forgets to reset its pointers on a GPU reset.
+ * Bludgeon them with a mmio update to be sure.
+ */
+ ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
+ 0xffff << 16 | reset_value << 8 | reset_value);
+ ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
+
+ /*
+ * After a reset, the HW starts writing into CSB entry [0]. We
+ * therefore have to set our HEAD pointer back one entry so that
+ * the *first* entry we check is entry 0. To complicate this further,
+ * as we don't wait for the first interrupt after reset, we have to
+ * fake the HW write to point back to the last entry so that our
+ * inline comparison of our cached head position against the last HW
+ * write works even before the first interrupt.
+ */
+ execlists->csb_head = reset_value;
+ WRITE_ONCE(*execlists->csb_write, reset_value);
+ wmb(); /* Make sure this is visible to HW (paranoia?) */
+
+ invalidate_csb_entries(&execlists->csb_status[0],
+ &execlists->csb_status[reset_value]);
+
+ /* Once more for luck and our trusty paranoia */
+ ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
+ 0xffff << 16 | reset_value << 8 | reset_value);
+ ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
+
+ GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value);
+}
+
+static void execlists_sanitize(struct intel_engine_cs *engine)
+{
+ /*
+ * Poison residual state on resume, in case the suspend didn't!
+ *
+ * We have to assume that across suspend/resume (or other loss
+ * of control) that the contents of our pinned buffers has been
+ * lost, replaced by garbage. Since this doesn't always happen,
+ * let's poison such state so that we more quickly spot when
+ * we falsely assume it has been preserved.
+ */
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
+
+ reset_csb_pointers(engine);
+
+ /*
+ * The kernel_context HWSP is stored in the status_page. As above,
+ * that may be lost on resume/initialisation, and so we need to
+ * reset the value in the HWSP.
+ */
+ intel_timeline_reset_seqno(engine->kernel_context->timeline);
+
+ /* And scrub the dirty cachelines for the HWSP */
+ clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
+}
+
static void enable_error_interrupt(struct intel_engine_cs *engine)
{
u32 status;
@@ -3518,7 +3987,7 @@ static void enable_error_interrupt(struct intel_engine_cs *engine)
status = ENGINE_READ(engine, RING_ESR);
if (unlikely(status)) {
- dev_err(engine->i915->drm.dev,
+ drm_err(&engine->i915->drm,
"engine '%s' resumed still in error: %08x\n",
engine->name, status);
__intel_gt_reset(engine->gt, engine->mask);
@@ -3582,7 +4051,8 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine)
bool unexpected = false;
if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
- DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
+ drm_dbg(&engine->i915->drm,
+ "STOP_RING still set in RING_MI_MODE\n");
unexpected = true;
}
@@ -3642,39 +4112,10 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
*
* FIXME: Wa for more modern gens needs to be validated
*/
+ ring_set_paused(engine, 1);
intel_engine_stop_cs(engine);
-}
-
-static void reset_csb_pointers(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists * const execlists = &engine->execlists;
- const unsigned int reset_value = execlists->csb_size - 1;
-
- ring_set_paused(engine, 0);
-
- /*
- * After a reset, the HW starts writing into CSB entry [0]. We
- * therefore have to set our HEAD pointer back one entry so that
- * the *first* entry we check is entry 0. To complicate this further,
- * as we don't wait for the first interrupt after reset, we have to
- * fake the HW write to point back to the last entry so that our
- * inline comparison of our cached head position against the last HW
- * write works even before the first interrupt.
- */
- execlists->csb_head = reset_value;
- WRITE_ONCE(*execlists->csb_write, reset_value);
- wmb(); /* Make sure this is visible to HW (paranoia?) */
- /*
- * Sometimes Icelake forgets to reset its pointers on a GPU reset.
- * Bludgeon them with a mmio update to be sure.
- */
- ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
- reset_value << 8 | reset_value);
- ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
-
- invalidate_csb_entries(&execlists->csb_status[0],
- &execlists->csb_status[reset_value]);
+ engine->execlists.reset_ccid = active_ccid(engine);
}
static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
@@ -3717,7 +4158,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
* its request, it was still running at the time of the
* reset and will have been clobbered.
*/
- rq = execlists_active(execlists);
+ rq = active_context(engine, engine->execlists.reset_ccid);
if (!rq)
goto unwind;
@@ -3767,8 +4208,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
* image back to the expected values to skip over the guilty request.
*/
__i915_request_reset(rq, stalled);
- if (!stalled)
- goto out_replay;
/*
* We want a simple context + ring to execute the breadcrumb update.
@@ -3778,9 +4217,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
* future request will be after userspace has had the opportunity
* to recreate its own state.
*/
- GEM_BUG_ON(!intel_context_is_pinned(ce));
- restore_default_state(ce, engine);
-
out_replay:
ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
head, ce->ring->tail);
@@ -4146,6 +4582,42 @@ static u32 preparser_disable(bool state)
return MI_ARB_CHECK | 1 << 8 | state;
}
+static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
+{
+ static const i915_reg_t vd[] = {
+ GEN12_VD0_AUX_NV,
+ GEN12_VD1_AUX_NV,
+ GEN12_VD2_AUX_NV,
+ GEN12_VD3_AUX_NV,
+ };
+
+ static const i915_reg_t ve[] = {
+ GEN12_VE0_AUX_NV,
+ GEN12_VE1_AUX_NV,
+ };
+
+ if (engine->class == VIDEO_DECODE_CLASS)
+ return vd[engine->instance];
+
+ if (engine->class == VIDEO_ENHANCEMENT_CLASS)
+ return ve[engine->instance];
+
+ GEM_BUG_ON("unknown aux_inv_reg\n");
+
+ return INVALID_MMIO_REG;
+}
+
+static u32 *
+gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
+{
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(inv_reg);
+ *cs++ = AUX_INV;
+ *cs++ = MI_NOOP;
+
+ return cs;
+}
+
static int gen12_emit_flush_render(struct i915_request *request,
u32 mode)
{
@@ -4154,13 +4626,13 @@ static int gen12_emit_flush_render(struct i915_request *request,
u32 *cs;
flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_FLUSH_L3;
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
/* Wa_1409600907:tgl */
flags |= PIPE_CONTROL_DEPTH_STALL;
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
flags |= PIPE_CONTROL_QW_WRITE;
@@ -4171,7 +4643,9 @@ static int gen12_emit_flush_render(struct i915_request *request,
if (IS_ERR(cs))
return PTR_ERR(cs);
- cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+ cs = gen12_emit_pipe_control(cs,
+ PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
+ flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(request, cs);
}
@@ -4186,14 +4660,13 @@ static int gen12_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_CS_STALL;
- cs = intel_ring_begin(request, 8);
+ cs = intel_ring_begin(request, 8 + 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
@@ -4206,6 +4679,9 @@ static int gen12_emit_flush_render(struct i915_request *request,
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+ /* hsdes: 1809175790 */
+ cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
+
*cs++ = preparser_disable(false);
intel_ring_advance(request, cs);
}
@@ -4213,6 +4689,56 @@ static int gen12_emit_flush_render(struct i915_request *request,
return 0;
}
+static int gen12_emit_flush(struct i915_request *request, u32 mode)
+{
+ intel_engine_mask_t aux_inv = 0;
+ u32 cmd, *cs;
+
+ if (mode & EMIT_INVALIDATE)
+ aux_inv = request->engine->mask & ~BIT(BCS0);
+
+ cs = intel_ring_begin(request,
+ 4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0));
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cmd = MI_FLUSH_DW + 1;
+
+ /* We always require a command barrier so that subsequent
+ * commands, such as breadcrumb interrupts, are strictly ordered
+ * wrt the contents of the write cache being flushed to memory
+ * (and thus being coherent from the CPU).
+ */
+ cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+ if (mode & EMIT_INVALIDATE) {
+ cmd |= MI_INVALIDATE_TLB;
+ if (request->engine->class == VIDEO_DECODE_CLASS)
+ cmd |= MI_INVALIDATE_BSD;
+ }
+
+ *cs++ = cmd;
+ *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
+ *cs++ = 0; /* upper addr */
+ *cs++ = 0; /* value */
+
+ if (aux_inv) { /* hsdes: 1809175790 */
+ struct intel_engine_cs *engine;
+ unsigned int tmp;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
+ for_each_engine_masked(engine, request->engine->gt,
+ aux_inv, tmp) {
+ *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
+ *cs++ = AUX_INV;
+ }
+ *cs++ = MI_NOOP;
+ }
+ intel_ring_advance(request, cs);
+
+ return 0;
+}
+
/*
* Reserve space for 2 NOOPs at the end of each request to be
* used as a workaround for not being allowed to do lite
@@ -4242,8 +4768,7 @@ static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
}
static __always_inline u32*
-gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
- u32 *cs)
+gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
{
*cs++ = MI_USER_INTERRUPT;
@@ -4257,14 +4782,16 @@ gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
return gen8_emit_wa_tail(request, cs);
}
-static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
{
- cs = gen8_emit_ggtt_write(cs,
- request->fence.seqno,
- i915_request_active_timeline(request)->hwsp_offset,
- 0);
+ u32 addr = i915_request_active_timeline(request)->hwsp_offset;
+
+ return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
+}
- return gen8_emit_fini_breadcrumb_footer(request, cs);
+static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
}
static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
@@ -4282,7 +4809,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL);
- return gen8_emit_fini_breadcrumb_footer(request, cs);
+ return gen8_emit_fini_breadcrumb_tail(request, cs);
}
static u32 *
@@ -4298,7 +4825,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
- return gen8_emit_fini_breadcrumb_footer(request, cs);
+ return gen8_emit_fini_breadcrumb_tail(request, cs);
}
/*
@@ -4336,7 +4863,7 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
}
static __always_inline u32*
-gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
+gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
{
*cs++ = MI_USER_INTERRUPT;
@@ -4350,33 +4877,29 @@ gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
return gen8_emit_wa_tail(request, cs);
}
-static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
{
- cs = gen8_emit_ggtt_write(cs,
- request->fence.seqno,
- i915_request_active_timeline(request)->hwsp_offset,
- 0);
-
- return gen12_emit_fini_breadcrumb_footer(request, cs);
+ return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
}
static u32 *
gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
- cs = gen8_emit_ggtt_write_rcs(cs,
- request->fence.seqno,
- i915_request_active_timeline(request)->hwsp_offset,
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_TILE_CACHE_FLUSH |
- PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- /* Wa_1409600907:tgl */
- PIPE_CONTROL_DEPTH_STALL |
- PIPE_CONTROL_DC_FLUSH_ENABLE |
- PIPE_CONTROL_FLUSH_ENABLE |
- PIPE_CONTROL_HDC_PIPELINE_FLUSH);
+ cs = gen12_emit_ggtt_write_rcs(cs,
+ request->fence.seqno,
+ i915_request_active_timeline(request)->hwsp_offset,
+ PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TILE_CACHE_FLUSH |
+ PIPE_CONTROL_FLUSH_L3 |
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ /* Wa_1409600907:tgl */
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE);
- return gen12_emit_fini_breadcrumb_footer(request, cs);
+ return gen12_emit_fini_breadcrumb_tail(request, cs);
}
static void execlists_park(struct intel_engine_cs *engine)
@@ -4428,6 +4951,8 @@ static void execlists_shutdown(struct intel_engine_cs *engine)
static void execlists_release(struct intel_engine_cs *engine)
{
+ engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
+
execlists_shutdown(engine);
intel_engine_cleanup_common(engine);
@@ -4447,9 +4972,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->emit_flush = gen8_emit_flush;
engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
- if (INTEL_GEN(engine->i915) >= 12)
+ if (INTEL_GEN(engine->i915) >= 12) {
engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
-
+ engine->emit_flush = gen12_emit_flush;
+ }
engine->set_default_submission = intel_execlists_set_default_submission;
if (INTEL_GEN(engine->i915) < 11) {
@@ -4530,7 +5056,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
* because we only expect rare glitches but nothing
* critical to prevent us from using GPU
*/
- DRM_ERROR("WA batch buffer initialization failed\n");
+ drm_err(&i915->drm, "WA batch buffer initialization failed\n");
if (HAS_LOGICAL_RING_ELSQ(i915)) {
execlists->submit_reg = uncore->regs +
@@ -4558,48 +5084,13 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
}
- reset_csb_pointers(engine);
-
/* Finally, take ownership and responsibility for cleanup! */
+ engine->sanitize = execlists_sanitize;
engine->release = execlists_release;
return 0;
}
-static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
-{
- u32 indirect_ctx_offset;
-
- switch (INTEL_GEN(engine->i915)) {
- default:
- MISSING_CASE(INTEL_GEN(engine->i915));
- /* fall through */
- case 12:
- indirect_ctx_offset =
- GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- break;
- case 11:
- indirect_ctx_offset =
- GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- break;
- case 10:
- indirect_ctx_offset =
- GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- break;
- case 9:
- indirect_ctx_offset =
- GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- break;
- case 8:
- indirect_ctx_offset =
- GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- break;
- }
-
- return indirect_ctx_offset;
-}
-
-
static void init_common_reg_state(u32 * const regs,
const struct intel_engine_cs *engine,
const struct intel_ring *ring,
@@ -4617,30 +5108,27 @@ static void init_common_reg_state(u32 * const regs,
regs[CTX_CONTEXT_CONTROL] = ctl;
regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+ regs[CTX_TIMESTAMP] = 0;
}
static void init_wa_bb_reg_state(u32 * const regs,
- const struct intel_engine_cs *engine,
- u32 pos_bb_per_ctx)
+ const struct intel_engine_cs *engine)
{
const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
if (wa_ctx->per_ctx.size) {
const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
- regs[pos_bb_per_ctx] =
+ GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
+ regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
}
if (wa_ctx->indirect_ctx.size) {
- const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
-
- regs[pos_bb_per_ctx + 2] =
- (ggtt_offset + wa_ctx->indirect_ctx.offset) |
- (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
-
- regs[pos_bb_per_ctx + 4] =
- intel_lr_indirect_ctx_offset(engine) << 6;
+ lrc_ring_setup_indirect_ctx(regs, engine,
+ i915_ggtt_offset(wa_ctx->vma) +
+ wa_ctx->indirect_ctx.offset,
+ wa_ctx->indirect_ctx.size);
}
}
@@ -4689,10 +5177,7 @@ static void execlists_init_reg_state(u32 *regs,
init_common_reg_state(regs, engine, ring, inhibit);
init_ppgtt_reg_state(regs, vm_alias(ce->vm));
- init_wa_bb_reg_state(regs, engine,
- INTEL_GEN(engine->i915) >= 12 ?
- GEN12_CTX_BB_PER_CTX_PTR :
- CTX_BB_PER_CTX_PTR);
+ init_wa_bb_reg_state(regs, engine);
__reset_stop_ring(regs, engine);
}
@@ -4705,29 +5190,18 @@ populate_lr_context(struct intel_context *ce,
{
bool inhibit = true;
void *vaddr;
- int ret;
vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
if (IS_ERR(vaddr)) {
- ret = PTR_ERR(vaddr);
- DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
- return ret;
+ drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
+ return PTR_ERR(vaddr);
}
set_redzone(vaddr, engine);
if (engine->default_state) {
- void *defaults;
-
- defaults = i915_gem_object_pin_map(engine->default_state,
- I915_MAP_WB);
- if (IS_ERR(defaults)) {
- ret = PTR_ERR(defaults);
- goto err_unpin_ctx;
- }
-
- memcpy(vaddr, defaults, engine->context_size);
- i915_gem_object_unpin_map(engine->default_state);
+ shmem_read(engine->default_state, 0,
+ vaddr, engine->context_size);
__set_bit(CONTEXT_VALID_BIT, &ce->flags);
inhibit = false;
}
@@ -4739,14 +5213,12 @@ populate_lr_context(struct intel_context *ce,
* The second page of the context object contains some registers which
* must be set up prior to the first execution.
*/
- execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
+ execlists_init_reg_state(vaddr + LRC_STATE_OFFSET,
ce, engine, ring, inhibit);
- ret = 0;
-err_unpin_ctx:
__i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
i915_gem_object_unpin_map(ctx_obj);
- return ret;
+ return 0;
}
static int __execlists_context_alloc(struct intel_context *ce,
@@ -4764,6 +5236,11 @@ static int __execlists_context_alloc(struct intel_context *ce,
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
context_size += I915_GTT_PAGE_SIZE; /* for redzone */
+ if (INTEL_GEN(engine->i915) == 12) {
+ ce->wa_bb_page = context_size / PAGE_SIZE;
+ context_size += PAGE_SIZE;
+ }
+
ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
if (IS_ERR(ctx_obj))
return PTR_ERR(ctx_obj);
@@ -4803,7 +5280,8 @@ static int __execlists_context_alloc(struct intel_context *ce,
ret = populate_lr_context(ce, ctx_obj, engine, ring);
if (ret) {
- DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
+ drm_dbg(&engine->i915->drm,
+ "Failed to populate LRC: %d\n", ret);
goto error_ring_free;
}
@@ -4856,6 +5334,8 @@ static void virtual_context_destroy(struct kref *kref)
__execlists_context_fini(&ve->context);
intel_context_fini(&ve->context);
+ intel_engine_free_request_pool(&ve->base);
+
kfree(ve->bonds);
kfree(ve);
}
@@ -4980,12 +5460,15 @@ static void virtual_submission_tasklet(unsigned long data)
return;
local_irq_disable();
- for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
- struct intel_engine_cs *sibling = ve->siblings[n];
+ for (n = 0; n < ve->num_siblings; n++) {
+ struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
struct ve_node * const node = &ve->nodes[sibling->id];
struct rb_node **parent, *rb;
bool first;
+ if (!READ_ONCE(ve->request))
+ break; /* already handled by a sibling's tasklet */
+
if (unlikely(!(mask & sibling->mask))) {
if (!RB_EMPTY_NODE(&node->rb)) {
spin_lock(&sibling->active.lock);
@@ -5036,10 +5519,8 @@ static void virtual_submission_tasklet(unsigned long data)
submit_engine:
GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
node->prio = prio;
- if (first && prio > sibling->execlists.queue_priority_hint) {
- sibling->execlists.queue_priority_hint = prio;
+ if (first && prio > sibling->execlists.queue_priority_hint)
tasklet_hi_schedule(&sibling->execlists.tasklet);
- }
spin_unlock(&sibling->active.lock);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index dfbc214e14f5..91fd8e452d9b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -90,6 +90,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine);
#define LRC_PPHWSP_SZ (1)
/* After the PPHWSP we have the logical state for the context */
#define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ)
+#define LRC_STATE_OFFSET (LRC_STATE_PN * PAGE_SIZE)
/* Space within PPHWSP reserved to be used as scratch */
#define LRC_PPHWSP_SCRATCH 0x34
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index d39b72590e40..93cb6c460508 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -9,14 +9,13 @@
#include <linux/types.h>
-/* GEN8 to GEN11 Reg State Context */
+/* GEN8 to GEN12 Reg State Context */
#define CTX_CONTEXT_CONTROL (0x02 + 1)
#define CTX_RING_HEAD (0x04 + 1)
#define CTX_RING_TAIL (0x06 + 1)
#define CTX_RING_START (0x08 + 1)
#define CTX_RING_CTL (0x0a + 1)
#define CTX_BB_STATE (0x10 + 1)
-#define CTX_BB_PER_CTX_PTR (0x18 + 1)
#define CTX_TIMESTAMP (0x22 + 1)
#define CTX_PDP3_UDW (0x24 + 1)
#define CTX_PDP3_LDW (0x26 + 1)
@@ -30,9 +29,6 @@
#define GEN9_CTX_RING_MI_MODE 0x54
-/* GEN12+ Reg State Context */
-#define GEN12_CTX_BB_PER_CTX_PTR (0x12 + 1)
-
#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
u32 *reg_state__ = (reg_state); \
const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 3847ee44b181..ab675d35030d 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -113,7 +113,6 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
struct intel_uncore *uncore = rc6_to_uncore(rc6);
struct intel_engine_cs *engine;
enum intel_engine_id id;
- u32 rc6_mode;
/* 2b: Program RC6 thresholds.*/
if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) {
@@ -165,16 +164,11 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
/* 3a: Enable RC6 */
set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
- /* WaRsUseTimeoutMode:cnl (pre-prod) */
- if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0))
- rc6_mode = GEN7_RC_CTL_TO_MODE;
- else
- rc6_mode = GEN6_RC_CTL_EI_MODE(1);
rc6->ctl_enable =
GEN6_RC_CTL_HW_ENABLE |
GEN6_RC_CTL_RC6_ENABLE |
- rc6_mode;
+ GEN6_RC_CTL_EI_MODE(1);
/*
* WaRsDisableCoarsePowerGating:skl,cnl
@@ -246,16 +240,18 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6)
ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
&rc6vids, NULL);
if (IS_GEN(i915, 6) && ret) {
- DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
+ drm_dbg(&i915->drm, "Couldn't check for BIOS workaround\n");
} else if (IS_GEN(i915, 6) &&
(GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
- DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
- GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
+ drm_dbg(&i915->drm,
+ "You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
+ GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
rc6vids &= 0xffff00;
rc6vids |= GEN6_ENCODE_RC6_VID(450);
ret = sandybridge_pcode_write(i915, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
if (ret)
- DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
+ drm_err(&i915->drm,
+ "Couldn't fix incorrect rc6 voltage\n");
}
}
@@ -263,14 +259,15 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6)
static int chv_rc6_init(struct intel_rc6 *rc6)
{
struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
resource_size_t pctx_paddr, paddr;
resource_size_t pctx_size = 32 * SZ_1K;
u32 pcbr;
pcbr = intel_uncore_read(uncore, VLV_PCBR);
if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
- DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
- paddr = rc6_to_i915(rc6)->dsm.end + 1 - pctx_size;
+ drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n");
+ paddr = i915->dsm.end + 1 - pctx_size;
GEM_BUG_ON(paddr > U32_MAX);
pctx_paddr = (paddr & ~4095);
@@ -304,7 +301,7 @@ static int vlv_rc6_init(struct intel_rc6 *rc6)
goto out;
}
- DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
+ drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n");
/*
* From the Gunit register HAS:
@@ -316,7 +313,8 @@ static int vlv_rc6_init(struct intel_rc6 *rc6)
*/
pctx = i915_gem_object_create_stolen(i915, pctx_size);
if (IS_ERR(pctx)) {
- DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
+ drm_dbg(&i915->drm,
+ "not enough stolen space for PCTX, disabling\n");
return PTR_ERR(pctx);
}
@@ -398,14 +396,14 @@ static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE);
rc_sw_target &= RC_SW_TARGET_STATE_MASK;
rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT;
- DRM_DEBUG_DRIVER("BIOS enabled RC states: "
+ drm_dbg(&i915->drm, "BIOS enabled RC states: "
"HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
rc_sw_target);
if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
- DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
+ drm_dbg(&i915->drm, "RC6 Base location not set properly.\n");
enable_rc6 = false;
}
@@ -417,7 +415,7 @@ static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
if (!(rc6_ctx_base >= i915->dsm_reserved.start &&
rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) {
- DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
+ drm_dbg(&i915->drm, "RC6 Base address not as expected.\n");
enable_rc6 = false;
}
@@ -425,24 +423,25 @@ static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
(intel_uncore_read(uncore, PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1 &&
(intel_uncore_read(uncore, PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1 &&
(intel_uncore_read(uncore, PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1)) {
- DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
+ drm_dbg(&i915->drm,
+ "Engine Idle wait time not set properly.\n");
enable_rc6 = false;
}
if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) ||
!intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) ||
!intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) {
- DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
+ drm_dbg(&i915->drm, "Pushbus not setup properly.\n");
enable_rc6 = false;
}
if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) {
- DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
+ drm_dbg(&i915->drm, "GFX pause not setup properly.\n");
enable_rc6 = false;
}
if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) {
- DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
+ drm_dbg(&i915->drm, "GPM control not setup properly.\n");
enable_rc6 = false;
}
@@ -463,7 +462,7 @@ static bool rc6_supported(struct intel_rc6 *rc6)
return false;
if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) {
- dev_notice(i915->drm.dev,
+ drm_notice(&i915->drm,
"RC6 and powersaving disabled by BIOS\n");
return false;
}
@@ -495,7 +494,7 @@ static bool pctx_corrupted(struct intel_rc6 *rc6)
if (intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO))
return false;
- dev_notice(i915->drm.dev,
+ drm_notice(&i915->drm,
"RC6 context corruption, disabling runtime power management\n");
return true;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index 5954ecc3207f..f59e7875cc5e 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -102,7 +102,7 @@ static int render_state_setup(struct intel_renderstate *so,
}
if (rodata->reloc[reloc_index] != -1) {
- DRM_ERROR("only %d relocs resolved\n", reloc_index);
+ drm_err(&i915->drm, "only %d relocs resolved\n", reloc_index);
goto err;
}
@@ -194,7 +194,7 @@ int intel_renderstate_init(struct intel_renderstate *so,
err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (err)
- goto err_vma;
+ goto err_obj;
err = render_state_setup(so, engine->i915);
if (err)
@@ -204,8 +204,6 @@ int intel_renderstate_init(struct intel_renderstate *so,
err_unpin:
i915_vma_unpin(so->vma);
-err_vma:
- i915_vma_close(so->vma);
err_obj:
i915_gem_object_put(obj);
so->vma = NULL;
@@ -221,6 +219,14 @@ int intel_renderstate_emit(struct intel_renderstate *so,
if (!so->vma)
return 0;
+ i915_vma_lock(so->vma);
+ err = i915_request_await_object(rq, so->vma->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(so->vma, rq, 0);
+ i915_vma_unlock(so->vma);
+ if (err)
+ return err;
+
err = engine->emit_bb_start(rq,
so->batch_offset, so->batch_size,
I915_DISPATCH_SECURE);
@@ -235,13 +241,7 @@ int intel_renderstate_emit(struct intel_renderstate *so,
return err;
}
- i915_vma_lock(so->vma);
- err = i915_request_await_object(rq, so->vma->obj, false);
- if (err == 0)
- err = i915_vma_move_to_active(so->vma, rq, 0);
- i915_vma_unlock(so->vma);
-
- return err;
+ return 0;
}
void intel_renderstate_fini(struct intel_renderstate *so)
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 80db3c9d785e..39070b514e65 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -109,7 +109,7 @@ static bool mark_guilty(struct i915_request *rq)
goto out;
}
- dev_notice(ctx->i915->drm.dev,
+ drm_notice(&ctx->i915->drm,
"%s context reset due to GPU hang\n",
ctx->name);
@@ -755,7 +755,7 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
for_each_engine(engine, gt, id)
__intel_engine_reset(engine, stalled_mask & engine->mask);
- i915_gem_restore_fences(gt->ggtt);
+ intel_ggtt_restore_fences(gt->ggtt);
return err;
}
@@ -1031,7 +1031,7 @@ void intel_gt_reset(struct intel_gt *gt,
goto unlock;
if (reason)
- dev_notice(gt->i915->drm.dev,
+ drm_notice(&gt->i915->drm,
"Resetting chip for %s\n", reason);
atomic_inc(&gt->i915->gpu_error.reset_count);
@@ -1039,7 +1039,7 @@ void intel_gt_reset(struct intel_gt *gt,
if (!intel_has_gpu_reset(gt)) {
if (i915_modparams.reset)
- dev_err(gt->i915->drm.dev, "GPU reset not supported\n");
+ drm_err(&gt->i915->drm, "GPU reset not supported\n");
else
drm_dbg(&gt->i915->drm, "GPU reset disabled\n");
goto error;
@@ -1049,7 +1049,7 @@ void intel_gt_reset(struct intel_gt *gt,
intel_runtime_pm_disable_interrupts(gt->i915);
if (do_reset(gt, stalled_mask)) {
- dev_err(gt->i915->drm.dev, "Failed to reset chip\n");
+ drm_err(&gt->i915->drm, "Failed to reset chip\n");
goto taint;
}
@@ -1111,7 +1111,7 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine)
/**
* intel_engine_reset - reset GPU engine to recover from a hang
* @engine: engine to reset
- * @msg: reason for GPU reset; or NULL for no dev_notice()
+ * @msg: reason for GPU reset; or NULL for no drm_notice()
*
* Reset a specific GPU engine. Useful if a hang is detected.
* Returns zero on successful reset or otherwise an error code.
@@ -1136,7 +1136,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
reset_prepare_engine(engine);
if (msg)
- dev_notice(engine->i915->drm.dev,
+ drm_notice(&engine->i915->drm,
"Resetting %s for %s\n", engine->name, msg);
atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
@@ -1381,7 +1381,7 @@ static void intel_wedge_me(struct work_struct *work)
{
struct intel_wedge_me *w = container_of(work, typeof(*w), work.work);
- dev_err(w->gt->i915->drm.dev,
+ drm_err(&w->gt->i915->drm,
"%s timed out, cancelling all in-flight rendering.\n",
w->name);
intel_gt_set_wedged(w->gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
index 5bdce24994aa..cc0ebca65167 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -88,6 +88,8 @@ static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
static inline void
assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
{
+ unsigned int head = READ_ONCE(ring->head);
+
GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
/*
@@ -105,8 +107,7 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
* into the same cacheline as ring->head.
*/
#define cacheline(a) round_down(a, CACHELINE_BYTES)
- GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
- tail < ring->head);
+ GEM_BUG_ON(cacheline(tail) == cacheline(head) && tail < head);
#undef cacheline
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index fdc3f10e12aa..ca7286e58409 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -42,6 +42,7 @@
#include "intel_reset.h"
#include "intel_ring.h"
#include "intel_workarounds.h"
+#include "shmem_utils.h"
/* Rough estimate of the typical request size, performing a flush,
* set-context and then emitting the batch.
@@ -577,8 +578,9 @@ static void flush_cs_tlb(struct intel_engine_cs *engine)
RING_INSTPM(engine->mmio_base),
INSTPM_SYNC_FLUSH, 0,
1000))
- DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
- engine->name);
+ drm_err(&dev_priv->drm,
+ "%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
+ engine->name);
}
static void ring_setup_status_page(struct intel_engine_cs *engine)
@@ -601,8 +603,9 @@ static bool stop_ring(struct intel_engine_cs *engine)
MODE_IDLE,
MODE_IDLE,
1000)) {
- DRM_ERROR("%s : timed out trying to stop ring\n",
- engine->name);
+ drm_err(&dev_priv->drm,
+ "%s : timed out trying to stop ring\n",
+ engine->name);
/*
* Sometimes we observe that the idle flag is not
@@ -661,22 +664,23 @@ static int xcs_resume(struct intel_engine_cs *engine)
/* WaClearRingBufHeadRegAtInit:ctg,elk */
if (!stop_ring(engine)) {
/* G45 ring initialization often fails to reset head to zero */
- DRM_DEBUG_DRIVER("%s head not reset to zero "
+ drm_dbg(&dev_priv->drm, "%s head not reset to zero "
+ "ctl %08x head %08x tail %08x start %08x\n",
+ engine->name,
+ ENGINE_READ(engine, RING_CTL),
+ ENGINE_READ(engine, RING_HEAD),
+ ENGINE_READ(engine, RING_TAIL),
+ ENGINE_READ(engine, RING_START));
+
+ if (!stop_ring(engine)) {
+ drm_err(&dev_priv->drm,
+ "failed to set %s head to zero "
"ctl %08x head %08x tail %08x start %08x\n",
engine->name,
ENGINE_READ(engine, RING_CTL),
ENGINE_READ(engine, RING_HEAD),
ENGINE_READ(engine, RING_TAIL),
ENGINE_READ(engine, RING_START));
-
- if (!stop_ring(engine)) {
- DRM_ERROR("failed to set %s head to zero "
- "ctl %08x head %08x tail %08x start %08x\n",
- engine->name,
- ENGINE_READ(engine, RING_CTL),
- ENGINE_READ(engine, RING_HEAD),
- ENGINE_READ(engine, RING_TAIL),
- ENGINE_READ(engine, RING_START));
ret = -EIO;
goto out;
}
@@ -719,7 +723,7 @@ static int xcs_resume(struct intel_engine_cs *engine)
RING_CTL(engine->mmio_base),
RING_VALID, RING_VALID,
50)) {
- DRM_ERROR("%s initialization failed "
+ drm_err(&dev_priv->drm, "%s initialization failed "
"ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
engine->name,
ENGINE_READ(engine, RING_CTL),
@@ -1238,7 +1242,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC);
if (engine->default_state) {
- void *defaults, *vaddr;
+ void *vaddr;
vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
if (IS_ERR(vaddr)) {
@@ -1246,15 +1250,8 @@ alloc_context_vma(struct intel_engine_cs *engine)
goto err_obj;
}
- defaults = i915_gem_object_pin_map(engine->default_state,
- I915_MAP_WB);
- if (IS_ERR(defaults)) {
- err = PTR_ERR(defaults);
- goto err_map;
- }
-
- memcpy(vaddr, defaults, engine->context_size);
- i915_gem_object_unpin_map(engine->default_state);
+ shmem_read(engine->default_state, 0,
+ vaddr, engine->context_size);
i915_gem_object_flush_map(obj);
i915_gem_object_unpin_map(obj);
@@ -1268,8 +1265,6 @@ alloc_context_vma(struct intel_engine_cs *engine)
return vma;
-err_map:
- i915_gem_object_unpin_map(obj);
err_obj:
i915_gem_object_put(obj);
return ERR_PTR(err);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 19542fd9e207..2f59fc6df3c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -8,12 +8,15 @@
#include "i915_drv.h"
#include "intel_gt.h"
+#include "intel_gt_clock_utils.h"
#include "intel_gt_irq.h"
#include "intel_gt_pm_irq.h"
#include "intel_rps.h"
#include "intel_sideband.h"
#include "../../../platform/x86/intel_ips.h"
+#define BUSY_MAX_EI 20u /* ms */
+
/*
* Lock protecting IPS related data structures
*/
@@ -44,6 +47,100 @@ static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
intel_uncore_write_fw(uncore, reg, val);
}
+static void rps_timer(struct timer_list *t)
+{
+ struct intel_rps *rps = from_timer(rps, t, timer);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ s64 max_busy[3] = {};
+ ktime_t dt, last;
+
+ for_each_engine(engine, rps_to_gt(rps), id) {
+ s64 busy;
+ int i;
+
+ dt = intel_engine_get_busy_time(engine);
+ last = engine->stats.rps;
+ engine->stats.rps = dt;
+
+ busy = ktime_to_ns(ktime_sub(dt, last));
+ for (i = 0; i < ARRAY_SIZE(max_busy); i++) {
+ if (busy > max_busy[i])
+ swap(busy, max_busy[i]);
+ }
+ }
+
+ dt = ktime_get();
+ last = rps->pm_timestamp;
+ rps->pm_timestamp = dt;
+
+ if (intel_rps_is_active(rps)) {
+ s64 busy;
+ int i;
+
+ dt = ktime_sub(dt, last);
+
+ /*
+ * Our goal is to evaluate each engine independently, so we run
+ * at the lowest clocks required to sustain the heaviest
+ * workload. However, a task may be split into sequential
+ * dependent operations across a set of engines, such that
+ * the independent contributions do not account for high load,
+ * but overall the task is GPU bound. For example, consider
+ * video decode on vcs followed by colour post-processing
+ * on vecs, followed by general post-processing on rcs.
+ * Since multi-engines being active does imply a single
+ * continuous workload across all engines, we hedge our
+ * bets by only contributing a factor of the distributed
+ * load into our busyness calculation.
+ */
+ busy = max_busy[0];
+ for (i = 1; i < ARRAY_SIZE(max_busy); i++) {
+ if (!max_busy[i])
+ break;
+
+ busy += div_u64(max_busy[i], 1 << i);
+ }
+ GT_TRACE(rps_to_gt(rps),
+ "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n",
+ busy, (int)div64_u64(100 * busy, dt),
+ max_busy[0], max_busy[1], max_busy[2],
+ rps->pm_interval);
+
+ if (100 * busy > rps->power.up_threshold * dt &&
+ rps->cur_freq < rps->max_freq_softlimit) {
+ rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
+ rps->pm_interval = 1;
+ schedule_work(&rps->work);
+ } else if (100 * busy < rps->power.down_threshold * dt &&
+ rps->cur_freq > rps->min_freq_softlimit) {
+ rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
+ rps->pm_interval = 1;
+ schedule_work(&rps->work);
+ } else {
+ rps->last_adj = 0;
+ }
+
+ mod_timer(&rps->timer,
+ jiffies + msecs_to_jiffies(rps->pm_interval));
+ rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI);
+ }
+}
+
+static void rps_start_timer(struct intel_rps *rps)
+{
+ rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
+ rps->pm_interval = 1;
+ mod_timer(&rps->timer, jiffies + 1);
+}
+
+static void rps_stop_timer(struct intel_rps *rps)
+{
+ del_timer_sync(&rps->timer);
+ rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
+ cancel_work_sync(&rps->work);
+}
+
static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
{
u32 mask = 0;
@@ -57,7 +154,7 @@ static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
if (val < rps->max_freq_softlimit)
mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
- mask &= READ_ONCE(rps->pm_events);
+ mask &= rps->pm_events;
return rps_pm_sanitize_mask(rps, ~mask);
}
@@ -70,18 +167,11 @@ static void rps_reset_ei(struct intel_rps *rps)
static void rps_enable_interrupts(struct intel_rps *rps)
{
struct intel_gt *gt = rps_to_gt(rps);
- u32 events;
- rps_reset_ei(rps);
+ GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n",
+ rps->pm_events, rps_pm_mask(rps, rps->last_freq));
- if (IS_VALLEYVIEW(gt->i915))
- /* WaGsvRC0ResidencyMethod:vlv */
- events = GEN6_PM_RP_UP_EI_EXPIRED;
- else
- events = (GEN6_PM_RP_UP_THRESHOLD |
- GEN6_PM_RP_DOWN_THRESHOLD |
- GEN6_PM_RP_DOWN_TIMEOUT);
- WRITE_ONCE(rps->pm_events, events);
+ rps_reset_ei(rps);
spin_lock_irq(&gt->irq_lock);
gen6_gt_pm_enable_irq(gt, rps->pm_events);
@@ -120,8 +210,6 @@ static void rps_disable_interrupts(struct intel_rps *rps)
{
struct intel_gt *gt = rps_to_gt(rps);
- WRITE_ONCE(rps->pm_events, 0);
-
intel_uncore_write(gt->uncore,
GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
@@ -140,6 +228,7 @@ static void rps_disable_interrupts(struct intel_rps *rps)
cancel_work_sync(&rps->work);
rps_reset_interrupts(rps);
+ GT_TRACE(gt, "interrupts:off\n");
}
static const struct cparams {
@@ -186,14 +275,12 @@ static void gen5_rps_init(struct intel_rps *rps)
fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
MEMMODE_FSTART_SHIFT;
- DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
- fmax, fmin, fstart);
+ drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n",
+ fmax, fmin, fstart);
rps->min_freq = fmax;
+ rps->efficient_freq = fstart;
rps->max_freq = fmin;
-
- rps->idle_freq = rps->min_freq;
- rps->cur_freq = rps->idle_freq;
}
static unsigned long
@@ -456,7 +543,8 @@ static bool gen5_rps_enable(struct intel_rps *rps)
if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
MEMCTL_CMD_STS) == 0, 10))
- DRM_ERROR("stuck trying to change perf mode\n");
+ drm_err(&uncore->i915->drm,
+ "stuck trying to change perf mode\n");
mdelay(1);
gen5_rps_set(rps, rps->cur_freq);
@@ -533,8 +621,8 @@ static u32 rps_limits(struct intel_rps *rps, u8 val)
static void rps_set_power(struct intel_rps *rps, int new_power)
{
- struct intel_uncore *uncore = rps_to_uncore(rps);
- struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_gt *gt = rps_to_gt(rps);
+ struct intel_uncore *uncore = gt->uncore;
u32 threshold_up = 0, threshold_down = 0; /* in % */
u32 ei_up = 0, ei_down = 0;
@@ -543,55 +631,49 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
if (new_power == rps->power.mode)
return;
+ threshold_up = 95;
+ threshold_down = 85;
+
/* Note the units here are not exactly 1us, but 1280ns. */
switch (new_power) {
case LOW_POWER:
- /* Upclock if more than 95% busy over 16ms */
ei_up = 16000;
- threshold_up = 95;
-
- /* Downclock if less than 85% busy over 32ms */
ei_down = 32000;
- threshold_down = 85;
break;
case BETWEEN:
- /* Upclock if more than 90% busy over 13ms */
ei_up = 13000;
- threshold_up = 90;
-
- /* Downclock if less than 75% busy over 32ms */
ei_down = 32000;
- threshold_down = 75;
break;
case HIGH_POWER:
- /* Upclock if more than 85% busy over 10ms */
ei_up = 10000;
- threshold_up = 85;
-
- /* Downclock if less than 60% busy over 32ms */
ei_down = 32000;
- threshold_down = 60;
break;
}
/* When byt can survive without system hang with dynamic
* sw freq adjustments, this restriction can be lifted.
*/
- if (IS_VALLEYVIEW(i915))
+ if (IS_VALLEYVIEW(gt->i915))
goto skip_hw_write;
- set(uncore, GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(i915, ei_up));
+ GT_TRACE(gt,
+ "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n",
+ new_power, threshold_up, ei_up, threshold_down, ei_down);
+
+ set(uncore, GEN6_RP_UP_EI,
+ intel_gt_ns_to_pm_interval(gt, ei_up * 1000));
set(uncore, GEN6_RP_UP_THRESHOLD,
- GT_INTERVAL_FROM_US(i915, ei_up * threshold_up / 100));
+ intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10));
- set(uncore, GEN6_RP_DOWN_EI, GT_INTERVAL_FROM_US(i915, ei_down));
+ set(uncore, GEN6_RP_DOWN_EI,
+ intel_gt_ns_to_pm_interval(gt, ei_down * 1000));
set(uncore, GEN6_RP_DOWN_THRESHOLD,
- GT_INTERVAL_FROM_US(i915, ei_down * threshold_down / 100));
+ intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10));
set(uncore, GEN6_RP_CONTROL,
- (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
+ (INTEL_GEN(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
GEN6_RP_MEDIA_HW_NORMAL_MODE |
GEN6_RP_MEDIA_IS_GFX |
GEN6_RP_ENABLE |
@@ -646,9 +728,11 @@ static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
{
+ GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", yesno(interactive));
+
mutex_lock(&rps->power.mutex);
if (interactive) {
- if (!rps->power.interactive++ && READ_ONCE(rps->active))
+ if (!rps->power.interactive++ && intel_rps_is_active(rps))
rps_set_power(rps, HIGH_POWER);
} else {
GEM_BUG_ON(!rps->power.interactive);
@@ -673,6 +757,9 @@ static int gen6_rps_set(struct intel_rps *rps, u8 val)
GEN6_AGGRESSIVE_TURBO);
set(uncore, GEN6_RPNSWREQ, swreq);
+ GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n",
+ val, intel_gpu_freq(rps, val), swreq);
+
return 0;
}
@@ -685,6 +772,9 @@ static int vlv_rps_set(struct intel_rps *rps, u8 val)
err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
vlv_punit_put(i915);
+ GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n",
+ val, intel_gpu_freq(rps, val));
+
return err;
}
@@ -715,29 +805,30 @@ static int rps_set(struct intel_rps *rps, u8 val, bool update)
void intel_rps_unpark(struct intel_rps *rps)
{
- u8 freq;
-
- if (!rps->enabled)
+ if (!intel_rps_is_enabled(rps))
return;
+ GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq);
+
/*
* Use the user's desired frequency as a guide, but for better
* performance, jump directly to RPe as our starting frequency.
*/
mutex_lock(&rps->lock);
- WRITE_ONCE(rps->active, true);
-
- freq = max(rps->cur_freq, rps->efficient_freq),
- freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit);
- intel_rps_set(rps, freq);
-
- rps->last_adj = 0;
+ intel_rps_set_active(rps);
+ intel_rps_set(rps,
+ clamp(rps->cur_freq,
+ rps->min_freq_softlimit,
+ rps->max_freq_softlimit));
mutex_unlock(&rps->lock);
- if (INTEL_GEN(rps_to_i915(rps)) >= 6)
+ rps->pm_iir = 0;
+ if (intel_rps_has_interrupts(rps))
rps_enable_interrupts(rps);
+ if (intel_rps_uses_timer(rps))
+ rps_start_timer(rps);
if (IS_GEN(rps_to_i915(rps), 5))
gen5_rps_update(rps);
@@ -745,15 +836,16 @@ void intel_rps_unpark(struct intel_rps *rps)
void intel_rps_park(struct intel_rps *rps)
{
- struct drm_i915_private *i915 = rps_to_i915(rps);
+ int adj;
- if (!rps->enabled)
+ if (!intel_rps_clear_active(rps))
return;
- if (INTEL_GEN(i915) >= 6)
+ if (intel_rps_uses_timer(rps))
+ rps_stop_timer(rps);
+ if (intel_rps_has_interrupts(rps))
rps_disable_interrupts(rps);
- WRITE_ONCE(rps->active, false);
if (rps->last_freq <= rps->idle_freq)
return;
@@ -784,8 +876,15 @@ void intel_rps_park(struct intel_rps *rps)
* (Note we accommodate Cherryview's limitation of only using an
* even bin by applying it to all.)
*/
- rps->cur_freq =
- max_t(int, round_down(rps->cur_freq - 1, 2), rps->min_freq);
+ adj = rps->last_adj;
+ if (adj < 0)
+ adj *= 2;
+ else /* CHV needs even encode values */
+ adj = -2;
+ rps->last_adj = adj;
+ rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq);
+
+ GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq);
}
void intel_rps_boost(struct i915_request *rq)
@@ -793,7 +892,7 @@ void intel_rps_boost(struct i915_request *rq)
struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
unsigned long flags;
- if (i915_request_signaled(rq) || !READ_ONCE(rps->active))
+ if (i915_request_signaled(rq) || !intel_rps_is_active(rps))
return;
/* Serializes with i915_request_retire() */
@@ -802,6 +901,9 @@ void intel_rps_boost(struct i915_request *rq)
!dma_fence_is_signaled_locked(&rq->fence)) {
set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags);
+ GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
+ rq->fence.context, rq->fence.seqno);
+
if (!atomic_fetch_inc(&rps->num_waiters) &&
READ_ONCE(rps->cur_freq) < rps->boost_freq)
schedule_work(&rps->work);
@@ -819,7 +921,7 @@ int intel_rps_set(struct intel_rps *rps, u8 val)
GEM_BUG_ON(val > rps->max_freq);
GEM_BUG_ON(val < rps->min_freq);
- if (rps->active) {
+ if (intel_rps_is_active(rps)) {
err = rps_set(rps, val, true);
if (err)
return err;
@@ -828,7 +930,7 @@ int intel_rps_set(struct intel_rps *rps, u8 val)
* Make sure we continue to get interrupts
* until we hit the minimum or maximum frequencies.
*/
- if (INTEL_GEN(rps_to_i915(rps)) >= 6) {
+ if (intel_rps_has_interrupts(rps)) {
struct intel_uncore *uncore = rps_to_uncore(rps);
set(uncore,
@@ -896,12 +998,14 @@ static void gen6_rps_init(struct intel_rps *rps)
static bool rps_reset(struct intel_rps *rps)
{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
/* force a reset */
rps->power.mode = -1;
rps->last_freq = -1;
if (rps_set(rps, rps->min_freq, true)) {
- DRM_ERROR("Failed to reset RPS to initial values\n");
+ drm_err(&i915->drm, "Failed to reset RPS to initial values\n");
return false;
}
@@ -912,20 +1016,18 @@ static bool rps_reset(struct intel_rps *rps)
/* See the Gen9_GT_PM_Programming_Guide doc for the below */
static bool gen9_rps_enable(struct intel_rps *rps)
{
- struct drm_i915_private *i915 = rps_to_i915(rps);
- struct intel_uncore *uncore = rps_to_uncore(rps);
+ struct intel_gt *gt = rps_to_gt(rps);
+ struct intel_uncore *uncore = gt->uncore;
/* Program defaults and thresholds for RPS */
- if (IS_GEN(i915, 9))
+ if (IS_GEN(gt->i915, 9))
intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
GEN9_FREQUENCY(rps->rp1_freq));
- /* 1 second timeout */
- intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
- GT_INTERVAL_FROM_US(i915, 1000000));
-
intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa);
+ rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
+
return rps_reset(rps);
}
@@ -936,12 +1038,10 @@ static bool gen8_rps_enable(struct intel_rps *rps)
intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
HSW_FREQUENCY(rps->rp1_freq));
- /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
- intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
- 100000000 / 128); /* 1 second timeout */
-
intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+ rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
+
return rps_reset(rps);
}
@@ -953,6 +1053,10 @@ static bool gen6_rps_enable(struct intel_rps *rps)
intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+ rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
+ GEN6_PM_RP_DOWN_THRESHOLD |
+ GEN6_PM_RP_DOWN_TIMEOUT);
+
return rps_reset(rps);
}
@@ -1038,6 +1142,10 @@ static bool chv_rps_enable(struct intel_rps *rps)
GEN6_RP_UP_BUSY_AVG |
GEN6_RP_DOWN_IDLE_AVG);
+ rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
+ GEN6_PM_RP_DOWN_THRESHOLD |
+ GEN6_PM_RP_DOWN_TIMEOUT);
+
/* Setting Fixed Bias */
vlv_punit_get(i915);
@@ -1052,8 +1160,8 @@ static bool chv_rps_enable(struct intel_rps *rps)
drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
"GPLL not enabled\n");
- DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
- DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+ drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+ drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
return rps_reset(rps);
}
@@ -1136,6 +1244,9 @@ static bool vlv_rps_enable(struct intel_rps *rps)
GEN6_RP_UP_BUSY_AVG |
GEN6_RP_DOWN_IDLE_CONT);
+ /* WaGsvRC0ResidencyMethod:vlv */
+ rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
+
vlv_punit_get(i915);
/* Setting Fixed Bias */
@@ -1150,8 +1261,8 @@ static bool vlv_rps_enable(struct intel_rps *rps)
drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
"GPLL not enabled\n");
- DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
- DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+ drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+ drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
return rps_reset(rps);
}
@@ -1194,33 +1305,71 @@ static unsigned long __ips_gfx_val(struct intel_ips *ips)
return ips->gfx_power + state2;
}
+static bool has_busy_stats(struct intel_rps *rps)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, rps_to_gt(rps), id) {
+ if (!intel_engine_supports_stats(engine))
+ return false;
+ }
+
+ return true;
+}
+
void intel_rps_enable(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
+ bool enabled = false;
+
+ if (!HAS_RPS(i915))
+ return;
+
+ intel_gt_check_clock_frequency(rps_to_gt(rps));
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
- if (IS_CHERRYVIEW(i915))
- rps->enabled = chv_rps_enable(rps);
+ if (rps->max_freq <= rps->min_freq)
+ /* leave disabled, no room for dynamic reclocking */;
+ else if (IS_CHERRYVIEW(i915))
+ enabled = chv_rps_enable(rps);
else if (IS_VALLEYVIEW(i915))
- rps->enabled = vlv_rps_enable(rps);
+ enabled = vlv_rps_enable(rps);
else if (INTEL_GEN(i915) >= 9)
- rps->enabled = gen9_rps_enable(rps);
+ enabled = gen9_rps_enable(rps);
else if (INTEL_GEN(i915) >= 8)
- rps->enabled = gen8_rps_enable(rps);
+ enabled = gen8_rps_enable(rps);
else if (INTEL_GEN(i915) >= 6)
- rps->enabled = gen6_rps_enable(rps);
+ enabled = gen6_rps_enable(rps);
else if (IS_IRONLAKE_M(i915))
- rps->enabled = gen5_rps_enable(rps);
+ enabled = gen5_rps_enable(rps);
+ else
+ MISSING_CASE(INTEL_GEN(i915));
intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
- if (!rps->enabled)
+ if (!enabled)
return;
- drm_WARN_ON(&i915->drm, rps->max_freq < rps->min_freq);
- drm_WARN_ON(&i915->drm, rps->idle_freq > rps->max_freq);
+ GT_TRACE(rps_to_gt(rps),
+ "min:%x, max:%x, freq:[%d, %d]\n",
+ rps->min_freq, rps->max_freq,
+ intel_gpu_freq(rps, rps->min_freq),
+ intel_gpu_freq(rps, rps->max_freq));
- drm_WARN_ON(&i915->drm, rps->efficient_freq < rps->min_freq);
- drm_WARN_ON(&i915->drm, rps->efficient_freq > rps->max_freq);
+ GEM_BUG_ON(rps->max_freq < rps->min_freq);
+ GEM_BUG_ON(rps->idle_freq > rps->max_freq);
+
+ GEM_BUG_ON(rps->efficient_freq < rps->min_freq);
+ GEM_BUG_ON(rps->efficient_freq > rps->max_freq);
+
+ if (has_busy_stats(rps))
+ intel_rps_set_timer(rps);
+ else if (INTEL_GEN(i915) >= 6)
+ intel_rps_set_interrupts(rps);
+ else
+ /* Ironlake currently uses intel_ips.ko */ {}
+
+ intel_rps_set_enabled(rps);
}
static void gen6_rps_disable(struct intel_rps *rps)
@@ -1232,7 +1381,9 @@ void intel_rps_disable(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
- rps->enabled = false;
+ intel_rps_clear_enabled(rps);
+ intel_rps_clear_interrupts(rps);
+ intel_rps_clear_timer(rps);
if (INTEL_GEN(i915) >= 6)
gen6_rps_disable(rps);
@@ -1308,7 +1459,8 @@ static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
CCK_GPLL_CLOCK_CONTROL,
i915->czclk_freq);
- DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq);
+ drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n",
+ rps->gpll_ref_freq);
}
static void vlv_rps_init(struct intel_rps *rps)
@@ -1336,28 +1488,24 @@ static void vlv_rps_init(struct intel_rps *rps)
i915->mem_freq = 1333;
break;
}
- DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+ drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
rps->max_freq = vlv_rps_max_freq(rps);
rps->rp0_freq = rps->max_freq;
- DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->max_freq),
- rps->max_freq);
+ drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
rps->efficient_freq = vlv_rps_rpe_freq(rps);
- DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->efficient_freq),
- rps->efficient_freq);
+ drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
rps->rp1_freq = vlv_rps_guar_freq(rps);
- DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->rp1_freq),
- rps->rp1_freq);
+ drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
rps->min_freq = vlv_rps_min_freq(rps);
- DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->min_freq),
- rps->min_freq);
+ drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
vlv_iosf_sb_put(i915,
BIT(VLV_IOSF_SB_PUNIT) |
@@ -1387,28 +1535,24 @@ static void chv_rps_init(struct intel_rps *rps)
i915->mem_freq = 1600;
break;
}
- DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+ drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
rps->max_freq = chv_rps_max_freq(rps);
rps->rp0_freq = rps->max_freq;
- DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->max_freq),
- rps->max_freq);
+ drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
rps->efficient_freq = chv_rps_rpe_freq(rps);
- DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->efficient_freq),
- rps->efficient_freq);
+ drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
rps->rp1_freq = chv_rps_guar_freq(rps);
- DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->rp1_freq),
- rps->rp1_freq);
+ drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
rps->min_freq = chv_rps_min_freq(rps);
- DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->min_freq),
- rps->min_freq);
+ drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
vlv_iosf_sb_put(i915,
BIT(VLV_IOSF_SB_PUNIT) |
@@ -1471,12 +1615,13 @@ static void rps_work(struct work_struct *work)
{
struct intel_rps *rps = container_of(work, typeof(*rps), work);
struct intel_gt *gt = rps_to_gt(rps);
+ struct drm_i915_private *i915 = rps_to_i915(rps);
bool client_boost = false;
int new_freq, adj, min, max;
u32 pm_iir = 0;
spin_lock_irq(&gt->irq_lock);
- pm_iir = fetch_and_zero(&rps->pm_iir) & READ_ONCE(rps->pm_events);
+ pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events;
client_boost = atomic_read(&rps->num_waiters);
spin_unlock_irq(&gt->irq_lock);
@@ -1485,6 +1630,10 @@ static void rps_work(struct work_struct *work)
goto out;
mutex_lock(&rps->lock);
+ if (!intel_rps_is_active(rps)) {
+ mutex_unlock(&rps->lock);
+ return;
+ }
pm_iir |= vlv_wa_c0_ei(rps, pm_iir);
@@ -1494,6 +1643,12 @@ static void rps_work(struct work_struct *work)
max = rps->max_freq_softlimit;
if (client_boost)
max = rps->max_freq;
+
+ GT_TRACE(gt,
+ "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n",
+ pm_iir, yesno(client_boost),
+ adj, new_freq, min, max);
+
if (client_boost && new_freq < rps->boost_freq) {
new_freq = rps->boost_freq;
adj = 0;
@@ -1525,30 +1680,18 @@ static void rps_work(struct work_struct *work)
adj = 0;
}
- rps->last_adj = adj;
-
/*
- * Limit deboosting and boosting to keep ourselves at the extremes
- * when in the respective power modes (i.e. slowly decrease frequencies
- * while in the HIGH_POWER zone and slowly increase frequencies while
- * in the LOW_POWER zone). On idle, we will hit the timeout and drop
- * to the next level quickly, and conversely if busy we expect to
- * hit a waitboost and rapidly switch into max power.
- */
- if ((adj < 0 && rps->power.mode == HIGH_POWER) ||
- (adj > 0 && rps->power.mode == LOW_POWER))
- rps->last_adj = 0;
-
- /* sysfs frequency interfaces may have snuck in while servicing the
- * interrupt
+ * sysfs frequency limits may have snuck in while
+ * servicing the interrupt
*/
new_freq += adj;
new_freq = clamp_t(int, new_freq, min, max);
if (intel_rps_set(rps, new_freq)) {
- DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
- rps->last_adj = 0;
+ drm_dbg(&i915->drm, "Failed to set new GPU frequency\n");
+ adj = 0;
}
+ rps->last_adj = adj;
mutex_unlock(&rps->lock);
@@ -1568,6 +1711,8 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
if (unlikely(!events))
return;
+ GT_TRACE(gt, "irq events:%x\n", events);
+
gen6_gt_pm_mask_irq(gt, events);
rps->pm_iir |= events;
@@ -1579,10 +1724,12 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
struct intel_gt *gt = rps_to_gt(rps);
u32 events;
- events = pm_iir & READ_ONCE(rps->pm_events);
+ events = pm_iir & rps->pm_events;
if (events) {
spin_lock(&gt->irq_lock);
+ GT_TRACE(gt, "irq events:%x\n", events);
+
gen6_gt_pm_mask_irq(gt, events);
rps->pm_iir |= events;
@@ -1640,6 +1787,7 @@ void intel_rps_init_early(struct intel_rps *rps)
mutex_init(&rps->power.mutex);
INIT_WORK(&rps->work, rps_work);
+ timer_setup(&rps->timer, rps_timer, 0);
atomic_set(&rps->num_waiters, 0);
}
@@ -1668,9 +1816,10 @@ void intel_rps_init(struct intel_rps *rps)
sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS,
&params, NULL);
if (params & BIT(31)) { /* OC supported */
- DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
- (rps->max_freq & 0xff) * 50,
- (params & 0xff) * 50);
+ drm_dbg(&i915->drm,
+ "Overclocking supported, max: %dMHz, overclock: %dMHz\n",
+ (rps->max_freq & 0xff) * 50,
+ (params & 0xff) * 50);
rps->max_freq = params & 0xff;
}
}
@@ -1678,7 +1827,9 @@ void intel_rps_init(struct intel_rps *rps)
/* Finally allow us to boost to max by default */
rps->boost_freq = rps->max_freq;
rps->idle_freq = rps->min_freq;
- rps->cur_freq = rps->idle_freq;
+
+ /* Start in the middle, from here we will autotune based on workload */
+ rps->cur_freq = rps->efficient_freq;
rps->pm_intrmsk_mbz = 0;
@@ -1695,6 +1846,12 @@ void intel_rps_init(struct intel_rps *rps)
rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
}
+void intel_rps_sanitize(struct intel_rps *rps)
+{
+ if (INTEL_GEN(rps_to_i915(rps)) >= 6)
+ rps_disable_interrupts(rps);
+}
+
u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
@@ -1722,7 +1879,7 @@ static u32 read_cagf(struct intel_rps *rps)
freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
vlv_punit_put(i915);
} else {
- freq = intel_uncore_read(rps_to_gt(rps)->uncore, GEN6_RPSTAT1);
+ freq = intel_uncore_read(rps_to_uncore(rps), GEN6_RPSTAT1);
}
return intel_rps_get_cagf(rps, freq);
@@ -1730,7 +1887,7 @@ static u32 read_cagf(struct intel_rps *rps)
u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
{
- struct intel_runtime_pm *rpm = rps_to_gt(rps)->uncore->rpm;
+ struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
intel_wakeref_t wakeref;
u32 freq = 0;
@@ -1930,3 +2087,7 @@ bool i915_gpu_turbo_disable(void)
return ret;
}
EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_rps.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
index dfa98194f3b2..8d3c9d663662 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -13,6 +13,7 @@ struct i915_request;
void intel_rps_init_early(struct intel_rps *rps);
void intel_rps_init(struct intel_rps *rps);
+void intel_rps_sanitize(struct intel_rps *rps);
void intel_rps_driver_register(struct intel_rps *rps);
void intel_rps_driver_unregister(struct intel_rps *rps);
@@ -36,4 +37,64 @@ void gen5_rps_irq_handler(struct intel_rps *rps);
void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
+static inline bool intel_rps_is_enabled(const struct intel_rps *rps)
+{
+ return test_bit(INTEL_RPS_ENABLED, &rps->flags);
+}
+
+static inline void intel_rps_set_enabled(struct intel_rps *rps)
+{
+ set_bit(INTEL_RPS_ENABLED, &rps->flags);
+}
+
+static inline void intel_rps_clear_enabled(struct intel_rps *rps)
+{
+ clear_bit(INTEL_RPS_ENABLED, &rps->flags);
+}
+
+static inline bool intel_rps_is_active(const struct intel_rps *rps)
+{
+ return test_bit(INTEL_RPS_ACTIVE, &rps->flags);
+}
+
+static inline void intel_rps_set_active(struct intel_rps *rps)
+{
+ set_bit(INTEL_RPS_ACTIVE, &rps->flags);
+}
+
+static inline bool intel_rps_clear_active(struct intel_rps *rps)
+{
+ return test_and_clear_bit(INTEL_RPS_ACTIVE, &rps->flags);
+}
+
+static inline bool intel_rps_has_interrupts(const struct intel_rps *rps)
+{
+ return test_bit(INTEL_RPS_INTERRUPTS, &rps->flags);
+}
+
+static inline void intel_rps_set_interrupts(struct intel_rps *rps)
+{
+ set_bit(INTEL_RPS_INTERRUPTS, &rps->flags);
+}
+
+static inline void intel_rps_clear_interrupts(struct intel_rps *rps)
+{
+ clear_bit(INTEL_RPS_INTERRUPTS, &rps->flags);
+}
+
+static inline bool intel_rps_uses_timer(const struct intel_rps *rps)
+{
+ return test_bit(INTEL_RPS_TIMER, &rps->flags);
+}
+
+static inline void intel_rps_set_timer(struct intel_rps *rps)
+{
+ set_bit(INTEL_RPS_TIMER, &rps->flags);
+}
+
+static inline void intel_rps_clear_timer(struct intel_rps *rps)
+{
+ clear_bit(INTEL_RPS_TIMER, &rps->flags);
+}
+
#endif /* INTEL_RPS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h
index c2e279154bd5..38083f0402d9 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h
@@ -31,6 +31,13 @@ struct intel_rps_ei {
u32 media_c0;
};
+enum {
+ INTEL_RPS_ENABLED = 0,
+ INTEL_RPS_ACTIVE,
+ INTEL_RPS_INTERRUPTS,
+ INTEL_RPS_TIMER,
+};
+
struct intel_rps {
struct mutex lock; /* protects enabling and the worker */
@@ -38,9 +45,12 @@ struct intel_rps {
* work, interrupts_enabled and pm_iir are protected by
* dev_priv->irq_lock
*/
+ struct timer_list timer;
struct work_struct work;
- bool enabled;
- bool active;
+ unsigned long flags;
+
+ ktime_t pm_timestamp;
+ u32 pm_interval;
u32 pm_iir;
/* PM interrupt bits that should never be masked */
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 74f793423231..d173271c7397 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -65,7 +65,6 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
{
const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
bool subslice_pg = sseu->has_subslice_pg;
- struct intel_sseu ctx_sseu;
u8 slices, subslices;
u32 rpcs = 0;
@@ -78,31 +77,13 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
/*
* If i915/perf is active, we want a stable powergating configuration
- * on the system.
- *
- * We could choose full enablement, but on ICL we know there are use
- * cases which disable slices for functional, apart for performance
- * reasons. So in this case we select a known stable subset.
+ * on the system. Use the configuration pinned by i915/perf.
*/
- if (!i915->perf.exclusive_stream) {
- ctx_sseu = *req_sseu;
- } else {
- ctx_sseu = intel_sseu_from_device_info(sseu);
-
- if (IS_GEN(i915, 11)) {
- /*
- * We only need subslice count so it doesn't matter
- * which ones we select - just turn off low bits in the
- * amount of half of all available subslices per slice.
- */
- ctx_sseu.subslice_mask =
- ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
- ctx_sseu.slice_mask = 0x1;
- }
- }
+ if (i915->perf.exclusive_stream)
+ req_sseu = &i915->perf.sseu;
- slices = hweight8(ctx_sseu.slice_mask);
- subslices = hweight8(ctx_sseu.subslice_mask);
+ slices = hweight8(req_sseu->slice_mask);
+ subslices = hweight8(req_sseu->subslice_mask);
/*
* Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
@@ -175,13 +156,13 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
if (sseu->has_eu_pg) {
u32 val;
- val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
+ val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
val &= GEN8_RPCS_EU_MIN_MASK;
rpcs |= val;
- val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
+ val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
val &= GEN8_RPCS_EU_MAX_MASK;
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 08b56d7ab4f4..4546284fede1 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -119,6 +119,15 @@ static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline)
spin_unlock_irqrestore(&gt->hwsp_lock, flags);
}
+static void __rcu_cacheline_free(struct rcu_head *rcu)
+{
+ struct intel_timeline_cacheline *cl =
+ container_of(rcu, typeof(*cl), rcu);
+
+ i915_active_fini(&cl->active);
+ kfree(cl);
+}
+
static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
{
GEM_BUG_ON(!i915_active_is_idle(&cl->active));
@@ -127,8 +136,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
i915_vma_put(cl->hwsp->vma);
__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
- i915_active_fini(&cl->active);
- kfree_rcu(cl, rcu);
+ call_rcu(&cl->rcu, __rcu_cacheline_free);
}
__i915_active_call
@@ -203,9 +211,9 @@ static void cacheline_free(struct intel_timeline_cacheline *cl)
i915_active_release(&cl->active);
}
-int intel_timeline_init(struct intel_timeline *timeline,
- struct intel_gt *gt,
- struct i915_vma *hwsp)
+static int intel_timeline_init(struct intel_timeline *timeline,
+ struct intel_gt *gt,
+ struct i915_vma *hwsp)
{
void *vaddr;
@@ -272,7 +280,7 @@ void intel_gt_init_timelines(struct intel_gt *gt)
INIT_LIST_HEAD(&timelines->hwsp_free_list);
}
-void intel_timeline_fini(struct intel_timeline *timeline)
+static void intel_timeline_fini(struct intel_timeline *timeline)
{
GEM_BUG_ON(atomic_read(&timeline->pin_count));
GEM_BUG_ON(!list_empty(&timeline->requests));
@@ -329,6 +337,13 @@ int intel_timeline_pin(struct intel_timeline *tl)
return 0;
}
+void intel_timeline_reset_seqno(const struct intel_timeline *tl)
+{
+ /* Must be pinned to be writable, and no requests in flight. */
+ GEM_BUG_ON(!atomic_read(&tl->pin_count));
+ WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
+}
+
void intel_timeline_enter(struct intel_timeline *tl)
{
struct intel_gt_timelines *timelines = &tl->gt->timelines;
@@ -357,8 +372,16 @@ void intel_timeline_enter(struct intel_timeline *tl)
return;
spin_lock(&timelines->lock);
- if (!atomic_fetch_inc(&tl->active_count))
+ if (!atomic_fetch_inc(&tl->active_count)) {
+ /*
+ * The HWSP is volatile, and may have been lost while inactive,
+ * e.g. across suspend/resume. Be paranoid, and ensure that
+ * the HWSP value matches our seqno so we don't proclaim
+ * the next request as already complete.
+ */
+ intel_timeline_reset_seqno(tl);
list_add_tail(&tl->link, &timelines->active_list);
+ }
spin_unlock(&timelines->lock);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index f5b7eade3809..4298b9ac7327 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -31,11 +31,6 @@
#include "i915_syncmap.h"
#include "gt/intel_timeline_types.h"
-int intel_timeline_init(struct intel_timeline *tl,
- struct intel_gt *gt,
- struct i915_vma *hwsp);
-void intel_timeline_fini(struct intel_timeline *tl);
-
struct intel_timeline *
intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp);
@@ -84,6 +79,8 @@ int intel_timeline_get_seqno(struct intel_timeline *tl,
void intel_timeline_exit(struct intel_timeline *tl);
void intel_timeline_unpin(struct intel_timeline *tl);
+void intel_timeline_reset_seqno(const struct intel_timeline *tl);
+
int intel_timeline_read_hwsp(struct i915_request *from,
struct i915_request *until,
u32 *hwsp_offset);
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 5176ad1a3976..90a2b9e399b0 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -485,25 +485,14 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
- struct drm_i915_private *i915 = engine->i915;
-
/* WaForceContextSaveRestoreNonCoherent:cnl */
WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
- /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
- if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
-
/* WaDisableReplayBufferBankArbitrationOptimization:cnl */
WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
- /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
- if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
-
/* WaPushConstantDereferenceHoldDisable:cnl */
WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
@@ -837,7 +826,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) &
GEN10_L3BANK_MASK;
- DRM_DEBUG_DRIVER("L3 fuse = %x\n", l3_fuse);
+ drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse);
l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse);
} else {
l3_en = ~0;
@@ -846,7 +835,8 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
slice = fls(sseu->slice_mask) - 1;
subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
if (!subslice) {
- DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n",
+ drm_warn(&i915->drm,
+ "No common index found between subslice mask %x and L3 bank mask %x!\n",
intel_sseu_get_subslices(sseu, slice), l3_en);
subslice = fls(l3_en);
drm_WARN_ON(&i915->drm, !subslice);
@@ -861,7 +851,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
}
- DRM_DEBUG_DRIVER("MCR slice/subslice = %x\n", mcr);
+ drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
}
@@ -871,12 +861,6 @@ cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
wa_init_mcr(i915, wal);
- /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
- if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
- wa_write_or(wal,
- GAMT_CHKN_BIT_REG,
- GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
-
/* WaInPlaceDecompressionHang:cnl */
wa_write_or(wal,
GEN9_GAMT_ECO_REG_RW_IA,
@@ -933,15 +917,20 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
GAMT_CHKN_BIT_REG,
GAMT_CHKN_DISABLE_L3_COH_PIPE);
- /* Wa_1607087056:icl */
- wa_write_or(wal,
- SLICE_UNIT_LEVEL_CLKGATE,
- L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
+ /* Wa_1607087056:icl,ehl,jsl */
+ if (IS_ICELAKE(i915) ||
+ IS_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) {
+ wa_write_or(wal,
+ SLICE_UNIT_LEVEL_CLKGATE,
+ L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
+ }
}
static void
tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
+ wa_init_mcr(i915, wal);
+
/* Wa_1409420604:tgl */
if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
wa_write_or(wal,
@@ -1379,12 +1368,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN7_FF_THREAD_MODE,
GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
- /*
- * Wa_1409085225:tgl
- * Wa_14010229206:tgl
- */
- wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
-
/* Wa_1408615072:tgl */
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
VSUNIT_CLKGATE_DIS_TGL);
@@ -1402,6 +1385,12 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
wa_masked_en(wal,
GEN9_CS_DEBUG_MODE1,
FF_DOP_CLOCK_GATE_DISABLE);
+
+ /*
+ * Wa_1409085225:tgl
+ * Wa_14010229206:tgl
+ */
+ wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
}
if (IS_GEN(i915, 11)) {
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 4a53ded7c2dd..b8dd3cbc8696 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -28,7 +28,6 @@
#include "i915_drv.h"
#include "intel_context.h"
#include "intel_engine_pm.h"
-#include "intel_engine_pool.h"
#include "mock_engine.h"
#include "selftests/mock_request.h"
@@ -328,7 +327,6 @@ int mock_engine_init(struct intel_engine_cs *engine)
intel_engine_init_execlists(engine);
intel_engine_init__pm(engine);
intel_engine_init_retire(engine);
- intel_engine_pool_init(&engine->pool);
ce = create_kernel_context(engine);
if (IS_ERR(ce))
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index e874dfaa5316..52af1cee9a94 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -24,6 +24,7 @@ static int request_sync(struct i915_request *rq)
/* Opencode i915_request_add() so we can keep the timeline locked. */
__i915_request_commit(rq);
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
__i915_request_queue(rq, NULL);
timeout = i915_request_wait(rq, 0, HZ / 10);
@@ -154,10 +155,7 @@ static int live_context_size(void *arg)
*/
for_each_engine(engine, gt, id) {
- struct {
- struct drm_i915_gem_object *state;
- void *pinned;
- } saved;
+ struct file *saved;
if (!engine->context_size)
continue;
@@ -171,8 +169,7 @@ static int live_context_size(void *arg)
* active state is sufficient, we are only checking that we
* don't use more than we planned.
*/
- saved.state = fetch_and_zero(&engine->default_state);
- saved.pinned = fetch_and_zero(&engine->pinned_default_state);
+ saved = fetch_and_zero(&engine->default_state);
/* Overlaps with the execlists redzone */
engine->context_size += I915_GTT_PAGE_SIZE;
@@ -181,8 +178,7 @@ static int live_context_size(void *arg)
engine->context_size -= I915_GTT_PAGE_SIZE;
- engine->pinned_default_state = saved.pinned;
- engine->default_state = saved.state;
+ engine->default_state = saved;
intel_engine_pm_put(engine);
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index 09ff8e4f88af..242181a5214c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -7,6 +7,7 @@
#include "selftest_llc.h"
#include "selftest_rc6.h"
+#include "selftest_rps.h"
static int live_gt_resume(void *arg)
{
@@ -52,6 +53,13 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_rc6_manual),
+ SUBTEST(live_rps_clock_interval),
+ SUBTEST(live_rps_control),
+ SUBTEST(live_rps_frequency_cs),
+ SUBTEST(live_rps_frequency_srm),
+ SUBTEST(live_rps_power),
+ SUBTEST(live_rps_interrupt),
+ SUBTEST(live_rps_dynamic),
SUBTEST(live_gt_resume),
};
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index f95ae15ce865..824f99c4cc7c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -21,7 +21,8 @@
#include "gem/selftests/mock_context.h"
#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
-#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
+#define NUM_GPR 16
+#define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
static struct i915_vma *create_scratch(struct intel_gt *gt)
{
@@ -68,26 +69,41 @@ static void engine_heartbeat_enable(struct intel_engine_cs *engine,
engine->props.heartbeat_interval_ms = saved;
}
+static bool is_active(struct i915_request *rq)
+{
+ if (i915_request_is_active(rq))
+ return true;
+
+ if (i915_request_on_hold(rq))
+ return true;
+
+ if (i915_request_started(rq))
+ return true;
+
+ return false;
+}
+
static int wait_for_submit(struct intel_engine_cs *engine,
struct i915_request *rq,
unsigned long timeout)
{
timeout += jiffies;
do {
- cond_resched();
- intel_engine_flush_submission(engine);
+ bool done = time_after(jiffies, timeout);
- if (READ_ONCE(engine->execlists.pending[0]))
- continue;
-
- if (i915_request_is_active(rq))
+ if (i915_request_completed(rq)) /* that was quick! */
return 0;
- if (i915_request_started(rq)) /* that was quick! */
+ /* Wait until the HW has acknowleged the submission (or err) */
+ intel_engine_flush_submission(engine);
+ if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
return 0;
- } while (time_before(jiffies, timeout));
- return -ETIME;
+ if (done)
+ return -ETIME;
+
+ cond_resched();
+ } while (1);
}
static int wait_for_reset(struct intel_engine_cs *engine,
@@ -634,9 +650,9 @@ static int live_error_interrupt(void *arg)
error_repr(p->error[i]));
if (!i915_request_started(client[i])) {
- pr_debug("%s: %s request not stated!\n",
- engine->name,
- error_repr(p->error[i]));
+ pr_err("%s: %s request not started!\n",
+ engine->name,
+ error_repr(p->error[i]));
err = -ETIME;
goto out;
}
@@ -644,9 +660,10 @@ static int live_error_interrupt(void *arg)
/* Kick the tasklet to process the error */
intel_engine_flush_submission(engine);
if (client[i]->fence.error != p->error[i]) {
- pr_err("%s: %s request completed with wrong error code: %d\n",
+ pr_err("%s: %s request (%s) with wrong error code: %d\n",
engine->name,
error_repr(p->error[i]),
+ i915_request_completed(client[i]) ? "completed" : "running",
client[i]->fence.error);
err = -EINVAL;
goto out;
@@ -1057,7 +1074,6 @@ static int live_timeslice_rewind(void *arg)
engine->name);
goto err;
}
- GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
@@ -1230,8 +1246,14 @@ static int live_timeslice_queue(void *arg)
if (err)
goto err_rq;
- intel_engine_flush_submission(engine);
+ /* Wait until we ack the release_queue and start timeslicing */
+ do {
+ cond_resched();
+ intel_engine_flush_submission(engine);
+ } while (READ_ONCE(engine->execlists.pending[0]));
+
if (!READ_ONCE(engine->execlists.timer.expires) &&
+ execlists_active(&engine->execlists) == rq &&
!i915_request_completed(rq)) {
struct drm_printer p =
drm_info_printer(gt->i915->drm.dev);
@@ -2032,6 +2054,9 @@ static int __cancel_hostile(struct live_preempt_cancel *arg)
if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
return 0;
+ if (!intel_has_reset_engine(arg->engine->gt))
+ return 0;
+
GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
rq = spinner_create_request(&arg->a.spin,
arg->a.ctx, arg->engine,
@@ -2632,7 +2657,7 @@ static int create_gang(struct intel_engine_cs *engine,
if (IS_ERR(rq))
goto err_obj;
- rq->batch = vma;
+ rq->batch = i915_vma_get(vma);
i915_request_get(rq);
i915_vma_lock(vma);
@@ -2656,6 +2681,7 @@ static int create_gang(struct intel_engine_cs *engine,
return 0;
err_rq:
+ i915_vma_put(rq->batch);
i915_request_put(rq);
err_obj:
i915_gem_object_put(obj);
@@ -2752,6 +2778,7 @@ static int live_preempt_gang(void *arg)
err = -ETIME;
}
+ i915_vma_put(rq->batch);
i915_request_put(rq);
rq = n;
}
@@ -2765,6 +2792,331 @@ static int live_preempt_gang(void *arg)
return 0;
}
+static struct i915_vma *
+create_gpr_user(struct intel_engine_cs *engine,
+ struct i915_vma *result,
+ unsigned int offset)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ u32 *cs;
+ int err;
+ int i;
+
+ obj = i915_gem_object_create_internal(engine->i915, 4096);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ vma = i915_vma_instance(obj, result->vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err) {
+ i915_vma_put(vma);
+ return ERR_PTR(err);
+ }
+
+ cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(cs)) {
+ i915_vma_put(vma);
+ return ERR_CAST(cs);
+ }
+
+ /* All GPR are clear for new contexts. We use GPR(0) as a constant */
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = CS_GPR(engine, 0);
+ *cs++ = 1;
+
+ for (i = 1; i < NUM_GPR; i++) {
+ u64 addr;
+
+ /*
+ * Perform: GPR[i]++
+ *
+ * As we read and write into the context saved GPR[i], if
+ * we restart this batch buffer from an earlier point, we
+ * will repeat the increment and store a value > 1.
+ */
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
+
+ addr = result->node.start + offset + i * sizeof(*cs);
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8;
+ *cs++ = CS_GPR(engine, 2 * i);
+ *cs++ = lower_32_bits(addr);
+ *cs++ = upper_32_bits(addr);
+
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_GTE_SDD;
+ *cs++ = i;
+ *cs++ = lower_32_bits(result->node.start);
+ *cs++ = upper_32_bits(result->node.start);
+ }
+
+ *cs++ = MI_BATCH_BUFFER_END;
+ i915_gem_object_flush_map(obj);
+ i915_gem_object_unpin_map(obj);
+
+ return vma;
+}
+
+static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int err;
+
+ obj = i915_gem_object_create_internal(gt->i915, sz);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_ggtt_pin(vma, 0, 0);
+ if (err) {
+ i915_vma_put(vma);
+ return ERR_PTR(err);
+ }
+
+ return vma;
+}
+
+static struct i915_request *
+create_gpr_client(struct intel_engine_cs *engine,
+ struct i915_vma *global,
+ unsigned int offset)
+{
+ struct i915_vma *batch, *vma;
+ struct intel_context *ce;
+ struct i915_request *rq;
+ int err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return ERR_CAST(ce);
+
+ vma = i915_vma_instance(global->obj, ce->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto out_ce;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto out_ce;
+
+ batch = create_gpr_user(engine, vma, offset);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto out_vma;
+ }
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_batch;
+ }
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, false);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, 0);
+ i915_vma_unlock(vma);
+
+ i915_vma_lock(batch);
+ if (!err)
+ err = i915_request_await_object(rq, batch->obj, false);
+ if (!err)
+ err = i915_vma_move_to_active(batch, rq, 0);
+ if (!err)
+ err = rq->engine->emit_bb_start(rq,
+ batch->node.start,
+ PAGE_SIZE, 0);
+ i915_vma_unlock(batch);
+ i915_vma_unpin(batch);
+
+ if (!err)
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+out_batch:
+ i915_vma_put(batch);
+out_vma:
+ i915_vma_unpin(vma);
+out_ce:
+ intel_context_put(ce);
+ return err ? ERR_PTR(err) : rq;
+}
+
+static int preempt_user(struct intel_engine_cs *engine,
+ struct i915_vma *global,
+ int id)
+{
+ struct i915_sched_attr attr = {
+ .priority = I915_PRIORITY_MAX
+ };
+ struct i915_request *rq;
+ int err = 0;
+ u32 *cs;
+
+ rq = intel_engine_create_kernel_request(engine);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ return PTR_ERR(cs);
+ }
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = i915_ggtt_offset(global);
+ *cs++ = 0;
+ *cs++ = id;
+
+ intel_ring_advance(rq, cs);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ engine->schedule(rq, &attr);
+
+ if (i915_request_wait(rq, 0, HZ / 2) < 0)
+ err = -ETIME;
+ i915_request_put(rq);
+
+ return err;
+}
+
+static int live_preempt_user(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ struct i915_vma *global;
+ enum intel_engine_id id;
+ u32 *result;
+ int err = 0;
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+ return 0;
+
+ /*
+ * In our other tests, we look at preemption in carefully
+ * controlled conditions in the ringbuffer. Since most of the
+ * time is spent in user batches, most of our preemptions naturally
+ * occur there. We want to verify that when we preempt inside a batch
+ * we continue on from the current instruction and do not roll back
+ * to the start, or another earlier arbitration point.
+ *
+ * To verify this, we create a batch which is a mixture of
+ * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
+ * a few preempting contexts thrown into the mix, we look for any
+ * repeated instructions (which show up as incorrect values).
+ */
+
+ global = create_global(gt, 4096);
+ if (IS_ERR(global))
+ return PTR_ERR(global);
+
+ result = i915_gem_object_pin_map(global->obj, I915_MAP_WC);
+ if (IS_ERR(result)) {
+ i915_vma_unpin_and_release(&global, 0);
+ return PTR_ERR(result);
+ }
+
+ for_each_engine(engine, gt, id) {
+ struct i915_request *client[3] = {};
+ struct igt_live_test t;
+ int i;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
+ continue; /* we need per-context GPR */
+
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
+ err = -EIO;
+ break;
+ }
+
+ memset(result, 0, 4096);
+
+ for (i = 0; i < ARRAY_SIZE(client); i++) {
+ struct i915_request *rq;
+
+ rq = create_gpr_client(engine, global,
+ NUM_GPR * i * sizeof(u32));
+ if (IS_ERR(rq))
+ goto end_test;
+
+ client[i] = rq;
+ }
+
+ /* Continuously preempt the set of 3 running contexts */
+ for (i = 1; i <= NUM_GPR; i++) {
+ err = preempt_user(engine, global, i);
+ if (err)
+ goto end_test;
+ }
+
+ if (READ_ONCE(result[0]) != NUM_GPR) {
+ pr_err("%s: Failed to release semaphore\n",
+ engine->name);
+ err = -EIO;
+ goto end_test;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(client); i++) {
+ int gpr;
+
+ if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
+ err = -ETIME;
+ goto end_test;
+ }
+
+ for (gpr = 1; gpr < NUM_GPR; gpr++) {
+ if (result[NUM_GPR * i + gpr] != 1) {
+ pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
+ engine->name,
+ i, gpr, result[NUM_GPR * i + gpr]);
+ err = -EINVAL;
+ goto end_test;
+ }
+ }
+ }
+
+end_test:
+ for (i = 0; i < ARRAY_SIZE(client); i++) {
+ if (!client[i])
+ break;
+
+ i915_request_put(client[i]);
+ }
+
+ /* Flush the semaphores on error */
+ smp_store_mb(result[0], -1);
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ if (err)
+ break;
+ }
+
+ i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
+ return err;
+}
+
static int live_preempt_timeout(void *arg)
{
struct intel_gt *gt = arg;
@@ -3972,6 +4324,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_chain_preempt),
SUBTEST(live_preempt_gang),
SUBTEST(live_preempt_timeout),
+ SUBTEST(live_preempt_user),
SUBTEST(live_preempt_smoke),
SUBTEST(live_virtual_engine),
SUBTEST(live_virtual_mask),
@@ -3989,35 +4342,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
return intel_gt_live_subtests(tests, &i915->gt);
}
-static void hexdump(const void *buf, size_t len)
-{
- const size_t rowsize = 8 * sizeof(u32);
- const void *prev = NULL;
- bool skip = false;
- size_t pos;
-
- for (pos = 0; pos < len; pos += rowsize) {
- char line[128];
-
- if (prev && !memcmp(prev, buf + pos, rowsize)) {
- if (!skip) {
- pr_info("*\n");
- skip = true;
- }
- continue;
- }
-
- WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
- rowsize, sizeof(u32),
- line, sizeof(line),
- false) >= sizeof(line));
- pr_info("[%04zx] %s\n", pos, line);
-
- prev = buf + pos;
- skip = false;
- }
-}
-
static int emit_semaphore_signal(struct intel_context *ce, void *slot)
{
const u32 offset =
@@ -4099,13 +4423,12 @@ static int live_lrc_layout(void *arg)
if (!engine->default_state)
continue;
- hw = i915_gem_object_pin_map(engine->default_state,
- I915_MAP_WB);
+ hw = shmem_pin_map(engine->default_state);
if (IS_ERR(hw)) {
err = PTR_ERR(hw);
break;
}
- hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+ hw += LRC_STATE_OFFSET / sizeof(*hw);
execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
engine->kernel_context,
@@ -4166,13 +4489,13 @@ static int live_lrc_layout(void *arg)
if (err) {
pr_info("%s: HW register image:\n", engine->name);
- hexdump(hw, PAGE_SIZE);
+ igt_hexdump(hw, PAGE_SIZE);
pr_info("%s: SW register image:\n", engine->name);
- hexdump(lrc, PAGE_SIZE);
+ igt_hexdump(lrc, PAGE_SIZE);
}
- i915_gem_object_unpin_map(engine->default_state);
+ shmem_unpin_map(engine->default_state, hw);
if (err)
break;
}
@@ -4241,10 +4564,35 @@ static int live_lrc_fixed(void *arg)
"BB_STATE"
},
{
+ i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
+ lrc_ring_wa_bb_per_ctx(engine),
+ "RING_BB_PER_CTX_PTR"
+ },
+ {
+ i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
+ lrc_ring_indirect_ptr(engine),
+ "RING_INDIRECT_CTX_PTR"
+ },
+ {
+ i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
+ lrc_ring_indirect_offset(engine),
+ "RING_INDIRECT_CTX_OFFSET"
+ },
+ {
i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
CTX_TIMESTAMP - 1,
"RING_CTX_TIMESTAMP"
},
+ {
+ i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
+ lrc_ring_gpr0(engine),
+ "RING_CS_GPR0"
+ },
+ {
+ i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
+ lrc_ring_cmd_buf_cctl(engine),
+ "RING_CMD_BUF_CCTL"
+ },
{ },
}, *t;
u32 *hw;
@@ -4252,13 +4600,12 @@ static int live_lrc_fixed(void *arg)
if (!engine->default_state)
continue;
- hw = i915_gem_object_pin_map(engine->default_state,
- I915_MAP_WB);
+ hw = shmem_pin_map(engine->default_state);
if (IS_ERR(hw)) {
err = PTR_ERR(hw);
break;
}
- hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+ hw += LRC_STATE_OFFSET / sizeof(*hw);
for (t = tbl; t->name; t++) {
int dw = find_offset(hw, t->reg);
@@ -4274,7 +4621,7 @@ static int live_lrc_fixed(void *arg)
}
}
- i915_gem_object_unpin_map(engine->default_state);
+ shmem_unpin_map(engine->default_state, hw);
}
return err;
@@ -4830,6 +5177,7 @@ store_context(struct intel_context *ce, struct i915_vma *scratch)
{
struct i915_vma *batch;
u32 dw, x, *cs, *hw;
+ u32 *defaults;
batch = create_user_vma(ce->vm, SZ_64K);
if (IS_ERR(batch))
@@ -4841,10 +5189,17 @@ store_context(struct intel_context *ce, struct i915_vma *scratch)
return ERR_CAST(cs);
}
+ defaults = shmem_pin_map(ce->engine->default_state);
+ if (!defaults) {
+ i915_gem_object_unpin_map(batch->obj);
+ i915_vma_put(batch);
+ return ERR_PTR(-ENOMEM);
+ }
+
x = 0;
dw = 0;
- hw = ce->engine->pinned_default_state;
- hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+ hw = defaults;
+ hw += LRC_STATE_OFFSET / sizeof(*hw);
do {
u32 len = hw[dw] & 0x7f;
@@ -4874,6 +5229,8 @@ store_context(struct intel_context *ce, struct i915_vma *scratch)
*cs++ = MI_BATCH_BUFFER_END;
+ shmem_unpin_map(ce->engine->default_state, defaults);
+
i915_gem_object_flush_map(batch->obj);
i915_gem_object_unpin_map(batch->obj);
@@ -4984,6 +5341,7 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
{
struct i915_vma *batch;
u32 dw, *cs, *hw;
+ u32 *defaults;
batch = create_user_vma(ce->vm, SZ_64K);
if (IS_ERR(batch))
@@ -4995,9 +5353,16 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
return ERR_CAST(cs);
}
+ defaults = shmem_pin_map(ce->engine->default_state);
+ if (!defaults) {
+ i915_gem_object_unpin_map(batch->obj);
+ i915_vma_put(batch);
+ return ERR_PTR(-ENOMEM);
+ }
+
dw = 0;
- hw = ce->engine->pinned_default_state;
- hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+ hw = defaults;
+ hw += LRC_STATE_OFFSET / sizeof(*hw);
do {
u32 len = hw[dw] & 0x7f;
@@ -5024,6 +5389,8 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
*cs++ = MI_BATCH_BUFFER_END;
+ shmem_unpin_map(ce->engine->default_state, defaults);
+
i915_gem_object_flush_map(batch->obj);
i915_gem_object_unpin_map(batch->obj);
@@ -5091,6 +5458,7 @@ static int compare_isolation(struct intel_engine_cs *engine,
{
u32 x, dw, *hw, *lrc;
u32 *A[2], *B[2];
+ u32 *defaults;
int err = 0;
A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
@@ -5121,12 +5489,18 @@ static int compare_isolation(struct intel_engine_cs *engine,
err = PTR_ERR(lrc);
goto err_B1;
}
- lrc += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+ lrc += LRC_STATE_OFFSET / sizeof(*hw);
+
+ defaults = shmem_pin_map(ce->engine->default_state);
+ if (!defaults) {
+ err = -ENOMEM;
+ goto err_lrc;
+ }
x = 0;
dw = 0;
- hw = engine->pinned_default_state;
- hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+ hw = defaults;
+ hw += LRC_STATE_OFFSET / sizeof(*hw);
do {
u32 len = hw[dw] & 0x7f;
@@ -5157,7 +5531,6 @@ static int compare_isolation(struct intel_engine_cs *engine,
A[0][x], B[0][x], B[1][x],
poison, lrc[dw + 1]);
err = -EINVAL;
- break;
}
}
dw += 2;
@@ -5166,6 +5539,8 @@ static int compare_isolation(struct intel_engine_cs *engine,
} while (dw < PAGE_SIZE / sizeof(u32) &&
(hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+ shmem_unpin_map(ce->engine->default_state, defaults);
+err_lrc:
i915_gem_object_unpin_map(ce->state->obj);
err_B1:
i915_gem_object_unpin_map(result[1]->obj);
@@ -5296,6 +5671,7 @@ static int live_lrc_isolation(void *arg)
0xffffffff,
0xffff0000,
};
+ int err = 0;
/*
* Our goal is try and verify that per-context state cannot be
@@ -5306,7 +5682,6 @@ static int live_lrc_isolation(void *arg)
*/
for_each_engine(engine, gt, id) {
- int err = 0;
int i;
/* Just don't even ask */
@@ -5315,25 +5690,180 @@ static int live_lrc_isolation(void *arg)
continue;
intel_engine_pm_get(engine);
- if (engine->pinned_default_state) {
- for (i = 0; i < ARRAY_SIZE(poison); i++) {
- err = __lrc_isolation(engine, poison[i]);
- if (err)
- break;
+ for (i = 0; i < ARRAY_SIZE(poison); i++) {
+ int result;
- err = __lrc_isolation(engine, ~poison[i]);
- if (err)
- break;
- }
+ result = __lrc_isolation(engine, poison[i]);
+ if (result && !err)
+ err = result;
+
+ result = __lrc_isolation(engine, ~poison[i]);
+ if (result && !err)
+ err = result;
}
intel_engine_pm_put(engine);
+ if (igt_flush_test(gt->i915)) {
+ err = -EIO;
+ break;
+ }
+ }
+
+ return err;
+}
+
+static int indirect_ctx_submit_req(struct intel_context *ce)
+{
+ struct i915_request *rq;
+ int err = 0;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -ETIME;
+
+ i915_request_put(rq);
+
+ return err;
+}
+
+#define CTX_BB_CANARY_OFFSET (3 * 1024)
+#define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32))
+
+static u32 *
+emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+{
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
+ MI_SRM_LRM_GLOBAL_GTT |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = i915_mmio_reg_offset(RING_START(0));
+ *cs++ = i915_ggtt_offset(ce->state) +
+ context_wa_bb_offset(ce) +
+ CTX_BB_CANARY_OFFSET;
+ *cs++ = 0;
+
+ return cs;
+}
+
+static void
+indirect_ctx_bb_setup(struct intel_context *ce)
+{
+ u32 *cs = context_indirect_bb(ce);
+
+ cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
+
+ setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
+}
+
+static bool check_ring_start(struct intel_context *ce)
+{
+ const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
+ LRC_STATE_OFFSET + context_wa_bb_offset(ce);
+
+ if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
+ return true;
+
+ pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
+ ctx_bb[CTX_BB_CANARY_INDEX],
+ ce->lrc_reg_state[CTX_RING_START]);
+
+ return false;
+}
+
+static int indirect_ctx_bb_check(struct intel_context *ce)
+{
+ int err;
+
+ err = indirect_ctx_submit_req(ce);
+ if (err)
+ return err;
+
+ if (!check_ring_start(ce))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
+{
+ struct intel_context *a, *b;
+ int err;
+
+ a = intel_context_create(engine);
+ if (IS_ERR(a))
+ return PTR_ERR(a);
+ err = intel_context_pin(a);
+ if (err)
+ goto put_a;
+
+ b = intel_context_create(engine);
+ if (IS_ERR(b)) {
+ err = PTR_ERR(b);
+ goto unpin_a;
+ }
+ err = intel_context_pin(b);
+ if (err)
+ goto put_b;
+
+ /* We use the already reserved extra page in context state */
+ if (!a->wa_bb_page) {
+ GEM_BUG_ON(b->wa_bb_page);
+ GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
+ goto unpin_b;
+ }
+
+ /*
+ * In order to test that our per context bb is truly per context,
+ * and executes at the intended spot on context restoring process,
+ * make the batch store the ring start value to memory.
+ * As ring start is restored apriori of starting the indirect ctx bb and
+ * as it will be different for each context, it fits to this purpose.
+ */
+ indirect_ctx_bb_setup(a);
+ indirect_ctx_bb_setup(b);
+
+ err = indirect_ctx_bb_check(a);
+ if (err)
+ goto unpin_b;
+
+ err = indirect_ctx_bb_check(b);
+
+unpin_b:
+ intel_context_unpin(b);
+put_b:
+ intel_context_put(b);
+unpin_a:
+ intel_context_unpin(a);
+put_a:
+ intel_context_put(a);
+
+ return err;
+}
+
+static int live_lrc_indirect_ctx_bb(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ for_each_engine(engine, gt, id) {
+ intel_engine_pm_get(engine);
+ err = __live_lrc_indirect_ctx_bb(engine);
+ intel_engine_pm_put(engine);
+
if (igt_flush_test(gt->i915))
err = -EIO;
+
if (err)
- return err;
+ break;
}
- return 0;
+ return err;
}
static void garbage_reset(struct intel_engine_cs *engine,
@@ -5367,7 +5897,7 @@ static struct i915_request *garbage(struct intel_context *ce,
prandom_bytes_state(prng,
ce->lrc_reg_state,
ce->engine->context_size -
- LRC_STATE_PN * PAGE_SIZE);
+ LRC_STATE_OFFSET);
rq = intel_context_create_request(ce);
if (IS_ERR(rq)) {
@@ -5571,6 +6101,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_lrc_timestamp),
SUBTEST(live_lrc_garbage),
SUBTEST(live_pphwsp_runtime),
+ SUBTEST(live_lrc_indirect_ctx_bb),
};
if (!HAS_LOGICAL_RING_CONTEXTS(i915))
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
index 95b165faeba7..2dc460624bbc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rc6.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -11,6 +11,7 @@
#include "selftest_rc6.h"
#include "selftests/i915_random.h"
+#include "selftests/librapl.h"
static u64 rc6_residency(struct intel_rc6 *rc6)
{
@@ -31,7 +32,9 @@ int live_rc6_manual(void *arg)
{
struct intel_gt *gt = arg;
struct intel_rc6 *rc6 = &gt->rc6;
+ u64 rc0_power, rc6_power;
intel_wakeref_t wakeref;
+ ktime_t dt;
u64 res[2];
int err = 0;
@@ -54,7 +57,12 @@ int live_rc6_manual(void *arg)
msleep(1); /* wakeup is not immediate, takes about 100us on icl */
res[0] = rc6_residency(rc6);
+
+ dt = ktime_get();
+ rc0_power = librapl_energy_uJ();
msleep(250);
+ rc0_power = librapl_energy_uJ() - rc0_power;
+ dt = ktime_sub(ktime_get(), dt);
res[1] = rc6_residency(rc6);
if ((res[1] - res[0]) >> 10) {
pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n",
@@ -63,13 +71,24 @@ int live_rc6_manual(void *arg)
goto out_unlock;
}
+ rc0_power = div64_u64(NSEC_PER_SEC * rc0_power, ktime_to_ns(dt));
+ if (!rc0_power) {
+ pr_err("No power measured while in RC0\n");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
/* Manually enter RC6 */
intel_rc6_park(rc6);
res[0] = rc6_residency(rc6);
+ intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL);
+ dt = ktime_get();
+ rc6_power = librapl_energy_uJ();
msleep(100);
+ rc6_power = librapl_energy_uJ() - rc6_power;
+ dt = ktime_sub(ktime_get(), dt);
res[1] = rc6_residency(rc6);
-
if (res[1] == res[0]) {
pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x, residency=%lld\n",
intel_uncore_read_fw(gt->uncore, GEN6_RC_STATE),
@@ -78,6 +97,15 @@ int live_rc6_manual(void *arg)
err = -EINVAL;
}
+ rc6_power = div64_u64(NSEC_PER_SEC * rc6_power, ktime_to_ns(dt));
+ pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n",
+ rc0_power, rc6_power);
+ if (2 * rc6_power > rc0_power) {
+ pr_err("GPU leaked energy while in RC6!\n");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
/* Restore what should have been the original state! */
intel_rc6_unpark(rc6);
diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
index 9995faadd7e8..3350e7c995bc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
@@ -54,6 +54,8 @@ static struct i915_vma *create_wally(struct intel_engine_cs *engine)
*cs++ = STACK_MAGIC;
*cs++ = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_flush_map(obj);
i915_gem_object_unpin_map(obj);
vma->private = intel_context_create(engine); /* dummy residuals */
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
new file mode 100644
index 000000000000..6275d69aa9cc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -0,0 +1,1331 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/pm_qos.h>
+#include <linux/sort.h>
+
+#include "intel_engine_heartbeat.h"
+#include "intel_engine_pm.h"
+#include "intel_gpu_commands.h"
+#include "intel_gt_clock_utils.h"
+#include "intel_gt_pm.h"
+#include "intel_rc6.h"
+#include "selftest_rps.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/librapl.h"
+
+/* Try to isolate the impact of cstates from determing frequency response */
+#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
+
+static unsigned long engine_heartbeat_disable(struct intel_engine_cs *engine)
+{
+ unsigned long old;
+
+ old = fetch_and_zero(&engine->props.heartbeat_interval_ms);
+
+ intel_engine_pm_get(engine);
+ intel_engine_park_heartbeat(engine);
+
+ return old;
+}
+
+static void engine_heartbeat_enable(struct intel_engine_cs *engine,
+ unsigned long saved)
+{
+ intel_engine_pm_put(engine);
+
+ engine->props.heartbeat_interval_ms = saved;
+}
+
+static void dummy_rps_work(struct work_struct *wrk)
+{
+}
+
+static int cmp_u64(const void *A, const void *B)
+{
+ const u64 *a = A, *b = B;
+
+ if (a < b)
+ return -1;
+ else if (a > b)
+ return 1;
+ else
+ return 0;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+ const u32 *a = A, *b = B;
+
+ if (a < b)
+ return -1;
+ else if (a > b)
+ return 1;
+ else
+ return 0;
+}
+
+static struct i915_vma *
+create_spin_counter(struct intel_engine_cs *engine,
+ struct i915_address_space *vm,
+ bool srm,
+ u32 **cancel,
+ u32 **counter)
+{
+ enum {
+ COUNT,
+ INC,
+ __NGPR__,
+ };
+#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ unsigned long end;
+ u32 *base, *cs;
+ int loop, i;
+ int err;
+
+ obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ end = obj->base.size / sizeof(u32) - 1;
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err) {
+ i915_vma_put(vma);
+ return ERR_PTR(err);
+ }
+
+ base = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(base)) {
+ i915_gem_object_put(obj);
+ return ERR_CAST(base);
+ }
+ cs = base;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
+ for (i = 0; i < __NGPR__; i++) {
+ *cs++ = i915_mmio_reg_offset(CS_GPR(i));
+ *cs++ = 0;
+ *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
+ *cs++ = 0;
+ }
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
+ *cs++ = 1;
+
+ loop = cs - base;
+
+ /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
+ for (i = 0; i < 1024; i++) {
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
+
+ if (srm) {
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8;
+ *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
+ *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
+ *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
+ }
+ }
+
+ *cs++ = MI_BATCH_BUFFER_START_GEN8;
+ *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
+ *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
+ GEM_BUG_ON(cs - base > end);
+
+ i915_gem_object_flush_map(obj);
+
+ *cancel = base + loop;
+ *counter = srm ? memset32(base + end, 0, 1) : NULL;
+ return vma;
+}
+
+static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
+{
+ u8 history[64], i;
+ unsigned long end;
+ int sleep;
+
+ i = 0;
+ memset(history, freq, sizeof(history));
+ sleep = 20;
+
+ /* The PCU does not change instantly, but drifts towards the goal? */
+ end = jiffies + msecs_to_jiffies(timeout_ms);
+ do {
+ u8 act;
+
+ act = read_cagf(rps);
+ if (time_after(jiffies, end))
+ return act;
+
+ /* Target acquired */
+ if (act == freq)
+ return act;
+
+ /* Any change within the last N samples? */
+ if (!memchr_inv(history, act, sizeof(history)))
+ return act;
+
+ history[i] = act;
+ i = (i + 1) % ARRAY_SIZE(history);
+
+ usleep_range(sleep, 2 * sleep);
+ sleep *= 2;
+ if (sleep > timeout_ms * 20)
+ sleep = timeout_ms * 20;
+ } while (1);
+}
+
+static u8 rps_set_check(struct intel_rps *rps, u8 freq)
+{
+ mutex_lock(&rps->lock);
+ GEM_BUG_ON(!intel_rps_is_active(rps));
+ intel_rps_set(rps, freq);
+ GEM_BUG_ON(rps->last_freq != freq);
+ mutex_unlock(&rps->lock);
+
+ return wait_for_freq(rps, freq, 50);
+}
+
+static void show_pstate_limits(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ if (IS_BROXTON(i915)) {
+ pr_info("P_STATE_CAP[%x]: 0x%08x\n",
+ i915_mmio_reg_offset(BXT_RP_STATE_CAP),
+ intel_uncore_read(rps_to_uncore(rps),
+ BXT_RP_STATE_CAP));
+ } else if (IS_GEN(i915, 9)) {
+ pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
+ i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
+ intel_uncore_read(rps_to_uncore(rps),
+ GEN9_RP_STATE_LIMITS));
+ }
+}
+
+int live_rps_clock_interval(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_rps *rps = &gt->rps;
+ void (*saved_work)(struct work_struct *wrk);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ int err = 0;
+
+ if (!intel_rps_is_enabled(rps))
+ return 0;
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ intel_gt_pm_wait_for_idle(gt);
+ saved_work = rps->work.func;
+ rps->work.func = dummy_rps_work;
+
+ intel_gt_pm_get(gt);
+ intel_rps_disable(&gt->rps);
+
+ intel_gt_check_clock_frequency(gt);
+
+ for_each_engine(engine, gt, id) {
+ unsigned long saved_heartbeat;
+ struct i915_request *rq;
+ u32 cycles;
+ u64 dt;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ saved_heartbeat = engine_heartbeat_disable(engine);
+
+ rq = igt_spinner_create_request(&spin,
+ engine->kernel_context,
+ MI_NOOP);
+ if (IS_ERR(rq)) {
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ pr_err("%s: RPS spinner did not start\n",
+ engine->name);
+ igt_spinner_end(&spin);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ intel_gt_set_wedged(engine->gt);
+ err = -EIO;
+ break;
+ }
+
+ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+
+ intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
+
+ /* Set the evaluation interval to infinity! */
+ intel_uncore_write_fw(gt->uncore,
+ GEN6_RP_UP_EI, 0xffffffff);
+ intel_uncore_write_fw(gt->uncore,
+ GEN6_RP_UP_THRESHOLD, 0xffffffff);
+
+ intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
+ GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
+
+ if (wait_for(intel_uncore_read_fw(gt->uncore,
+ GEN6_RP_CUR_UP_EI),
+ 10)) {
+ /* Just skip the test; assume lack of HW support */
+ pr_notice("%s: rps evaluation interval not ticking\n",
+ engine->name);
+ err = -ENODEV;
+ } else {
+ ktime_t dt_[5];
+ u32 cycles_[5];
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ preempt_disable();
+
+ dt_[i] = ktime_get();
+ cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
+
+ udelay(1000);
+
+ dt_[i] = ktime_sub(ktime_get(), dt_[i]);
+ cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
+
+ preempt_enable();
+ }
+
+ /* Use the median of both cycle/dt; close enough */
+ sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
+ cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
+ sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
+ dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
+ }
+
+ intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
+ intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
+
+ igt_spinner_end(&spin);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+
+ if (err == 0) {
+ u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
+ u32 expected =
+ intel_gt_ns_to_pm_interval(gt, dt);
+
+ pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
+ engine->name, cycles, time, dt, expected,
+ gt->clock_frequency / 1000);
+
+ if (10 * time < 8 * dt ||
+ 8 * time > 10 * dt) {
+ pr_err("%s: rps clock time does not match walltime!\n",
+ engine->name);
+ err = -EINVAL;
+ }
+
+ if (10 * expected < 8 * cycles ||
+ 8 * expected > 10 * cycles) {
+ pr_err("%s: walltime does not match rps clock ticks!\n",
+ engine->name);
+ err = -EINVAL;
+ }
+ }
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ break; /* once is enough */
+ }
+
+ intel_rps_enable(&gt->rps);
+ intel_gt_pm_put(gt);
+
+ igt_spinner_fini(&spin);
+
+ intel_gt_pm_wait_for_idle(gt);
+ rps->work.func = saved_work;
+
+ if (err == -ENODEV) /* skipped, don't report a fail */
+ err = 0;
+
+ return err;
+}
+
+int live_rps_control(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_rps *rps = &gt->rps;
+ void (*saved_work)(struct work_struct *wrk);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ int err = 0;
+
+ /*
+ * Check that the actual frequency matches our requested frequency,
+ * to verify our control mechanism. We have to be careful that the
+ * PCU may throttle the GPU in which case the actual frequency used
+ * will be lowered than requested.
+ */
+
+ if (!intel_rps_is_enabled(rps))
+ return 0;
+
+ if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
+ return 0;
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ intel_gt_pm_wait_for_idle(gt);
+ saved_work = rps->work.func;
+ rps->work.func = dummy_rps_work;
+
+ intel_gt_pm_get(gt);
+ for_each_engine(engine, gt, id) {
+ unsigned long saved_heartbeat;
+ struct i915_request *rq;
+ ktime_t min_dt, max_dt;
+ int f, limit;
+ int min, max;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ saved_heartbeat = engine_heartbeat_disable(engine);
+
+ rq = igt_spinner_create_request(&spin,
+ engine->kernel_context,
+ MI_NOOP);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ pr_err("%s: RPS spinner did not start\n",
+ engine->name);
+ igt_spinner_end(&spin);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ intel_gt_set_wedged(engine->gt);
+ err = -EIO;
+ break;
+ }
+
+ if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
+ pr_err("%s: could not set minimum frequency [%x], only %x!\n",
+ engine->name, rps->min_freq, read_cagf(rps));
+ igt_spinner_end(&spin);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ show_pstate_limits(rps);
+ err = -EINVAL;
+ break;
+ }
+
+ for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
+ if (rps_set_check(rps, f) < f)
+ break;
+ }
+
+ limit = rps_set_check(rps, f);
+
+ if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
+ pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
+ engine->name, rps->min_freq, read_cagf(rps));
+ igt_spinner_end(&spin);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ show_pstate_limits(rps);
+ err = -EINVAL;
+ break;
+ }
+
+ max_dt = ktime_get();
+ max = rps_set_check(rps, limit);
+ max_dt = ktime_sub(ktime_get(), max_dt);
+
+ min_dt = ktime_get();
+ min = rps_set_check(rps, rps->min_freq);
+ min_dt = ktime_sub(ktime_get(), min_dt);
+
+ igt_spinner_end(&spin);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+
+ pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
+ engine->name,
+ rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
+ rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
+ limit, intel_gpu_freq(rps, limit),
+ min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
+
+ if (limit == rps->min_freq) {
+ pr_err("%s: GPU throttled to minimum!\n",
+ engine->name);
+ show_pstate_limits(rps);
+ err = -ENODEV;
+ break;
+ }
+
+ if (igt_flush_test(gt->i915)) {
+ err = -EIO;
+ break;
+ }
+ }
+ intel_gt_pm_put(gt);
+
+ igt_spinner_fini(&spin);
+
+ intel_gt_pm_wait_for_idle(gt);
+ rps->work.func = saved_work;
+
+ return err;
+}
+
+static void show_pcu_config(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ unsigned int max_gpu_freq, min_gpu_freq;
+ intel_wakeref_t wakeref;
+ int gpu_freq;
+
+ if (!HAS_LLC(i915))
+ return;
+
+ min_gpu_freq = rps->min_freq;
+ max_gpu_freq = rps->max_freq;
+ if (INTEL_GEN(i915) >= 9) {
+ /* Convert GT frequency to 50 HZ units */
+ min_gpu_freq /= GEN9_FREQ_SCALER;
+ max_gpu_freq /= GEN9_FREQ_SCALER;
+ }
+
+ wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
+
+ pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing");
+ for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
+ int ia_freq = gpu_freq;
+
+ sandybridge_pcode_read(i915,
+ GEN6_PCODE_READ_MIN_FREQ_TABLE,
+ &ia_freq, NULL);
+
+ pr_info("%5d %5d %5d\n",
+ gpu_freq * 50,
+ ((ia_freq >> 0) & 0xff) * 100,
+ ((ia_freq >> 8) & 0xff) * 100);
+ }
+
+ intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
+}
+
+static u64 __measure_frequency(u32 *cntr, int duration_ms)
+{
+ u64 dc, dt;
+
+ dt = ktime_get();
+ dc = READ_ONCE(*cntr);
+ usleep_range(1000 * duration_ms, 2000 * duration_ms);
+ dc = READ_ONCE(*cntr) - dc;
+ dt = ktime_get() - dt;
+
+ return div64_u64(1000 * 1000 * dc, dt);
+}
+
+static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
+{
+ u64 x[5];
+ int i;
+
+ *freq = rps_set_check(rps, *freq);
+ for (i = 0; i < 5; i++)
+ x[i] = __measure_frequency(cntr, 2);
+ *freq = (*freq + read_cagf(rps)) / 2;
+
+ /* A simple triangle filter for better result stability */
+ sort(x, 5, sizeof(*x), cmp_u64, NULL);
+ return div_u64(x[1] + 2 * x[2] + x[3], 4);
+}
+
+static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
+ int duration_ms)
+{
+ u64 dc, dt;
+
+ dt = ktime_get();
+ dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
+ usleep_range(1000 * duration_ms, 2000 * duration_ms);
+ dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
+ dt = ktime_get() - dt;
+
+ return div64_u64(1000 * 1000 * dc, dt);
+}
+
+static u64 measure_cs_frequency_at(struct intel_rps *rps,
+ struct intel_engine_cs *engine,
+ int *freq)
+{
+ u64 x[5];
+ int i;
+
+ *freq = rps_set_check(rps, *freq);
+ for (i = 0; i < 5; i++)
+ x[i] = __measure_cs_frequency(engine, 2);
+ *freq = (*freq + read_cagf(rps)) / 2;
+
+ /* A simple triangle filter for better result stability */
+ sort(x, 5, sizeof(*x), cmp_u64, NULL);
+ return div_u64(x[1] + 2 * x[2] + x[3], 4);
+}
+
+static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
+{
+ return f_d * x > f_n * y && f_n * x < f_d * y;
+}
+
+int live_rps_frequency_cs(void *arg)
+{
+ void (*saved_work)(struct work_struct *wrk);
+ struct intel_gt *gt = arg;
+ struct intel_rps *rps = &gt->rps;
+ struct intel_engine_cs *engine;
+ struct pm_qos_request qos;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * The premise is that the GPU does change freqency at our behest.
+ * Let's check there is a correspondence between the requested
+ * frequency, the actual frequency, and the observed clock rate.
+ */
+
+ if (!intel_rps_is_enabled(rps))
+ return 0;
+
+ if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
+ return 0;
+
+ if (CPU_LATENCY >= 0)
+ cpu_latency_qos_add_request(&qos, CPU_LATENCY);
+
+ intel_gt_pm_wait_for_idle(gt);
+ saved_work = rps->work.func;
+ rps->work.func = dummy_rps_work;
+
+ for_each_engine(engine, gt, id) {
+ unsigned long saved_heartbeat;
+ struct i915_request *rq;
+ struct i915_vma *vma;
+ u32 *cancel, *cntr;
+ struct {
+ u64 count;
+ int freq;
+ } min, max;
+
+ saved_heartbeat = engine_heartbeat_disable(engine);
+
+ vma = create_spin_counter(engine,
+ engine->kernel_context->vm, false,
+ &cancel, &cntr);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ break;
+ }
+
+ rq = intel_engine_create_kernel_request(engine);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_vma;
+ }
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, false);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, 0);
+ if (!err)
+ err = rq->engine->emit_bb_start(rq,
+ vma->node.start,
+ PAGE_SIZE, 0);
+ i915_vma_unlock(vma);
+ i915_request_add(rq);
+ if (err)
+ goto err_vma;
+
+ if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
+ 10)) {
+ pr_err("%s: timed loop did not start\n",
+ engine->name);
+ goto err_vma;
+ }
+
+ min.freq = rps->min_freq;
+ min.count = measure_cs_frequency_at(rps, engine, &min.freq);
+
+ max.freq = rps->max_freq;
+ max.count = measure_cs_frequency_at(rps, engine, &max.freq);
+
+ pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
+ engine->name,
+ min.count, intel_gpu_freq(rps, min.freq),
+ max.count, intel_gpu_freq(rps, max.freq),
+ (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
+ max.freq * min.count));
+
+ if (!scaled_within(max.freq * min.count,
+ min.freq * max.count,
+ 2, 3)) {
+ int f;
+
+ pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
+ engine->name,
+ max.freq * min.count,
+ min.freq * max.count);
+ show_pcu_config(rps);
+
+ for (f = min.freq + 1; f <= rps->max_freq; f++) {
+ int act = f;
+ u64 count;
+
+ count = measure_cs_frequency_at(rps, engine, &act);
+ if (act < f)
+ break;
+
+ pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
+ engine->name,
+ act, intel_gpu_freq(rps, act), count,
+ (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
+ act * min.count));
+
+ f = act; /* may skip ahead [pcu granularity] */
+ }
+
+ err = -EINVAL;
+ }
+
+err_vma:
+ *cancel = MI_BATCH_BUFFER_END;
+ i915_gem_object_flush_map(vma->obj);
+ i915_gem_object_unpin_map(vma->obj);
+ i915_vma_unpin(vma);
+ i915_vma_put(vma);
+
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ break;
+ }
+
+ intel_gt_pm_wait_for_idle(gt);
+ rps->work.func = saved_work;
+
+ if (CPU_LATENCY >= 0)
+ cpu_latency_qos_remove_request(&qos);
+
+ return err;
+}
+
+int live_rps_frequency_srm(void *arg)
+{
+ void (*saved_work)(struct work_struct *wrk);
+ struct intel_gt *gt = arg;
+ struct intel_rps *rps = &gt->rps;
+ struct intel_engine_cs *engine;
+ struct pm_qos_request qos;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * The premise is that the GPU does change freqency at our behest.
+ * Let's check there is a correspondence between the requested
+ * frequency, the actual frequency, and the observed clock rate.
+ */
+
+ if (!intel_rps_is_enabled(rps))
+ return 0;
+
+ if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
+ return 0;
+
+ if (CPU_LATENCY >= 0)
+ cpu_latency_qos_add_request(&qos, CPU_LATENCY);
+
+ intel_gt_pm_wait_for_idle(gt);
+ saved_work = rps->work.func;
+ rps->work.func = dummy_rps_work;
+
+ for_each_engine(engine, gt, id) {
+ unsigned long saved_heartbeat;
+ struct i915_request *rq;
+ struct i915_vma *vma;
+ u32 *cancel, *cntr;
+ struct {
+ u64 count;
+ int freq;
+ } min, max;
+
+ saved_heartbeat = engine_heartbeat_disable(engine);
+
+ vma = create_spin_counter(engine,
+ engine->kernel_context->vm, true,
+ &cancel, &cntr);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ break;
+ }
+
+ rq = intel_engine_create_kernel_request(engine);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_vma;
+ }
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, false);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, 0);
+ if (!err)
+ err = rq->engine->emit_bb_start(rq,
+ vma->node.start,
+ PAGE_SIZE, 0);
+ i915_vma_unlock(vma);
+ i915_request_add(rq);
+ if (err)
+ goto err_vma;
+
+ if (wait_for(READ_ONCE(*cntr), 10)) {
+ pr_err("%s: timed loop did not start\n",
+ engine->name);
+ goto err_vma;
+ }
+
+ min.freq = rps->min_freq;
+ min.count = measure_frequency_at(rps, cntr, &min.freq);
+
+ max.freq = rps->max_freq;
+ max.count = measure_frequency_at(rps, cntr, &max.freq);
+
+ pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
+ engine->name,
+ min.count, intel_gpu_freq(rps, min.freq),
+ max.count, intel_gpu_freq(rps, max.freq),
+ (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
+ max.freq * min.count));
+
+ if (!scaled_within(max.freq * min.count,
+ min.freq * max.count,
+ 1, 2)) {
+ int f;
+
+ pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
+ engine->name,
+ max.freq * min.count,
+ min.freq * max.count);
+ show_pcu_config(rps);
+
+ for (f = min.freq + 1; f <= rps->max_freq; f++) {
+ int act = f;
+ u64 count;
+
+ count = measure_frequency_at(rps, cntr, &act);
+ if (act < f)
+ break;
+
+ pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
+ engine->name,
+ act, intel_gpu_freq(rps, act), count,
+ (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
+ act * min.count));
+
+ f = act; /* may skip ahead [pcu granularity] */
+ }
+
+ err = -EINVAL;
+ }
+
+err_vma:
+ *cancel = MI_BATCH_BUFFER_END;
+ i915_gem_object_flush_map(vma->obj);
+ i915_gem_object_unpin_map(vma->obj);
+ i915_vma_unpin(vma);
+ i915_vma_put(vma);
+
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ break;
+ }
+
+ intel_gt_pm_wait_for_idle(gt);
+ rps->work.func = saved_work;
+
+ if (CPU_LATENCY >= 0)
+ cpu_latency_qos_remove_request(&qos);
+
+ return err;
+}
+
+static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
+{
+ /* Flush any previous EI */
+ usleep_range(timeout_us, 2 * timeout_us);
+
+ /* Reset the interrupt status */
+ rps_disable_interrupts(rps);
+ GEM_BUG_ON(rps->pm_iir);
+ rps_enable_interrupts(rps);
+
+ /* And then wait for the timeout, for real this time */
+ usleep_range(2 * timeout_us, 3 * timeout_us);
+}
+
+static int __rps_up_interrupt(struct intel_rps *rps,
+ struct intel_engine_cs *engine,
+ struct igt_spinner *spin)
+{
+ struct intel_uncore *uncore = engine->uncore;
+ struct i915_request *rq;
+ u32 timeout;
+
+ if (!intel_engine_can_store_dword(engine))
+ return 0;
+
+ rps_set_check(rps, rps->min_freq);
+
+ rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(spin, rq)) {
+ pr_err("%s: RPS spinner did not start\n",
+ engine->name);
+ i915_request_put(rq);
+ intel_gt_set_wedged(engine->gt);
+ return -EIO;
+ }
+
+ if (!intel_rps_is_active(rps)) {
+ pr_err("%s: RPS not enabled on starting spinner\n",
+ engine->name);
+ igt_spinner_end(spin);
+ i915_request_put(rq);
+ return -EINVAL;
+ }
+
+ if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
+ pr_err("%s: RPS did not register UP interrupt\n",
+ engine->name);
+ i915_request_put(rq);
+ return -EINVAL;
+ }
+
+ if (rps->last_freq != rps->min_freq) {
+ pr_err("%s: RPS did not program min frequency\n",
+ engine->name);
+ i915_request_put(rq);
+ return -EINVAL;
+ }
+
+ timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
+ timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
+ timeout = DIV_ROUND_UP(timeout, 1000);
+
+ sleep_for_ei(rps, timeout);
+ GEM_BUG_ON(i915_request_completed(rq));
+
+ igt_spinner_end(spin);
+ i915_request_put(rq);
+
+ if (rps->cur_freq != rps->min_freq) {
+ pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
+ engine->name, intel_rps_read_actual_frequency(rps));
+ return -EINVAL;
+ }
+
+ if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
+ pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
+ engine->name, rps->pm_iir,
+ intel_uncore_read(uncore, GEN6_RP_PREV_UP),
+ intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
+ intel_uncore_read(uncore, GEN6_RP_UP_EI));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int __rps_down_interrupt(struct intel_rps *rps,
+ struct intel_engine_cs *engine)
+{
+ struct intel_uncore *uncore = engine->uncore;
+ u32 timeout;
+
+ rps_set_check(rps, rps->max_freq);
+
+ if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
+ pr_err("%s: RPS did not register DOWN interrupt\n",
+ engine->name);
+ return -EINVAL;
+ }
+
+ if (rps->last_freq != rps->max_freq) {
+ pr_err("%s: RPS did not program max frequency\n",
+ engine->name);
+ return -EINVAL;
+ }
+
+ timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
+ timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
+ timeout = DIV_ROUND_UP(timeout, 1000);
+
+ sleep_for_ei(rps, timeout);
+
+ if (rps->cur_freq != rps->max_freq) {
+ pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
+ engine->name,
+ intel_rps_read_actual_frequency(rps));
+ return -EINVAL;
+ }
+
+ if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
+ pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
+ engine->name, rps->pm_iir,
+ intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
+ intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
+ intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
+ intel_uncore_read(uncore, GEN6_RP_PREV_UP),
+ intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
+ intel_uncore_read(uncore, GEN6_RP_UP_EI));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int live_rps_interrupt(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_rps *rps = &gt->rps;
+ void (*saved_work)(struct work_struct *wrk);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ u32 pm_events;
+ int err = 0;
+
+ /*
+ * First, let's check whether or not we are receiving interrupts.
+ */
+
+ if (!intel_rps_has_interrupts(rps))
+ return 0;
+
+ intel_gt_pm_get(gt);
+ pm_events = rps->pm_events;
+ intel_gt_pm_put(gt);
+ if (!pm_events) {
+ pr_err("No RPS PM events registered, but RPS is enabled?\n");
+ return -ENODEV;
+ }
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ intel_gt_pm_wait_for_idle(gt);
+ saved_work = rps->work.func;
+ rps->work.func = dummy_rps_work;
+
+ for_each_engine(engine, gt, id) {
+ /* Keep the engine busy with a spinner; expect an UP! */
+ if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
+ unsigned long saved_heartbeat;
+
+ intel_gt_pm_wait_for_idle(engine->gt);
+ GEM_BUG_ON(intel_rps_is_active(rps));
+
+ saved_heartbeat = engine_heartbeat_disable(engine);
+
+ err = __rps_up_interrupt(rps, engine, &spin);
+
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ if (err)
+ goto out;
+
+ intel_gt_pm_wait_for_idle(engine->gt);
+ }
+
+ /* Keep the engine awake but idle and check for DOWN */
+ if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
+ unsigned long saved_heartbeat;
+
+ saved_heartbeat = engine_heartbeat_disable(engine);
+ intel_rc6_disable(&gt->rc6);
+
+ err = __rps_down_interrupt(rps, engine);
+
+ intel_rc6_enable(&gt->rc6);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ if (err)
+ goto out;
+ }
+ }
+
+out:
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ igt_spinner_fini(&spin);
+
+ intel_gt_pm_wait_for_idle(gt);
+ rps->work.func = saved_work;
+
+ return err;
+}
+
+static u64 __measure_power(int duration_ms)
+{
+ u64 dE, dt;
+
+ dt = ktime_get();
+ dE = librapl_energy_uJ();
+ usleep_range(1000 * duration_ms, 2000 * duration_ms);
+ dE = librapl_energy_uJ() - dE;
+ dt = ktime_get() - dt;
+
+ return div64_u64(1000 * 1000 * dE, dt);
+}
+
+static u64 measure_power_at(struct intel_rps *rps, int *freq)
+{
+ u64 x[5];
+ int i;
+
+ *freq = rps_set_check(rps, *freq);
+ for (i = 0; i < 5; i++)
+ x[i] = __measure_power(5);
+ *freq = (*freq + read_cagf(rps)) / 2;
+
+ /* A simple triangle filter for better result stability */
+ sort(x, 5, sizeof(*x), cmp_u64, NULL);
+ return div_u64(x[1] + 2 * x[2] + x[3], 4);
+}
+
+int live_rps_power(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_rps *rps = &gt->rps;
+ void (*saved_work)(struct work_struct *wrk);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ int err = 0;
+
+ /*
+ * Our fundamental assumption is that running at lower frequency
+ * actually saves power. Let's see if our RAPL measurement support
+ * that theory.
+ */
+
+ if (!intel_rps_is_enabled(rps))
+ return 0;
+
+ if (!librapl_energy_uJ())
+ return 0;
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ intel_gt_pm_wait_for_idle(gt);
+ saved_work = rps->work.func;
+ rps->work.func = dummy_rps_work;
+
+ for_each_engine(engine, gt, id) {
+ unsigned long saved_heartbeat;
+ struct i915_request *rq;
+ struct {
+ u64 power;
+ int freq;
+ } min, max;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ saved_heartbeat = engine_heartbeat_disable(engine);
+
+ rq = igt_spinner_create_request(&spin,
+ engine->kernel_context,
+ MI_NOOP);
+ if (IS_ERR(rq)) {
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ pr_err("%s: RPS spinner did not start\n",
+ engine->name);
+ igt_spinner_end(&spin);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+ intel_gt_set_wedged(engine->gt);
+ err = -EIO;
+ break;
+ }
+
+ max.freq = rps->max_freq;
+ max.power = measure_power_at(rps, &max.freq);
+
+ min.freq = rps->min_freq;
+ min.power = measure_power_at(rps, &min.freq);
+
+ igt_spinner_end(&spin);
+ engine_heartbeat_enable(engine, saved_heartbeat);
+
+ pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
+ engine->name,
+ min.power, intel_gpu_freq(rps, min.freq),
+ max.power, intel_gpu_freq(rps, max.freq));
+
+ if (10 * min.freq >= 9 * max.freq) {
+ pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
+ min.freq, intel_gpu_freq(rps, min.freq),
+ max.freq, intel_gpu_freq(rps, max.freq));
+ continue;
+ }
+
+ if (11 * min.power > 10 * max.power) {
+ pr_err("%s: did not conserve power when setting lower frequency!\n",
+ engine->name);
+ err = -EINVAL;
+ break;
+ }
+
+ if (igt_flush_test(gt->i915)) {
+ err = -EIO;
+ break;
+ }
+ }
+
+ igt_spinner_fini(&spin);
+
+ intel_gt_pm_wait_for_idle(gt);
+ rps->work.func = saved_work;
+
+ return err;
+}
+
+int live_rps_dynamic(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_rps *rps = &gt->rps;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ int err = 0;
+
+ /*
+ * We've looked at the bascs, and have established that we
+ * can change the clock frequency and that the HW will generate
+ * interrupts based on load. Now we check how we integrate those
+ * moving parts into dynamic reclocking based on load.
+ */
+
+ if (!intel_rps_is_enabled(rps))
+ return 0;
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ for_each_engine(engine, gt, id) {
+ struct i915_request *rq;
+ struct {
+ ktime_t dt;
+ u8 freq;
+ } min, max;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ intel_gt_pm_wait_for_idle(gt);
+ GEM_BUG_ON(intel_rps_is_active(rps));
+ rps->cur_freq = rps->min_freq;
+
+ intel_engine_pm_get(engine);
+ intel_rc6_disable(&gt->rc6);
+ GEM_BUG_ON(rps->last_freq != rps->min_freq);
+
+ rq = igt_spinner_create_request(&spin,
+ engine->kernel_context,
+ MI_NOOP);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err;
+ }
+
+ i915_request_add(rq);
+
+ max.dt = ktime_get();
+ max.freq = wait_for_freq(rps, rps->max_freq, 500);
+ max.dt = ktime_sub(ktime_get(), max.dt);
+
+ igt_spinner_end(&spin);
+
+ min.dt = ktime_get();
+ min.freq = wait_for_freq(rps, rps->min_freq, 2000);
+ min.dt = ktime_sub(ktime_get(), min.dt);
+
+ pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
+ engine->name,
+ max.freq, intel_gpu_freq(rps, max.freq),
+ ktime_to_ns(max.dt),
+ min.freq, intel_gpu_freq(rps, min.freq),
+ ktime_to_ns(min.dt));
+ if (min.freq >= max.freq) {
+ pr_err("%s: dynamic reclocking of spinner failed\n!",
+ engine->name);
+ err = -EINVAL;
+ }
+
+err:
+ intel_rc6_enable(&gt->rc6);
+ intel_engine_pm_put(engine);
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ break;
+ }
+
+ igt_spinner_fini(&spin);
+
+ return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.h b/drivers/gpu/drm/i915/gt/selftest_rps.h
new file mode 100644
index 000000000000..6e82a631cfa1
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef SELFTEST_RPS_H
+#define SELFTEST_RPS_H
+
+int live_rps_control(void *arg);
+int live_rps_clock_interval(void *arg);
+int live_rps_frequency_cs(void *arg);
+int live_rps_frequency_srm(void *arg);
+int live_rps_power(void *arg);
+int live_rps_interrupt(void *arg);
+int live_rps_dynamic(void *arg);
+
+#endif /* SELFTEST_RPS_H */
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
new file mode 100644
index 000000000000..43c7acbdc79d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/shmem_fs.h>
+
+#include "gem/i915_gem_object.h"
+#include "shmem_utils.h"
+
+struct file *shmem_create_from_data(const char *name, void *data, size_t len)
+{
+ struct file *file;
+ int err;
+
+ file = shmem_file_setup(name, PAGE_ALIGN(len), VM_NORESERVE);
+ if (IS_ERR(file))
+ return file;
+
+ err = shmem_write(file, 0, data, len);
+ if (err) {
+ fput(file);
+ return ERR_PTR(err);
+ }
+
+ return file;
+}
+
+struct file *shmem_create_from_object(struct drm_i915_gem_object *obj)
+{
+ struct file *file;
+ void *ptr;
+
+ if (obj->ops == &i915_gem_shmem_ops) {
+ file = obj->base.filp;
+ atomic_long_inc(&file->f_count);
+ return file;
+ }
+
+ ptr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(ptr))
+ return ERR_CAST(ptr);
+
+ file = shmem_create_from_data("", ptr, obj->base.size);
+ i915_gem_object_unpin_map(obj);
+
+ return file;
+}
+
+static size_t shmem_npte(struct file *file)
+{
+ return file->f_mapping->host->i_size >> PAGE_SHIFT;
+}
+
+static void __shmem_unpin_map(struct file *file, void *ptr, size_t n_pte)
+{
+ unsigned long pfn;
+
+ vunmap(ptr);
+
+ for (pfn = 0; pfn < n_pte; pfn++) {
+ struct page *page;
+
+ page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
+ GFP_KERNEL);
+ if (!WARN_ON(IS_ERR(page))) {
+ put_page(page);
+ put_page(page);
+ }
+ }
+}
+
+void *shmem_pin_map(struct file *file)
+{
+ const size_t n_pte = shmem_npte(file);
+ pte_t *stack[32], **ptes, **mem;
+ struct vm_struct *area;
+ unsigned long pfn;
+
+ mem = stack;
+ if (n_pte > ARRAY_SIZE(stack)) {
+ mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL);
+ if (!mem)
+ return NULL;
+ }
+
+ area = alloc_vm_area(n_pte << PAGE_SHIFT, mem);
+ if (!area) {
+ if (mem != stack)
+ kvfree(mem);
+ return NULL;
+ }
+
+ ptes = mem;
+ for (pfn = 0; pfn < n_pte; pfn++) {
+ struct page *page;
+
+ page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
+ GFP_KERNEL);
+ if (IS_ERR(page))
+ goto err_page;
+
+ **ptes++ = mk_pte(page, PAGE_KERNEL);
+ }
+
+ if (mem != stack)
+ kvfree(mem);
+
+ mapping_set_unevictable(file->f_mapping);
+ return area->addr;
+
+err_page:
+ if (mem != stack)
+ kvfree(mem);
+
+ __shmem_unpin_map(file, area->addr, pfn);
+ return NULL;
+}
+
+void shmem_unpin_map(struct file *file, void *ptr)
+{
+ mapping_clear_unevictable(file->f_mapping);
+ __shmem_unpin_map(file, ptr, shmem_npte(file));
+}
+
+static int __shmem_rw(struct file *file, loff_t off,
+ void *ptr, size_t len,
+ bool write)
+{
+ unsigned long pfn;
+
+ for (pfn = off >> PAGE_SHIFT; len; pfn++) {
+ unsigned int this =
+ min_t(size_t, PAGE_SIZE - offset_in_page(off), len);
+ struct page *page;
+ void *vaddr;
+
+ page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
+ GFP_KERNEL);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ vaddr = kmap(page);
+ if (write)
+ memcpy(vaddr + offset_in_page(off), ptr, this);
+ else
+ memcpy(ptr, vaddr + offset_in_page(off), this);
+ kunmap(page);
+ put_page(page);
+
+ len -= this;
+ ptr += this;
+ off = 0;
+ }
+
+ return 0;
+}
+
+int shmem_read(struct file *file, loff_t off, void *dst, size_t len)
+{
+ return __shmem_rw(file, off, dst, len, false);
+}
+
+int shmem_write(struct file *file, loff_t off, void *src, size_t len)
+{
+ return __shmem_rw(file, off, src, len, true);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "st_shmem_utils.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.h b/drivers/gpu/drm/i915/gt/shmem_utils.h
new file mode 100644
index 000000000000..c1669170c351
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef SHMEM_UTILS_H
+#define SHMEM_UTILS_H
+
+#include <linux/types.h>
+
+struct drm_i915_gem_object;
+struct file;
+
+struct file *shmem_create_from_data(const char *name, void *data, size_t len);
+struct file *shmem_create_from_object(struct drm_i915_gem_object *obj);
+
+void *shmem_pin_map(struct file *file);
+void shmem_unpin_map(struct file *file, void *ptr);
+
+int shmem_read(struct file *file, loff_t off, void *dst, size_t len);
+int shmem_write(struct file *file, loff_t off, void *src, size_t len);
+
+#endif /* SHMEM_UTILS_H */
diff --git a/drivers/gpu/drm/i915/gt/st_shmem_utils.c b/drivers/gpu/drm/i915/gt/st_shmem_utils.c
new file mode 100644
index 000000000000..b279fe88b70e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/st_shmem_utils.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+/* Just a quick and causal check of the shmem_utils API */
+
+static int igt_shmem_basic(void *ignored)
+{
+ u32 datum = 0xdeadbeef, result;
+ struct file *file;
+ u32 *map;
+ int err;
+
+ file = shmem_create_from_data("mock", &datum, sizeof(datum));
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ result = 0;
+ err = shmem_read(file, 0, &result, sizeof(result));
+ if (err)
+ goto out_file;
+
+ if (result != datum) {
+ pr_err("Incorrect read back from shmemfs: %x != %x\n",
+ result, datum);
+ err = -EINVAL;
+ goto out_file;
+ }
+
+ result = 0xc0ffee;
+ err = shmem_write(file, 0, &result, sizeof(result));
+ if (err)
+ goto out_file;
+
+ map = shmem_pin_map(file);
+ if (!map) {
+ err = -ENOMEM;
+ goto out_file;
+ }
+
+ if (*map != result) {
+ pr_err("Incorrect read back via mmap of last write: %x != %x\n",
+ *map, result);
+ err = -EINVAL;
+ goto out_map;
+ }
+
+out_map:
+ shmem_unpin_map(file, map);
+out_file:
+ fput(file);
+ return err;
+}
+
+int shmem_utils_mock_selftests(void)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(igt_shmem_basic),
+ };
+
+ return i915_subtests(tests, NULL);
+}
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c
index 8f9b2f33dbaf..535cc1169e54 100644
--- a/drivers/gpu/drm/i915/gt/sysfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -192,6 +192,17 @@ static struct kobj_attribute max_spin_attr =
__ATTR(max_busywait_duration_ns, 0644, max_spin_show, max_spin_store);
static ssize_t
+max_spin_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->defaults.max_busywait_duration_ns);
+}
+
+static struct kobj_attribute max_spin_def =
+__ATTR(max_busywait_duration_ns, 0444, max_spin_default, NULL);
+
+static ssize_t
timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
@@ -234,6 +245,17 @@ static struct kobj_attribute timeslice_duration_attr =
__ATTR(timeslice_duration_ms, 0644, timeslice_show, timeslice_store);
static ssize_t
+timeslice_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->defaults.timeslice_duration_ms);
+}
+
+static struct kobj_attribute timeslice_duration_def =
+__ATTR(timeslice_duration_ms, 0444, timeslice_default, NULL);
+
+static ssize_t
stop_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
@@ -273,6 +295,17 @@ static struct kobj_attribute stop_timeout_attr =
__ATTR(stop_timeout_ms, 0644, stop_show, stop_store);
static ssize_t
+stop_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->defaults.stop_timeout_ms);
+}
+
+static struct kobj_attribute stop_timeout_def =
+__ATTR(stop_timeout_ms, 0444, stop_default, NULL);
+
+static ssize_t
preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
@@ -317,6 +350,18 @@ static struct kobj_attribute preempt_timeout_attr =
__ATTR(preempt_timeout_ms, 0644, preempt_timeout_show, preempt_timeout_store);
static ssize_t
+preempt_timeout_default(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->defaults.preempt_timeout_ms);
+}
+
+static struct kobj_attribute preempt_timeout_def =
+__ATTR(preempt_timeout_ms, 0444, preempt_timeout_default, NULL);
+
+static ssize_t
heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
@@ -359,6 +404,17 @@ heartbeat_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
static struct kobj_attribute heartbeat_interval_attr =
__ATTR(heartbeat_interval_ms, 0644, heartbeat_show, heartbeat_store);
+static ssize_t
+heartbeat_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->defaults.heartbeat_interval_ms);
+}
+
+static struct kobj_attribute heartbeat_interval_def =
+__ATTR(heartbeat_interval_ms, 0444, heartbeat_default, NULL);
+
static void kobj_engine_release(struct kobject *kobj)
{
kfree(kobj);
@@ -390,6 +446,42 @@ kobj_engine(struct kobject *dir, struct intel_engine_cs *engine)
return &ke->base;
}
+static void add_defaults(struct kobj_engine *parent)
+{
+ static const struct attribute *files[] = {
+ &max_spin_def.attr,
+ &stop_timeout_def.attr,
+#if CONFIG_DRM_I915_HEARTBEAT_INTERVAL
+ &heartbeat_interval_def.attr,
+#endif
+ NULL
+ };
+ struct kobj_engine *ke;
+
+ ke = kzalloc(sizeof(*ke), GFP_KERNEL);
+ if (!ke)
+ return;
+
+ kobject_init(&ke->base, &kobj_engine_type);
+ ke->engine = parent->engine;
+
+ if (kobject_add(&ke->base, &parent->base, "%s", ".defaults")) {
+ kobject_put(&ke->base);
+ return;
+ }
+
+ if (sysfs_create_files(&ke->base, files))
+ return;
+
+ if (intel_engine_has_timeslices(ke->engine) &&
+ sysfs_create_file(&ke->base, &timeslice_duration_def.attr))
+ return;
+
+ if (intel_engine_has_preempt_reset(ke->engine) &&
+ sysfs_create_file(&ke->base, &preempt_timeout_def.attr))
+ return;
+}
+
void intel_engines_add_sysfs(struct drm_i915_private *i915)
{
static const struct attribute *files[] = {
@@ -433,6 +525,8 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
sysfs_create_file(kobj, &preempt_timeout_attr.attr))
goto err_engine;
+ add_defaults(container_of(kobj, struct kobj_engine, base));
+
if (0) {
err_object:
kobject_put(kobj);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 819f09ef51fc..861657897c0f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -169,7 +169,7 @@ void intel_guc_init_early(struct intel_guc *guc)
{
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
- intel_guc_fw_init_early(guc);
+ intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC);
intel_guc_ct_init_early(&guc->ct);
intel_guc_log_init_early(&guc->log);
intel_guc_submission_init_early(guc);
@@ -723,3 +723,47 @@ int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
return 0;
}
+
+/**
+ * intel_guc_load_status - dump information about GuC load status
+ * @guc: the GuC
+ * @p: the &drm_printer
+ *
+ * Pretty printer for GuC load status.
+ */
+void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_uncore *uncore = gt->uncore;
+ intel_wakeref_t wakeref;
+
+ if (!intel_guc_is_supported(guc)) {
+ drm_printf(p, "GuC not supported\n");
+ return;
+ }
+
+ if (!intel_guc_is_wanted(guc)) {
+ drm_printf(p, "GuC disabled\n");
+ return;
+ }
+
+ intel_uc_fw_dump(&guc->fw, p);
+
+ with_intel_runtime_pm(uncore->rpm, wakeref) {
+ u32 status = intel_uncore_read(uncore, GUC_STATUS);
+ u32 i;
+
+ drm_printf(p, "\nGuC status 0x%08x:\n", status);
+ drm_printf(p, "\tBootrom status = 0x%x\n",
+ (status & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
+ drm_printf(p, "\tuKernel status = 0x%x\n",
+ (status & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
+ drm_printf(p, "\tMIA Core status = 0x%x\n",
+ (status & GS_MIA_MASK) >> GS_MIA_SHIFT);
+ drm_puts(p, "\nScratch registers:\n");
+ for (i = 0; i < 16; i++) {
+ drm_printf(p, "\t%2d: \t0x%x\n",
+ i, intel_uncore_read(uncore, SOFT_SCRATCH(i)));
+ }
+ }
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 4594ccbeaa34..e84ab67b317d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -74,6 +74,11 @@ struct intel_guc {
struct mutex send_mutex;
};
+static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)
+{
+ return container_of(log, struct intel_guc, log);
+}
+
static
inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
{
@@ -190,4 +195,6 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask)
int intel_guc_reset_engine(struct intel_guc *guc,
struct intel_engine_cs *engine);
+void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
+
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
new file mode 100644
index 000000000000..fe7cb7b29a1e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <drm/drm_print.h>
+
+#include "gt/debugfs_gt.h"
+#include "intel_guc.h"
+#include "intel_guc_debugfs.h"
+#include "intel_guc_log_debugfs.h"
+
+static int guc_info_show(struct seq_file *m, void *data)
+{
+ struct intel_guc *guc = m->private;
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ if (!intel_guc_is_supported(guc))
+ return -ENODEV;
+
+ intel_guc_load_status(guc, &p);
+ drm_puts(&p, "\n");
+ intel_guc_log_info(&guc->log, &p);
+
+ /* Add more as required ... */
+
+ return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_info);
+
+void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root)
+{
+ static const struct debugfs_gt_file files[] = {
+ { "guc_info", &guc_info_fops, NULL },
+ };
+
+ if (!intel_guc_is_supported(guc))
+ return;
+
+ intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), guc);
+ intel_guc_log_debugfs_register(&guc->log, root);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.h
new file mode 100644
index 000000000000..424c26665cf1
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef DEBUGFS_GUC_H
+#define DEBUGFS_GUC_H
+
+struct intel_guc;
+struct dentry;
+
+void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root);
+
+#endif /* DEBUGFS_GUC_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 3a1c47d600ea..d4a87f4c9421 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -13,20 +13,6 @@
#include "intel_guc_fw.h"
#include "i915_drv.h"
-/**
- * intel_guc_fw_init_early() - initializes GuC firmware struct
- * @guc: intel_guc struct
- *
- * On platforms with GuC selects firmware for uploading
- */
-void intel_guc_fw_init_early(struct intel_guc *guc)
-{
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
-
- intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC, HAS_GT_UC(i915),
- INTEL_INFO(i915)->platform, INTEL_REVID(i915));
-}
-
static void guc_prepare_xfer(struct intel_uncore *uncore)
{
u32 shim_flags = GUC_DISABLE_SRAM_INIT_TO_ZEROES |
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h
index b5ab639d7259..0b4d2a9c9435 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h
@@ -8,7 +8,6 @@
struct intel_guc;
-void intel_guc_fw_init_early(struct intel_guc *guc);
int intel_guc_fw_upload(struct intel_guc *guc);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index caed0d57e704..fb10f3597ea5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -55,11 +55,6 @@ static int guc_action_control_log(struct intel_guc *guc, bool enable,
return intel_guc_send(guc, action, ARRAY_SIZE(action));
}
-static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)
-{
- return container_of(log, struct intel_guc, log);
-}
-
static void guc_log_enable_flush_events(struct intel_guc_log *log)
{
intel_guc_enable_msg(log_to_guc(log),
@@ -672,3 +667,95 @@ void intel_guc_log_handle_flush_event(struct intel_guc_log *log)
{
queue_work(system_highpri_wq, &log->relay.flush_work);
}
+
+static const char *
+stringify_guc_log_type(enum guc_log_buffer_type type)
+{
+ switch (type) {
+ case GUC_ISR_LOG_BUFFER:
+ return "ISR";
+ case GUC_DPC_LOG_BUFFER:
+ return "DPC";
+ case GUC_CRASH_DUMP_LOG_BUFFER:
+ return "CRASH";
+ default:
+ MISSING_CASE(type);
+ }
+
+ return "";
+}
+
+/**
+ * intel_guc_log_info - dump information about GuC log relay
+ * @log: the GuC log
+ * @p: the &drm_printer
+ *
+ * Pretty printer for GuC log info
+ */
+void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p)
+{
+ enum guc_log_buffer_type type;
+
+ if (!intel_guc_log_relay_created(log)) {
+ drm_puts(p, "GuC log relay not created\n");
+ return;
+ }
+
+ drm_puts(p, "GuC logging stats:\n");
+
+ drm_printf(p, "\tRelay full count: %u\n", log->relay.full_count);
+
+ for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) {
+ drm_printf(p, "\t%s:\tflush count %10u, overflow count %10u\n",
+ stringify_guc_log_type(type),
+ log->stats[type].flush,
+ log->stats[type].sampled_overflow);
+ }
+}
+
+/**
+ * intel_guc_log_dump - dump the contents of the GuC log
+ * @log: the GuC log
+ * @p: the &drm_printer
+ * @dump_load_err: dump the log saved on GuC load error
+ *
+ * Pretty printer for the GuC log
+ */
+int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p,
+ bool dump_load_err)
+{
+ struct intel_guc *guc = log_to_guc(log);
+ struct intel_uc *uc = container_of(guc, struct intel_uc, guc);
+ struct drm_i915_gem_object *obj = NULL;
+ u32 *map;
+ int i = 0;
+
+ if (!intel_guc_is_supported(guc))
+ return -ENODEV;
+
+ if (dump_load_err)
+ obj = uc->load_err_log;
+ else if (guc->log.vma)
+ obj = guc->log.vma->obj;
+
+ if (!obj)
+ return 0;
+
+ map = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(map)) {
+ DRM_DEBUG("Failed to pin object\n");
+ drm_puts(p, "(log data unaccessible)\n");
+ return PTR_ERR(map);
+ }
+
+ for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
+ drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+ *(map + i), *(map + i + 1),
+ *(map + i + 2), *(map + i + 3));
+
+ drm_puts(p, "\n");
+
+ i915_gem_object_unpin_map(obj);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
index c252c022c5fc..11fccd0b2294 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@@ -79,4 +79,8 @@ static inline u32 intel_guc_log_get_level(struct intel_guc_log *log)
return log->level;
}
+void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p);
+int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p,
+ bool dump_load_err);
+
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
new file mode 100644
index 000000000000..129e0cf7dfe2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/fs.h>
+#include <drm/drm_print.h>
+
+#include "gt/debugfs_gt.h"
+#include "intel_guc.h"
+#include "intel_guc_log.h"
+#include "intel_guc_log_debugfs.h"
+
+static int guc_log_dump_show(struct seq_file *m, void *data)
+{
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ return intel_guc_log_dump(m->private, &p, false);
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_log_dump);
+
+static int guc_load_err_log_dump_show(struct seq_file *m, void *data)
+{
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ return intel_guc_log_dump(m->private, &p, true);
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_load_err_log_dump);
+
+static int guc_log_level_get(void *data, u64 *val)
+{
+ struct intel_guc_log *log = data;
+
+ if (!intel_guc_is_used(log_to_guc(log)))
+ return -ENODEV;
+
+ *val = intel_guc_log_get_level(log);
+
+ return 0;
+}
+
+static int guc_log_level_set(void *data, u64 val)
+{
+ struct intel_guc_log *log = data;
+
+ if (!intel_guc_is_used(log_to_guc(log)))
+ return -ENODEV;
+
+ return intel_guc_log_set_level(log, val);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(guc_log_level_fops,
+ guc_log_level_get, guc_log_level_set,
+ "%lld\n");
+
+static int guc_log_relay_open(struct inode *inode, struct file *file)
+{
+ struct intel_guc_log *log = inode->i_private;
+
+ if (!intel_guc_is_ready(log_to_guc(log)))
+ return -ENODEV;
+
+ file->private_data = log;
+
+ return intel_guc_log_relay_open(log);
+}
+
+static ssize_t
+guc_log_relay_write(struct file *filp,
+ const char __user *ubuf,
+ size_t cnt,
+ loff_t *ppos)
+{
+ struct intel_guc_log *log = filp->private_data;
+ int val;
+ int ret;
+
+ ret = kstrtoint_from_user(ubuf, cnt, 0, &val);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Enable and start the guc log relay on value of 1.
+ * Flush log relay for any other value.
+ */
+ if (val == 1)
+ ret = intel_guc_log_relay_start(log);
+ else
+ intel_guc_log_relay_flush(log);
+
+ return ret ?: cnt;
+}
+
+static int guc_log_relay_release(struct inode *inode, struct file *file)
+{
+ struct intel_guc_log *log = inode->i_private;
+
+ intel_guc_log_relay_close(log);
+ return 0;
+}
+
+static const struct file_operations guc_log_relay_fops = {
+ .owner = THIS_MODULE,
+ .open = guc_log_relay_open,
+ .write = guc_log_relay_write,
+ .release = guc_log_relay_release,
+};
+
+void intel_guc_log_debugfs_register(struct intel_guc_log *log,
+ struct dentry *root)
+{
+ static const struct debugfs_gt_file files[] = {
+ { "guc_log_dump", &guc_log_dump_fops, NULL },
+ { "guc_load_err_log_dump", &guc_load_err_log_dump_fops, NULL },
+ { "guc_log_level", &guc_log_level_fops, NULL },
+ { "guc_log_relay", &guc_log_relay_fops, NULL },
+ };
+
+ if (!intel_guc_is_supported(log_to_guc(log)))
+ return;
+
+ intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), log);
+}
+
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.h
new file mode 100644
index 000000000000..e8900e3d74ea
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef DEBUGFS_GUC_LOG_H
+#define DEBUGFS_GUC_LOG_H
+
+struct intel_guc_log;
+struct dentry;
+
+void intel_guc_log_debugfs_register(struct intel_guc_log *log,
+ struct dentry *root);
+
+#endif /* DEBUGFS_GUC_LOG_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index aa6d56e25a10..94eb63f309ce 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -258,7 +258,7 @@ static void guc_submit(struct intel_engine_cs *engine,
static inline int rq_prio(const struct i915_request *rq)
{
- return rq->sched.attr.priority | __NO_PREEMPTION;
+ return rq->sched.attr.priority;
}
static struct i915_request *schedule_in(struct i915_request *rq, int idx)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index a74b65694512..65eeb44b397d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -41,7 +41,7 @@ void intel_huc_init_early(struct intel_huc *huc)
{
struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
- intel_huc_fw_init_early(huc);
+ intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC);
if (INTEL_GEN(i915) >= 11) {
huc->status.reg = GEN11_HUC_KERNEL_LOAD_INFO;
@@ -200,9 +200,13 @@ fail:
* This function reads status register to verify if HuC
* firmware was successfully loaded.
*
- * Returns: 1 if HuC firmware is loaded and verified,
- * 0 if HuC firmware is not loaded and -ENODEV if HuC
- * is not present on this platform.
+ * Returns:
+ * * -ENODEV if HuC is not present on this platform,
+ * * -EOPNOTSUPP if HuC firmware is disabled,
+ * * -ENOPKG if HuC firmware was not installed,
+ * * -ENOEXEC if HuC firmware is invalid or mismatched,
+ * * 0 if HuC firmware is not running,
+ * * 1 if HuC firmware is authenticated and running.
*/
int intel_huc_check_status(struct intel_huc *huc)
{
@@ -210,11 +214,50 @@ int intel_huc_check_status(struct intel_huc *huc)
intel_wakeref_t wakeref;
u32 status = 0;
- if (!intel_huc_is_supported(huc))
+ switch (__intel_uc_fw_status(&huc->fw)) {
+ case INTEL_UC_FIRMWARE_NOT_SUPPORTED:
return -ENODEV;
+ case INTEL_UC_FIRMWARE_DISABLED:
+ return -EOPNOTSUPP;
+ case INTEL_UC_FIRMWARE_MISSING:
+ return -ENOPKG;
+ case INTEL_UC_FIRMWARE_ERROR:
+ return -ENOEXEC;
+ default:
+ break;
+ }
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
status = intel_uncore_read(gt->uncore, huc->status.reg);
return (status & huc->status.mask) == huc->status.value;
}
+
+/**
+ * intel_huc_load_status - dump information about HuC load status
+ * @huc: the HuC
+ * @p: the &drm_printer
+ *
+ * Pretty printer for HuC load status.
+ */
+void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p)
+{
+ struct intel_gt *gt = huc_to_gt(huc);
+ intel_wakeref_t wakeref;
+
+ if (!intel_huc_is_supported(huc)) {
+ drm_printf(p, "HuC not supported\n");
+ return;
+ }
+
+ if (!intel_huc_is_wanted(huc)) {
+ drm_printf(p, "HuC disabled\n");
+ return;
+ }
+
+ intel_uc_fw_dump(&huc->fw, p);
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ drm_printf(p, "HuC status: 0x%08x\n",
+ intel_uncore_read(gt->uncore, huc->status.reg));
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index a40b9cfc6c22..daee43b661d4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -57,4 +57,6 @@ static inline bool intel_huc_is_authenticated(struct intel_huc *huc)
return intel_uc_fw_is_running(&huc->fw);
}
+void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p);
+
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c
new file mode 100644
index 000000000000..5733c15fd123
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <drm/drm_print.h>
+
+#include "gt/debugfs_gt.h"
+#include "intel_huc.h"
+#include "intel_huc_debugfs.h"
+
+static int huc_info_show(struct seq_file *m, void *data)
+{
+ struct intel_huc *huc = m->private;
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ if (!intel_huc_is_supported(huc))
+ return -ENODEV;
+
+ intel_huc_load_status(huc, &p);
+
+ return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(huc_info);
+
+void intel_huc_debugfs_register(struct intel_huc *huc, struct dentry *root)
+{
+ static const struct debugfs_gt_file files[] = {
+ { "huc_info", &huc_info_fops, NULL },
+ };
+
+ if (!intel_huc_is_supported(huc))
+ return;
+
+ intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), huc);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.h
new file mode 100644
index 000000000000..be79e992f976
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef DEBUGFS_HUC_H
+#define DEBUGFS_HUC_H
+
+struct intel_huc;
+struct dentry;
+
+void intel_huc_debugfs_register(struct intel_huc *huc, struct dentry *root);
+
+#endif /* DEBUGFS_HUC_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
index 9cdf4cbe691c..e5ef509c70e8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
@@ -8,23 +8,6 @@
#include "i915_drv.h"
/**
- * intel_huc_fw_init_early() - initializes HuC firmware struct
- * @huc: intel_huc struct
- *
- * On platforms with HuC selects firmware for uploading
- */
-void intel_huc_fw_init_early(struct intel_huc *huc)
-{
- struct intel_gt *gt = huc_to_gt(huc);
- struct intel_uc *uc = &gt->uc;
- struct drm_i915_private *i915 = gt->i915;
-
- intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC,
- intel_uc_wants_guc(uc),
- INTEL_INFO(i915)->platform, INTEL_REVID(i915));
-}
-
-/**
* intel_huc_fw_upload() - load HuC uCode to device
* @huc: intel_huc structure
*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
index b791269ce923..12f264ee3e0b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
@@ -8,7 +8,6 @@
struct intel_huc;
-void intel_huc_fw_init_early(struct intel_huc *huc);
int intel_huc_fw_upload(struct intel_huc *huc);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index a4cbe06e06bd..f518fe05c6f9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -45,12 +45,12 @@ static void __confirm_options(struct intel_uc *uc)
{
struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
- DRM_DEV_DEBUG_DRIVER(i915->drm.dev,
- "enable_guc=%d (guc:%s submission:%s huc:%s)\n",
- i915_modparams.enable_guc,
- yesno(intel_uc_wants_guc(uc)),
- yesno(intel_uc_wants_guc_submission(uc)),
- yesno(intel_uc_wants_huc(uc)));
+ drm_dbg(&i915->drm,
+ "enable_guc=%d (guc:%s submission:%s huc:%s)\n",
+ i915_modparams.enable_guc,
+ yesno(intel_uc_wants_guc(uc)),
+ yesno(intel_uc_wants_guc_submission(uc)),
+ yesno(intel_uc_wants_huc(uc)));
if (i915_modparams.enable_guc == -1)
return;
@@ -63,25 +63,25 @@ static void __confirm_options(struct intel_uc *uc)
}
if (!intel_uc_supports_guc(uc))
- dev_info(i915->drm.dev,
+ drm_info(&i915->drm,
"Incompatible option enable_guc=%d - %s\n",
i915_modparams.enable_guc, "GuC is not supported!");
if (i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC &&
!intel_uc_supports_huc(uc))
- dev_info(i915->drm.dev,
+ drm_info(&i915->drm,
"Incompatible option enable_guc=%d - %s\n",
i915_modparams.enable_guc, "HuC is not supported!");
if (i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION &&
!intel_uc_supports_guc_submission(uc))
- dev_info(i915->drm.dev,
+ drm_info(&i915->drm,
"Incompatible option enable_guc=%d - %s\n",
i915_modparams.enable_guc, "GuC submission is N/A");
if (i915_modparams.enable_guc & ~(ENABLE_GUC_SUBMISSION |
ENABLE_GUC_LOAD_HUC))
- dev_info(i915->drm.dev,
+ drm_info(&i915->drm,
"Incompatible option enable_guc=%d - %s\n",
i915_modparams.enable_guc, "undocumented flag");
}
@@ -131,6 +131,13 @@ static void __uc_free_load_err_log(struct intel_uc *uc)
i915_gem_object_put(log);
}
+void intel_uc_driver_remove(struct intel_uc *uc)
+{
+ intel_uc_fini_hw(uc);
+ intel_uc_fini(uc);
+ __uc_free_load_err_log(uc);
+}
+
static inline bool guc_communication_enabled(struct intel_guc *guc)
{
return intel_guc_ct_enabled(&guc->ct);
@@ -311,8 +318,6 @@ static void __uc_fini(struct intel_uc *uc)
{
intel_huc_fini(&uc->huc);
intel_guc_fini(&uc->guc);
-
- __uc_free_load_err_log(uc);
}
static int __uc_sanitize(struct intel_uc *uc)
@@ -475,14 +480,14 @@ static int __uc_init_hw(struct intel_uc *uc)
if (intel_uc_uses_guc_submission(uc))
intel_guc_submission_enable(guc);
- dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n",
+ drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n",
intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path,
guc->fw.major_ver_found, guc->fw.minor_ver_found,
"submission",
enableddisabled(intel_uc_uses_guc_submission(uc)));
if (intel_uc_uses_huc(uc)) {
- dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n",
+ drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n",
intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC),
huc->fw.path,
huc->fw.major_ver_found, huc->fw.minor_ver_found,
@@ -503,7 +508,7 @@ err_out:
__uc_sanitize(uc);
if (!ret) {
- dev_notice(i915->drm.dev, "GuC is uninitialized\n");
+ drm_notice(&i915->drm, "GuC is uninitialized\n");
/* We want to run without GuC submission */
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
index 5ae7b50b7dc1..9c954c589edf 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -34,6 +34,7 @@ struct intel_uc {
void intel_uc_init_early(struct intel_uc *uc);
void intel_uc_driver_late_release(struct intel_uc *uc);
+void intel_uc_driver_remove(struct intel_uc *uc);
void intel_uc_init_mmio(struct intel_uc *uc);
void intel_uc_reset_prepare(struct intel_uc *uc);
void intel_uc_suspend(struct intel_uc *uc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
new file mode 100644
index 000000000000..9d16b784aa0d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+
+#include "intel_guc_debugfs.h"
+#include "intel_huc_debugfs.h"
+#include "intel_uc.h"
+#include "intel_uc_debugfs.h"
+
+void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root)
+{
+ struct dentry *root;
+
+ if (!gt_root)
+ return;
+
+ /* GuC and HuC go always in pair, no need to check both */
+ if (!intel_uc_supports_guc(uc))
+ return;
+
+ root = debugfs_create_dir("uc", gt_root);
+ if (IS_ERR(root))
+ return;
+
+ intel_guc_debugfs_register(&uc->guc, root);
+ intel_huc_debugfs_register(&uc->huc, root);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.h
new file mode 100644
index 000000000000..010ce250d223
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef DEBUGFS_UC_H
+#define DEBUGFS_UC_H
+
+struct intel_uc;
+struct dentry;
+
+void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root);
+
+#endif /* DEBUGFS_UC_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 18c755203688..e1caae93996d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -11,26 +11,32 @@
#include "intel_uc_fw_abi.h"
#include "i915_drv.h"
-static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw)
+static inline struct intel_gt *
+____uc_fw_to_gt(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type)
{
- GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED);
- if (uc_fw->type == INTEL_UC_FW_TYPE_GUC)
+ if (type == INTEL_UC_FW_TYPE_GUC)
return container_of(uc_fw, struct intel_gt, uc.guc.fw);
- GEM_BUG_ON(uc_fw->type != INTEL_UC_FW_TYPE_HUC);
+ GEM_BUG_ON(type != INTEL_UC_FW_TYPE_HUC);
return container_of(uc_fw, struct intel_gt, uc.huc.fw);
}
+static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw)
+{
+ GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED);
+ return ____uc_fw_to_gt(uc_fw, uc_fw->type);
+}
+
#ifdef CONFIG_DRM_I915_DEBUG_GUC
void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
enum intel_uc_fw_status status)
{
uc_fw->__status = status;
- DRM_DEV_DEBUG_DRIVER(__uc_fw_to_gt(uc_fw)->i915->drm.dev,
- "%s firmware -> %s\n",
- intel_uc_fw_type_repr(uc_fw->type),
- status == INTEL_UC_FIRMWARE_SELECTED ?
- uc_fw->path : intel_uc_fw_status_repr(status));
+ drm_dbg(&__uc_fw_to_gt(uc_fw)->i915->drm,
+ "%s firmware -> %s\n",
+ intel_uc_fw_type_repr(uc_fw->type),
+ status == INTEL_UC_FIRMWARE_SELECTED ?
+ uc_fw->path : intel_uc_fw_status_repr(status));
}
#endif
@@ -187,17 +193,15 @@ static void __uc_fw_user_override(struct intel_uc_fw *uc_fw)
* intel_uc_fw_init_early - initialize the uC object and select the firmware
* @uc_fw: uC firmware
* @type: type of uC
- * @supported: is uC support possible
- * @platform: platform identifier
- * @rev: hardware revision
*
* Initialize the state of our uC object and relevant tracking and select the
* firmware to fetch and load.
*/
void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
- enum intel_uc_fw_type type, bool supported,
- enum intel_platform platform, u8 rev)
+ enum intel_uc_fw_type type)
{
+ struct drm_i915_private *i915 = ____uc_fw_to_gt(uc_fw, type)->i915;
+
/*
* we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status
* before we're looked at the HW caps to see if we have uc support
@@ -208,8 +212,10 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
uc_fw->type = type;
- if (supported) {
- __uc_fw_auto_select(uc_fw, platform, rev);
+ if (HAS_GT_UC(i915)) {
+ __uc_fw_auto_select(uc_fw,
+ INTEL_INFO(i915)->platform,
+ INTEL_REVID(i915));
__uc_fw_user_override(uc_fw);
}
@@ -290,7 +296,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
/* Check the size of the blob before examining buffer contents */
if (unlikely(fw->size < sizeof(struct uc_css_header))) {
- dev_warn(dev, "%s firmware %s: invalid size: %zu < %zu\n",
+ drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
fw->size, sizeof(struct uc_css_header));
err = -ENODATA;
@@ -303,7 +309,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw -
css->exponent_size_dw) * sizeof(u32);
if (unlikely(size != sizeof(struct uc_css_header))) {
- dev_warn(dev,
+ drm_warn(&i915->drm,
"%s firmware %s: unexpected header size: %zu != %zu\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
fw->size, sizeof(struct uc_css_header));
@@ -316,7 +322,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
/* now RSA */
if (unlikely(css->key_size_dw != UOS_RSA_SCRATCH_COUNT)) {
- dev_warn(dev, "%s firmware %s: unexpected key size: %u != %u\n",
+ drm_warn(&i915->drm, "%s firmware %s: unexpected key size: %u != %u\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
css->key_size_dw, UOS_RSA_SCRATCH_COUNT);
err = -EPROTO;
@@ -327,7 +333,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
/* At least, it should have header, uCode and RSA. Size of all three. */
size = sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->rsa_size;
if (unlikely(fw->size < size)) {
- dev_warn(dev, "%s firmware %s: invalid size: %zu < %zu\n",
+ drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
fw->size, size);
err = -ENOEXEC;
@@ -337,7 +343,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
/* Sanity check whether this fw is not larger than whole WOPCM memory */
size = __intel_uc_fw_get_upload_size(uc_fw);
if (unlikely(size >= i915->wopcm.size)) {
- dev_warn(dev, "%s firmware %s: invalid size: %zu > %zu\n",
+ drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu > %zu\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
size, (size_t)i915->wopcm.size);
err = -E2BIG;
@@ -352,7 +358,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
- dev_notice(dev, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
+ drm_notice(&i915->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
uc_fw->major_ver_found, uc_fw->minor_ver_found,
uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
@@ -380,9 +386,9 @@ fail:
INTEL_UC_FIRMWARE_MISSING :
INTEL_UC_FIRMWARE_ERROR);
- dev_notice(dev, "%s firmware %s: fetch failed with error %d\n",
+ drm_notice(&i915->drm, "%s firmware %s: fetch failed with error %d\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
- dev_info(dev, "%s firmware(s) can be downloaded from %s\n",
+ drm_info(&i915->drm, "%s firmware(s) can be downloaded from %s\n",
intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL);
release_firmware(fw); /* OK even if fw is NULL */
@@ -467,7 +473,7 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
/* Wait for DMA to finish */
ret = intel_wait_for_register_fw(uncore, DMA_CTRL, START_DMA, 0, 100);
if (ret)
- dev_err(gt->i915->drm.dev, "DMA for %s fw failed, DMA_CTRL=%u\n",
+ drm_err(&gt->i915->drm, "DMA for %s fw failed, DMA_CTRL=%u\n",
intel_uc_fw_type_repr(uc_fw->type),
intel_uncore_read_fw(uncore, DMA_CTRL));
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 888ff0de0244..23d3a423ac0f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -239,8 +239,7 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
}
void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
- enum intel_uc_fw_type type, bool supported,
- enum intel_platform platform, u8 rev);
+ enum intel_uc_fw_type type);
int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw);
void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw);
int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags);