summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gpu_error.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-02-01 17:48:47 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-02-01 17:48:47 -0800
commit4bf772b14675411a69b3c807f73006de0fe4b649 (patch)
treeb841e3ba0e3429695589cb0ab73871fa12f42c38 /drivers/gpu/drm/i915/i915_gpu_error.c
parent3879ae653a3e98380fe2daf653338830b7ca0097 (diff)
parent24b8ef699e8221d2b7f813adaab13eec053e1507 (diff)
Merge tag 'drm-for-v4.16' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie: "This seems to have been a comparatively quieter merge window, I assume due to holidays etc. The "biggest" change is AMD header cleanups, which merge/remove a bunch of them. The AMD gpu scheduler is now being made generic with the etnaviv driver wanting to reuse the code, hopefully other drivers can go in the same direction. Otherwise it's the usual lots of stuff in i915/amdgpu, not so much stuff elsewhere. Core: - Add .last_close and .output_poll_changed helpers to reduce driver footprints - Fix plane clipping - Improved debug printing support - Add panel orientation property - Update edid derived properties at edid setting - Reduction in fbdev driver footprint - Move amdgpu scheduler into core for other drivers to use. i915: - Selftest and IGT improvements - Fast boot prep work on IPS, pipe config - HW workarounds for Cannonlake, Geminilake - Cannonlake clock and HDMI2.0 fixes - GPU cache invalidation and context switch improvements - Display planes cleanup - New PMU interface for perf queries - New firmware support for KBL/SKL - Geminilake HW workaround for perforamce - Coffeelake stolen memory improvements - GPU reset robustness work - Cannonlake horizontal plane flipping - GVT work amdgpu/radeon: - RV and Vega header file cleanups (lots of lines gone!) - TTM operation context support - 48-bit GPUVM support for Vega/RV - ECC support for Vega - Resizeable BAR support - Multi-display sync support - Enable swapout for reserved BOs during allocation - S3 fixes on Raven - GPU reset cleanup and fixes - 2+1 level GPU page table amdkfd: - GFX7/8 SDMA user queues support - Hardware scheduling for multiple processes - dGPU prep work rcar: - Added R8A7743/5 support - System suspend/resume support sun4i: - Multi-plane support for YUV formats - A83T and LVDS support msm: - Devfreq support for GPU tegra: - Prep work for adding Tegra186 support - Tegra186 HDMI support - HDMI2.0 and zpos support by using generic helpers tilcdc: - Misc fixes omapdrm: - Support memory bandwidth limits - DSI command mode panel cleanups - DMM error handling exynos: - drop the old IPP subdriver. etnaviv: - Occlusion query fixes - Job handling fixes - Prep work for hooking in gpu scheduler armada: - Move closer to atomic modesetting - Allow disabling primary plane if overlay is full screen imx: - Format modifier support - Add tile prefetch to PRE - Runtime PM support for PRG ast: - fix LUT loading" * tag 'drm-for-v4.16' of git://people.freedesktop.org/~airlied/linux: (1471 commits) drm/ast: Load lut in crtc_commit drm: Check for lessee in DROP_MASTER ioctl drm: fix gpu scheduler link order drm/amd/display: Demote error print to debug print when ATOM impl missing dma-buf: fix reservation_object_wait_timeout_rcu once more v2 drm/amdgpu: Avoid leaking PM domain on driver unbind (v2) drm/amd/amdgpu: Add Polaris version check drm/amdgpu: Reenable manual GPU reset from sysfs drm/amdgpu: disable MMHUB power gating on raven drm/ttm: Don't unreserve swapped BOs that were previously reserved drm/ttm: Don't add swapped BOs to swap-LRU list drm/amdgpu: only check for ECC on Vega10 drm/amd/powerplay: Fix smu_table_entry.handle type drm/ttm: add VADDR_FLAG_UPDATED_COUNT to correctly update dma_page global count drm: Fix PANEL_ORIENTATION_QUIRKS breaking the Kconfig DRM menuconfig drm/radeon: fill in rb backend map on evergreen/ni. drm/amdgpu/gfx9: fix ngg enablement to clear gds reserved memory (v2) drm/ttm: only free pages rather than update global memory count together drm/amdgpu: fix CPU based VM updates drm/amdgpu: fix typo in amdgpu_vce_validate_bo ...
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c195
1 files changed, 115 insertions, 80 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 653fb69e7ecb..944059322daa 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -30,6 +30,8 @@
#include <generated/utsrelease.h>
#include <linux/stop_machine.h>
#include <linux/zlib.h>
+#include <drm/drm_print.h>
+
#include "i915_drv.h"
static const char *engine_str(int engine)
@@ -175,6 +177,21 @@ static void i915_error_puts(struct drm_i915_error_state_buf *e,
#define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
#define err_puts(e, s) i915_error_puts(e, s)
+static void __i915_printfn_error(struct drm_printer *p, struct va_format *vaf)
+{
+ i915_error_vprintf(p->arg, vaf->fmt, *vaf->va);
+}
+
+static inline struct drm_printer
+i915_error_printer(struct drm_i915_error_state_buf *e)
+{
+ struct drm_printer p = {
+ .printfn = __i915_printfn_error,
+ .arg = e,
+ };
+ return p;
+}
+
#ifdef CONFIG_DRM_I915_COMPRESS_ERROR
struct compress {
@@ -399,6 +416,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
int n;
err_printf(m, "%s command stream:\n", engine_str(ee->engine_id));
+ err_printf(m, " IDLE?: %s\n", yesno(ee->idle));
err_printf(m, " START: 0x%08x\n", ee->start);
err_printf(m, " HEAD: 0x%08x [0x%08x]\n", ee->head, ee->rq_head);
err_printf(m, " TAIL: 0x%08x [0x%08x, 0x%08x]\n",
@@ -547,34 +565,17 @@ static void print_error_obj(struct drm_i915_error_state_buf *m,
static void err_print_capabilities(struct drm_i915_error_state_buf *m,
const struct intel_device_info *info)
{
-#define PRINT_FLAG(x) err_printf(m, #x ": %s\n", yesno(info->x))
- DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG);
-#undef PRINT_FLAG
-}
+ struct drm_printer p = i915_error_printer(m);
-static __always_inline void err_print_param(struct drm_i915_error_state_buf *m,
- const char *name,
- const char *type,
- const void *x)
-{
- if (!__builtin_strcmp(type, "bool"))
- err_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x));
- else if (!__builtin_strcmp(type, "int"))
- err_printf(m, "i915.%s=%d\n", name, *(const int *)x);
- else if (!__builtin_strcmp(type, "unsigned int"))
- err_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x);
- else if (!__builtin_strcmp(type, "char *"))
- err_printf(m, "i915.%s=%s\n", name, *(const char **)x);
- else
- BUILD_BUG();
+ intel_device_info_dump_flags(info, &p);
}
static void err_print_params(struct drm_i915_error_state_buf *m,
- const struct i915_params *p)
+ const struct i915_params *params)
{
-#define PRINT(T, x, ...) err_print_param(m, #x, #T, &p->x);
- I915_PARAMS_FOR_EACH(PRINT);
-#undef PRINT
+ struct drm_printer p = i915_error_printer(m);
+
+ i915_params_dump(params, &p);
}
static void err_print_pciid(struct drm_i915_error_state_buf *m,
@@ -589,6 +590,21 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m,
pdev->subsystem_device);
}
+static void err_print_uc(struct drm_i915_error_state_buf *m,
+ const struct i915_error_uc *error_uc)
+{
+ struct drm_printer p = i915_error_printer(m);
+ const struct i915_gpu_state *error =
+ container_of(error_uc, typeof(*error), uc);
+
+ if (!error->device_info.has_guc)
+ return;
+
+ intel_uc_fw_dump(&error_uc->guc_fw, &p);
+ intel_uc_fw_dump(&error_uc->huc_fw, &p);
+ print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log);
+}
+
int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
const struct i915_gpu_state *error)
{
@@ -759,11 +775,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
print_error_obj(m, dev_priv->engine[i],
"WA batchbuffer", ee->wa_batchbuffer);
- }
-
- print_error_obj(m, NULL, "Semaphores", error->semaphore);
- print_error_obj(m, NULL, "GuC log buffer", error->guc_log);
+ print_error_obj(m, dev_priv->engine[i],
+ "NULL context", ee->default_state);
+ }
if (error->overlay)
intel_overlay_print_error_state(m, error->overlay);
@@ -773,6 +788,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
err_print_capabilities(m, &error->device_info);
err_print_params(m, &error->params);
+ err_print_uc(m, &error->uc);
if (m->bytes == 0 && m->err)
return m->err;
@@ -831,6 +847,22 @@ static __always_inline void free_param(const char *type, void *x)
kfree(*(void **)x);
}
+static void cleanup_params(struct i915_gpu_state *error)
+{
+#define FREE(T, x, ...) free_param(#T, &error->params.x);
+ I915_PARAMS_FOR_EACH(FREE);
+#undef FREE
+}
+
+static void cleanup_uc_state(struct i915_gpu_state *error)
+{
+ struct i915_error_uc *error_uc = &error->uc;
+
+ kfree(error_uc->guc_fw.path);
+ kfree(error_uc->huc_fw.path);
+ i915_error_object_free(error_uc->guc_log);
+}
+
void __i915_gpu_state_free(struct kref *error_ref)
{
struct i915_gpu_state *error =
@@ -856,9 +888,6 @@ void __i915_gpu_state_free(struct kref *error_ref)
kfree(ee->waiters);
}
- i915_error_object_free(error->semaphore);
- i915_error_object_free(error->guc_log);
-
for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
kfree(error->active_bo[i]);
kfree(error->pinned_bo);
@@ -866,9 +895,8 @@ void __i915_gpu_state_free(struct kref *error_ref)
kfree(error->overlay);
kfree(error->display);
-#define FREE(T, x, ...) free_param(#T, &error->params.x);
- I915_PARAMS_FOR_EACH(FREE);
-#undef FREE
+ cleanup_params(error);
+ cleanup_uc_state(error);
kfree(error);
}
@@ -912,7 +940,7 @@ i915_error_object_create(struct drm_i915_private *i915,
ggtt->base.insert_page(&ggtt->base, dma, slot,
I915_CACHE_NONE, 0);
- s = io_mapping_map_atomic_wc(&ggtt->mappable, slot);
+ s = io_mapping_map_atomic_wc(&ggtt->iomap, slot);
ret = compress_page(&compress, (void __force *)s, dst);
io_mapping_unmap_atomic(s);
@@ -1071,34 +1099,6 @@ gen8_engine_sync_index(struct intel_engine_cs *engine,
return idx;
}
-static void gen8_record_semaphore_state(struct i915_gpu_state *error,
- struct intel_engine_cs *engine,
- struct drm_i915_error_engine *ee)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- struct intel_engine_cs *to;
- enum intel_engine_id id;
-
- if (!error->semaphore)
- return;
-
- for_each_engine(to, dev_priv, id) {
- int idx;
- u16 signal_offset;
- u32 *tmp;
-
- if (engine == to)
- continue;
-
- signal_offset =
- (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4;
- tmp = error->semaphore->pages[0];
- idx = gen8_engine_sync_index(engine, to);
-
- ee->semaphore_mboxes[idx] = tmp[signal_offset];
- }
-}
-
static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
struct drm_i915_error_engine *ee)
{
@@ -1172,11 +1172,12 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
if (INTEL_GEN(dev_priv) >= 6) {
ee->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base));
- ee->fault_reg = I915_READ(RING_FAULT_REG(engine));
- if (INTEL_GEN(dev_priv) >= 8)
- gen8_record_semaphore_state(error, engine, ee);
- else
+ if (INTEL_GEN(dev_priv) >= 8) {
+ ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG);
+ } else {
gen6_record_semaphore_state(engine, ee);
+ ee->fault_reg = I915_READ(RING_FAULT_REG(engine));
+ }
}
if (INTEL_GEN(dev_priv) >= 4) {
@@ -1239,6 +1240,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
ee->hws = I915_READ(mmio);
}
+ ee->idle = intel_engine_is_idle(engine);
ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
ee->hangcheck_action = engine->hangcheck.action;
ee->hangcheck_stalled = engine->hangcheck.stalled;
@@ -1400,15 +1402,30 @@ static void request_record_user_bo(struct drm_i915_gem_request *request,
ee->user_bo_count = count;
}
+static struct drm_i915_error_object *
+capture_object(struct drm_i915_private *dev_priv,
+ struct drm_i915_gem_object *obj)
+{
+ if (obj && i915_gem_object_has_pages(obj)) {
+ struct i915_vma fake = {
+ .node = { .start = U64_MAX, .size = obj->base.size },
+ .size = obj->base.size,
+ .pages = obj->mm.pages,
+ .obj = obj,
+ };
+
+ return i915_error_object_create(dev_priv, &fake);
+ } else {
+ return NULL;
+ }
+}
+
static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
struct i915_gpu_state *error)
{
struct i915_ggtt *ggtt = &dev_priv->ggtt;
int i;
- error->semaphore =
- i915_error_object_create(dev_priv, dev_priv->semaphore);
-
for (i = 0; i < I915_NUM_ENGINES; i++) {
struct intel_engine_cs *engine = dev_priv->engine[i];
struct drm_i915_error_engine *ee = &error->engine[i];
@@ -1474,6 +1491,9 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
ee->wa_ctx =
i915_error_object_create(dev_priv, engine->wa_ctx.vma);
+
+ ee->default_state =
+ capture_object(dev_priv, engine->default_state);
}
}
@@ -1559,15 +1579,25 @@ static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv,
error->pinned_bo = bo;
}
-static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv,
- struct i915_gpu_state *error)
+static void capture_uc_state(struct i915_gpu_state *error)
{
- /* Capturing log buf contents won't be useful if logging was disabled */
- if (!dev_priv->guc.log.vma || (i915_modparams.guc_log_level < 0))
+ struct drm_i915_private *i915 = error->i915;
+ struct i915_error_uc *error_uc = &error->uc;
+
+ /* Capturing uC state won't be useful if there is no GuC */
+ if (!error->device_info.has_guc)
return;
- error->guc_log = i915_error_object_create(dev_priv,
- dev_priv->guc.log.vma);
+ error_uc->guc_fw = i915->guc.fw;
+ error_uc->huc_fw = i915->huc.fw;
+
+ /* Non-default firmware paths will be specified by the modparam.
+ * As modparams are generally accesible from the userspace make
+ * explicit copies of the firmware paths.
+ */
+ error_uc->guc_fw.path = kstrdup(i915->guc.fw.path, GFP_ATOMIC);
+ error_uc->huc_fw.path = kstrdup(i915->huc.fw.path, GFP_ATOMIC);
+ error_uc->guc_log = i915_error_object_create(i915, i915->guc.log.vma);
}
/* Capture all registers which don't fit into another category. */
@@ -1695,6 +1725,14 @@ static __always_inline void dup_param(const char *type, void *x)
*(void **)x = kstrdup(*(void **)x, GFP_ATOMIC);
}
+static void capture_params(struct i915_gpu_state *error)
+{
+ error->params = i915_modparams;
+#define DUP(T, x, ...) dup_param(#T, &error->params.x);
+ I915_PARAMS_FOR_EACH(DUP);
+#undef DUP
+}
+
static int capture(void *data)
{
struct i915_gpu_state *error = data;
@@ -1705,10 +1743,8 @@ static int capture(void *data)
ktime_to_timeval(ktime_sub(ktime_get(),
error->i915->gt.last_init_time));
- error->params = i915_modparams;
-#define DUP(T, x, ...) dup_param(#T, &error->params.x);
- I915_PARAMS_FOR_EACH(DUP);
-#undef DUP
+ capture_params(error);
+ capture_uc_state(error);
i915_capture_gen_state(error->i915, error);
i915_capture_reg_state(error->i915, error);
@@ -1716,7 +1752,6 @@ static int capture(void *data)
i915_gem_record_rings(error->i915, error);
i915_capture_active_buffers(error->i915, error);
i915_capture_pinned_buffers(error->i915, error);
- i915_gem_capture_guc_log_buffer(error->i915, error);
error->overlay = intel_overlay_capture_error_state(error->i915);
error->display = intel_display_capture_error_state(error->i915);