summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2022-04-28 15:32:29 +1000
committerDave Airlie <airlied@redhat.com>2022-04-28 15:32:29 +1000
commit9bda072a7bec278c424ad660373e69d8e4a3385d (patch)
tree9082199d09fcdf1f2c1a350fef8837c1700e30c8
parent4eaf02db9c2680ca92af92e2de7b33c6e079b2cd (diff)
parentf15856d7de914595d0daa2c706f53a693b48e228 (diff)
Merge tag 'drm-intel-gt-next-2022-04-27' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
UAPI Changes: - GuC hwconfig support and query (John Harrison, Rodrigo Vivi, Tvrtko Ursulin) - Sysfs support for multi-tile devices (Andi Shyti, Sujaritha Sundaresan) - Per client GPU utilisation via fdinfo (Tvrtko Ursulin, Ashutosh Dixit) - Add DRM_I915_QUERY_GEOMETRY_SUBSLICES (Matt Atwood) Cross-subsystem Changes: - Add GSC as a MEI auxiliary device (Tomas Winkler, Alexander Usyskin) Core Changes: - Document fdinfo format specification (Tvrtko Ursulin) Driver Changes: - Fix prime_mmap to work when using LMEM (Gwan-gyeong Mun) - Fix vm open count and remove vma refcount (Thomas Hellström) - Fixup setting screen_size (Matthew Auld) - Opportunistically apply ALLOC_CONTIGIOUS (Matthew Auld) - Limit where we apply TTM_PL_FLAG_CONTIGUOUS (Matthew Auld) - Drop aux table invalidation on FlatCCS platforms (Matt Roper) - Add missing boundary check in vm_access (Mastan Katragadda) - Update topology dumps for Xe_HP (Matt Roper) - Add support for steered register writes (Matt Roper) - Add steering info to GuC register save/restore list (Daniele Ceraolo Spurio) - Small PCI BAR enabling (Matthew Auld, Akeem G Abodunrin, CQ Tang) - Add preemption changes for Wa_14015141709 (Akeem G Abodunrin) - Add logical mapping for video decode engines (Matthew Brost) - Don't evict unmappable VMAs when pinning with PIN_MAPPABLE (v2) (Vivek Kasireddy) - GuC error capture support (Alan Previn, Daniele Ceraolo Spurio) - avoid concurrent writes to aux_inv (Fei Yang) - Add Wa_22014226127 (José Roberto de Souza) - Sunset igpu legacy mmap support based on GRAPHICS_VER_FULL (Matt Roper) - Evict and restore of compressed objects (Ramalingam C) - Update to GuC version 70.1.1 (John Harrison) - Add Wa_22011802037 force cs halt (Tilak Tangudu) - Enable Wa_22011802037 for gen12 GuC based platforms (Umesh Nerlige Ramappa) - GuC based workarounds for DG2 (Vinay Belgaumkar, John Harrison, Matthew Brost, José Roberto de Souza) - consider min_page_size when migrating (Matthew Auld) - Prep work for next GuC firmware release (John Harrison) - Support platforms with CCS engines but no RCS (Matt Roper, Stuart Summers) - Don't overallocate subslice storage (Matt Roper) - Reduce stack usage in debugfs due to SSEU (John Harrison) - Report steering details in debugfs (Matt Roper) - Refactor some x86-ism out to prepare for non-x86 builds (Michael Cheng) - add lmem_size modparam (CQ Tang) - Refactor for non-x86 driver builds (Casey Bowman) - Centralize computation of freq caps (Ashutosh Dixit) - Update dma_buf_ops.unmap_dma_buf callback to use drm_gem_unmap_dma_buf() (Gwan-gyeong Mun) - Limit the async bind to bind_async_flags (Matthew Auld) - Stop checking for NULL vma->obj (Matthew Auld) - Reduce overzealous alignment constraints for GGTT (Matthew Auld) - Remove GEN12_SFC_DONE_MAX from register defs header (Matt Roper) - Fix renamed struct field (Lucas De Marchi) - Do not return '0' if there is nothing to return (Andi Shyti) - fix i915_reg_t initialization (Jani Nikula) - move the migration sanity check (Matthew Auld) - handle more rounding in selftests (Matthew Auld) - Perf and i915 query kerneldoc updates (Matt Roper) - Use i915_probe_error instead of drm_err (Vinay Belgaumkar) - sanity check object size in the buddy allocator (Matthew Auld) - fixup selftests min_alignment usage (Matthew Auld) - tweak selftests misaligned_case (Matthew Auld) Signed-off-by: Dave Airlie <airlied@redhat.com> # Conflicts: # drivers/gpu/drm/i915/i915_vma.c From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/Ymkfy8FjsG2JrodK@tursulin-mobl2
-rw-r--r--Documentation/gpu/drm-usage-stats.rst112
-rw-r--r--Documentation/gpu/i915.rst28
-rw-r--r--Documentation/gpu/index.rst1
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/gpu/drm/i915/Kconfig3
-rw-r--r--drivers/gpu/drm/i915/Makefile10
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpt.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb_pin.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbdev.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_plane_initial.c56
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c70
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context_types.h6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_create.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c15
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c14
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_lmem.c4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_region.c50
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_region.h7
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c1
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.c144
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.h4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.c52
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.h1
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c6
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c12
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/mock_context.c5
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.c2
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c113
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.h4
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h24
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c40
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_regs.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_user.c11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c49
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c693
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gsc.c224
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gsc.h37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c223
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h41
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_debugfs.c20
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_debugfs.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_gmch.c654
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_gmch.h46
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c18
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c40
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs.c122
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs.h34
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c601
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.h15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h18
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c56
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h65
-rw-r--r--drivers/gpu/drm/i915/gt/intel_hwconfig.h21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c39
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.h27
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.c390
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c126
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps_types.h15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c54
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.h50
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c30
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c21
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_execlists.c86
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c10
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_migrate.c259
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h15
-rw-r--r--drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h218
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c48
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h19
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c185
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c1657
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h33
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h92
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c164
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c125
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.h7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c56
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c645
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c30
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c2
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c12
-rw-r--r--drivers/gpu/drm/i915/i915_driver.c37
-rw-r--r--drivers/gpu/drm/i915/i915_drm_client.c158
-rw-r--r--drivers/gpu/drm/i915/i915_drm_client.h68
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h25
-rw-r--r--drivers/gpu/drm/i915/i915_file_private.h3
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c89
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c297
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.h37
-rw-r--r--drivers/gpu/drm/i915/i915_params.c3
-rw-r--r--drivers/gpu/drm/i915/i915_params.h1
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c4
-rw-r--r--drivers/gpu/drm/i915/i915_query.c94
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h18
-rw-r--r--drivers/gpu/drm/i915/i915_reg_defs.h2
-rw-r--r--drivers/gpu/drm/i915/i915_sysfs.c310
-rw-r--r--drivers/gpu/drm/i915/i915_sysfs.h3
-rw-r--r--drivers/gpu/drm/i915/i915_ttm_buddy_manager.c4
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c108
-rw-r--r--drivers/gpu/drm/i915/i915_vma.h14
-rw-r--r--drivers/gpu/drm/i915/i915_vma_resource.c2
-rw-r--r--drivers/gpu/drm/i915/i915_vma_resource.h6
-rw-r--r--drivers/gpu/drm/i915/i915_vma_types.h8
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.h2
-rw-r--r--drivers/gpu/drm/i915/intel_memory_region.c2
-rw-r--r--drivers/gpu/drm/i915/intel_memory_region.h8
-rw-r--r--drivers/gpu/drm/i915/intel_region_ttm.c7
-rw-r--r--drivers/gpu/drm/i915/intel_region_ttm.h1
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.c86
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.h7
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_gtt.c18
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gem_device.c13
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_region.c4
-rw-r--r--drivers/misc/mei/Kconfig14
-rw-r--r--drivers/misc/mei/Makefile3
-rw-r--r--drivers/misc/mei/bus-fixup.c25
-rw-r--r--drivers/misc/mei/gsc-me.c259
-rw-r--r--drivers/misc/mei/hw-me.c29
-rw-r--r--drivers/misc/mei/hw-me.h2
-rw-r--r--include/linux/mei_aux.h19
-rw-r--r--include/uapi/drm/i915_drm.h291
143 files changed, 8164 insertions, 2333 deletions
diff --git a/Documentation/gpu/drm-usage-stats.rst b/Documentation/gpu/drm-usage-stats.rst
new file mode 100644
index 000000000000..6c9f166a8d6f
--- /dev/null
+++ b/Documentation/gpu/drm-usage-stats.rst
@@ -0,0 +1,112 @@
+.. _drm-client-usage-stats:
+
+======================
+DRM client usage stats
+======================
+
+DRM drivers can choose to export partly standardised text output via the
+`fops->show_fdinfo()` as part of the driver specific file operations registered
+in the `struct drm_driver` object registered with the DRM core.
+
+One purpose of this output is to enable writing as generic as practicaly
+feasible `top(1)` like userspace monitoring tools.
+
+Given the differences between various DRM drivers the specification of the
+output is split between common and driver specific parts. Having said that,
+wherever possible effort should still be made to standardise as much as
+possible.
+
+File format specification
+=========================
+
+- File shall contain one key value pair per one line of text.
+- Colon character (`:`) must be used to delimit keys and values.
+- All keys shall be prefixed with `drm-`.
+- Whitespace between the delimiter and first non-whitespace character shall be
+ ignored when parsing.
+- Neither keys or values are allowed to contain whitespace characters.
+- Numerical key value pairs can end with optional unit string.
+- Data type of the value is fixed as defined in the specification.
+
+Key types
+---------
+
+1. Mandatory, fully standardised.
+2. Optional, fully standardised.
+3. Driver specific.
+
+Data types
+----------
+
+- <uint> - Unsigned integer without defining the maximum value.
+- <str> - String excluding any above defined reserved characters or whitespace.
+
+Mandatory fully standardised keys
+---------------------------------
+
+- drm-driver: <str>
+
+String shall contain the name this driver registered as via the respective
+`struct drm_driver` data structure.
+
+Optional fully standardised keys
+--------------------------------
+
+- drm-pdev: <aaaa:bb.cc.d>
+
+For PCI devices this should contain the PCI slot address of the device in
+question.
+
+- drm-client-id: <uint>
+
+Unique value relating to the open DRM file descriptor used to distinguish
+duplicated and shared file descriptors. Conceptually the value should map 1:1
+to the in kernel representation of `struct drm_file` instances.
+
+Uniqueness of the value shall be either globally unique, or unique within the
+scope of each device, in which case `drm-pdev` shall be present as well.
+
+Userspace should make sure to not double account any usage statistics by using
+the above described criteria in order to associate data to individual clients.
+
+- drm-engine-<str>: <uint> ns
+
+GPUs usually contain multiple execution engines. Each shall be given a stable
+and unique name (str), with possible values documented in the driver specific
+documentation.
+
+Value shall be in specified time units which the respective GPU engine spent
+busy executing workloads belonging to this client.
+
+Values are not required to be constantly monotonic if it makes the driver
+implementation easier, but are required to catch up with the previously reported
+larger value within a reasonable period. Upon observing a value lower than what
+was previously read, userspace is expected to stay with that larger previous
+value until a monotonic update is seen.
+
+- drm-engine-capacity-<str>: <uint>
+
+Engine identifier string must be the same as the one specified in the
+drm-engine-<str> tag and shall contain a greater than zero number in case the
+exported engine corresponds to a group of identical hardware engines.
+
+In the absence of this tag parser shall assume capacity of one. Zero capacity
+is not allowed.
+
+- drm-memory-<str>: <uint> [KiB|MiB]
+
+Each possible memory type which can be used to store buffer objects by the
+GPU in question shall be given a stable and unique name to be returned as the
+string here.
+
+Value shall reflect the amount of storage currently consumed by the buffer
+object belong to this client, in the respective memory region.
+
+Default unit shall be bytes with optional unit specifiers of 'KiB' or 'MiB'
+indicating kibi- or mebi-bytes.
+
+===============================
+Driver specific implementations
+===============================
+
+:ref:`i915-usage-stats`
diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 0f08693d05cd..54060cd6c419 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -697,3 +697,31 @@ The style guide for ``i915_reg.h``.
.. kernel-doc:: drivers/gpu/drm/i915/i915_reg.h
:doc: The i915 register macro definition style guide
+
+.. _i915-usage-stats:
+
+i915 DRM client usage stats implementation
+==========================================
+
+The drm/i915 driver implements the DRM client usage stats specification as
+documented in :ref:`drm-client-usage-stats`.
+
+Example of the output showing the implemented key value pairs and entirety of
+the currently possible format options:
+
+::
+
+ pos: 0
+ flags: 0100002
+ mnt_id: 21
+ drm-driver: i915
+ drm-pdev: 0000:00:02.0
+ drm-client-id: 7
+ drm-engine-render: 9288864723 ns
+ drm-engine-copy: 2035071108 ns
+ drm-engine-video: 0 ns
+ drm-engine-capacity-video: 2
+ drm-engine-video-enhance: 0 ns
+
+Possible `drm-engine-` key names are: `render`, `copy`, `video` and
+`video-enhance`.
diff --git a/Documentation/gpu/index.rst b/Documentation/gpu/index.rst
index b9c1214d8f23..b99dede9a5b1 100644
--- a/Documentation/gpu/index.rst
+++ b/Documentation/gpu/index.rst
@@ -10,6 +10,7 @@ Linux GPU Driver Developer's Guide
drm-kms
drm-kms-helpers
drm-uapi
+ drm-usage-stats
driver-uapi
drm-client
drivers
diff --git a/MAINTAINERS b/MAINTAINERS
index 558dfda0a772..4c2d6964b3da 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9996,6 +9996,7 @@ S: Supported
F: Documentation/driver-api/mei/*
F: drivers/misc/mei/
F: drivers/watchdog/mei_wdt.c
+F: include/linux/mei_aux.h
F: include/linux/mei_cl_bus.h
F: include/uapi/linux/mei.h
F: samples/mei/*
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 98c5450b8eac..aa1e7f0b1fe4 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -4,7 +4,7 @@ config DRM_I915
depends on DRM
depends on X86 && PCI
depends on !PREEMPT_RT
- select INTEL_GTT
+ select INTEL_GTT if X86
select INTERVAL_TREE
# we need shmfs for the swappable backing store, and in particular
# the shmem_readpage() which depends upon tmpfs
@@ -30,6 +30,7 @@ config DRM_I915
select VMAP_PFN
select DRM_TTM
select DRM_BUDDY
+ select AUXILIARY_BUS
help
Choose this option if you have a system that has "Intel Graphics
Media Accelerator" or "HD Graphics" integrated graphics,
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 815589f8658c..cd0bf6806228 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -33,6 +33,7 @@ subdir-ccflags-y += -I$(srctree)/$(src)
# core driver code
i915-y += i915_driver.o \
+ i915_drm_client.o \
i915_config.o \
i915_getparam.o \
i915_ioctl.o \
@@ -106,6 +107,8 @@ gt-y += \
gt/intel_gt_pm_debugfs.o \
gt/intel_gt_pm_irq.o \
gt/intel_gt_requests.o \
+ gt/intel_gt_sysfs.o \
+ gt/intel_gt_sysfs_pm.o \
gt/intel_gtt.o \
gt/intel_llc.o \
gt/intel_lrc.o \
@@ -125,6 +128,8 @@ gt-y += \
gt/intel_workarounds.o \
gt/shmem_utils.o \
gt/sysfs_engines.o
+# x86 intel-gtt module support
+gt-$(CONFIG_X86) += gt/intel_gt_gmch.o
# autogenerated null render state
gt-y += \
gt/gen6_renderstate.o \
@@ -185,9 +190,11 @@ i915-y += gt/uc/intel_uc.o \
gt/uc/intel_uc_fw.o \
gt/uc/intel_guc.o \
gt/uc/intel_guc_ads.o \
+ gt/uc/intel_guc_capture.o \
gt/uc/intel_guc_ct.o \
gt/uc/intel_guc_debugfs.o \
gt/uc/intel_guc_fw.o \
+ gt/uc/intel_guc_hwconfig.o \
gt/uc/intel_guc_log.o \
gt/uc/intel_guc_log_debugfs.o \
gt/uc/intel_guc_rc.o \
@@ -197,6 +204,9 @@ i915-y += gt/uc/intel_uc.o \
gt/uc/intel_huc_debugfs.o \
gt/uc/intel_huc_fw.o
+# graphics system controller (GSC) support
+i915-y += gt/intel_gsc.o
+
# modesetting core code
i915-y += \
display/hsw_ips.o \
diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
index 15b2716172f7..fb0e7e79e0cd 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -300,5 +300,5 @@ void intel_dpt_destroy(struct i915_address_space *vm)
{
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
- i915_vm_close(&dpt->vm);
+ i915_vm_put(&dpt->vm);
}
diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
index e94923e9dbb1..9f5a6b79e95b 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fb.c
@@ -2029,7 +2029,7 @@ intel_user_framebuffer_create(struct drm_device *dev,
/* object is backed with LMEM for discrete */
i915 = to_i915(obj->base.dev);
- if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM)) {
+ if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM_0)) {
/* object is "remote", not in local memory */
i915_gem_object_put(obj);
return ERR_PTR(-EREMOTE);
diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c
index a307b4993bcf..bd6e7c98e751 100644
--- a/drivers/gpu/drm/i915/display/intel_fb_pin.c
+++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c
@@ -140,7 +140,7 @@ retry:
if (!ret && phys_cursor)
ret = i915_gem_object_attach_phys(obj, alignment);
else if (!ret && HAS_LMEM(dev_priv))
- ret = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM);
+ ret = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM_0);
/* TODO: Do we need to sync when migration becomes async? */
if (!ret)
ret = i915_gem_object_pin_pages(obj);
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 2cd62a187df3..221336178991 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -279,7 +279,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
/* Our framebuffer is the entirety of fbdev's system memory */
info->fix.smem_start =
(unsigned long)(ggtt->gmadr.start + vma->node.start);
- info->fix.smem_len = vma->node.size;
+ info->fix.smem_len = vma->size;
}
vaddr = i915_vma_pin_iomap(vma);
@@ -290,7 +290,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
goto out_unpin;
}
info->screen_base = vaddr;
- info->screen_size = vma->node.size;
+ info->screen_size = vma->size;
drm_fb_helper_fill_info(info, &ifbdev->helper, sizes);
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c
index e207d12286b5..d10f27d0b7b0 100644
--- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
+++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
@@ -3,6 +3,7 @@
* Copyright © 2021 Intel Corporation
*/
+#include "gem/i915_gem_region.h"
#include "i915_drv.h"
#include "intel_atomic_plane.h"
#include "intel_display.h"
@@ -46,16 +47,55 @@ static struct i915_vma *
initial_plane_vma(struct drm_i915_private *i915,
struct intel_initial_plane_config *plane_config)
{
- struct intel_memory_region *mem = i915->mm.stolen_region;
+ struct intel_memory_region *mem;
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
+ resource_size_t phys_base;
u32 base, size;
+ u64 pinctl;
- if (!mem || plane_config->size == 0)
+ if (plane_config->size == 0)
+ return NULL;
+
+ base = round_down(plane_config->base, I915_GTT_MIN_ALIGNMENT);
+ if (IS_DGFX(i915)) {
+ gen8_pte_t __iomem *gte = to_gt(i915)->ggtt->gsm;
+ gen8_pte_t pte;
+
+ gte += base / I915_GTT_PAGE_SIZE;
+
+ pte = ioread64(gte);
+ if (!(pte & GEN12_GGTT_PTE_LM)) {
+ drm_err(&i915->drm,
+ "Initial plane programming missing PTE_LM bit\n");
+ return NULL;
+ }
+
+ phys_base = pte & I915_GTT_PAGE_MASK;
+ mem = i915->mm.regions[INTEL_REGION_LMEM_0];
+
+ /*
+ * We don't currently expect this to ever be placed in the
+ * stolen portion.
+ */
+ if (phys_base >= resource_size(&mem->region)) {
+ drm_err(&i915->drm,
+ "Initial plane programming using invalid range, phys_base=%pa\n",
+ &phys_base);
+ return NULL;
+ }
+
+ drm_dbg(&i915->drm,
+ "Using phys_base=%pa, based on initial plane programming\n",
+ &phys_base);
+ } else {
+ phys_base = base;
+ mem = i915->mm.stolen_region;
+ }
+
+ if (!mem)
return NULL;
- base = round_down(plane_config->base,
- I915_GTT_MIN_ALIGNMENT);
size = round_up(plane_config->base + plane_config->size,
mem->min_page_size);
size -= base;
@@ -66,10 +106,11 @@ initial_plane_vma(struct drm_i915_private *i915,
* features.
*/
if (IS_ENABLED(CONFIG_FRAMEBUFFER_CONSOLE) &&
+ mem == i915->mm.stolen_region &&
size * 2 > i915->stolen_usable_size)
return NULL;
- obj = i915_gem_object_create_stolen_for_preallocated(i915, base, size);
+ obj = i915_gem_object_create_region_at(mem, phys_base, size, 0);
if (IS_ERR(obj))
return NULL;
@@ -99,7 +140,10 @@ initial_plane_vma(struct drm_i915_private *i915,
if (IS_ERR(vma))
goto err_obj;
- if (i915_ggtt_pin(vma, NULL, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
+ pinctl = PIN_GLOBAL | PIN_OFFSET_FIXED | base;
+ if (HAS_GMCH(i915))
+ pinctl |= PIN_MAPPABLE;
+ if (i915_vma_pin(vma, 0, 0, pinctl))
goto err_obj;
if (i915_gem_object_is_tiled(obj) &&
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 5db83aaf93ee..ab4c5ab28e4d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1031,23 +1031,44 @@ static void free_engines_rcu(struct rcu_head *rcu)
free_engines(engines);
}
+static void accumulate_runtime(struct i915_drm_client *client,
+ struct i915_gem_engines *engines)
+{
+ struct i915_gem_engines_iter it;
+ struct intel_context *ce;
+
+ if (!client)
+ return;
+
+ /* Transfer accumulated runtime to the parent GEM context. */
+ for_each_gem_engine(ce, engines, it) {
+ unsigned int class = ce->engine->uabi_class;
+
+ GEM_BUG_ON(class >= ARRAY_SIZE(client->past_runtime));
+ atomic64_add(intel_context_get_total_runtime_ns(ce),
+ &client->past_runtime[class]);
+ }
+}
+
static int
engines_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
struct i915_gem_engines *engines =
container_of(fence, typeof(*engines), fence);
+ struct i915_gem_context *ctx = engines->ctx;
switch (state) {
case FENCE_COMPLETE:
if (!list_empty(&engines->link)) {
- struct i915_gem_context *ctx = engines->ctx;
unsigned long flags;
spin_lock_irqsave(&ctx->stale.lock, flags);
list_del(&engines->link);
spin_unlock_irqrestore(&ctx->stale.lock, flags);
}
- i915_gem_context_put(engines->ctx);
+ accumulate_runtime(ctx->client, engines);
+ i915_gem_context_put(ctx);
+
break;
case FENCE_FREE:
@@ -1257,6 +1278,9 @@ static void i915_gem_context_release_work(struct work_struct *work)
if (ctx->pxp_wakeref)
intel_runtime_pm_put(&ctx->i915->runtime_pm, ctx->pxp_wakeref);
+ if (ctx->client)
+ i915_drm_client_put(ctx->client);
+
mutex_destroy(&ctx->engines_mutex);
mutex_destroy(&ctx->lut_mutex);
@@ -1467,7 +1491,7 @@ static void set_closed_name(struct i915_gem_context *ctx)
static void context_close(struct i915_gem_context *ctx)
{
- struct i915_address_space *vm;
+ struct i915_drm_client *client;
/* Flush any concurrent set_engines() */
mutex_lock(&ctx->engines_mutex);
@@ -1480,19 +1504,6 @@ static void context_close(struct i915_gem_context *ctx)
set_closed_name(ctx);
- vm = ctx->vm;
- if (vm) {
- /* i915_vm_close drops the final reference, which is a bit too
- * early and could result in surprises with concurrent
- * operations racing with thist ctx close. Keep a full reference
- * until the end.
- */
- i915_vm_get(vm);
- i915_vm_close(vm);
- }
-
- ctx->file_priv = ERR_PTR(-EBADF);
-
/*
* The LUT uses the VMA as a backpointer to unref the object,
* so we need to clear the LUT before we close all the VMA (inside
@@ -1500,10 +1511,19 @@ static void context_close(struct i915_gem_context *ctx)
*/
lut_close(ctx);
+ ctx->file_priv = ERR_PTR(-EBADF);
+
spin_lock(&ctx->i915->gem.contexts.lock);
list_del(&ctx->link);
spin_unlock(&ctx->i915->gem.contexts.lock);
+ client = ctx->client;
+ if (client) {
+ spin_lock(&client->ctx_lock);
+ list_del_rcu(&ctx->client_link);
+ spin_unlock(&client->ctx_lock);
+ }
+
mutex_unlock(&ctx->mutex);
/*
@@ -1598,12 +1618,8 @@ i915_gem_create_context(struct drm_i915_private *i915,
}
vm = &ppgtt->vm;
}
- if (vm) {
- ctx->vm = i915_vm_open(vm);
-
- /* i915_vm_open() takes a reference */
- i915_vm_put(vm);
- }
+ if (vm)
+ ctx->vm = vm;
mutex_init(&ctx->engines_mutex);
if (pc->num_user_engines >= 0) {
@@ -1653,7 +1669,7 @@ err_engines:
free_engines(e);
err_vm:
if (ctx->vm)
- i915_vm_close(ctx->vm);
+ i915_vm_put(ctx->vm);
err_ctx:
kfree(ctx);
return ERR_PTR(err);
@@ -1680,6 +1696,8 @@ static void gem_context_register(struct i915_gem_context *ctx,
ctx->file_priv = fpriv;
ctx->pid = get_task_pid(current, PIDTYPE_PID);
+ ctx->client = i915_drm_client_get(fpriv->client);
+
snprintf(ctx->name, sizeof(ctx->name), "%s[%d]",
current->comm, pid_nr(ctx->pid));
@@ -1687,6 +1705,10 @@ static void gem_context_register(struct i915_gem_context *ctx,
old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL);
WARN_ON(old);
+ spin_lock(&ctx->client->ctx_lock);
+ list_add_tail_rcu(&ctx->client_link, &ctx->client->ctx_list);
+ spin_unlock(&ctx->client->ctx_lock);
+
spin_lock(&i915->gem.contexts.lock);
list_add_tail(&ctx->link, &i915->gem.contexts.list);
spin_unlock(&i915->gem.contexts.lock);
@@ -1837,7 +1859,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv,
if (err)
return err;
- i915_vm_open(vm);
+ i915_vm_get(vm);
GEM_BUG_ON(id == 0); /* reserved for invalid/unassigned ppgtt */
args->value = id;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 282cdb8a5c5a..cb78214a7dcd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -293,6 +293,12 @@ struct i915_gem_context {
/** @link: place with &drm_i915_private.context_list */
struct list_head link;
+ /** @client: struct i915_drm_client */
+ struct i915_drm_client *client;
+
+ /** @client_link: for linking onto &i915_drm_client.ctx_list */
+ struct list_head client_link;
+
/**
* @ref: reference count
*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index c6eb023d3d86..5802692ea604 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -123,7 +123,7 @@ __i915_gem_object_create_user_ext(struct drm_i915_private *i915, u64 size,
*/
flags = I915_BO_ALLOC_USER;
- ret = mr->ops->init_object(mr, obj, size, 0, flags);
+ ret = mr->ops->init_object(mr, obj, I915_BO_INVALID_OFFSET, size, 0, flags);
if (ret)
goto object_free;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 13917231ae81..f5062d0c6333 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -66,15 +66,6 @@ err:
return ERR_PTR(ret);
}
-static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
- struct sg_table *sg,
- enum dma_data_direction dir)
-{
- dma_unmap_sgtable(attachment->dev, sg, dir, DMA_ATTR_SKIP_CPU_SYNC);
- sg_free_table(sg);
- kfree(sg);
-}
-
static int i915_gem_dmabuf_vmap(struct dma_buf *dma_buf,
struct iosys_map *map)
{
@@ -102,11 +93,15 @@ static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf,
static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
{
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
int ret;
if (obj->base.size < vma->vm_end - vma->vm_start)
return -EINVAL;
+ if (HAS_LMEM(i915))
+ return drm_gem_prime_mmap(&obj->base, vma);
+
if (!obj->base.filp)
return -ENODEV;
@@ -209,7 +204,7 @@ static const struct dma_buf_ops i915_dmabuf_ops = {
.attach = i915_gem_dmabuf_attach,
.detach = i915_gem_dmabuf_detach,
.map_dma_buf = i915_gem_map_dma_buf,
- .unmap_dma_buf = i915_gem_unmap_dma_buf,
+ .unmap_dma_buf = drm_gem_unmap_dma_buf,
.release = drm_gem_dmabuf_release,
.mmap = i915_gem_dmabuf_mmap,
.vmap = i915_gem_dmabuf_vmap,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index fd0e15d9573c..b3383e047505 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1321,10 +1321,8 @@ static void *reloc_vaddr(struct i915_vma *vma,
static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
{
if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
- if (flushes & CLFLUSH_BEFORE) {
- clflushopt(addr);
- mb();
- }
+ if (flushes & CLFLUSH_BEFORE)
+ drm_clflush_virt_range(addr, sizeof(*addr));
*addr = value;
@@ -1336,7 +1334,7 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
* to ensure ordering of clflush wrt to the system.
*/
if (flushes & CLFLUSH_AFTER)
- clflushopt(addr);
+ drm_clflush_virt_range(addr, sizeof(*addr));
} else
*addr = value;
}
@@ -2690,6 +2688,11 @@ eb_select_engine(struct i915_execbuffer *eb)
if (err)
goto err;
+ if (!i915_vm_tryget(ce->vm)) {
+ err = -ENOENT;
+ goto err;
+ }
+
eb->context = ce;
eb->gt = ce->engine->gt;
@@ -2713,6 +2716,7 @@ eb_put_engine(struct i915_execbuffer *eb)
{
struct intel_context *child;
+ i915_vm_put(eb->context->vm);
intel_gt_pm_put(eb->gt);
for_each_child(eb->context, child)
intel_context_put(child);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
index 02ed3b269326..8949fb0a944f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
@@ -102,7 +102,7 @@ __i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915,
resource_size_t page_size,
unsigned int flags)
{
- return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM],
+ return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM_0],
size, page_size, flags);
}
@@ -137,6 +137,6 @@ i915_gem_object_create_lmem(struct drm_i915_private *i915,
resource_size_t size,
unsigned int flags)
{
- return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM],
+ return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM_0],
size, 0, flags);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index c3ea243d414d..0c5c43852e24 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -70,7 +70,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
* mmap ioctl is disallowed for all discrete platforms,
* and for all platforms with GRAPHICS_VER > 12.
*/
- if (IS_DGFX(i915) || GRAPHICS_VER(i915) > 12)
+ if (IS_DGFX(i915) || GRAPHICS_VER_FULL(i915) > IP_VER(12, 0))
return -EOPNOTSUPP;
if (args->flags & ~(I915_MMAP_WC))
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 747ac65e060f..06b1b188ce5a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -606,6 +606,9 @@ bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj,
if (!mr)
return false;
+ if (!IS_ALIGNED(obj->base.size, mr->min_page_size))
+ return false;
+
if (obj->mm.region == mr)
return true;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index fd54eb8f4826..2c88bdb8ff7c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -631,6 +631,8 @@ struct drm_i915_gem_object {
struct drm_mm_node *stolen;
+ resource_size_t bo_offset;
+
unsigned long scratch;
u64 encode;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c
index 5866c88e4d02..f46ee16a323a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_region.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c
@@ -29,11 +29,12 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj)
mutex_unlock(&mem->objects.lock);
}
-struct drm_i915_gem_object *
-i915_gem_object_create_region(struct intel_memory_region *mem,
- resource_size_t size,
- resource_size_t page_size,
- unsigned int flags)
+static struct drm_i915_gem_object *
+__i915_gem_object_create_region(struct intel_memory_region *mem,
+ resource_size_t offset,
+ resource_size_t size,
+ resource_size_t page_size,
+ unsigned int flags)
{
struct drm_i915_gem_object *obj;
resource_size_t default_page_size;
@@ -64,6 +65,9 @@ i915_gem_object_create_region(struct intel_memory_region *mem,
size = round_up(size, default_page_size);
+ if (default_page_size == size)
+ flags |= I915_BO_ALLOC_CONTIGUOUS;
+
GEM_BUG_ON(!size);
GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT));
@@ -85,7 +89,7 @@ i915_gem_object_create_region(struct intel_memory_region *mem,
if (default_page_size < mem->min_page_size)
flags |= I915_BO_ALLOC_PM_EARLY;
- err = mem->ops->init_object(mem, obj, size, page_size, flags);
+ err = mem->ops->init_object(mem, obj, offset, size, page_size, flags);
if (err)
goto err_object_free;
@@ -97,6 +101,40 @@ err_object_free:
return ERR_PTR(err);
}
+struct drm_i915_gem_object *
+i915_gem_object_create_region(struct intel_memory_region *mem,
+ resource_size_t size,
+ resource_size_t page_size,
+ unsigned int flags)
+{
+ return __i915_gem_object_create_region(mem, I915_BO_INVALID_OFFSET,
+ size, page_size, flags);
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_region_at(struct intel_memory_region *mem,
+ resource_size_t offset,
+ resource_size_t size,
+ unsigned int flags)
+{
+ GEM_BUG_ON(offset == I915_BO_INVALID_OFFSET);
+
+ if (GEM_WARN_ON(!IS_ALIGNED(size, mem->min_page_size)) ||
+ GEM_WARN_ON(!IS_ALIGNED(offset, mem->min_page_size)))
+ return ERR_PTR(-EINVAL);
+
+ if (range_overflows(offset, size, resource_size(&mem->region)))
+ return ERR_PTR(-EINVAL);
+
+ if (!(flags & I915_BO_ALLOC_GPU_ONLY) &&
+ offset + size > mem->io_size &&
+ !i915_ggtt_has_aperture(to_gt(mem->i915)->ggtt))
+ return ERR_PTR(-ENOSPC);
+
+ return __i915_gem_object_create_region(mem, offset, size, 0,
+ flags | I915_BO_ALLOC_CONTIGUOUS);
+}
+
/**
* i915_gem_process_region - Iterate over all objects of a region using ops
* to process and optionally skip objects
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h
index fcaa12d657d4..2dfcc41c0170 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_region.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h
@@ -14,6 +14,8 @@ struct sg_table;
struct i915_gem_apply_to_region;
+#define I915_BO_INVALID_OFFSET ((resource_size_t)-1)
+
/**
* struct i915_gem_apply_to_region_ops - ops to use when iterating over all
* region objects.
@@ -56,6 +58,11 @@ i915_gem_object_create_region(struct intel_memory_region *mem,
resource_size_t size,
resource_size_t page_size,
unsigned int flags);
+struct drm_i915_gem_object *
+i915_gem_object_create_region_at(struct intel_memory_region *mem,
+ resource_size_t offset,
+ resource_size_t size,
+ unsigned int flags);
int i915_gem_process_region(struct intel_memory_region *mr,
struct i915_gem_apply_to_region *apply);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index c7541dc687c1..c2a3e388fcb4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -553,6 +553,7 @@ static int __create_shmem(struct drm_i915_private *i915,
static int shmem_object_init(struct intel_memory_region *mem,
struct drm_i915_gem_object *obj,
+ resource_size_t offset,
resource_size_t size,
resource_size_t page_size,
unsigned int flags)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 2a225daa2b12..47b5e0e342ab 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -12,6 +12,8 @@
#include "gem/i915_gem_lmem.h"
#include "gem/i915_gem_region.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_region_lmem.h"
#include "i915_drv.h"
#include "i915_gem_stolen.h"
#include "i915_reg.h"
@@ -493,7 +495,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
/* Exclude the reserved region from driver use */
mem->region.end = reserved_base - 1;
- mem->io_size = resource_size(&mem->region);
+ mem->io_size = min(mem->io_size, resource_size(&mem->region));
/* It is possible for the reserved area to end before the end of stolen
* memory, so just consider the start. */
@@ -680,6 +682,7 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem,
static int _i915_gem_object_stolen_init(struct intel_memory_region *mem,
struct drm_i915_gem_object *obj,
+ resource_size_t offset,
resource_size_t size,
resource_size_t page_size,
unsigned int flags)
@@ -694,12 +697,32 @@ static int _i915_gem_object_stolen_init(struct intel_memory_region *mem,
if (size == 0)
return -EINVAL;
+ /*
+ * With discrete devices, where we lack a mappable aperture there is no
+ * possible way to ever access this memory on the CPU side.
+ */
+ if (mem->type == INTEL_MEMORY_STOLEN_LOCAL && !mem->io_size &&
+ !(flags & I915_BO_ALLOC_GPU_ONLY))
+ return -ENOSPC;
+
stolen = kzalloc(sizeof(*stolen), GFP_KERNEL);
if (!stolen)
return -ENOMEM;
- ret = i915_gem_stolen_insert_node(i915, stolen, size,
- mem->min_page_size);
+ if (offset != I915_BO_INVALID_OFFSET) {
+ drm_dbg(&i915->drm,
+ "creating preallocated stolen object: stolen_offset=%pa, size=%pa\n",
+ &offset, &size);
+
+ stolen->start = offset;
+ stolen->size = size;
+ mutex_lock(&i915->mm.stolen_lock);
+ ret = drm_mm_reserve_node(&i915->mm.stolen, stolen);
+ mutex_unlock(&i915->mm.stolen_lock);
+ } else {
+ ret = i915_gem_stolen_insert_node(i915, stolen, size,
+ mem->min_page_size);
+ }
if (ret)
goto err_free;
@@ -751,11 +774,6 @@ static int init_stolen_lmem(struct intel_memory_region *mem)
if (GEM_WARN_ON(resource_size(&mem->region) == 0))
return -ENODEV;
- if (!io_mapping_init_wc(&mem->iomap,
- mem->io_start,
- mem->io_size))
- return -EIO;
-
/*
* TODO: For stolen lmem we mostly just care about populating the dsm
* related bits and setting up the drm_mm allocator for the range.
@@ -763,18 +781,26 @@ static int init_stolen_lmem(struct intel_memory_region *mem)
*/
err = i915_gem_init_stolen(mem);
if (err)
- goto err_fini;
+ return err;
+
+ if (mem->io_size && !io_mapping_init_wc(&mem->iomap,
+ mem->io_start,
+ mem->io_size)) {
+ err = -EIO;
+ goto err_cleanup;
+ }
return 0;
-err_fini:
- io_mapping_fini(&mem->iomap);
+err_cleanup:
+ i915_gem_cleanup_stolen(mem->i915);
return err;
}
static int release_stolen_lmem(struct intel_memory_region *mem)
{
- io_mapping_fini(&mem->iomap);
+ if (mem->io_size)
+ io_mapping_fini(&mem->iomap);
i915_gem_cleanup_stolen(mem->i915);
return 0;
}
@@ -791,25 +817,43 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
{
struct intel_uncore *uncore = &i915->uncore;
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+ resource_size_t dsm_size, dsm_base, lmem_size;
struct intel_memory_region *mem;
+ resource_size_t io_start, io_size;
resource_size_t min_page_size;
- resource_size_t io_start;
- resource_size_t lmem_size;
- u64 lmem_base;
- lmem_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
- if (GEM_WARN_ON(lmem_base >= pci_resource_len(pdev, 2)))
+ if (WARN_ON_ONCE(instance))
return ERR_PTR(-ENODEV);
- lmem_size = pci_resource_len(pdev, 2) - lmem_base;
- io_start = pci_resource_start(pdev, 2) + lmem_base;
+ /* Use DSM base address instead for stolen memory */
+ dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
+ if (IS_DG1(uncore->i915)) {
+ lmem_size = pci_resource_len(pdev, 2);
+ if (WARN_ON(lmem_size < dsm_base))
+ return ERR_PTR(-ENODEV);
+ } else {
+ resource_size_t lmem_range;
+
+ lmem_range = intel_gt_read_register(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0xFFFF;
+ lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT;
+ lmem_size *= SZ_1G;
+ }
+
+ dsm_size = lmem_size - dsm_base;
+ if (pci_resource_len(pdev, 2) < lmem_size) {
+ io_start = 0;
+ io_size = 0;
+ } else {
+ io_start = pci_resource_start(pdev, 2) + dsm_base;
+ io_size = dsm_size;
+ }
min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
I915_GTT_PAGE_SIZE_4K;
- mem = intel_memory_region_create(i915, lmem_base, lmem_size,
+ mem = intel_memory_region_create(i915, dsm_base, dsm_size,
min_page_size,
- io_start, lmem_size,
+ io_start, io_size,
type, instance,
&i915_region_stolen_lmem_ops);
if (IS_ERR(mem))
@@ -823,6 +867,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n",
&mem->io_start);
+ drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &dsm_base);
intel_memory_region_set_name(mem, "stolen-local");
@@ -851,63 +896,6 @@ i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type,
return mem;
}
-struct drm_i915_gem_object *
-i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915,
- resource_size_t stolen_offset,
- resource_size_t size)
-{
- struct intel_memory_region *mem = i915->mm.stolen_region;
- struct drm_i915_gem_object *obj;
- struct drm_mm_node *stolen;
- int ret;
-
- if (!drm_mm_initialized(&i915->mm.stolen))
- return ERR_PTR(-ENODEV);
-
- drm_dbg(&i915->drm,
- "creating preallocated stolen object: stolen_offset=%pa, size=%pa\n",
- &stolen_offset, &size);
-
- /* KISS and expect everything to be page-aligned */
- if (GEM_WARN_ON(size == 0) ||
- GEM_WARN_ON(!IS_ALIGNED(size, mem->min_page_size)) ||
- GEM_WARN_ON(!IS_ALIGNED(stolen_offset, mem->min_page_size)))
- return ERR_PTR(-EINVAL);
-
- stolen = kzalloc(sizeof(*stolen), GFP_KERNEL);
- if (!stolen)
- return ERR_PTR(-ENOMEM);
-
- stolen->start = stolen_offset;
- stolen->size = size;
- mutex_lock(&i915->mm.stolen_lock);
- ret = drm_mm_reserve_node(&i915->mm.stolen, stolen);
- mutex_unlock(&i915->mm.stolen_lock);
- if (ret)
- goto err_free;
-
- obj = i915_gem_object_alloc();
- if (!obj) {
- ret = -ENOMEM;
- goto err_stolen;
- }
-
- ret = __i915_gem_object_create_stolen(mem, obj, stolen);
- if (ret)
- goto err_object_free;
-
- i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
- return obj;
-
-err_object_free:
- i915_gem_object_free(obj);
-err_stolen:
- i915_gem_stolen_remove_node(i915, stolen);
-err_free:
- kfree(stolen);
- return ERR_PTR(ret);
-}
-
bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj)
{
return obj->ops == &i915_gem_object_stolen_ops;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
index ccdf7befc571..d5005a39d130 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
@@ -31,10 +31,6 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
struct drm_i915_gem_object *
i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
resource_size_t size);
-struct drm_i915_gem_object *
-i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv,
- resource_size_t stolen_offset,
- resource_size_t size);
bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index b9ae6b02f304..4c25d9b2f138 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -20,6 +20,7 @@
#include "gem/i915_gem_ttm.h"
#include "gem/i915_gem_ttm_move.h"
#include "gem/i915_gem_ttm_pm.h"
+#include "gt/intel_gpu_commands.h"
#define I915_TTM_PRIO_PURGE 0
#define I915_TTM_PRIO_NO_PAGES 1
@@ -126,14 +127,22 @@ i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj)
static void
i915_ttm_place_from_region(const struct intel_memory_region *mr,
struct ttm_place *place,
+ resource_size_t offset,
+ resource_size_t size,
unsigned int flags)
{
memset(place, 0, sizeof(*place));
place->mem_type = intel_region_to_ttm_type(mr);
+ if (mr->type == INTEL_MEMORY_SYSTEM)
+ return;
+
if (flags & I915_BO_ALLOC_CONTIGUOUS)
place->flags |= TTM_PL_FLAG_CONTIGUOUS;
- if (mr->io_size && mr->io_size < mr->total) {
+ if (offset != I915_BO_INVALID_OFFSET) {
+ place->fpfn = offset >> PAGE_SHIFT;
+ place->lpfn = place->fpfn + (size >> PAGE_SHIFT);
+ } else if (mr->io_size && mr->io_size < mr->total) {
if (flags & I915_BO_ALLOC_GPU_ONLY) {
place->flags |= TTM_PL_FLAG_TOPDOWN;
} else {
@@ -155,12 +164,14 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj,
placement->num_placement = 1;
i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] :
- obj->mm.region, requested, flags);
+ obj->mm.region, requested, obj->bo_offset,
+ obj->base.size, flags);
/* Cache this on object? */
placement->num_busy_placement = num_allowed;
for (i = 0; i < placement->num_busy_placement; ++i)
- i915_ttm_place_from_region(obj->mm.placements[i], busy + i, flags);
+ i915_ttm_place_from_region(obj->mm.placements[i], busy + i,
+ obj->bo_offset, obj->base.size, flags);
if (num_allowed == 0) {
*busy = *requested;
@@ -255,12 +266,33 @@ static const struct i915_refct_sgt_ops tt_rsgt_ops = {
.release = i915_ttm_tt_release
};
+static inline bool
+i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj)
+{
+ bool lmem_placement = false;
+ int i;
+
+ for (i = 0; i < obj->mm.n_placements; i++) {
+ /* Compression is not allowed for the objects with smem placement */
+ if (obj->mm.placements[i]->type == INTEL_MEMORY_SYSTEM)
+ return false;
+ if (!lmem_placement &&
+ obj->mm.placements[i]->type == INTEL_MEMORY_LOCAL)
+ lmem_placement = true;
+ }
+
+ return lmem_placement;
+}
+
static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
uint32_t page_flags)
{
+ struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
+ bdev);
struct ttm_resource_manager *man =
ttm_manager_type(bo->bdev, bo->resource->mem_type);
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ unsigned long ccs_pages = 0;
enum ttm_caching caching;
struct i915_ttm_tt *i915_tt;
int ret;
@@ -283,7 +315,12 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
i915_tt->is_shmem = true;
}
- ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching, 0);
+ if (HAS_FLAT_CCS(i915) && i915_gem_object_needs_ccs_pages(obj))
+ ccs_pages = DIV_ROUND_UP(DIV_ROUND_UP(bo->base.size,
+ NUM_BYTES_PER_CCS_BYTE),
+ PAGE_SIZE);
+
+ ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching, ccs_pages);
if (ret)
goto err_free;
@@ -763,6 +800,7 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
i915_sg_dma_sizes(rsgt->table.sgl));
}
+ GEM_BUG_ON(bo->ttm && ((obj->base.size >> PAGE_SHIFT) < bo->ttm->num_pages));
i915_ttm_adjust_lru(obj);
return ret;
}
@@ -802,7 +840,8 @@ static int __i915_ttm_migrate(struct drm_i915_gem_object *obj,
struct ttm_placement placement;
int ret;
- i915_ttm_place_from_region(mr, &requested, flags);
+ i915_ttm_place_from_region(mr, &requested, obj->bo_offset,
+ obj->base.size, flags);
placement.num_placement = 1;
placement.num_busy_placement = 1;
placement.placement = &requested;
@@ -1142,6 +1181,7 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo)
*/
int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
struct drm_i915_gem_object *obj,
+ resource_size_t offset,
resource_size_t size,
resource_size_t page_size,
unsigned int flags)
@@ -1158,6 +1198,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
drm_gem_private_object_init(&i915->drm, &obj->base, size);
i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags);
+ obj->bo_offset = offset;
+
/* Don't put on a region list until we're either locked or fully initialized. */
obj->mm.region = mem;
INIT_LIST_HEAD(&obj->mm.region_link);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
index 9d698ad00853..73e371aa3850 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
@@ -45,6 +45,7 @@ i915_ttm_to_gem(struct ttm_buffer_object *bo)
int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
struct drm_i915_gem_object *obj,
+ resource_size_t offset,
resource_size_t size,
resource_size_t page_size,
unsigned int flags);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index b4275b55e5b8..62c61af77a42 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -88,7 +88,7 @@ out:
static int igt_dmabuf_import_same_driver_lmem(void *arg)
{
struct drm_i915_private *i915 = arg;
- struct intel_memory_region *lmem = i915->mm.regions[INTEL_REGION_LMEM];
+ struct intel_memory_region *lmem = i915->mm.regions[INTEL_REGION_LMEM_0];
struct drm_i915_gem_object *obj;
struct drm_gem_object *import;
struct dma_buf *dmabuf;
@@ -253,10 +253,10 @@ static int igt_dmabuf_import_same_driver_lmem_smem(void *arg)
struct drm_i915_private *i915 = arg;
struct intel_memory_region *regions[2];
- if (!i915->mm.regions[INTEL_REGION_LMEM])
+ if (!i915->mm.regions[INTEL_REGION_LMEM_0])
return 0;
- regions[0] = i915->mm.regions[INTEL_REGION_LMEM];
+ regions[0] = i915->mm.regions[INTEL_REGION_LMEM_0];
regions[1] = i915->mm.regions[INTEL_REGION_SMEM];
return igt_dmabuf_import_same_driver(i915, regions, 2);
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
index 0ad443a90c8b..801af51aff62 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
@@ -47,14 +47,16 @@ static int igt_create_migrate(struct intel_gt *gt, enum intel_region_id src,
{
struct drm_i915_private *i915 = gt->i915;
struct intel_memory_region *src_mr = i915->mm.regions[src];
+ struct intel_memory_region *dst_mr = i915->mm.regions[dst];
struct drm_i915_gem_object *obj;
struct i915_gem_ww_ctx ww;
int err = 0;
GEM_BUG_ON(!src_mr);
+ GEM_BUG_ON(!dst_mr);
/* Switch object backing-store on create */
- obj = i915_gem_object_create_region(src_mr, PAGE_SIZE, 0, 0);
+ obj = i915_gem_object_create_region(src_mr, dst_mr->min_page_size, 0, 0);
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -92,17 +94,17 @@ static int igt_create_migrate(struct intel_gt *gt, enum intel_region_id src,
static int igt_smem_create_migrate(void *arg)
{
- return igt_create_migrate(arg, INTEL_REGION_LMEM, INTEL_REGION_SMEM);
+ return igt_create_migrate(arg, INTEL_REGION_LMEM_0, INTEL_REGION_SMEM);
}
static int igt_lmem_create_migrate(void *arg)
{
- return igt_create_migrate(arg, INTEL_REGION_SMEM, INTEL_REGION_LMEM);
+ return igt_create_migrate(arg, INTEL_REGION_SMEM, INTEL_REGION_LMEM_0);
}
static int igt_same_create_migrate(void *arg)
{
- return igt_create_migrate(arg, INTEL_REGION_LMEM, INTEL_REGION_LMEM);
+ return igt_create_migrate(arg, INTEL_REGION_LMEM_0, INTEL_REGION_LMEM_0);
}
static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww,
@@ -152,7 +154,7 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww,
}
} else {
- err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM);
+ err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM_0);
if (err) {
pr_err("Object failed migration to lmem\n");
if (err)
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index 6d6082b5f31f..8ac6726ec16b 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -42,8 +42,7 @@ mock_context(struct drm_i915_private *i915,
if (!ppgtt)
goto err_free;
- ctx->vm = i915_vm_open(&ppgtt->vm);
- i915_vm_put(&ppgtt->vm);
+ ctx->vm = &ppgtt->vm;
}
mutex_init(&ctx->engines_mutex);
@@ -59,7 +58,7 @@ mock_context(struct drm_i915_private *i915,
err_vm:
if (ctx->vm)
- i915_vm_close(ctx->vm);
+ i915_vm_put(ctx->vm);
err_free:
kfree(ctx);
return NULL;
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index 871fe7bda0e0..1bb766c79dcb 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -322,7 +322,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
int err;
- GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
+ GEM_BUG_ON(!kref_read(&ppgtt->base.vm.ref));
/*
* Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index b1b9c3fd7bf9..9529c5455bc3 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -6,7 +6,6 @@
#include "gen8_engine_cs.h"
#include "i915_drv.h"
#include "intel_gpu_commands.h"
-#include "intel_gt_regs.h"
#include "intel_lrc.h"
#include "intel_ring.h"
@@ -165,33 +164,9 @@ static u32 preparser_disable(bool state)
return MI_ARB_CHECK | 1 << 8 | state;
}
-static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
+u32 *gen12_emit_aux_table_inv(u32 *cs, const i915_reg_t inv_reg)
{
- static const i915_reg_t vd[] = {
- GEN12_VD0_AUX_NV,
- GEN12_VD1_AUX_NV,
- GEN12_VD2_AUX_NV,
- GEN12_VD3_AUX_NV,
- };
-
- static const i915_reg_t ve[] = {
- GEN12_VE0_AUX_NV,
- GEN12_VE1_AUX_NV,
- };
-
- if (engine->class == VIDEO_DECODE_CLASS)
- return vd[engine->instance];
-
- if (engine->class == VIDEO_ENHANCEMENT_CLASS)
- return ve[engine->instance];
-
- GEM_BUG_ON("unknown aux_inv reg\n");
- return INVALID_MMIO_REG;
-}
-
-static u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
-{
- *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
*cs++ = i915_mmio_reg_offset(inv_reg);
*cs++ = AUX_INV;
*cs++ = MI_NOOP;
@@ -236,7 +211,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
if (mode & EMIT_INVALIDATE) {
u32 flags = 0;
- u32 *cs;
+ u32 *cs, count;
flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TLB_INVALIDATE;
@@ -254,7 +229,12 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
if (engine->class == COMPUTE_CLASS)
flags &= ~PIPE_CONTROL_3D_FLAGS;
- cs = intel_ring_begin(rq, 8 + 4);
+ if (!HAS_FLAT_CCS(rq->engine->i915))
+ count = 8 + 4;
+ else
+ count = 8;
+
+ cs = intel_ring_begin(rq, count);
if (IS_ERR(cs))
return PTR_ERR(cs);
@@ -267,8 +247,10 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
- /* hsdes: 1809175790 */
- cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
+ if (!HAS_FLAT_CCS(rq->engine->i915)) {
+ /* hsdes: 1809175790 */
+ cs = gen12_emit_aux_table_inv(cs, GEN12_GFX_CCS_AUX_NV);
+ }
*cs++ = preparser_disable(false);
intel_ring_advance(rq, cs);
@@ -283,12 +265,17 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
u32 cmd, *cs;
cmd = 4;
- if (mode & EMIT_INVALIDATE)
+ if (mode & EMIT_INVALIDATE) {
cmd += 2;
- if (mode & EMIT_INVALIDATE)
- aux_inv = rq->engine->mask & ~BIT(BCS0);
- if (aux_inv)
- cmd += 2 * hweight32(aux_inv) + 2;
+
+ if (!HAS_FLAT_CCS(rq->engine->i915) &&
+ (rq->engine->class == VIDEO_DECODE_CLASS ||
+ rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
+ aux_inv = rq->engine->mask & ~BIT(BCS0);
+ if (aux_inv)
+ cmd += 4;
+ }
+ }
cs = intel_ring_begin(rq, cmd);
if (IS_ERR(cs))
@@ -319,15 +306,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
*cs++ = 0; /* value */
if (aux_inv) { /* hsdes: 1809175790 */
- struct intel_engine_cs *engine;
- unsigned int tmp;
-
- *cs++ = MI_LOAD_REGISTER_IMM(hweight32(aux_inv));
- for_each_engine_masked(engine, rq->engine->gt, aux_inv, tmp) {
- *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
- *cs++ = AUX_INV;
- }
- *cs++ = MI_NOOP;
+ if (rq->engine->class == VIDEO_DECODE_CLASS)
+ cs = gen12_emit_aux_table_inv(cs, GEN12_VD0_AUX_NV);
+ else
+ cs = gen12_emit_aux_table_inv(cs, GEN12_VE0_AUX_NV);
}
if (mode & EMIT_INVALIDATE)
@@ -601,6 +583,43 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs)
return cs;
}
+/* Wa_14014475959:dg2 */
+#define CCS_SEMAPHORE_PPHWSP_OFFSET 0x540
+static u32 ccs_semaphore_offset(struct i915_request *rq)
+{
+ return i915_ggtt_offset(rq->context->state) +
+ (LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET;
+}
+
+/* Wa_14014475959:dg2 */
+static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
+{
+ int i;
+
+ *cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL |
+ MI_ATOMIC_MOVE;
+ *cs++ = ccs_semaphore_offset(rq);
+ *cs++ = 0;
+ *cs++ = 1;
+
+ /*
+ * When MI_ATOMIC_INLINE_DATA set this command must be 11 DW + (1 NOP)
+ * to align. 4 DWs above + 8 filler DWs here.
+ */
+ for (i = 0; i < 8; ++i)
+ *cs++ = 0;
+
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0;
+ *cs++ = ccs_semaphore_offset(rq);
+ *cs++ = 0;
+
+ return cs;
+}
+
static __always_inline u32*
gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
{
@@ -611,6 +630,10 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
!intel_uc_uses_guc_submission(&rq->engine->gt->uc))
cs = gen12_emit_preempt_busywait(rq, cs);
+ /* Wa_14014475959:dg2 */
+ if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine))
+ cs = ccs_emit_wa_busywait(rq, cs);
+
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
index cc6e21d3662a..107ab42539ab 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
@@ -10,7 +10,7 @@
#include <linux/types.h>
#include "i915_gem.h" /* GEM_BUG_ON */
-
+#include "intel_gt_regs.h"
#include "intel_gpu_commands.h"
struct i915_request;
@@ -38,6 +38,8 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
+u32 *gen12_emit_aux_table_inv(u32 *cs, const i915_reg_t inv_reg);
+
static inline u32 *
__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
{
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index f574da00eff1..c7bd5d71b03e 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -454,11 +454,11 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
pd = pdp->entry[gen8_pd_index(idx, 2)];
}
- clflush_cache_range(vaddr, PAGE_SIZE);
+ drm_clflush_virt_range(vaddr, PAGE_SIZE);
vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
}
} while (1);
- clflush_cache_range(vaddr, PAGE_SIZE);
+ drm_clflush_virt_range(vaddr, PAGE_SIZE);
return idx;
}
@@ -631,7 +631,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
}
} while (rem >= page_size && index < I915_PDES);
- clflush_cache_range(vaddr, PAGE_SIZE);
+ drm_clflush_virt_range(vaddr, PAGE_SIZE);
/*
* Is it safe to mark the 2M block as 64K? -- Either we have
@@ -647,7 +647,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
I915_GTT_PAGE_SIZE_2M)))) {
vaddr = px_vaddr(pd);
vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
- clflush_cache_range(vaddr, PAGE_SIZE);
+ drm_clflush_virt_range(vaddr, PAGE_SIZE);
page_size = I915_GTT_PAGE_SIZE_64K;
/*
@@ -668,7 +668,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
for (i = 1; i < index; i += 16)
memset64(vaddr + i, encode, 15);
- clflush_cache_range(vaddr, PAGE_SIZE);
+ drm_clflush_virt_range(vaddr, PAGE_SIZE);
}
}
@@ -722,7 +722,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
vaddr = px_vaddr(pt);
vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
- clflush_cache_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
+ drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
}
static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 5d0ec7c49b6a..4070cb5711d8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -386,7 +386,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
ce->ring = NULL;
ce->ring_size = SZ_4K;
- ewma_runtime_init(&ce->runtime.avg);
+ ewma_runtime_init(&ce->stats.runtime.avg);
ce->vm = i915_vm_get(engine->gt->vm);
@@ -400,7 +400,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
INIT_LIST_HEAD(&ce->guc_state.fences);
INIT_LIST_HEAD(&ce->guc_state.requests);
- ce->guc_id.id = GUC_INVALID_LRC_ID;
+ ce->guc_id.id = GUC_INVALID_CONTEXT_ID;
INIT_LIST_HEAD(&ce->guc_id.link);
INIT_LIST_HEAD(&ce->destroyed_link);
@@ -576,6 +576,31 @@ void intel_context_bind_parent_child(struct intel_context *parent,
child->parallel.parent = parent;
}
+u64 intel_context_get_total_runtime_ns(const struct intel_context *ce)
+{
+ u64 total, active;
+
+ total = ce->stats.runtime.total;
+ if (ce->ops->flags & COPS_RUNTIME_CYCLES)
+ total *= ce->engine->gt->clock_period_ns;
+
+ active = READ_ONCE(ce->stats.active);
+ if (active)
+ active = intel_context_clock() - active;
+
+ return total + active;
+}
+
+u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
+{
+ u64 avg = ewma_runtime_read(&ce->stats.runtime.avg);
+
+ if (ce->ops->flags & COPS_RUNTIME_CYCLES)
+ avg *= ce->engine->gt->clock_period_ns;
+
+ return avg;
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_context.c"
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index d8c74bbf9aae..b7d3214d2cdd 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -351,18 +351,13 @@ intel_context_clear_nopreempt(struct intel_context *ce)
clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
}
-static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
-{
- const u32 period = ce->engine->gt->clock_period_ns;
-
- return READ_ONCE(ce->runtime.total) * period;
-}
+u64 intel_context_get_total_runtime_ns(const struct intel_context *ce);
+u64 intel_context_get_avg_runtime_ns(struct intel_context *ce);
-static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
+static inline u64 intel_context_clock(void)
{
- const u32 period = ce->engine->gt->clock_period_ns;
-
- return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period);
+ /* As we mix CS cycles with CPU clocks, use the raw monotonic clock. */
+ return ktime_get_raw_fast_ns();
}
#endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 30cd81ad8911..09f82545789f 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -35,6 +35,9 @@ struct intel_context_ops {
#define COPS_HAS_INFLIGHT_BIT 0
#define COPS_HAS_INFLIGHT BIT(COPS_HAS_INFLIGHT_BIT)
+#define COPS_RUNTIME_CYCLES_BIT 1
+#define COPS_RUNTIME_CYCLES BIT(COPS_RUNTIME_CYCLES_BIT)
+
int (*alloc)(struct intel_context *ce);
void (*ban)(struct intel_context *ce, struct i915_request *rq);
@@ -134,14 +137,19 @@ struct intel_context {
} lrc;
u32 tag; /* cookie passed to HW to track this context on submission */
- /* Time on GPU as tracked by the hw. */
- struct {
- struct ewma_runtime avg;
- u64 total;
- u32 last;
- I915_SELFTEST_DECLARE(u32 num_underflow);
- I915_SELFTEST_DECLARE(u32 max_underflow);
- } runtime;
+ /** stats: Context GPU engine busyness tracking. */
+ struct intel_context_stats {
+ u64 active;
+
+ /* Time on GPU as tracked by the hw. */
+ struct {
+ struct ewma_runtime avg;
+ u64 total;
+ u32 last;
+ I915_SELFTEST_DECLARE(u32 num_underflow);
+ I915_SELFTEST_DECLARE(u32 max_underflow);
+ } runtime;
+ } stats;
unsigned int active_count; /* protected by timeline->mutex */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 1c0ab05c3c40..1431f1e9dbee 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -4,6 +4,7 @@
#include <asm/cacheflush.h>
#include <drm/drm_util.h>
+#include <drm/drm_cache.h>
#include <linux/hashtable.h>
#include <linux/irq_work.h>
@@ -143,15 +144,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
* of extra paranoia to try and ensure that the HWS takes the value
* we give and that it doesn't end up trapped inside the CPU!
*/
- if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
- mb();
- clflush(&engine->status_page.addr[reg]);
- engine->status_page.addr[reg] = value;
- clflush(&engine->status_page.addr[reg]);
- mb();
- } else {
- WRITE_ONCE(engine->status_page.addr[reg], value);
- }
+ drm_clflush_virt_range(&engine->status_page.addr[reg], sizeof(value));
+ WRITE_ONCE(engine->status_page.addr[reg], value);
+ drm_clflush_virt_range(&engine->status_page.addr[reg], sizeof(value));
}
/*
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 7447411a5b26..14c6ddbbfde8 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -436,6 +436,11 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS)
engine->props.preempt_timeout_ms = 0;
+ if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) &&
+ __ffs(CCS_MASK(engine->gt)) == engine->instance) ||
+ engine->class == RENDER_CLASS)
+ engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE;
+
/* features common between engines sharing EUs */
if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) {
engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE;
@@ -726,12 +731,24 @@ static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
{
- int i;
- u8 map[MAX_ENGINE_INSTANCE + 1];
+ /*
+ * Logical to physical mapping is needed for proper support
+ * to split-frame feature.
+ */
+ if (MEDIA_VER(gt->i915) >= 11 && class == VIDEO_DECODE_CLASS) {
+ const u8 map[] = { 0, 2, 4, 6, 1, 3, 5, 7 };
+
+ populate_logical_ids(gt, logical_ids, class,
+ map, ARRAY_SIZE(map));
+ } else {
+ int i;
+ u8 map[MAX_ENGINE_INSTANCE + 1];
- for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
- map[i] = i;
- populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map));
+ for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
+ map[i] = i;
+ populate_logical_ids(gt, logical_ids, class,
+ map, ARRAY_SIZE(map));
+ }
}
/**
@@ -1263,6 +1280,15 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine,
int err;
intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
+
+ /*
+ * Wa_22011802037 : gen12, Prior to doing a reset, ensure CS is
+ * stopped, set ring stop bit and prefetch disable bit to halt CS
+ */
+ if (GRAPHICS_VER(engine->i915) == 12)
+ intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
+ _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));
+
err = __intel_wait_for_register_fw(engine->uncore, mode,
MODE_IDLE, MODE_IDLE,
fast_timeout_us,
@@ -1697,9 +1723,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
}
- if (intel_engine_uses_guc(engine)) {
- /* nothing to print yet */
- } else if (HAS_EXECLISTS(dev_priv)) {
+ if (HAS_EXECLISTS(dev_priv) && !intel_engine_uses_guc(engine)) {
struct i915_request * const *port, *rq;
const u32 *hws =
&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index 0bf8b45c9319..594a629cb28f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -181,6 +181,7 @@
#define GFX_SURFACE_FAULT_ENABLE (1 << 12)
#define GFX_REPLAY_MODE (1 << 11)
#define GFX_PSMI_GRANULARITY (1 << 10)
+#define GEN12_GFX_PREFETCH_DISABLE REG_BIT(10)
#define GFX_PPGTT_ENABLE (1 << 9)
#define GEN8_GFX_PPGTT_48B (1 << 7)
#define GFX_FORWARD_VBLANK_MASK (3 << 5)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 19ff8758e34d..298f2cc7a879 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -96,7 +96,9 @@ struct i915_ctx_workarounds {
#define I915_MAX_VCS 8
#define I915_MAX_VECS 4
+#define I915_MAX_SFC (I915_MAX_VCS / 2)
#define I915_MAX_CCS 4
+#define I915_MAX_RCS 1
/*
* Engine IDs definitions.
@@ -526,6 +528,8 @@ struct intel_engine_cs {
#define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8)
#define I915_ENGINE_HAS_RCS_REG_STATE BIT(9)
#define I915_ENGINE_HAS_EU_PRIORITY BIT(10)
+#define I915_ENGINE_FIRST_RENDER_COMPUTE BIT(11)
+#define I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT BIT(12)
unsigned int flags;
/*
@@ -626,6 +630,13 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO;
}
+/* Wa_14014475959:dg2 */
+static inline bool
+intel_engine_uses_wa_hold_ccs_switchout(struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
+}
+
#define instdone_has_slice(dev_priv___, sseu___, slice___) \
((GRAPHICS_VER(dev_priv___) == 7 ? 1 : ((sseu___)->slice_mask)) & BIT(slice___))
@@ -643,7 +654,7 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
#define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \
for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \
- (iter_) < GEN_MAX_SUBSLICES; \
+ (iter_) < GEN_SS_MASK_SIZE; \
(iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \
(dss_) = (iter_) % GEN_DSS_PER_GSLICE) \
for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_)))
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index b8c9b6b89003..0f6cd96b459f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -193,7 +193,6 @@ static void add_legacy_ring(struct legacy_ring *ring,
void intel_engines_driver_register(struct drm_i915_private *i915)
{
struct legacy_ring ring = {};
- u8 uabi_instances[5] = {};
struct list_head *it, *next;
struct rb_node **p, *prev;
LIST_HEAD(engines);
@@ -214,8 +213,10 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
engine->uabi_class = uabi_classes[engine->class];
- GEM_BUG_ON(engine->uabi_class >= ARRAY_SIZE(uabi_instances));
- engine->uabi_instance = uabi_instances[engine->uabi_class]++;
+ GEM_BUG_ON(engine->uabi_class >=
+ ARRAY_SIZE(i915->engine_uabi_class_count));
+ engine->uabi_instance =
+ i915->engine_uabi_class_count[engine->uabi_class]++;
/* Replace the internal name with the final user facing name */
memcpy(old, engine->name, sizeof(engine->name));
@@ -245,8 +246,8 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
int class, inst;
int errors = 0;
- for (class = 0; class < ARRAY_SIZE(uabi_instances); class++) {
- for (inst = 0; inst < uabi_instances[class]; inst++) {
+ for (class = 0; class < ARRAY_SIZE(i915->engine_uabi_class_count); class++) {
+ for (inst = 0; inst < i915->engine_uabi_class_count[class]; inst++) {
engine = intel_engine_lookup_user(i915,
class, inst);
if (!engine) {
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 1c602d4ae297..f8749c433b7c 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -625,8 +625,6 @@ static void __execlists_schedule_out(struct i915_request * const rq,
GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
__set_bit(ccid - 1, &engine->context_tag);
}
-
- lrc_update_runtime(ce);
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
if (engine->fw_domain && !--engine->fw_active)
@@ -1651,12 +1649,6 @@ cancel_port_requests(struct intel_engine_execlists * const execlists,
return inactive;
}
-static void invalidate_csb_entries(const u64 *first, const u64 *last)
-{
- clflush((void *)first);
- clflush((void *)last);
-}
-
/*
* Starting with Gen12, the status has a new format:
*
@@ -2004,15 +1996,30 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
* the wash as hardware, working or not, will need to do the
* invalidation before.
*/
- invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
+ drm_clflush_virt_range(&buf[0], num_entries * sizeof(buf[0]));
/*
* We assume that any event reflects a change in context flow
* and merits a fresh timeslice. We reinstall the timer after
* inspecting the queue to see if we need to resumbit.
*/
- if (*prev != *execlists->active) /* elide lite-restores */
+ if (*prev != *execlists->active) { /* elide lite-restores */
+ /*
+ * Note the inherent discrepancy between the HW runtime,
+ * recorded as part of the context switch, and the CPU
+ * adjustment for active contexts. We have to hope that
+ * the delay in processing the CS event is very small
+ * and consistent. It works to our advantage to have
+ * the CPU adjustment _undershoot_ (i.e. start later than)
+ * the CS timestamp so we never overreport the runtime
+ * and correct overselves later when updating from HW.
+ */
+ if (*prev)
+ lrc_runtime_stop((*prev)->context);
+ if (*execlists->active)
+ lrc_runtime_start((*execlists->active)->context);
new_timeslice(execlists);
+ }
return inactive;
}
@@ -2236,11 +2243,11 @@ static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
if (!cap->error)
goto err_cap;
- cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
+ cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp, CORE_DUMP_FLAG_NONE);
if (!cap->error->gt)
goto err_gpu;
- cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
+ cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp, CORE_DUMP_FLAG_NONE);
if (!cap->error->gt->engine)
goto err_gt;
@@ -2644,7 +2651,7 @@ unwind:
}
static const struct intel_context_ops execlists_context_ops = {
- .flags = COPS_HAS_INFLIGHT,
+ .flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES,
.alloc = execlists_context_alloc,
@@ -2788,8 +2795,9 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
/* Check that the GPU does indeed update the CSB entries! */
memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
- invalidate_csb_entries(&execlists->csb_status[0],
- &execlists->csb_status[reset_value]);
+ drm_clflush_virt_range(execlists->csb_status,
+ execlists->csb_size *
+ sizeof(execlists->csb_status));
/* Once more for luck and our trusty paranoia */
ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
@@ -2833,7 +2841,7 @@ static void execlists_sanitize(struct intel_engine_cs *engine)
sanitize_hwsp(engine);
/* And scrub the dirty cachelines for the HWSP */
- clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
+ drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
intel_engine_reset_pinned_contexts(engine);
}
@@ -2912,7 +2920,7 @@ static int execlists_resume(struct intel_engine_cs *engine)
enable_execlists(engine);
- if (engine->class == RENDER_CLASS)
+ if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
xehp_enable_ccs_engines(engine);
return 0;
@@ -2958,9 +2966,8 @@ reset_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
- mb(); /* paranoia: read the CSB pointers from after the reset */
- clflush(execlists->csb_write);
- mb();
+ drm_clflush_virt_range(execlists->csb_write,
+ sizeof(execlists->csb_write[0]));
inactive = process_csb(engine, inactive); /* drain preemption events */
@@ -3702,7 +3709,7 @@ virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling)
}
static const struct intel_context_ops virtual_context_ops = {
- .flags = COPS_HAS_INFLIGHT,
+ .flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES,
.alloc = virtual_context_alloc,
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index e164f30a900d..e6b2eb122ad7 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -3,18 +3,16 @@
* Copyright © 2020 Intel Corporation
*/
-#include <linux/agp_backend.h>
-#include <linux/stop_machine.h>
-
+#include <linux/types.h>
#include <asm/set_memory.h>
#include <asm/smp.h>
#include <drm/i915_drm.h>
-#include <drm/intel-gtt.h>
#include "gem/i915_gem_lmem.h"
#include "intel_gt.h"
+#include "intel_gt_gmch.h"
#include "intel_gt_regs.h"
#include "i915_drv.h"
#include "i915_scatterlist.h"
@@ -95,28 +93,6 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915)
return 0;
}
-/*
- * Certain Gen5 chipsets require idling the GPU before
- * unmapping anything from the GTT when VT-d is enabled.
- */
-static bool needs_idle_maps(struct drm_i915_private *i915)
-{
- /*
- * Query intel_iommu to see if we need the workaround. Presumably that
- * was loaded first.
- */
- if (!i915_vtd_active(i915))
- return false;
-
- if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915))
- return true;
-
- if (GRAPHICS_VER(i915) == 12)
- return true; /* XXX DMAR fault reason 7 */
-
- return false;
-}
-
/**
* i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM
* @vm: The VM to suspend the mappings for
@@ -127,7 +103,7 @@ static bool needs_idle_maps(struct drm_i915_private *i915)
void i915_ggtt_suspend_vm(struct i915_address_space *vm)
{
struct i915_vma *vma, *vn;
- int open;
+ int save_skip_rewrite;
drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
@@ -136,8 +112,12 @@ retry:
mutex_lock(&vm->mutex);
- /* Skip rewriting PTE on VMA unbind. */
- open = atomic_xchg(&vm->open, 0);
+ /*
+ * Skip rewriting PTE on VMA unbind.
+ * FIXME: Use an argument to i915_vma_unbind() instead?
+ */
+ save_skip_rewrite = vm->skip_pte_rewrite;
+ vm->skip_pte_rewrite = true;
list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
struct drm_i915_gem_object *obj = vma->obj;
@@ -155,16 +135,14 @@ retry:
*/
i915_gem_object_get(obj);
- atomic_set(&vm->open, open);
mutex_unlock(&vm->mutex);
i915_gem_object_lock(obj, NULL);
- open = i915_vma_unbind(vma);
+ GEM_WARN_ON(i915_vma_unbind(vma));
i915_gem_object_unlock(obj);
-
- GEM_WARN_ON(open);
-
i915_gem_object_put(obj);
+
+ vm->skip_pte_rewrite = save_skip_rewrite;
goto retry;
}
@@ -180,7 +158,7 @@ retry:
vm->clear_range(vm, 0, vm->total);
- atomic_set(&vm->open, open);
+ vm->skip_pte_rewrite = save_skip_rewrite;
mutex_unlock(&vm->mutex);
}
@@ -203,7 +181,7 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
spin_unlock_irq(&uncore->lock);
}
-static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
+void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
{
struct intel_uncore *uncore = ggtt->vm.gt->uncore;
@@ -228,11 +206,6 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
}
-static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
-{
- intel_gtt_chipset_flush();
-}
-
u64 gen8_ggtt_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
u32 flags)
@@ -245,258 +218,7 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
return pte;
}
-static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
-{
- writeq(pte, addr);
-}
-
-static void gen8_ggtt_insert_page(struct i915_address_space *vm,
- dma_addr_t addr,
- u64 offset,
- enum i915_cache_level level,
- u32 flags)
-{
- struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
- gen8_pte_t __iomem *pte =
- (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
-
- gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
-
- ggtt->invalidate(ggtt);
-}
-
-static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
- struct i915_vma_resource *vma_res,
- enum i915_cache_level level,
- u32 flags)
-{
- const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
- struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
- gen8_pte_t __iomem *gte;
- gen8_pte_t __iomem *end;
- struct sgt_iter iter;
- dma_addr_t addr;
-
- /*
- * Note that we ignore PTE_READ_ONLY here. The caller must be careful
- * not to allow the user to override access to a read only page.
- */
-
- gte = (gen8_pte_t __iomem *)ggtt->gsm;
- gte += vma_res->start / I915_GTT_PAGE_SIZE;
- end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
-
- for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
- gen8_set_pte(gte++, pte_encode | addr);
- GEM_BUG_ON(gte > end);
-
- /* Fill the allocated but "unused" space beyond the end of the buffer */
- while (gte < end)
- gen8_set_pte(gte++, vm->scratch[0]->encode);
-
- /*
- * We want to flush the TLBs only after we're certain all the PTE
- * updates have finished.
- */
- ggtt->invalidate(ggtt);
-}
-
-static void gen6_ggtt_insert_page(struct i915_address_space *vm,
- dma_addr_t addr,
- u64 offset,
- enum i915_cache_level level,
- u32 flags)
-{
- struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
- gen6_pte_t __iomem *pte =
- (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
-
- iowrite32(vm->pte_encode(addr, level, flags), pte);
-
- ggtt->invalidate(ggtt);
-}
-
-/*
- * Binds an object into the global gtt with the specified cache level.
- * The object will be accessible to the GPU via commands whose operands
- * reference offsets within the global GTT as well as accessible by the GPU
- * through the GMADR mapped BAR (i915->mm.gtt->gtt).
- */
-static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
- struct i915_vma_resource *vma_res,
- enum i915_cache_level level,
- u32 flags)
-{
- struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
- gen6_pte_t __iomem *gte;
- gen6_pte_t __iomem *end;
- struct sgt_iter iter;
- dma_addr_t addr;
-
- gte = (gen6_pte_t __iomem *)ggtt->gsm;
- gte += vma_res->start / I915_GTT_PAGE_SIZE;
- end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
-
- for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
- iowrite32(vm->pte_encode(addr, level, flags), gte++);
- GEM_BUG_ON(gte > end);
-
- /* Fill the allocated but "unused" space beyond the end of the buffer */
- while (gte < end)
- iowrite32(vm->scratch[0]->encode, gte++);
-
- /*
- * We want to flush the TLBs only after we're certain all the PTE
- * updates have finished.
- */
- ggtt->invalidate(ggtt);
-}
-
-static void nop_clear_range(struct i915_address_space *vm,
- u64 start, u64 length)
-{
-}
-
-static void gen8_ggtt_clear_range(struct i915_address_space *vm,
- u64 start, u64 length)
-{
- struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
- unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
- unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
- const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
- gen8_pte_t __iomem *gtt_base =
- (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
- const int max_entries = ggtt_total_entries(ggtt) - first_entry;
- int i;
-
- if (WARN(num_entries > max_entries,
- "First entry = %d; Num entries = %d (max=%d)\n",
- first_entry, num_entries, max_entries))
- num_entries = max_entries;
-
- for (i = 0; i < num_entries; i++)
- gen8_set_pte(&gtt_base[i], scratch_pte);
-}
-
-static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
-{
- /*
- * Make sure the internal GAM fifo has been cleared of all GTT
- * writes before exiting stop_machine(). This guarantees that
- * any aperture accesses waiting to start in another process
- * cannot back up behind the GTT writes causing a hang.
- * The register can be any arbitrary GAM register.
- */
- intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6);
-}
-
-struct insert_page {
- struct i915_address_space *vm;
- dma_addr_t addr;
- u64 offset;
- enum i915_cache_level level;
-};
-
-static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
-{
- struct insert_page *arg = _arg;
-
- gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
- bxt_vtd_ggtt_wa(arg->vm);
-
- return 0;
-}
-
-static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
- dma_addr_t addr,
- u64 offset,
- enum i915_cache_level level,
- u32 unused)
-{
- struct insert_page arg = { vm, addr, offset, level };
-
- stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
-}
-
-struct insert_entries {
- struct i915_address_space *vm;
- struct i915_vma_resource *vma_res;
- enum i915_cache_level level;
- u32 flags;
-};
-
-static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
-{
- struct insert_entries *arg = _arg;
-
- gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags);
- bxt_vtd_ggtt_wa(arg->vm);
-
- return 0;
-}
-
-static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
- struct i915_vma_resource *vma_res,
- enum i915_cache_level level,
- u32 flags)
-{
- struct insert_entries arg = { vm, vma_res, level, flags };
-
- stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
-}
-
-static void gen6_ggtt_clear_range(struct i915_address_space *vm,
- u64 start, u64 length)
-{
- struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
- unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
- unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
- gen6_pte_t scratch_pte, __iomem *gtt_base =
- (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
- const int max_entries = ggtt_total_entries(ggtt) - first_entry;
- int i;
-
- if (WARN(num_entries > max_entries,
- "First entry = %d; Num entries = %d (max=%d)\n",
- first_entry, num_entries, max_entries))
- num_entries = max_entries;
-
- scratch_pte = vm->scratch[0]->encode;
- for (i = 0; i < num_entries; i++)
- iowrite32(scratch_pte, &gtt_base[i]);
-}
-
-static void i915_ggtt_insert_page(struct i915_address_space *vm,
- dma_addr_t addr,
- u64 offset,
- enum i915_cache_level cache_level,
- u32 unused)
-{
- unsigned int flags = (cache_level == I915_CACHE_NONE) ?
- AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-
- intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
-}
-
-static void i915_ggtt_insert_entries(struct i915_address_space *vm,
- struct i915_vma_resource *vma_res,
- enum i915_cache_level cache_level,
- u32 unused)
-{
- unsigned int flags = (cache_level == I915_CACHE_NONE) ?
- AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-
- intel_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT,
- flags);
-}
-
-static void i915_ggtt_clear_range(struct i915_address_space *vm,
- u64 start, u64 length)
-{
- intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
-}
-
-static void ggtt_bind_vma(struct i915_address_space *vm,
+void intel_ggtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma_resource *vma_res,
enum i915_cache_level cache_level,
@@ -520,7 +242,7 @@ static void ggtt_bind_vma(struct i915_address_space *vm,
vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
}
-static void ggtt_unbind_vma(struct i915_address_space *vm,
+void intel_ggtt_unbind_vma(struct i915_address_space *vm,
struct i915_vma_resource *vma_res)
{
vm->clear_range(vm, vma_res->start, vma_res->vma_size);
@@ -723,10 +445,10 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
ggtt->alias = ppgtt;
ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
- GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
+ GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != intel_ggtt_bind_vma);
ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
- GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
+ GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != intel_ggtt_unbind_vma);
ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
i915_vm_free_pt_stash(&ppgtt->vm, &stash);
@@ -749,8 +471,8 @@ static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
i915_vm_put(&ppgtt->vm);
- ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
- ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
+ ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma;
+ ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma;
}
int i915_init_ggtt(struct drm_i915_private *i915)
@@ -774,13 +496,13 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
{
struct i915_vma *vma, *vn;
- atomic_set(&ggtt->vm.open, 0);
-
flush_workqueue(ggtt->vm.i915->wq);
i915_gem_drain_freed_objects(ggtt->vm.i915);
mutex_lock(&ggtt->vm.mutex);
+ ggtt->vm.skip_pte_rewrite = true;
+
list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
struct drm_i915_gem_object *obj = vma->obj;
bool trylock;
@@ -838,364 +560,12 @@ void i915_ggtt_driver_late_release(struct drm_i915_private *i915)
dma_resv_fini(&ggtt->vm._resv);
}
-static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
-{
- snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
- snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
- return snb_gmch_ctl << 20;
-}
-
-static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
-{
- bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
- bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
- if (bdw_gmch_ctl)
- bdw_gmch_ctl = 1 << bdw_gmch_ctl;
-
-#ifdef CONFIG_X86_32
- /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
- if (bdw_gmch_ctl > 4)
- bdw_gmch_ctl = 4;
-#endif
-
- return bdw_gmch_ctl << 20;
-}
-
-static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
-{
- gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
- gmch_ctrl &= SNB_GMCH_GGMS_MASK;
-
- if (gmch_ctrl)
- return 1 << (20 + gmch_ctrl);
-
- return 0;
-}
-
-static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915)
-{
- /*
- * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset
- * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset
- */
- GEM_BUG_ON(GRAPHICS_VER(i915) < 6);
- return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M;
-}
-
-static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915)
-{
- return gen6_gttmmadr_size(i915) / 2;
-}
-
-static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
-{
- struct drm_i915_private *i915 = ggtt->vm.i915;
- struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
- phys_addr_t phys_addr;
- u32 pte_flags;
- int ret;
-
- GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915));
- phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915);
-
- /*
- * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
- * will be dropped. For WC mappings in general we have 64 byte burst
- * writes when the WC buffer is flushed, so we can't use it, but have to
- * resort to an uncached mapping. The WC issue is easily caught by the
- * readback check when writing GTT PTE entries.
- */
- if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11)
- ggtt->gsm = ioremap(phys_addr, size);
- else
- ggtt->gsm = ioremap_wc(phys_addr, size);
- if (!ggtt->gsm) {
- drm_err(&i915->drm, "Failed to map the ggtt page table\n");
- return -ENOMEM;
- }
-
- kref_init(&ggtt->vm.resv_ref);
- ret = setup_scratch_page(&ggtt->vm);
- if (ret) {
- drm_err(&i915->drm, "Scratch setup failed\n");
- /* iounmap will also get called at remove, but meh */
- iounmap(ggtt->gsm);
- return ret;
- }
-
- pte_flags = 0;
- if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
- pte_flags |= PTE_LM;
-
- ggtt->vm.scratch[0]->encode =
- ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
- I915_CACHE_NONE, pte_flags);
-
- return 0;
-}
-
-static void gen6_gmch_remove(struct i915_address_space *vm)
-{
- struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-
- iounmap(ggtt->gsm);
- free_scratch(vm);
-}
-
-static struct resource pci_resource(struct pci_dev *pdev, int bar)
+struct resource intel_pci_resource(struct pci_dev *pdev, int bar)
{
return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar),
pci_resource_len(pdev, bar));
}
-static int gen8_gmch_probe(struct i915_ggtt *ggtt)
-{
- struct drm_i915_private *i915 = ggtt->vm.i915;
- struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
- unsigned int size;
- u16 snb_gmch_ctl;
-
- /* TODO: We're not aware of mappable constraints on gen8 yet */
- if (!HAS_LMEM(i915)) {
- ggtt->gmadr = pci_resource(pdev, 2);
- ggtt->mappable_end = resource_size(&ggtt->gmadr);
- }
-
- pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
- if (IS_CHERRYVIEW(i915))
- size = chv_get_total_gtt_size(snb_gmch_ctl);
- else
- size = gen8_get_total_gtt_size(snb_gmch_ctl);
-
- ggtt->vm.alloc_pt_dma = alloc_pt_dma;
- ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
- ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
-
- ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
- ggtt->vm.cleanup = gen6_gmch_remove;
- ggtt->vm.insert_page = gen8_ggtt_insert_page;
- ggtt->vm.clear_range = nop_clear_range;
- if (intel_scanout_needs_vtd_wa(i915))
- ggtt->vm.clear_range = gen8_ggtt_clear_range;
-
- ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
-
- /*
- * Serialize GTT updates with aperture access on BXT if VT-d is on,
- * and always on CHV.
- */
- if (intel_vm_no_concurrent_access_wa(i915)) {
- ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
- ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
- ggtt->vm.bind_async_flags =
- I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
- }
-
- ggtt->invalidate = gen8_ggtt_invalidate;
-
- ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
- ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
-
- ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
-
- setup_private_pat(ggtt->vm.gt->uncore);
-
- return ggtt_probe_common(ggtt, size);
-}
-
-static u64 snb_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
- u32 flags)
-{
- gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
-
- switch (level) {
- case I915_CACHE_L3_LLC:
- case I915_CACHE_LLC:
- pte |= GEN6_PTE_CACHE_LLC;
- break;
- case I915_CACHE_NONE:
- pte |= GEN6_PTE_UNCACHED;
- break;
- default:
- MISSING_CASE(level);
- }
-
- return pte;
-}
-
-static u64 ivb_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
- u32 flags)
-{
- gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
-
- switch (level) {
- case I915_CACHE_L3_LLC:
- pte |= GEN7_PTE_CACHE_L3_LLC;
- break;
- case I915_CACHE_LLC:
- pte |= GEN6_PTE_CACHE_LLC;
- break;
- case I915_CACHE_NONE:
- pte |= GEN6_PTE_UNCACHED;
- break;
- default:
- MISSING_CASE(level);
- }
-
- return pte;
-}
-
-static u64 byt_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
- u32 flags)
-{
- gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
-
- if (!(flags & PTE_READ_ONLY))
- pte |= BYT_PTE_WRITEABLE;
-
- if (level != I915_CACHE_NONE)
- pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
-
- return pte;
-}
-
-static u64 hsw_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
- u32 flags)
-{
- gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
-
- if (level != I915_CACHE_NONE)
- pte |= HSW_WB_LLC_AGE3;
-
- return pte;
-}
-
-static u64 iris_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
- u32 flags)
-{
- gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
-
- switch (level) {
- case I915_CACHE_NONE:
- break;
- case I915_CACHE_WT:
- pte |= HSW_WT_ELLC_LLC_AGE3;
- break;
- default:
- pte |= HSW_WB_ELLC_LLC_AGE3;
- break;
- }
-
- return pte;
-}
-
-static int gen6_gmch_probe(struct i915_ggtt *ggtt)
-{
- struct drm_i915_private *i915 = ggtt->vm.i915;
- struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
- unsigned int size;
- u16 snb_gmch_ctl;
-
- ggtt->gmadr = pci_resource(pdev, 2);
- ggtt->mappable_end = resource_size(&ggtt->gmadr);
-
- /*
- * 64/512MB is the current min/max we actually know of, but this is
- * just a coarse sanity check.
- */
- if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
- drm_err(&i915->drm, "Unknown GMADR size (%pa)\n",
- &ggtt->mappable_end);
- return -ENXIO;
- }
-
- pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
-
- size = gen6_get_total_gtt_size(snb_gmch_ctl);
- ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
-
- ggtt->vm.alloc_pt_dma = alloc_pt_dma;
- ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
-
- ggtt->vm.clear_range = nop_clear_range;
- if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
- ggtt->vm.clear_range = gen6_ggtt_clear_range;
- ggtt->vm.insert_page = gen6_ggtt_insert_page;
- ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
- ggtt->vm.cleanup = gen6_gmch_remove;
-
- ggtt->invalidate = gen6_ggtt_invalidate;
-
- if (HAS_EDRAM(i915))
- ggtt->vm.pte_encode = iris_pte_encode;
- else if (IS_HASWELL(i915))
- ggtt->vm.pte_encode = hsw_pte_encode;
- else if (IS_VALLEYVIEW(i915))
- ggtt->vm.pte_encode = byt_pte_encode;
- else if (GRAPHICS_VER(i915) >= 7)
- ggtt->vm.pte_encode = ivb_pte_encode;
- else
- ggtt->vm.pte_encode = snb_pte_encode;
-
- ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
- ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
-
- return ggtt_probe_common(ggtt, size);
-}
-
-static void i915_gmch_remove(struct i915_address_space *vm)
-{
- intel_gmch_remove();
-}
-
-static int i915_gmch_probe(struct i915_ggtt *ggtt)
-{
- struct drm_i915_private *i915 = ggtt->vm.i915;
- phys_addr_t gmadr_base;
- int ret;
-
- ret = intel_gmch_probe(i915->bridge_dev, to_pci_dev(i915->drm.dev), NULL);
- if (!ret) {
- drm_err(&i915->drm, "failed to set up gmch\n");
- return -EIO;
- }
-
- intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
-
- ggtt->gmadr =
- (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
-
- ggtt->vm.alloc_pt_dma = alloc_pt_dma;
- ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
-
- if (needs_idle_maps(i915)) {
- drm_notice(&i915->drm,
- "Flushing DMA requests before IOMMU unmaps; performance may be degraded\n");
- ggtt->do_idle_maps = true;
- }
-
- ggtt->vm.insert_page = i915_ggtt_insert_page;
- ggtt->vm.insert_entries = i915_ggtt_insert_entries;
- ggtt->vm.clear_range = i915_ggtt_clear_range;
- ggtt->vm.cleanup = i915_gmch_remove;
-
- ggtt->invalidate = gmch_ggtt_invalidate;
-
- ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
- ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
-
- if (unlikely(ggtt->do_idle_maps))
- drm_notice(&i915->drm,
- "Applying Ironlake quirks for intel_iommu\n");
-
- return 0;
-}
-
static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
@@ -1207,11 +577,11 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
dma_resv_init(&ggtt->vm._resv);
if (GRAPHICS_VER(i915) <= 5)
- ret = i915_gmch_probe(ggtt);
+ ret = intel_gt_gmch_gen5_probe(ggtt);
else if (GRAPHICS_VER(i915) < 8)
- ret = gen6_gmch_probe(ggtt);
+ ret = intel_gt_gmch_gen6_probe(ggtt);
else
- ret = gen8_gmch_probe(ggtt);
+ ret = intel_gt_gmch_gen8_probe(ggtt);
if (ret) {
dma_resv_fini(&ggtt->vm._resv);
return ret;
@@ -1265,10 +635,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
int i915_ggtt_enable_hw(struct drm_i915_private *i915)
{
- if (GRAPHICS_VER(i915) < 6 && !intel_enable_gtt())
- return -EIO;
-
- return 0;
+ return intel_gt_gmch_gen5_enable_hw(i915);
}
void i915_ggtt_enable_guc(struct i915_ggtt *ggtt)
@@ -1308,16 +675,12 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
{
struct i915_vma *vma;
bool write_domain_objs = false;
- int open;
drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
/* First fill our portion of the GTT with scratch pages */
vm->clear_range(vm, 0, vm->total);
- /* Skip rewriting PTE on VMA unbind. */
- open = atomic_xchg(&vm->open, 0);
-
/* clflush objects bound into the GGTT and rebind them. */
list_for_each_entry(vma, &vm->bound_list, vm_link) {
struct drm_i915_gem_object *obj = vma->obj;
@@ -1334,8 +697,6 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
}
}
- atomic_set(&vm->open, open);
-
return write_domain_objs;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index d112ffd56418..e52718a87f14 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -134,6 +134,13 @@
#define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */
#define MI_USE_GGTT (1 << 22) /* g4x+ */
#define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1)
+#define MI_ATOMIC MI_INSTR(0x2f, 1)
+#define MI_ATOMIC_INLINE (MI_INSTR(0x2f, 9) | MI_ATOMIC_INLINE_DATA)
+#define MI_ATOMIC_GLOBAL_GTT (1 << 22)
+#define MI_ATOMIC_INLINE_DATA (1 << 18)
+#define MI_ATOMIC_CS_STALL (1 << 17)
+#define MI_ATOMIC_MOVE (0x4 << 8)
+
/*
* Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM:
* - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw
@@ -144,6 +151,7 @@
#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
#define MI_LRI_LRM_CS_MMIO REG_BIT(19)
+#define MI_LRI_MMIO_REMAP_EN REG_BIT(17)
#define MI_LRI_FORCE_POSTED (1<<12)
#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
@@ -153,8 +161,10 @@
#define MI_FLUSH_DW_PROTECTED_MEM_EN (1 << 22)
#define MI_FLUSH_DW_STORE_INDEX (1<<21)
#define MI_INVALIDATE_TLB (1<<18)
+#define MI_FLUSH_DW_CCS (1<<16)
#define MI_FLUSH_DW_OP_STOREDW (1<<14)
#define MI_FLUSH_DW_OP_MASK (3<<14)
+#define MI_FLUSH_DW_LLC (1<<9)
#define MI_FLUSH_DW_NOTIFY (1<<8)
#define MI_INVALIDATE_BSD (1<<7)
#define MI_FLUSH_DW_USE_GTT (1<<2)
@@ -203,8 +213,27 @@
#define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
#define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2)
+#define XY_CTRL_SURF_INSTR_SIZE 5
+#define MI_FLUSH_DW_SIZE 3
+#define XY_CTRL_SURF_COPY_BLT ((2 << 29) | (0x48 << 22) | 3)
+#define SRC_ACCESS_TYPE_SHIFT 21
+#define DST_ACCESS_TYPE_SHIFT 20
+#define CCS_SIZE_MASK 0x3FF
+#define CCS_SIZE_SHIFT 8
+#define XY_CTRL_SURF_MOCS_MASK GENMASK(31, 25)
+#define NUM_CCS_BYTES_PER_BLOCK 256
+#define NUM_BYTES_PER_CCS_BYTE 256
+#define NUM_CCS_BLKS_PER_XFER 1024
+#define INDIRECT_ACCESS 0
+#define DIRECT_ACCESS 1
+
#define COLOR_BLT_CMD (2 << 29 | 0x40 << 22 | (5 - 2))
#define XY_COLOR_BLT_CMD (2 << 29 | 0x50 << 22)
+#define XY_FAST_COLOR_BLT_CMD (2 << 29 | 0x44 << 22)
+#define XY_FAST_COLOR_BLT_DEPTH_32 (2 << 19)
+#define XY_FAST_COLOR_BLT_DW 16
+#define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 21)
+#define XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
#define SRC_COPY_BLT_CMD (2 << 29 | 0x43 << 22)
#define GEN9_XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22)
#define XY_SRC_COPY_BLT_CMD (2 << 29 | 0x53 << 22)
diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c
new file mode 100644
index 000000000000..0e494028b81d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gsc.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright(c) 2019-2022, Intel Corporation. All rights reserved.
+ */
+
+#include <linux/irq.h>
+#include <linux/mei_aux.h>
+#include "i915_drv.h"
+#include "i915_reg.h"
+#include "gt/intel_gsc.h"
+#include "gt/intel_gt.h"
+
+#define GSC_BAR_LENGTH 0x00000FFC
+
+static void gsc_irq_mask(struct irq_data *d)
+{
+ /* generic irq handling */
+}
+
+static void gsc_irq_unmask(struct irq_data *d)
+{
+ /* generic irq handling */
+}
+
+static struct irq_chip gsc_irq_chip = {
+ .name = "gsc_irq_chip",
+ .irq_mask = gsc_irq_mask,
+ .irq_unmask = gsc_irq_unmask,
+};
+
+static int gsc_irq_init(int irq)
+{
+ irq_set_chip_and_handler_name(irq, &gsc_irq_chip,
+ handle_simple_irq, "gsc_irq_handler");
+
+ return irq_set_chip_data(irq, NULL);
+}
+
+struct gsc_def {
+ const char *name;
+ unsigned long bar;
+ size_t bar_size;
+};
+
+/* gsc resources and definitions (HECI1 and HECI2) */
+static const struct gsc_def gsc_def_dg1[] = {
+ {
+ /* HECI1 not yet implemented. */
+ },
+ {
+ .name = "mei-gscfi",
+ .bar = DG1_GSC_HECI2_BASE,
+ .bar_size = GSC_BAR_LENGTH,
+ }
+};
+
+static const struct gsc_def gsc_def_dg2[] = {
+ {
+ .name = "mei-gsc",
+ .bar = DG2_GSC_HECI1_BASE,
+ .bar_size = GSC_BAR_LENGTH,
+ },
+ {
+ .name = "mei-gscfi",
+ .bar = DG2_GSC_HECI2_BASE,
+ .bar_size = GSC_BAR_LENGTH,
+ }
+};
+
+static void gsc_release_dev(struct device *dev)
+{
+ struct auxiliary_device *aux_dev = to_auxiliary_dev(dev);
+ struct mei_aux_device *adev = auxiliary_dev_to_mei_aux_dev(aux_dev);
+
+ kfree(adev);
+}
+
+static void gsc_destroy_one(struct intel_gsc_intf *intf)
+{
+ if (intf->adev) {
+ auxiliary_device_delete(&intf->adev->aux_dev);
+ auxiliary_device_uninit(&intf->adev->aux_dev);
+ intf->adev = NULL;
+ }
+ if (intf->irq >= 0)
+ irq_free_desc(intf->irq);
+ intf->irq = -1;
+}
+
+static void gsc_init_one(struct drm_i915_private *i915,
+ struct intel_gsc_intf *intf,
+ unsigned int intf_id)
+{
+ struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+ struct mei_aux_device *adev;
+ struct auxiliary_device *aux_dev;
+ const struct gsc_def *def;
+ int ret;
+
+ intf->irq = -1;
+ intf->id = intf_id;
+
+ if (intf_id == 0 && !HAS_HECI_PXP(i915))
+ return;
+
+ if (IS_DG1(i915)) {
+ def = &gsc_def_dg1[intf_id];
+ } else if (IS_DG2(i915)) {
+ def = &gsc_def_dg2[intf_id];
+ } else {
+ drm_warn_once(&i915->drm, "Unknown platform\n");
+ return;
+ }
+
+ if (!def->name) {
+ drm_warn_once(&i915->drm, "HECI%d is not implemented!\n", intf_id + 1);
+ return;
+ }
+
+ intf->irq = irq_alloc_desc(0);
+ if (intf->irq < 0) {
+ drm_err(&i915->drm, "gsc irq error %d\n", intf->irq);
+ return;
+ }
+
+ ret = gsc_irq_init(intf->irq);
+ if (ret < 0) {
+ drm_err(&i915->drm, "gsc irq init failed %d\n", ret);
+ goto fail;
+ }
+
+ adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+ if (!adev)
+ goto fail;
+
+ adev->irq = intf->irq;
+ adev->bar.parent = &pdev->resource[0];
+ adev->bar.start = def->bar + pdev->resource[0].start;
+ adev->bar.end = adev->bar.start + def->bar_size - 1;
+ adev->bar.flags = IORESOURCE_MEM;
+ adev->bar.desc = IORES_DESC_NONE;
+
+ aux_dev = &adev->aux_dev;
+ aux_dev->name = def->name;
+ aux_dev->id = (pci_domain_nr(pdev->bus) << 16) |
+ PCI_DEVID(pdev->bus->number, pdev->devfn);
+ aux_dev->dev.parent = &pdev->dev;
+ aux_dev->dev.release = gsc_release_dev;
+
+ ret = auxiliary_device_init(aux_dev);
+ if (ret < 0) {
+ drm_err(&i915->drm, "gsc aux init failed %d\n", ret);
+ kfree(adev);
+ goto fail;
+ }
+
+ ret = auxiliary_device_add(aux_dev);
+ if (ret < 0) {
+ drm_err(&i915->drm, "gsc aux add failed %d\n", ret);
+ /* adev will be freed with the put_device() and .release sequence */
+ auxiliary_device_uninit(aux_dev);
+ goto fail;
+ }
+ intf->adev = adev;
+
+ return;
+fail:
+ gsc_destroy_one(intf);
+}
+
+static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id)
+{
+ int ret;
+
+ if (intf_id >= INTEL_GSC_NUM_INTERFACES) {
+ drm_warn_once(&gt->i915->drm, "GSC irq: intf_id %d is out of range", intf_id);
+ return;
+ }
+
+ if (!HAS_HECI_GSC(gt->i915)) {
+ drm_warn_once(&gt->i915->drm, "GSC irq: not supported");
+ return;
+ }
+
+ if (gt->gsc.intf[intf_id].irq < 0) {
+ drm_err_ratelimited(&gt->i915->drm, "GSC irq: irq not set");
+ return;
+ }
+
+ ret = generic_handle_irq(gt->gsc.intf[intf_id].irq);
+ if (ret)
+ drm_err_ratelimited(&gt->i915->drm, "error handling GSC irq: %d\n", ret);
+}
+
+void intel_gsc_irq_handler(struct intel_gt *gt, u32 iir)
+{
+ if (iir & GSC_IRQ_INTF(0))
+ gsc_irq_handler(gt, 0);
+ if (iir & GSC_IRQ_INTF(1))
+ gsc_irq_handler(gt, 1);
+}
+
+void intel_gsc_init(struct intel_gsc *gsc, struct drm_i915_private *i915)
+{
+ unsigned int i;
+
+ if (!HAS_HECI_GSC(i915))
+ return;
+
+ for (i = 0; i < INTEL_GSC_NUM_INTERFACES; i++)
+ gsc_init_one(i915, &gsc->intf[i], i);
+}
+
+void intel_gsc_fini(struct intel_gsc *gsc)
+{
+ struct intel_gt *gt = gsc_to_gt(gsc);
+ unsigned int i;
+
+ if (!HAS_HECI_GSC(gt->i915))
+ return;
+
+ for (i = 0; i < INTEL_GSC_NUM_INTERFACES; i++)
+ gsc_destroy_one(&gsc->intf[i]);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.h b/drivers/gpu/drm/i915/gt/intel_gsc.h
new file mode 100644
index 000000000000..68582f912b21
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gsc.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright(c) 2019-2022, Intel Corporation. All rights reserved.
+ */
+#ifndef __INTEL_GSC_DEV_H__
+#define __INTEL_GSC_DEV_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct intel_gt;
+struct mei_aux_device;
+
+#define INTEL_GSC_NUM_INTERFACES 2
+/*
+ * The HECI1 bit corresponds to bit15 and HECI2 to bit14.
+ * The reason for this is to allow growth for more interfaces in the future.
+ */
+#define GSC_IRQ_INTF(_x) BIT(15 - (_x))
+
+/**
+ * struct intel_gsc - graphics security controller
+ * @intf : gsc interface
+ */
+struct intel_gsc {
+ struct intel_gsc_intf {
+ struct mei_aux_device *adev;
+ int irq;
+ unsigned int id;
+ } intf[INTEL_GSC_NUM_INTERFACES];
+};
+
+void intel_gsc_init(struct intel_gsc *gsc, struct drm_i915_private *dev_priv);
+void intel_gsc_fini(struct intel_gsc *gsc);
+void intel_gsc_irq_handler(struct intel_gt *gt, u32 iir);
+
+#endif /* __INTEL_GSC_DEV_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 8a2483ccbfb9..92394f13b42f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -4,7 +4,6 @@
*/
#include <drm/drm_managed.h>
-#include <drm/intel-gtt.h>
#include "gem/i915_gem_internal.h"
#include "gem/i915_gem_lmem.h"
@@ -17,6 +16,7 @@
#include "intel_gt_buffer_pool.h"
#include "intel_gt_clock_utils.h"
#include "intel_gt_debugfs.h"
+#include "intel_gt_gmch.h"
#include "intel_gt_pm.h"
#include "intel_gt_regs.h"
#include "intel_gt_requests.h"
@@ -26,10 +26,11 @@
#include "intel_rc6.h"
#include "intel_renderstate.h"
#include "intel_rps.h"
+#include "intel_gt_sysfs.h"
#include "intel_uncore.h"
#include "shmem_utils.h"
-void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
+static void __intel_gt_init_early(struct intel_gt *gt)
{
spin_lock_init(&gt->irq_lock);
@@ -51,17 +52,23 @@ void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
intel_rps_init_early(&gt->rps);
}
-void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
+/* Preliminary initialization of Tile 0 */
+void intel_root_gt_init_early(struct drm_i915_private *i915)
{
+ struct intel_gt *gt = to_gt(i915);
+
gt->i915 = i915;
gt->uncore = &i915->uncore;
+
+ __intel_gt_init_early(gt);
}
-int intel_gt_probe_lmem(struct intel_gt *gt)
+static int intel_gt_probe_lmem(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
+ unsigned int instance = gt->info.id;
+ int id = INTEL_REGION_LMEM_0 + instance;
struct intel_memory_region *mem;
- int id;
int err;
mem = intel_gt_setup_lmem(gt);
@@ -76,9 +83,8 @@ int intel_gt_probe_lmem(struct intel_gt *gt)
return err;
}
- id = INTEL_REGION_LMEM;
-
mem->id = id;
+ mem->instance = instance;
intel_memory_region_set_name(mem, "local%u", mem->instance);
@@ -96,6 +102,12 @@ int intel_gt_assign_ggtt(struct intel_gt *gt)
return gt->ggtt ? 0 : -ENOMEM;
}
+static const char * const intel_steering_types[] = {
+ "L3BANK",
+ "MSLICE",
+ "LNCF",
+};
+
static const struct intel_mmio_range icl_l3bank_steering_table[] = {
{ 0x00B100, 0x00B3FF },
{},
@@ -439,14 +451,17 @@ void intel_gt_chipset_flush(struct intel_gt *gt)
{
wmb();
if (GRAPHICS_VER(gt->i915) < 6)
- intel_gtt_chipset_flush();
+ intel_gt_gmch_gen5_chipset_flush(gt);
}
void intel_gt_driver_register(struct intel_gt *gt)
{
+ intel_gsc_init(&gt->gsc, gt->i915);
+
intel_rps_driver_register(&gt->rps);
intel_gt_debugfs_register(gt);
+ intel_gt_sysfs_register(gt);
}
static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
@@ -712,6 +727,11 @@ int intel_gt_init(struct intel_gt *gt)
if (err)
goto err_uc_init;
+ err = intel_gt_init_hwconfig(gt);
+ if (err)
+ drm_err(&gt->i915->drm, "Failed to retrieve hwconfig table: %pe\n",
+ ERR_PTR(err));
+
err = __engines_record_defaults(gt);
if (err)
goto err_gt;
@@ -766,6 +786,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
intel_wakeref_t wakeref;
intel_rps_driver_unregister(&gt->rps);
+ intel_gsc_fini(&gt->gsc);
intel_pxp_fini(&gt->pxp);
@@ -793,18 +814,24 @@ void intel_gt_driver_release(struct intel_gt *gt)
intel_gt_pm_fini(gt);
intel_gt_fini_scratch(gt);
intel_gt_fini_buffer_pool(gt);
+ intel_gt_fini_hwconfig(gt);
}
-void intel_gt_driver_late_release(struct intel_gt *gt)
+void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
{
+ struct intel_gt *gt;
+ unsigned int id;
+
/* We need to wait for inflight RCU frees to release their grip */
rcu_barrier();
- intel_uc_driver_late_release(&gt->uc);
- intel_gt_fini_requests(gt);
- intel_gt_fini_reset(gt);
- intel_gt_fini_timelines(gt);
- intel_engines_free(gt);
+ for_each_gt(gt, i915, id) {
+ intel_uc_driver_late_release(&gt->uc);
+ intel_gt_fini_requests(gt);
+ intel_gt_fini_reset(gt);
+ intel_gt_fini_timelines(gt);
+ intel_engines_free(gt);
+ }
}
/**
@@ -913,6 +940,35 @@ u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg)
return intel_uncore_read_fw(gt->uncore, reg);
}
+/**
+ * intel_gt_get_valid_steering_for_reg - get a valid steering for a register
+ * @gt: GT structure
+ * @reg: register for which the steering is required
+ * @sliceid: return variable for slice steering
+ * @subsliceid: return variable for subslice steering
+ *
+ * This function returns a slice/subslice pair that is guaranteed to work for
+ * read steering of the given register. Note that a value will be returned even
+ * if the register is not replicated and therefore does not actually require
+ * steering.
+ */
+void intel_gt_get_valid_steering_for_reg(struct intel_gt *gt, i915_reg_t reg,
+ u8 *sliceid, u8 *subsliceid)
+{
+ int type;
+
+ for (type = 0; type < NUM_STEERING_TYPES; type++) {
+ if (intel_gt_reg_needs_read_steering(gt, reg, type)) {
+ intel_gt_get_valid_steering(gt, type, sliceid,
+ subsliceid);
+ return;
+ }
+ }
+
+ *sliceid = gt->default_steering.groupid;
+ *subsliceid = gt->default_steering.instanceid;
+}
+
u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg)
{
int type;
@@ -932,6 +988,145 @@ u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg)
return intel_uncore_read(gt->uncore, reg);
}
+static void report_steering_type(struct drm_printer *p,
+ struct intel_gt *gt,
+ enum intel_steering_type type,
+ bool dump_table)
+{
+ const struct intel_mmio_range *entry;
+ u8 slice, subslice;
+
+ BUILD_BUG_ON(ARRAY_SIZE(intel_steering_types) != NUM_STEERING_TYPES);
+
+ if (!gt->steering_table[type]) {
+ drm_printf(p, "%s steering: uses default steering\n",
+ intel_steering_types[type]);
+ return;
+ }
+
+ intel_gt_get_valid_steering(gt, type, &slice, &subslice);
+ drm_printf(p, "%s steering: sliceid=0x%x, subsliceid=0x%x\n",
+ intel_steering_types[type], slice, subslice);
+
+ if (!dump_table)
+ return;
+
+ for (entry = gt->steering_table[type]; entry->end; entry++)
+ drm_printf(p, "\t0x%06x - 0x%06x\n", entry->start, entry->end);
+}
+
+void intel_gt_report_steering(struct drm_printer *p, struct intel_gt *gt,
+ bool dump_table)
+{
+ drm_printf(p, "Default steering: sliceid=0x%x, subsliceid=0x%x\n",
+ gt->default_steering.groupid,
+ gt->default_steering.instanceid);
+
+ if (HAS_MSLICES(gt->i915)) {
+ report_steering_type(p, gt, MSLICE, dump_table);
+ report_steering_type(p, gt, LNCF, dump_table);
+ }
+}
+
+static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr)
+{
+ int ret;
+
+ if (!gt_is_root(gt)) {
+ struct intel_uncore_mmio_debug *mmio_debug;
+ struct intel_uncore *uncore;
+
+ uncore = kzalloc(sizeof(*uncore), GFP_KERNEL);
+ if (!uncore)
+ return -ENOMEM;
+
+ mmio_debug = kzalloc(sizeof(*mmio_debug), GFP_KERNEL);
+ if (!mmio_debug) {
+ kfree(uncore);
+ return -ENOMEM;
+ }
+
+ gt->uncore = uncore;
+ gt->uncore->debug = mmio_debug;
+
+ __intel_gt_init_early(gt);
+ }
+
+ intel_uncore_init_early(gt->uncore, gt);
+
+ ret = intel_uncore_setup_mmio(gt->uncore, phys_addr);
+ if (ret)
+ return ret;
+
+ gt->phys_addr = phys_addr;
+
+ return 0;
+}
+
+static void
+intel_gt_tile_cleanup(struct intel_gt *gt)
+{
+ intel_uncore_cleanup_mmio(gt->uncore);
+
+ if (!gt_is_root(gt)) {
+ kfree(gt->uncore->debug);
+ kfree(gt->uncore);
+ kfree(gt);
+ }
+}
+
+int intel_gt_probe_all(struct drm_i915_private *i915)
+{
+ struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+ struct intel_gt *gt = &i915->gt0;
+ phys_addr_t phys_addr;
+ unsigned int mmio_bar;
+ int ret;
+
+ mmio_bar = GRAPHICS_VER(i915) == 2 ? 1 : 0;
+ phys_addr = pci_resource_start(pdev, mmio_bar);
+
+ /*
+ * We always have at least one primary GT on any device
+ * and it has been already initialized early during probe
+ * in i915_driver_probe()
+ */
+ ret = intel_gt_tile_setup(gt, phys_addr);
+ if (ret)
+ return ret;
+
+ i915->gt[0] = gt;
+
+ /* TODO: add more tiles */
+ return 0;
+}
+
+int intel_gt_tiles_init(struct drm_i915_private *i915)
+{
+ struct intel_gt *gt;
+ unsigned int id;
+ int ret;
+
+ for_each_gt(gt, i915, id) {
+ ret = intel_gt_probe_lmem(gt);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void intel_gt_release_all(struct drm_i915_private *i915)
+{
+ struct intel_gt *gt;
+ unsigned int id;
+
+ for_each_gt(gt, i915, id) {
+ intel_gt_tile_cleanup(gt);
+ i915->gt[id] = NULL;
+ }
+}
+
void intel_gt_info_print(const struct intel_gt_info *info,
struct drm_printer *p)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 0f571c8ee22b..44c6cb63ccbc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -13,12 +13,24 @@
struct drm_i915_private;
struct drm_printer;
+struct insert_entries {
+ struct i915_address_space *vm;
+ struct i915_vma_resource *vma_res;
+ enum i915_cache_level level;
+ u32 flags;
+};
+
#define GT_TRACE(gt, fmt, ...) do { \
const struct intel_gt *gt__ __maybe_unused = (gt); \
GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \
##__VA_ARGS__); \
} while (0)
+static inline bool gt_is_root(struct intel_gt *gt)
+{
+ return !gt->info.id;
+}
+
static inline struct intel_gt *uc_to_gt(struct intel_uc *uc)
{
return container_of(uc, struct intel_gt, uc);
@@ -34,10 +46,13 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc)
return container_of(huc, struct intel_gt, uc.huc);
}
-void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
-void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
+static inline struct intel_gt *gsc_to_gt(struct intel_gsc *gsc)
+{
+ return container_of(gsc, struct intel_gt, gsc);
+}
+
+void intel_root_gt_init_early(struct drm_i915_private *i915);
int intel_gt_assign_ggtt(struct intel_gt *gt);
-int intel_gt_probe_lmem(struct intel_gt *gt);
int intel_gt_init_mmio(struct intel_gt *gt);
int __must_check intel_gt_init_hw(struct intel_gt *gt);
int intel_gt_init(struct intel_gt *gt);
@@ -47,7 +62,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt);
void intel_gt_driver_remove(struct intel_gt *gt);
void intel_gt_driver_release(struct intel_gt *gt);
-void intel_gt_driver_late_release(struct intel_gt *gt);
+void intel_gt_driver_late_release_all(struct drm_i915_private *i915);
int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
@@ -84,9 +99,25 @@ static inline bool intel_gt_needs_read_steering(struct intel_gt *gt,
return gt->steering_table[type];
}
+void intel_gt_get_valid_steering_for_reg(struct intel_gt *gt, i915_reg_t reg,
+ u8 *sliceid, u8 *subsliceid);
+
u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg);
u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg);
+void intel_gt_report_steering(struct drm_printer *p, struct intel_gt *gt,
+ bool dump_table);
+
+int intel_gt_probe_all(struct drm_i915_private *i915);
+int intel_gt_tiles_init(struct drm_i915_private *i915);
+void intel_gt_release_all(struct drm_i915_private *i915);
+
+#define for_each_gt(gt__, i915__, id__) \
+ for ((id__) = 0; \
+ (id__) < I915_MAX_GT; \
+ (id__)++) \
+ for_each_if(((gt__) = (i915__)->gt[(id__)]))
+
void intel_gt_info_print(const struct intel_gt_info *info,
struct drm_printer *p);
@@ -94,4 +125,6 @@ void intel_gt_watchdog_work(struct work_struct *work);
void intel_gt_invalidate_tlbs(struct intel_gt *gt);
+struct resource intel_pci_resource(struct pci_dev *pdev, int bar);
+
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
index 0db822c3b7e5..d5d1b04dbcad 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
@@ -161,6 +161,10 @@ void intel_gt_init_clock_frequency(struct intel_gt *gt)
if (gt->clock_frequency)
gt->clock_period_ns = intel_gt_clock_interval_to_ns(gt, 1);
+ /* Icelake appears to use another fixed frequency for CTX_TIMESTAMP */
+ if (GRAPHICS_VER(gt->i915) == 11)
+ gt->clock_period_ns = NSEC_PER_SEC / 13750000;
+
GT_TRACE(gt,
"Using clock frequency: %dkHz, period: %dns, wrap: %lldms\n",
gt->clock_frequency / 1000,
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
index f103664b71d4..d886fdc2c694 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
@@ -6,6 +6,7 @@
#include <linux/debugfs.h>
#include "i915_drv.h"
+#include "intel_gt.h"
#include "intel_gt_debugfs.h"
#include "intel_gt_engines_debugfs.h"
#include "intel_gt_pm_debugfs.h"
@@ -29,7 +30,7 @@ int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val)
}
}
-int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val)
+void intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val)
{
/* Flush any previous reset before applying for a new one */
wait_event(gt->reset.queue,
@@ -37,7 +38,6 @@ int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val)
intel_gt_handle_error(gt, val, I915_ERROR_CAPTURE,
"Manually reset engine mask to %llx", val);
- return 0;
}
/*
@@ -51,16 +51,30 @@ static int __intel_gt_debugfs_reset_show(void *data, u64 *val)
static int __intel_gt_debugfs_reset_store(void *data, u64 val)
{
- return intel_gt_debugfs_reset_store(data, val);
+ intel_gt_debugfs_reset_store(data, val);
+
+ return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(reset_fops, __intel_gt_debugfs_reset_show,
__intel_gt_debugfs_reset_store, "%llu\n");
+static int steering_show(struct seq_file *m, void *data)
+{
+ struct drm_printer p = drm_seq_file_printer(m);
+ struct intel_gt *gt = m->private;
+
+ intel_gt_report_steering(&p, gt, true);
+
+ return 0;
+}
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(steering);
+
static void gt_debugfs_register(struct intel_gt *gt, struct dentry *root)
{
static const struct intel_gt_debugfs_file files[] = {
{ "reset", &reset_fops, NULL },
+ { "steering", &steering_fops },
};
intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h
index 17e79b735cfe..e4110eebf093 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h
@@ -48,6 +48,6 @@ void intel_gt_debugfs_register_files(struct dentry *root,
/* functions that need to be accessed by the upper level non-gt interfaces */
int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val);
-int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val);
+void intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val);
#endif /* INTEL_GT_DEBUGFS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c
new file mode 100644
index 000000000000..18e488672d1b
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c
@@ -0,0 +1,654 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/intel-gtt.h>
+#include <drm/i915_drm.h>
+
+#include <linux/agp_backend.h>
+#include <linux/stop_machine.h>
+
+#include "i915_drv.h"
+#include "intel_gt_gmch.h"
+#include "intel_gt_regs.h"
+#include "intel_gt.h"
+#include "i915_utils.h"
+
+#include "gen8_ppgtt.h"
+
+struct insert_page {
+ struct i915_address_space *vm;
+ dma_addr_t addr;
+ u64 offset;
+ enum i915_cache_level level;
+};
+
+static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
+{
+ writeq(pte, addr);
+}
+
+static void nop_clear_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+}
+
+static u64 snb_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+ switch (level) {
+ case I915_CACHE_L3_LLC:
+ case I915_CACHE_LLC:
+ pte |= GEN6_PTE_CACHE_LLC;
+ break;
+ case I915_CACHE_NONE:
+ pte |= GEN6_PTE_UNCACHED;
+ break;
+ default:
+ MISSING_CASE(level);
+ }
+
+ return pte;
+}
+
+static u64 ivb_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+ switch (level) {
+ case I915_CACHE_L3_LLC:
+ pte |= GEN7_PTE_CACHE_L3_LLC;
+ break;
+ case I915_CACHE_LLC:
+ pte |= GEN6_PTE_CACHE_LLC;
+ break;
+ case I915_CACHE_NONE:
+ pte |= GEN6_PTE_UNCACHED;
+ break;
+ default:
+ MISSING_CASE(level);
+ }
+
+ return pte;
+}
+
+static u64 byt_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+ if (!(flags & PTE_READ_ONLY))
+ pte |= BYT_PTE_WRITEABLE;
+
+ if (level != I915_CACHE_NONE)
+ pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
+
+ return pte;
+}
+
+static u64 hsw_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+ if (level != I915_CACHE_NONE)
+ pte |= HSW_WB_LLC_AGE3;
+
+ return pte;
+}
+
+static u64 iris_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+ switch (level) {
+ case I915_CACHE_NONE:
+ break;
+ case I915_CACHE_WT:
+ pte |= HSW_WT_ELLC_LLC_AGE3;
+ break;
+ default:
+ pte |= HSW_WB_ELLC_LLC_AGE3;
+ break;
+ }
+
+ return pte;
+}
+
+static void gen5_ggtt_insert_page(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ enum i915_cache_level cache_level,
+ u32 unused)
+{
+ unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+ AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+ intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
+}
+
+static void gen6_ggtt_insert_page(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen6_pte_t __iomem *pte =
+ (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+ iowrite32(vm->pte_encode(addr, level, flags), pte);
+
+ ggtt->invalidate(ggtt);
+}
+
+static void gen8_ggtt_insert_page(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen8_pte_t __iomem *pte =
+ (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+ gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
+
+ ggtt->invalidate(ggtt);
+}
+
+static void gen5_ggtt_insert_entries(struct i915_address_space *vm,
+ struct i915_vma_resource *vma_res,
+ enum i915_cache_level cache_level,
+ u32 unused)
+{
+ unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+ AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+ intel_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT,
+ flags);
+}
+
+/*
+ * Binds an object into the global gtt with the specified cache level.
+ * The object will be accessible to the GPU via commands whose operands
+ * reference offsets within the global GTT as well as accessible by the GPU
+ * through the GMADR mapped BAR (i915->mm.gtt->gtt).
+ */
+static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
+ struct i915_vma_resource *vma_res,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen6_pte_t __iomem *gte;
+ gen6_pte_t __iomem *end;
+ struct sgt_iter iter;
+ dma_addr_t addr;
+
+ gte = (gen6_pte_t __iomem *)ggtt->gsm;
+ gte += vma_res->start / I915_GTT_PAGE_SIZE;
+ end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
+
+ for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
+ iowrite32(vm->pte_encode(addr, level, flags), gte++);
+ GEM_BUG_ON(gte > end);
+
+ /* Fill the allocated but "unused" space beyond the end of the buffer */
+ while (gte < end)
+ iowrite32(vm->scratch[0]->encode, gte++);
+
+ /*
+ * We want to flush the TLBs only after we're certain all the PTE
+ * updates have finished.
+ */
+ ggtt->invalidate(ggtt);
+}
+
+static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
+ struct i915_vma_resource *vma_res,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen8_pte_t __iomem *gte;
+ gen8_pte_t __iomem *end;
+ struct sgt_iter iter;
+ dma_addr_t addr;
+
+ /*
+ * Note that we ignore PTE_READ_ONLY here. The caller must be careful
+ * not to allow the user to override access to a read only page.
+ */
+
+ gte = (gen8_pte_t __iomem *)ggtt->gsm;
+ gte += vma_res->start / I915_GTT_PAGE_SIZE;
+ end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
+
+ for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
+ gen8_set_pte(gte++, pte_encode | addr);
+ GEM_BUG_ON(gte > end);
+
+ /* Fill the allocated but "unused" space beyond the end of the buffer */
+ while (gte < end)
+ gen8_set_pte(gte++, vm->scratch[0]->encode);
+
+ /*
+ * We want to flush the TLBs only after we're certain all the PTE
+ * updates have finished.
+ */
+ ggtt->invalidate(ggtt);
+}
+
+static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
+{
+ /*
+ * Make sure the internal GAM fifo has been cleared of all GTT
+ * writes before exiting stop_machine(). This guarantees that
+ * any aperture accesses waiting to start in another process
+ * cannot back up behind the GTT writes causing a hang.
+ * The register can be any arbitrary GAM register.
+ */
+ intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6);
+}
+
+static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
+{
+ struct insert_page *arg = _arg;
+
+ gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
+ bxt_vtd_ggtt_wa(arg->vm);
+
+ return 0;
+}
+
+static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ enum i915_cache_level level,
+ u32 unused)
+{
+ struct insert_page arg = { vm, addr, offset, level };
+
+ stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
+}
+
+static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
+{
+ struct insert_entries *arg = _arg;
+
+ gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags);
+ bxt_vtd_ggtt_wa(arg->vm);
+
+ return 0;
+}
+
+static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
+ struct i915_vma_resource *vma_res,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ struct insert_entries arg = { vm, vma_res, level, flags };
+
+ stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
+}
+
+void intel_gt_gmch_gen5_chipset_flush(struct intel_gt *gt)
+{
+ intel_gtt_chipset_flush();
+}
+
+static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+ intel_gtt_chipset_flush();
+}
+
+static void gen5_ggtt_clear_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
+}
+
+static void gen6_ggtt_clear_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+ unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+ gen6_pte_t scratch_pte, __iomem *gtt_base =
+ (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
+ const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+ int i;
+
+ if (WARN(num_entries > max_entries,
+ "First entry = %d; Num entries = %d (max=%d)\n",
+ first_entry, num_entries, max_entries))
+ num_entries = max_entries;
+
+ scratch_pte = vm->scratch[0]->encode;
+ for (i = 0; i < num_entries; i++)
+ iowrite32(scratch_pte, &gtt_base[i]);
+}
+
+static void gen8_ggtt_clear_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+ unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+ const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
+ gen8_pte_t __iomem *gtt_base =
+ (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
+ const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+ int i;
+
+ if (WARN(num_entries > max_entries,
+ "First entry = %d; Num entries = %d (max=%d)\n",
+ first_entry, num_entries, max_entries))
+ num_entries = max_entries;
+
+ for (i = 0; i < num_entries; i++)
+ gen8_set_pte(&gtt_base[i], scratch_pte);
+}
+
+static void gen5_gmch_remove(struct i915_address_space *vm)
+{
+ intel_gmch_remove();
+}
+
+static void gen6_gmch_remove(struct i915_address_space *vm)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+
+ iounmap(ggtt->gsm);
+ free_scratch(vm);
+}
+
+/*
+ * Certain Gen5 chipsets require idling the GPU before
+ * unmapping anything from the GTT when VT-d is enabled.
+ */
+static bool needs_idle_maps(struct drm_i915_private *i915)
+{
+ /*
+ * Query intel_iommu to see if we need the workaround. Presumably that
+ * was loaded first.
+ */
+ if (!i915_vtd_active(i915))
+ return false;
+
+ if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915))
+ return true;
+
+ if (GRAPHICS_VER(i915) == 12)
+ return true; /* XXX DMAR fault reason 7 */
+
+ return false;
+}
+
+static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915)
+{
+ /*
+ * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset
+ * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset
+ */
+ GEM_BUG_ON(GRAPHICS_VER(i915) < 6);
+ return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M;
+}
+
+static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
+{
+ snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
+ snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
+ return snb_gmch_ctl << 20;
+}
+
+static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
+{
+ bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
+ bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
+ if (bdw_gmch_ctl)
+ bdw_gmch_ctl = 1 << bdw_gmch_ctl;
+
+#ifdef CONFIG_X86_32
+ /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
+ if (bdw_gmch_ctl > 4)
+ bdw_gmch_ctl = 4;
+#endif
+
+ return bdw_gmch_ctl << 20;
+}
+
+static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915)
+{
+ return gen6_gttmmadr_size(i915) / 2;
+}
+
+static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+ struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+ phys_addr_t phys_addr;
+ u32 pte_flags;
+ int ret;
+
+ GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915));
+ phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915);
+
+ /*
+ * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
+ * will be dropped. For WC mappings in general we have 64 byte burst
+ * writes when the WC buffer is flushed, so we can't use it, but have to
+ * resort to an uncached mapping. The WC issue is easily caught by the
+ * readback check when writing GTT PTE entries.
+ */
+ if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11)
+ ggtt->gsm = ioremap(phys_addr, size);
+ else
+ ggtt->gsm = ioremap_wc(phys_addr, size);
+ if (!ggtt->gsm) {
+ drm_err(&i915->drm, "Failed to map the ggtt page table\n");
+ return -ENOMEM;
+ }
+
+ kref_init(&ggtt->vm.resv_ref);
+ ret = setup_scratch_page(&ggtt->vm);
+ if (ret) {
+ drm_err(&i915->drm, "Scratch setup failed\n");
+ /* iounmap will also get called at remove, but meh */
+ iounmap(ggtt->gsm);
+ return ret;
+ }
+
+ pte_flags = 0;
+ if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
+ pte_flags |= PTE_LM;
+
+ ggtt->vm.scratch[0]->encode =
+ ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
+ I915_CACHE_NONE, pte_flags);
+
+ return 0;
+}
+
+int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+ phys_addr_t gmadr_base;
+ int ret;
+
+ ret = intel_gmch_probe(i915->bridge_dev, to_pci_dev(i915->drm.dev), NULL);
+ if (!ret) {
+ drm_err(&i915->drm, "failed to set up gmch\n");
+ return -EIO;
+ }
+
+ intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
+
+ ggtt->gmadr =
+ (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
+
+ ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+ ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
+
+ if (needs_idle_maps(i915)) {
+ drm_notice(&i915->drm,
+ "Flushing DMA requests before IOMMU unmaps; performance may be degraded\n");
+ ggtt->do_idle_maps = true;
+ }
+
+ ggtt->vm.insert_page = gen5_ggtt_insert_page;
+ ggtt->vm.insert_entries = gen5_ggtt_insert_entries;
+ ggtt->vm.clear_range = gen5_ggtt_clear_range;
+ ggtt->vm.cleanup = gen5_gmch_remove;
+
+ ggtt->invalidate = gmch_ggtt_invalidate;
+
+ ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma;
+ ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma;
+
+ if (unlikely(ggtt->do_idle_maps))
+ drm_notice(&i915->drm,
+ "Applying Ironlake quirks for intel_iommu\n");
+
+ return 0;
+}
+
+int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+ struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+ unsigned int size;
+ u16 snb_gmch_ctl;
+
+ ggtt->gmadr = intel_pci_resource(pdev, 2);
+ ggtt->mappable_end = resource_size(&ggtt->gmadr);
+
+ /*
+ * 64/512MB is the current min/max we actually know of, but this is
+ * just a coarse sanity check.
+ */
+ if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
+ drm_err(&i915->drm, "Unknown GMADR size (%pa)\n",
+ &ggtt->mappable_end);
+ return -ENXIO;
+ }
+
+ pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
+
+ size = gen6_get_total_gtt_size(snb_gmch_ctl);
+ ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
+
+ ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+ ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
+
+ ggtt->vm.clear_range = nop_clear_range;
+ if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
+ ggtt->vm.clear_range = gen6_ggtt_clear_range;
+ ggtt->vm.insert_page = gen6_ggtt_insert_page;
+ ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
+ ggtt->vm.cleanup = gen6_gmch_remove;
+
+ ggtt->invalidate = gen6_ggtt_invalidate;
+
+ if (HAS_EDRAM(i915))
+ ggtt->vm.pte_encode = iris_pte_encode;
+ else if (IS_HASWELL(i915))
+ ggtt->vm.pte_encode = hsw_pte_encode;
+ else if (IS_VALLEYVIEW(i915))
+ ggtt->vm.pte_encode = byt_pte_encode;
+ else if (GRAPHICS_VER(i915) >= 7)
+ ggtt->vm.pte_encode = ivb_pte_encode;
+ else
+ ggtt->vm.pte_encode = snb_pte_encode;
+
+ ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma;
+ ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma;
+
+ return ggtt_probe_common(ggtt, size);
+}
+
+static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
+{
+ gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
+ gmch_ctrl &= SNB_GMCH_GGMS_MASK;
+
+ if (gmch_ctrl)
+ return 1 << (20 + gmch_ctrl);
+
+ return 0;
+}
+
+int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+ struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+ unsigned int size;
+ u16 snb_gmch_ctl;
+
+ /* TODO: We're not aware of mappable constraints on gen8 yet */
+ if (!HAS_LMEM(i915)) {
+ ggtt->gmadr = intel_pci_resource(pdev, 2);
+ ggtt->mappable_end = resource_size(&ggtt->gmadr);
+ }
+
+ pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
+ if (IS_CHERRYVIEW(i915))
+ size = chv_get_total_gtt_size(snb_gmch_ctl);
+ else
+ size = gen8_get_total_gtt_size(snb_gmch_ctl);
+
+ ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+ ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
+ ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
+
+ ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
+ ggtt->vm.cleanup = gen6_gmch_remove;
+ ggtt->vm.insert_page = gen8_ggtt_insert_page;
+ ggtt->vm.clear_range = nop_clear_range;
+ if (intel_scanout_needs_vtd_wa(i915))
+ ggtt->vm.clear_range = gen8_ggtt_clear_range;
+
+ ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
+
+ /*
+ * Serialize GTT updates with aperture access on BXT if VT-d is on,
+ * and always on CHV.
+ */
+ if (intel_vm_no_concurrent_access_wa(i915)) {
+ ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
+ ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
+ ggtt->vm.bind_async_flags =
+ I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
+ }
+
+ ggtt->invalidate = gen8_ggtt_invalidate;
+
+ ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma;
+ ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma;
+
+ ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
+
+ setup_private_pat(ggtt->vm.gt->uncore);
+
+ return ggtt_probe_common(ggtt, size);
+}
+
+int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915)
+{
+ if (GRAPHICS_VER(i915) < 6 && !intel_enable_gtt())
+ return -EIO;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h
new file mode 100644
index 000000000000..75ed55c1f30a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_GMCH_H__
+#define __INTEL_GT_GMCH_H__
+
+#include "intel_gtt.h"
+
+/* For x86 platforms */
+#if IS_ENABLED(CONFIG_X86)
+void intel_gt_gmch_gen5_chipset_flush(struct intel_gt *gt);
+int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt);
+int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt);
+int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt);
+int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915);
+
+/* Stubs for non-x86 platforms */
+#else
+static inline void intel_gt_gmch_gen5_chipset_flush(struct intel_gt *gt)
+{
+}
+static inline int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt)
+{
+ /* No HW should be probed for this case yet, return fail */
+ return -ENODEV;
+}
+static inline int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt)
+{
+ /* No HW should be probed for this case yet, return fail */
+ return -ENODEV;
+}
+static inline int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt)
+{
+ /* No HW should be probed for this case yet, return fail */
+ return -ENODEV;
+}
+static inline int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915)
+{
+ /* No HW should be enabled for this case yet, return fail */
+ return -ENODEV;
+}
+#endif
+
+#endif /* __INTEL_GT_GMCH_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index e443ac4c8059..88b4becfcb17 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -68,6 +68,9 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance,
if (instance == OTHER_KCR_INSTANCE)
return intel_pxp_irq_handler(&gt->pxp, iir);
+ if (instance == OTHER_GSC_INSTANCE)
+ return intel_gsc_irq_handler(gt, iir);
+
WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n",
instance, iir);
}
@@ -184,6 +187,8 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0);
if (CCS_MASK(gt))
intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, 0);
+ if (HAS_HECI_GSC(gt->i915))
+ intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_ENABLE, 0);
/* Restore masks irqs on RCS, BCS, VCS and VECS engines. */
intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~0);
@@ -201,6 +206,8 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
intel_uncore_write(uncore, GEN12_CCS0_CCS1_INTR_MASK, ~0);
if (HAS_ENGINE(gt, CCS2) || HAS_ENGINE(gt, CCS3))
intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~0);
+ if (HAS_HECI_GSC(gt->i915))
+ intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_MASK, ~0);
intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0);
intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0);
@@ -215,6 +222,7 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
{
struct intel_uncore *uncore = gt->uncore;
u32 irqs = GT_RENDER_USER_INTERRUPT;
+ const u32 gsc_mask = GSC_IRQ_INTF(0) | GSC_IRQ_INTF(1);
u32 dmask;
u32 smask;
@@ -233,6 +241,9 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask);
if (CCS_MASK(gt))
intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, smask);
+ if (HAS_HECI_GSC(gt->i915))
+ intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_ENABLE,
+ gsc_mask);
/* Unmask irqs on RCS, BCS, VCS and VECS engines. */
intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~smask);
@@ -250,6 +261,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
intel_uncore_write(uncore, GEN12_CCS0_CCS1_INTR_MASK, ~dmask);
if (HAS_ENGINE(gt, CCS2) || HAS_ENGINE(gt, CCS3))
intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~dmask);
+ if (HAS_HECI_GSC(gt->i915))
+ intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_MASK, ~gsc_mask);
/*
* RPS interrupts will get enabled/disabled on demand when RPS itself
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index e4ecc17889d3..f553e2173bda 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -129,7 +129,14 @@ static const struct intel_wakeref_ops wf_ops = {
void intel_gt_pm_init_early(struct intel_gt *gt)
{
- intel_wakeref_init(&gt->wakeref, gt->uncore->rpm, &wf_ops);
+ /*
+ * We access the runtime_pm structure via gt->i915 here rather than
+ * gt->uncore as we do elsewhere in the file because gt->uncore is not
+ * yet initialized for all tiles at this point in the driver startup.
+ * runtime_pm is per-device rather than per-tile, so this is still the
+ * correct structure.
+ */
+ intel_wakeref_init(&gt->wakeref, &gt->i915->runtime_pm, &wf_ops);
seqcount_mutex_init(&gt->stats.lock, &gt->wakeref.mutex);
}
@@ -175,15 +182,16 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
if (intel_gt_is_wedged(gt))
intel_gt_unset_wedged(gt);
- for_each_engine(engine, gt, id)
+ /* For GuC mode, ensure submission is disabled before stopping ring */
+ intel_uc_reset_prepare(&gt->uc);
+
+ for_each_engine(engine, gt, id) {
if (engine->reset.prepare)
engine->reset.prepare(engine);
- intel_uc_reset_prepare(&gt->uc);
-
- for_each_engine(engine, gt, id)
if (engine->sanitize)
engine->sanitize(engine);
+ }
if (reset_engines(gt) || force) {
for_each_engine(engine, gt, id)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 437e96bb3b93..0c6b9eb724ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -24,38 +24,38 @@
#include "intel_uncore.h"
#include "vlv_sideband.h"
-int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt)
+void intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt)
{
atomic_inc(&gt->user_wakeref);
intel_gt_pm_get(gt);
if (GRAPHICS_VER(gt->i915) >= 6)
intel_uncore_forcewake_user_get(gt->uncore);
-
- return 0;
}
-int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt)
+void intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt)
{
if (GRAPHICS_VER(gt->i915) >= 6)
intel_uncore_forcewake_user_put(gt->uncore);
intel_gt_pm_put(gt);
atomic_dec(&gt->user_wakeref);
-
- return 0;
}
static int forcewake_user_open(struct inode *inode, struct file *file)
{
struct intel_gt *gt = inode->i_private;
- return intel_gt_pm_debugfs_forcewake_user_open(gt);
+ intel_gt_pm_debugfs_forcewake_user_open(gt);
+
+ return 0;
}
static int forcewake_user_release(struct inode *inode, struct file *file)
{
struct intel_gt *gt = inode->i_private;
- return intel_gt_pm_debugfs_forcewake_user_release(gt);
+ intel_gt_pm_debugfs_forcewake_user_release(gt);
+
+ return 0;
}
static const struct file_operations forcewake_user_fops = {
@@ -342,17 +342,16 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p)
} else if (GRAPHICS_VER(i915) >= 6) {
u32 rp_state_limits;
u32 gt_perf_status;
- u32 rp_state_cap;
+ struct intel_rps_freq_caps caps;
u32 rpmodectl, rpinclimit, rpdeclimit;
u32 rpstat, cagf, reqf;
u32 rpcurupei, rpcurup, rpprevup;
u32 rpcurdownei, rpcurdown, rpprevdown;
u32 rpupei, rpupt, rpdownei, rpdownt;
u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
- int max_freq;
rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
- rp_state_cap = intel_rps_read_state_cap(rps);
+ gen6_rps_get_freq_caps(rps, &caps);
if (IS_GEN9_LP(i915))
gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
else
@@ -474,25 +473,12 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p)
drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n",
rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
- max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 :
- rp_state_cap >> 16) & 0xff;
- max_freq *= (IS_GEN9_BC(i915) ||
- GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1);
drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
- intel_gpu_freq(rps, max_freq));
-
- max_freq = (rp_state_cap & 0xff00) >> 8;
- max_freq *= (IS_GEN9_BC(i915) ||
- GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1);
+ intel_gpu_freq(rps, caps.min_freq));
drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
- intel_gpu_freq(rps, max_freq));
-
- max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 :
- rp_state_cap >> 0) & 0xff;
- max_freq *= (IS_GEN9_BC(i915) ||
- GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1);
+ intel_gpu_freq(rps, caps.rp1_freq));
drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
- intel_gpu_freq(rps, max_freq));
+ intel_gpu_freq(rps, caps.rp0_freq));
drm_printf(p, "Max overclocked frequency: %dMHz\n",
intel_gpu_freq(rps, rps->max_freq));
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h
index a8457887ec65..0ace8c2da0ac 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h
@@ -14,7 +14,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root);
void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *m);
/* functions that need to be accessed by the upper level non-gt interfaces */
-int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt);
-int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt);
+void intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt);
+void intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt);
#endif /* INTEL_GT_PM_DEBUGFS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 201b507c9dde..a39718a40cc3 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -46,6 +46,7 @@
#define GEN8_MCR_SLICE_MASK GEN8_MCR_SLICE(3)
#define GEN8_MCR_SUBSLICE(subslice) (((subslice) & 3) << 24)
#define GEN8_MCR_SUBSLICE_MASK GEN8_MCR_SUBSLICE(3)
+#define GEN11_MCR_MULTICAST REG_BIT(31)
#define GEN11_MCR_SLICE(slice) (((slice) & 0xf) << 27)
#define GEN11_MCR_SLICE_MASK GEN11_MCR_SLICE(0xf)
#define GEN11_MCR_SUBSLICE(subslice) (((subslice) & 0x7) << 24)
@@ -840,6 +841,24 @@
#define CTC_SHIFT_PARAMETER_SHIFT 1
#define CTC_SHIFT_PARAMETER_MASK (0x3 << CTC_SHIFT_PARAMETER_SHIFT)
+/* GPM MSG_IDLE */
+#define MSG_IDLE_CS _MMIO(0x8000)
+#define MSG_IDLE_VCS0 _MMIO(0x8004)
+#define MSG_IDLE_VCS1 _MMIO(0x8008)
+#define MSG_IDLE_BCS _MMIO(0x800C)
+#define MSG_IDLE_VECS0 _MMIO(0x8010)
+#define MSG_IDLE_VCS2 _MMIO(0x80C0)
+#define MSG_IDLE_VCS3 _MMIO(0x80C4)
+#define MSG_IDLE_VCS4 _MMIO(0x80C8)
+#define MSG_IDLE_VCS5 _MMIO(0x80CC)
+#define MSG_IDLE_VCS6 _MMIO(0x80D0)
+#define MSG_IDLE_VCS7 _MMIO(0x80D4)
+#define MSG_IDLE_VECS1 _MMIO(0x80D8)
+#define MSG_IDLE_VECS2 _MMIO(0x80DC)
+#define MSG_IDLE_VECS3 _MMIO(0x80E0)
+#define MSG_IDLE_FW_MASK REG_GENMASK(13, 9)
+#define MSG_IDLE_FW_SHIFT 9
+
#define FORCEWAKE_MEDIA_GEN9 _MMIO(0xa270)
#define FORCEWAKE_RENDER_GEN9 _MMIO(0xa278)
@@ -1087,6 +1106,7 @@
#define EU_PERF_CNTL3 _MMIO(0xe758)
#define LSC_CHICKEN_BIT_0 _MMIO(0xe7c8)
+#define DISABLE_D8_D16_COASLESCE REG_BIT(30)
#define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15)
#define LSC_CHICKEN_BIT_0_UDW _MMIO(0xe7c8 + 4)
#define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32)
@@ -1482,6 +1502,7 @@
#define OTHER_GUC_INSTANCE 0
#define OTHER_GTPM_INSTANCE 1
#define OTHER_KCR_INSTANCE 4
+#define OTHER_GSC_INSTANCE 6
#define GEN11_IIR_REG_SELECTOR(x) _MMIO(0x190070 + ((x) * 4))
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
new file mode 100644
index 000000000000..8ec8bc660c8c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_device.h>
+#include <linux/device.h>
+#include <linux/kobject.h>
+#include <linux/printk.h>
+#include <linux/sysfs.h>
+
+#include "i915_drv.h"
+#include "i915_sysfs.h"
+#include "intel_gt.h"
+#include "intel_gt_sysfs.h"
+#include "intel_gt_sysfs_pm.h"
+#include "intel_gt_types.h"
+#include "intel_rc6.h"
+
+bool is_object_gt(struct kobject *kobj)
+{
+ return !strncmp(kobj->name, "gt", 2);
+}
+
+static struct intel_gt *kobj_to_gt(struct kobject *kobj)
+{
+ return container_of(kobj, struct kobj_gt, base)->gt;
+}
+
+struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
+ const char *name)
+{
+ struct kobject *kobj = &dev->kobj;
+
+ /*
+ * We are interested at knowing from where the interface
+ * has been called, whether it's called from gt/ or from
+ * the parent directory.
+ * From the interface position it depends also the value of
+ * the private data.
+ * If the interface is called from gt/ then private data is
+ * of the "struct intel_gt *" type, otherwise it's * a
+ * "struct drm_i915_private *" type.
+ */
+ if (!is_object_gt(kobj)) {
+ struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
+
+ return to_gt(i915);
+ }
+
+ return kobj_to_gt(kobj);
+}
+
+static struct kobject *gt_get_parent_obj(struct intel_gt *gt)
+{
+ return &gt->i915->drm.primary->kdev->kobj;
+}
+
+static ssize_t id_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+
+ return sysfs_emit(buf, "%u\n", gt->info.id);
+}
+static DEVICE_ATTR_RO(id);
+
+static struct attribute *id_attrs[] = {
+ &dev_attr_id.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(id);
+
+static void kobj_gt_release(struct kobject *kobj)
+{
+ kfree(kobj);
+}
+
+static struct kobj_type kobj_gt_type = {
+ .release = kobj_gt_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = id_groups,
+};
+
+void intel_gt_sysfs_register(struct intel_gt *gt)
+{
+ struct kobj_gt *kg;
+
+ /*
+ * We need to make things right with the
+ * ABI compatibility. The files were originally
+ * generated under the parent directory.
+ *
+ * We generate the files only for gt 0
+ * to avoid duplicates.
+ */
+ if (gt_is_root(gt))
+ intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt));
+
+ kg = kzalloc(sizeof(*kg), GFP_KERNEL);
+ if (!kg)
+ goto exit_fail;
+
+ kobject_init(&kg->base, &kobj_gt_type);
+ kg->gt = gt;
+
+ /* xfer ownership to sysfs tree */
+ if (kobject_add(&kg->base, gt->i915->sysfs_gt, "gt%d", gt->info.id))
+ goto exit_kobj_put;
+
+ intel_gt_sysfs_pm_init(gt, &kg->base);
+
+ return;
+
+exit_kobj_put:
+ kobject_put(&kg->base);
+
+exit_fail:
+ drm_warn(&gt->i915->drm,
+ "failed to initialize gt%d sysfs root\n", gt->info.id);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
new file mode 100644
index 000000000000..9471b26752cf
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __SYSFS_GT_H__
+#define __SYSFS_GT_H__
+
+#include <linux/ctype.h>
+#include <linux/kobject.h>
+
+#include "i915_gem.h" /* GEM_BUG_ON() */
+
+struct intel_gt;
+
+struct kobj_gt {
+ struct kobject base;
+ struct intel_gt *gt;
+};
+
+bool is_object_gt(struct kobject *kobj);
+
+struct drm_i915_private *kobj_to_i915(struct kobject *kobj);
+
+struct kobject *
+intel_gt_create_kobj(struct intel_gt *gt,
+ struct kobject *dir,
+ const char *name);
+
+void intel_gt_sysfs_register(struct intel_gt *gt);
+struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
+ const char *name);
+
+#endif /* SYSFS_GT_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
new file mode 100644
index 000000000000..26cbfa6477d1
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
@@ -0,0 +1,601 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_device.h>
+#include <linux/sysfs.h>
+#include <linux/printk.h>
+
+#include "i915_drv.h"
+#include "i915_reg.h"
+#include "i915_sysfs.h"
+#include "intel_gt.h"
+#include "intel_gt_regs.h"
+#include "intel_gt_sysfs.h"
+#include "intel_gt_sysfs_pm.h"
+#include "intel_rc6.h"
+#include "intel_rps.h"
+
+#ifdef CONFIG_PM
+enum intel_gt_sysfs_op {
+ INTEL_GT_SYSFS_MIN = 0,
+ INTEL_GT_SYSFS_MAX,
+};
+
+static int
+sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr,
+ int (func)(struct intel_gt *gt, u32 val), u32 val)
+{
+ struct intel_gt *gt;
+ int ret;
+
+ if (!is_object_gt(&dev->kobj)) {
+ int i;
+ struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
+
+ for_each_gt(gt, i915, i) {
+ ret = func(gt, val);
+ if (ret)
+ break;
+ }
+ } else {
+ gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+ ret = func(gt, val);
+ }
+
+ return ret;
+}
+
+static u32
+sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
+ u32 (func)(struct intel_gt *gt),
+ enum intel_gt_sysfs_op op)
+{
+ struct intel_gt *gt;
+ u32 ret;
+
+ ret = (op == INTEL_GT_SYSFS_MAX) ? 0 : (u32) -1;
+
+ if (!is_object_gt(&dev->kobj)) {
+ int i;
+ struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
+
+ for_each_gt(gt, i915, i) {
+ u32 val = func(gt);
+
+ switch (op) {
+ case INTEL_GT_SYSFS_MIN:
+ if (val < ret)
+ ret = val;
+ break;
+
+ case INTEL_GT_SYSFS_MAX:
+ if (val > ret)
+ ret = val;
+ break;
+ }
+ }
+ } else {
+ gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+ ret = func(gt);
+ }
+
+ return ret;
+}
+
+/* RC6 interfaces will show the minimum RC6 residency value */
+#define sysfs_gt_attribute_r_min_func(d, a, f) \
+ sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MIN)
+
+/* Frequency interfaces will show the maximum frequency value */
+#define sysfs_gt_attribute_r_max_func(d, a, f) \
+ sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX)
+
+static u32 get_residency(struct intel_gt *gt, i915_reg_t reg)
+{
+ intel_wakeref_t wakeref;
+ u64 res = 0;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ res = intel_rc6_residency_us(&gt->rc6, reg);
+
+ return DIV_ROUND_CLOSEST_ULL(res, 1000);
+}
+
+static ssize_t rc6_enable_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buff)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+ u8 mask = 0;
+
+ if (HAS_RC6(gt->i915))
+ mask |= BIT(0);
+ if (HAS_RC6p(gt->i915))
+ mask |= BIT(1);
+ if (HAS_RC6pp(gt->i915))
+ mask |= BIT(2);
+
+ return sysfs_emit(buff, "%x\n", mask);
+}
+
+static u32 __rc6_residency_ms_show(struct intel_gt *gt)
+{
+ return get_residency(gt, GEN6_GT_GFX_RC6);
+}
+
+static ssize_t rc6_residency_ms_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buff)
+{
+ u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr,
+ __rc6_residency_ms_show);
+
+ return sysfs_emit(buff, "%u\n", rc6_residency);
+}
+
+static u32 __rc6p_residency_ms_show(struct intel_gt *gt)
+{
+ return get_residency(gt, GEN6_GT_GFX_RC6p);
+}
+
+static ssize_t rc6p_residency_ms_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buff)
+{
+ u32 rc6p_residency = sysfs_gt_attribute_r_min_func(dev, attr,
+ __rc6p_residency_ms_show);
+
+ return sysfs_emit(buff, "%u\n", rc6p_residency);
+}
+
+static u32 __rc6pp_residency_ms_show(struct intel_gt *gt)
+{
+ return get_residency(gt, GEN6_GT_GFX_RC6pp);
+}
+
+static ssize_t rc6pp_residency_ms_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buff)
+{
+ u32 rc6pp_residency = sysfs_gt_attribute_r_min_func(dev, attr,
+ __rc6pp_residency_ms_show);
+
+ return sysfs_emit(buff, "%u\n", rc6pp_residency);
+}
+
+static u32 __media_rc6_residency_ms_show(struct intel_gt *gt)
+{
+ return get_residency(gt, VLV_GT_MEDIA_RC6);
+}
+
+static ssize_t media_rc6_residency_ms_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buff)
+{
+ u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr,
+ __media_rc6_residency_ms_show);
+
+ return sysfs_emit(buff, "%u\n", rc6_residency);
+}
+
+static DEVICE_ATTR_RO(rc6_enable);
+static DEVICE_ATTR_RO(rc6_residency_ms);
+static DEVICE_ATTR_RO(rc6p_residency_ms);
+static DEVICE_ATTR_RO(rc6pp_residency_ms);
+static DEVICE_ATTR_RO(media_rc6_residency_ms);
+
+static struct attribute *rc6_attrs[] = {
+ &dev_attr_rc6_enable.attr,
+ &dev_attr_rc6_residency_ms.attr,
+ NULL
+};
+
+static struct attribute *rc6p_attrs[] = {
+ &dev_attr_rc6p_residency_ms.attr,
+ &dev_attr_rc6pp_residency_ms.attr,
+ NULL
+};
+
+static struct attribute *media_rc6_attrs[] = {
+ &dev_attr_media_rc6_residency_ms.attr,
+ NULL
+};
+
+static const struct attribute_group rc6_attr_group[] = {
+ { .attrs = rc6_attrs, },
+ { .name = power_group_name, .attrs = rc6_attrs, },
+};
+
+static const struct attribute_group rc6p_attr_group[] = {
+ { .attrs = rc6p_attrs, },
+ { .name = power_group_name, .attrs = rc6p_attrs, },
+};
+
+static const struct attribute_group media_rc6_attr_group[] = {
+ { .attrs = media_rc6_attrs, },
+ { .name = power_group_name, .attrs = media_rc6_attrs, },
+};
+
+static int __intel_gt_sysfs_create_group(struct kobject *kobj,
+ const struct attribute_group *grp)
+{
+ return is_object_gt(kobj) ?
+ sysfs_create_group(kobj, &grp[0]) :
+ sysfs_merge_group(kobj, &grp[1]);
+}
+
+static void intel_sysfs_rc6_init(struct intel_gt *gt, struct kobject *kobj)
+{
+ int ret;
+
+ if (!HAS_RC6(gt->i915))
+ return;
+
+ ret = __intel_gt_sysfs_create_group(kobj, rc6_attr_group);
+ if (ret)
+ drm_warn(&gt->i915->drm,
+ "failed to create gt%u RC6 sysfs files (%pe)\n",
+ gt->info.id, ERR_PTR(ret));
+
+ /*
+ * cannot use the is_visible() attribute because
+ * the upper object inherits from the parent group.
+ */
+ if (HAS_RC6p(gt->i915)) {
+ ret = __intel_gt_sysfs_create_group(kobj, rc6p_attr_group);
+ if (ret)
+ drm_warn(&gt->i915->drm,
+ "failed to create gt%u RC6p sysfs files (%pe)\n",
+ gt->info.id, ERR_PTR(ret));
+ }
+
+ if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) {
+ ret = __intel_gt_sysfs_create_group(kobj, media_rc6_attr_group);
+ if (ret)
+ drm_warn(&gt->i915->drm,
+ "failed to create media %u RC6 sysfs files (%pe)\n",
+ gt->info.id, ERR_PTR(ret));
+ }
+}
+#else
+static void intel_sysfs_rc6_init(struct intel_gt *gt, struct kobject *kobj)
+{
+}
+#endif /* CONFIG_PM */
+
+static u32 __act_freq_mhz_show(struct intel_gt *gt)
+{
+ return intel_rps_read_actual_frequency(&gt->rps);
+}
+
+static ssize_t act_freq_mhz_show(struct device *dev,
+ struct device_attribute *attr, char *buff)
+{
+ u32 actual_freq = sysfs_gt_attribute_r_max_func(dev, attr,
+ __act_freq_mhz_show);
+
+ return sysfs_emit(buff, "%u\n", actual_freq);
+}
+
+static u32 __cur_freq_mhz_show(struct intel_gt *gt)
+{
+ return intel_rps_get_requested_frequency(&gt->rps);
+}
+
+static ssize_t cur_freq_mhz_show(struct device *dev,
+ struct device_attribute *attr, char *buff)
+{
+ u32 cur_freq = sysfs_gt_attribute_r_max_func(dev, attr,
+ __cur_freq_mhz_show);
+
+ return sysfs_emit(buff, "%u\n", cur_freq);
+}
+
+static u32 __boost_freq_mhz_show(struct intel_gt *gt)
+{
+ return intel_rps_get_boost_frequency(&gt->rps);
+}
+
+static ssize_t boost_freq_mhz_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buff)
+{
+ u32 boost_freq = sysfs_gt_attribute_r_max_func(dev, attr,
+ __boost_freq_mhz_show);
+
+ return sysfs_emit(buff, "%u\n", boost_freq);
+}
+
+static int __boost_freq_mhz_store(struct intel_gt *gt, u32 val)
+{
+ return intel_rps_set_boost_frequency(&gt->rps, val);
+}
+
+static ssize_t boost_freq_mhz_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buff, size_t count)
+{
+ ssize_t ret;
+ u32 val;
+
+ ret = kstrtou32(buff, 0, &val);
+ if (ret)
+ return ret;
+
+ return sysfs_gt_attribute_w_func(dev, attr,
+ __boost_freq_mhz_store, val) ?: count;
+}
+
+static u32 __rp0_freq_mhz_show(struct intel_gt *gt)
+{
+ return intel_rps_get_rp0_frequency(&gt->rps);
+}
+
+static ssize_t RP0_freq_mhz_show(struct device *dev,
+ struct device_attribute *attr, char *buff)
+{
+ u32 rp0_freq = sysfs_gt_attribute_r_max_func(dev, attr,
+ __rp0_freq_mhz_show);
+
+ return sysfs_emit(buff, "%u\n", rp0_freq);
+}
+
+static u32 __rp1_freq_mhz_show(struct intel_gt *gt)
+{
+ return intel_rps_get_rp1_frequency(&gt->rps);
+}
+
+static ssize_t RP1_freq_mhz_show(struct device *dev,
+ struct device_attribute *attr, char *buff)
+{
+ u32 rp1_freq = sysfs_gt_attribute_r_max_func(dev, attr,
+ __rp1_freq_mhz_show);
+
+ return sysfs_emit(buff, "%u\n", rp1_freq);
+}
+
+static u32 __rpn_freq_mhz_show(struct intel_gt *gt)
+{
+ return intel_rps_get_rpn_frequency(&gt->rps);
+}
+
+static ssize_t RPn_freq_mhz_show(struct device *dev,
+ struct device_attribute *attr, char *buff)
+{
+ u32 rpn_freq = sysfs_gt_attribute_r_max_func(dev, attr,
+ __rpn_freq_mhz_show);
+
+ return sysfs_emit(buff, "%u\n", rpn_freq);
+}
+
+static u32 __max_freq_mhz_show(struct intel_gt *gt)
+{
+ return intel_rps_get_max_frequency(&gt->rps);
+}
+
+static ssize_t max_freq_mhz_show(struct device *dev,
+ struct device_attribute *attr, char *buff)
+{
+ u32 max_freq = sysfs_gt_attribute_r_max_func(dev, attr,
+ __max_freq_mhz_show);
+
+ return sysfs_emit(buff, "%u\n", max_freq);
+}
+
+static int __set_max_freq(struct intel_gt *gt, u32 val)
+{
+ return intel_rps_set_max_frequency(&gt->rps, val);
+}
+
+static ssize_t max_freq_mhz_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buff, size_t count)
+{
+ int ret;
+ u32 val;
+
+ ret = kstrtou32(buff, 0, &val);
+ if (ret)
+ return ret;
+
+ ret = sysfs_gt_attribute_w_func(dev, attr, __set_max_freq, val);
+
+ return ret ?: count;
+}
+
+static u32 __min_freq_mhz_show(struct intel_gt *gt)
+{
+ return intel_rps_get_min_frequency(&gt->rps);
+}
+
+static ssize_t min_freq_mhz_show(struct device *dev,
+ struct device_attribute *attr, char *buff)
+{
+ u32 min_freq = sysfs_gt_attribute_r_min_func(dev, attr,
+ __min_freq_mhz_show);
+
+ return sysfs_emit(buff, "%u\n", min_freq);
+}
+
+static int __set_min_freq(struct intel_gt *gt, u32 val)
+{
+ return intel_rps_set_min_frequency(&gt->rps, val);
+}
+
+static ssize_t min_freq_mhz_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buff, size_t count)
+{
+ int ret;
+ u32 val;
+
+ ret = kstrtou32(buff, 0, &val);
+ if (ret)
+ return ret;
+
+ ret = sysfs_gt_attribute_w_func(dev, attr, __set_min_freq, val);
+
+ return ret ?: count;
+}
+
+static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt)
+{
+ struct intel_rps *rps = &gt->rps;
+
+ return intel_gpu_freq(rps, rps->efficient_freq);
+}
+
+static ssize_t vlv_rpe_freq_mhz_show(struct device *dev,
+ struct device_attribute *attr, char *buff)
+{
+ u32 rpe_freq = sysfs_gt_attribute_r_max_func(dev, attr,
+ __vlv_rpe_freq_mhz_show);
+
+ return sysfs_emit(buff, "%u\n", rpe_freq);
+}
+
+#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store) \
+ struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, _show, _store); \
+ struct device_attribute dev_attr_rps_##_name = __ATTR(rps_##_name, _mode, _show, _store)
+
+#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name) \
+ INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL)
+#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name) \
+ INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store)
+
+static INTEL_GT_RPS_SYSFS_ATTR_RO(act_freq_mhz);
+static INTEL_GT_RPS_SYSFS_ATTR_RO(cur_freq_mhz);
+static INTEL_GT_RPS_SYSFS_ATTR_RW(boost_freq_mhz);
+static INTEL_GT_RPS_SYSFS_ATTR_RO(RP0_freq_mhz);
+static INTEL_GT_RPS_SYSFS_ATTR_RO(RP1_freq_mhz);
+static INTEL_GT_RPS_SYSFS_ATTR_RO(RPn_freq_mhz);
+static INTEL_GT_RPS_SYSFS_ATTR_RW(max_freq_mhz);
+static INTEL_GT_RPS_SYSFS_ATTR_RW(min_freq_mhz);
+
+static DEVICE_ATTR_RO(vlv_rpe_freq_mhz);
+
+#define GEN6_ATTR(s) { \
+ &dev_attr_##s##_act_freq_mhz.attr, \
+ &dev_attr_##s##_cur_freq_mhz.attr, \
+ &dev_attr_##s##_boost_freq_mhz.attr, \
+ &dev_attr_##s##_max_freq_mhz.attr, \
+ &dev_attr_##s##_min_freq_mhz.attr, \
+ &dev_attr_##s##_RP0_freq_mhz.attr, \
+ &dev_attr_##s##_RP1_freq_mhz.attr, \
+ &dev_attr_##s##_RPn_freq_mhz.attr, \
+ NULL, \
+ }
+
+#define GEN6_RPS_ATTR GEN6_ATTR(rps)
+#define GEN6_GT_ATTR GEN6_ATTR(gt)
+
+static const struct attribute * const gen6_rps_attrs[] = GEN6_RPS_ATTR;
+static const struct attribute * const gen6_gt_attrs[] = GEN6_GT_ATTR;
+
+static ssize_t punit_req_freq_mhz_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buff)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+ u32 preq = intel_rps_read_punit_req_frequency(&gt->rps);
+
+ return sysfs_emit(buff, "%u\n", preq);
+}
+
+struct intel_gt_bool_throttle_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ i915_reg_t reg32;
+ u32 mask;
+};
+
+static ssize_t throttle_reason_bool_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buff)
+{
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+ struct intel_gt_bool_throttle_attr *t_attr =
+ (struct intel_gt_bool_throttle_attr *) attr;
+ bool val = rps_read_mask_mmio(&gt->rps, t_attr->reg32, t_attr->mask);
+
+ return sysfs_emit(buff, "%u\n", val);
+}
+
+#define INTEL_GT_RPS_BOOL_ATTR_RO(sysfs_func__, mask__) \
+struct intel_gt_bool_throttle_attr attr_##sysfs_func__ = { \
+ .attr = { .name = __stringify(sysfs_func__), .mode = 0444 }, \
+ .show = throttle_reason_bool_show, \
+ .reg32 = GT0_PERF_LIMIT_REASONS, \
+ .mask = mask__, \
+}
+
+static DEVICE_ATTR_RO(punit_req_freq_mhz);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_status, GT0_PERF_LIMIT_REASONS_MASK);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl1, POWER_LIMIT_1_MASK);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl2, POWER_LIMIT_2_MASK);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl4, POWER_LIMIT_4_MASK);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_thermal, THERMAL_LIMIT_MASK);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_prochot, PROCHOT_MASK);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_ratl, RATL_MASK);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_vr_thermalert, VR_THERMALERT_MASK);
+static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_vr_tdc, VR_TDC_MASK);
+
+static const struct attribute *freq_attrs[] = {
+ &dev_attr_punit_req_freq_mhz.attr,
+ &attr_throttle_reason_status.attr,
+ &attr_throttle_reason_pl1.attr,
+ &attr_throttle_reason_pl2.attr,
+ &attr_throttle_reason_pl4.attr,
+ &attr_throttle_reason_thermal.attr,
+ &attr_throttle_reason_prochot.attr,
+ &attr_throttle_reason_ratl.attr,
+ &attr_throttle_reason_vr_thermalert.attr,
+ &attr_throttle_reason_vr_tdc.attr,
+ NULL
+};
+
+static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj,
+ const struct attribute * const *attrs)
+{
+ int ret;
+
+ if (GRAPHICS_VER(gt->i915) < 6)
+ return 0;
+
+ ret = sysfs_create_files(kobj, attrs);
+ if (ret)
+ return ret;
+
+ if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915))
+ ret = sysfs_create_file(kobj, &dev_attr_vlv_rpe_freq_mhz.attr);
+
+ return ret;
+}
+
+void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj)
+{
+ int ret;
+
+ intel_sysfs_rc6_init(gt, kobj);
+
+ ret = is_object_gt(kobj) ?
+ intel_sysfs_rps_init(gt, kobj, gen6_rps_attrs) :
+ intel_sysfs_rps_init(gt, kobj, gen6_gt_attrs);
+ if (ret)
+ drm_warn(&gt->i915->drm,
+ "failed to create gt%u RPS sysfs files (%pe)",
+ gt->info.id, ERR_PTR(ret));
+
+ /* end of the legacy interfaces */
+ if (!is_object_gt(kobj))
+ return;
+
+ ret = sysfs_create_files(kobj, freq_attrs);
+ if (ret)
+ drm_warn(&gt->i915->drm,
+ "failed to create gt%u throttle sysfs files (%pe)",
+ gt->info.id, ERR_PTR(ret));
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.h
new file mode 100644
index 000000000000..f567105a4a89
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __SYSFS_GT_PM_H__
+#define __SYSFS_GT_PM_H__
+
+#include <linux/kobject.h>
+
+#include "intel_gt_types.h"
+
+void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj);
+
+#endif /* SYSFS_RC6_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index f20687796490..b06611c1d4ad 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -16,10 +16,12 @@
#include <linux/workqueue.h>
#include "uc/intel_uc.h"
+#include "intel_gsc.h"
#include "i915_vma.h"
#include "intel_engine_types.h"
#include "intel_gt_buffer_pool_types.h"
+#include "intel_hwconfig.h"
#include "intel_llc_types.h"
#include "intel_reset_types.h"
#include "intel_rc6_types.h"
@@ -72,6 +74,7 @@ struct intel_gt {
struct i915_ggtt *ggtt;
struct intel_uc uc;
+ struct intel_gsc gsc;
struct mutex tlb_invalidate_lock;
@@ -182,7 +185,19 @@ struct intel_gt {
const struct intel_mmio_range *steering_table[NUM_STEERING_TYPES];
+ struct {
+ u8 groupid;
+ u8 instanceid;
+ } default_steering;
+
+ /*
+ * Base of per-tile GTTMMADR where we can derive the MMIO and the GGTT.
+ */
+ phys_addr_t phys_addr;
+
struct intel_gt_info {
+ unsigned int id;
+
intel_engine_mask_t engine_mask;
u32 l3bank_mask;
@@ -199,6 +214,9 @@ struct intel_gt {
struct sseu_dev_info sseu;
unsigned long mslice_mask;
+
+ /** @hwconfig: hardware configuration data */
+ struct intel_hwconfig hwconfig;
} info;
struct {
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index cc53b481a1c5..b67831833c9a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -109,32 +109,52 @@ int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object
return 0;
}
-void __i915_vm_close(struct i915_address_space *vm)
+static void clear_vm_list(struct list_head *list)
{
struct i915_vma *vma, *vn;
- if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex))
- return;
-
- list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
+ list_for_each_entry_safe(vma, vn, list, vm_link) {
struct drm_i915_gem_object *obj = vma->obj;
- if (!kref_get_unless_zero(&obj->base.refcount)) {
+ if (!i915_gem_object_get_rcu(obj)) {
/*
- * Unbind the dying vma to ensure the bound_list
+ * Object is dying, but has not yet cleared its
+ * vma list.
+ * Unbind the dying vma to ensure our list
* is completely drained. We leave the destruction to
- * the object destructor.
+ * the object destructor to avoid the vma
+ * disappearing under it.
*/
atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
WARN_ON(__i915_vma_unbind(vma));
- continue;
+
+ /* Remove from the unbound list */
+ list_del_init(&vma->vm_link);
+
+ /*
+ * Delay the vm and vm mutex freeing until the
+ * object is done with destruction.
+ */
+ i915_vm_resv_get(vma->vm);
+ vma->vm_ddestroy = true;
+ } else {
+ i915_vma_destroy_locked(vma);
+ i915_gem_object_put(obj);
}
- /* Keep the obj (and hence the vma) alive as _we_ destroy it */
- i915_vma_destroy_locked(vma);
- i915_gem_object_put(obj);
}
+}
+
+static void __i915_vm_close(struct i915_address_space *vm)
+{
+ mutex_lock(&vm->mutex);
+
+ clear_vm_list(&vm->bound_list);
+ clear_vm_list(&vm->unbound_list);
+
+ /* Check for must-fix unanticipated side-effects */
GEM_BUG_ON(!list_empty(&vm->bound_list));
+ GEM_BUG_ON(!list_empty(&vm->unbound_list));
mutex_unlock(&vm->mutex);
}
@@ -156,7 +176,6 @@ int i915_vm_lock_objects(struct i915_address_space *vm,
void i915_address_space_fini(struct i915_address_space *vm)
{
drm_mm_takedown(&vm->mm);
- mutex_destroy(&vm->mutex);
}
/**
@@ -164,7 +183,8 @@ void i915_address_space_fini(struct i915_address_space *vm)
* @kref: Pointer to the &i915_address_space.resv_ref member.
*
* This function is called when the last lock sharer no longer shares the
- * &i915_address_space._resv lock.
+ * &i915_address_space._resv lock, and also if we raced when
+ * destroying a vma by the vma destruction
*/
void i915_vm_resv_release(struct kref *kref)
{
@@ -172,6 +192,8 @@ void i915_vm_resv_release(struct kref *kref)
container_of(kref, typeof(*vm), resv_ref);
dma_resv_fini(&vm->_resv);
+ mutex_destroy(&vm->mutex);
+
kfree(vm);
}
@@ -180,6 +202,8 @@ static void __i915_vm_release(struct work_struct *work)
struct i915_address_space *vm =
container_of(work, struct i915_address_space, release_work);
+ __i915_vm_close(vm);
+
/* Synchronize async unbinds. */
i915_vma_resource_bind_dep_sync_all(vm);
@@ -213,7 +237,6 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
vm->pending_unbind = RB_ROOT_CACHED;
INIT_WORK(&vm->release_work, __i915_vm_release);
- atomic_set(&vm->open, 1);
/*
* The vm->mutex must be reclaim safe (for use in the shrinker).
@@ -258,6 +281,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
INIT_LIST_HEAD(&vm->bound_list);
+ INIT_LIST_HEAD(&vm->unbound_list);
}
void *__px_vaddr(struct drm_i915_gem_object *p)
@@ -286,7 +310,7 @@ fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count)
void *vaddr = __px_vaddr(p);
memset64(vaddr, val, count);
- clflush_cache_range(vaddr, PAGE_SIZE);
+ drm_clflush_virt_range(vaddr, PAGE_SIZE);
}
static void poison_scratch_page(struct drm_i915_gem_object *scratch)
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index ba7025388a5e..a40d928b3888 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -240,15 +240,6 @@ struct i915_address_space {
unsigned int bind_async_flags;
- /*
- * Each active user context has its own address space (in full-ppgtt).
- * Since the vm may be shared between multiple contexts, we count how
- * many contexts keep us "open". Once open hits zero, we are closed
- * and do not allow any new attachments, and proceed to shutdown our
- * vma and page directories.
- */
- atomic_t open;
-
struct mutex mutex; /* protects vma and our lists */
struct kref resv_ref; /* kref to keep the reservation lock alive. */
@@ -263,6 +254,11 @@ struct i915_address_space {
*/
struct list_head bound_list;
+ /**
+ * List of vmas not yet bound or evicted.
+ */
+ struct list_head unbound_list;
+
/* Global GTT */
bool is_ggtt:1;
@@ -272,6 +268,9 @@ struct i915_address_space {
/* Some systems support read-only mappings for GGTT and/or PPGTT */
bool has_read_only:1;
+ /* Skip pte rewrite on unbind for suspend. Protected by @mutex */
+ bool skip_pte_rewrite:1;
+
u8 top;
u8 pd_shift;
u8 scratch_order;
@@ -448,6 +447,17 @@ i915_vm_get(struct i915_address_space *vm)
return vm;
}
+static inline struct i915_address_space *
+i915_vm_tryget(struct i915_address_space *vm)
+{
+ return kref_get_unless_zero(&vm->ref) ? vm : NULL;
+}
+
+static inline void assert_vm_alive(struct i915_address_space *vm)
+{
+ GEM_BUG_ON(!kref_read(&vm->ref));
+}
+
/**
* i915_vm_resv_get - Obtain a reference on the vm's reservation lock
* @vm: The vm whose reservation lock we want to share.
@@ -478,34 +488,6 @@ static inline void i915_vm_resv_put(struct i915_address_space *vm)
kref_put(&vm->resv_ref, i915_vm_resv_release);
}
-static inline struct i915_address_space *
-i915_vm_open(struct i915_address_space *vm)
-{
- GEM_BUG_ON(!atomic_read(&vm->open));
- atomic_inc(&vm->open);
- return i915_vm_get(vm);
-}
-
-static inline bool
-i915_vm_tryopen(struct i915_address_space *vm)
-{
- if (atomic_add_unless(&vm->open, 1, 0))
- return i915_vm_get(vm);
-
- return false;
-}
-
-void __i915_vm_close(struct i915_address_space *vm);
-
-static inline void
-i915_vm_close(struct i915_address_space *vm)
-{
- GEM_BUG_ON(!atomic_read(&vm->open));
- __i915_vm_close(vm);
-
- i915_vm_put(vm);
-}
-
void i915_address_space_init(struct i915_address_space *vm, int subclass);
void i915_address_space_fini(struct i915_address_space *vm);
@@ -567,6 +549,14 @@ i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,
unsigned long lmem_pt_obj_flags);
+void intel_ggtt_bind_vma(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma_resource *vma_res,
+ enum i915_cache_level cache_level,
+ u32 flags);
+void intel_ggtt_unbind_vma(struct i915_address_space *vm,
+ struct i915_vma_resource *vma_res);
+
int i915_ggtt_probe_hw(struct drm_i915_private *i915);
int i915_ggtt_init_hw(struct drm_i915_private *i915);
int i915_ggtt_enable_hw(struct drm_i915_private *i915);
@@ -637,6 +627,7 @@ release_pd_entry(struct i915_page_directory * const pd,
struct i915_page_table * const pt,
const struct drm_i915_gem_object * const scratch);
void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);
+void gen8_ggtt_invalidate(struct i915_ggtt *ggtt);
void ppgtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
diff --git a/drivers/gpu/drm/i915/gt/intel_hwconfig.h b/drivers/gpu/drm/i915/gt/intel_hwconfig.h
new file mode 100644
index 000000000000..322290780b67
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_hwconfig.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _INTEL_HWCONFIG_H_
+#define _INTEL_HWCONFIG_H_
+
+#include <linux/types.h>
+
+struct intel_gt;
+
+struct intel_hwconfig {
+ u32 size;
+ void *ptr;
+};
+
+int intel_gt_init_hwconfig(struct intel_gt *gt);
+void intel_gt_fini_hwconfig(struct intel_gt *gt);
+
+#endif /* _INTEL_HWCONFIG_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 07bef7128fdb..3f83a9038e13 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -778,7 +778,7 @@ static void init_common_regs(u32 * const regs,
CTX_CTRL_RS_CTX_ENABLE);
regs[CTX_CONTEXT_CONTROL] = ctl;
- regs[CTX_TIMESTAMP] = ce->runtime.last;
+ regs[CTX_TIMESTAMP] = ce->stats.runtime.last;
}
static void init_wa_bb_regs(u32 * const regs,
@@ -1208,6 +1208,10 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
IS_DG2_G11(ce->engine->i915))
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
+ /* hsdes: 1809175790 */
+ if (!HAS_FLAT_CCS(ce->engine->i915))
+ cs = gen12_emit_aux_table_inv(cs, GEN12_GFX_CCS_AUX_NV);
+
return cs;
}
@@ -1225,6 +1229,14 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
0);
+ /* hsdes: 1809175790 */
+ if (!HAS_FLAT_CCS(ce->engine->i915)) {
+ if (ce->engine->class == VIDEO_DECODE_CLASS)
+ cs = gen12_emit_aux_table_inv(cs, GEN12_VD0_AUX_NV);
+ else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
+ cs = gen12_emit_aux_table_inv(cs, GEN12_VE0_AUX_NV);
+ }
+
return cs;
}
@@ -1722,11 +1734,12 @@ err:
}
}
-static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
+static void st_runtime_underflow(struct intel_context_stats *stats, s32 dt)
{
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
- ce->runtime.num_underflow++;
- ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
+ stats->runtime.num_underflow++;
+ stats->runtime.max_underflow =
+ max_t(u32, stats->runtime.max_underflow, -dt);
#endif
}
@@ -1743,25 +1756,25 @@ static u32 lrc_get_runtime(const struct intel_context *ce)
void lrc_update_runtime(struct intel_context *ce)
{
+ struct intel_context_stats *stats = &ce->stats;
u32 old;
s32 dt;
- if (intel_context_is_barrier(ce))
+ old = stats->runtime.last;
+ stats->runtime.last = lrc_get_runtime(ce);
+ dt = stats->runtime.last - old;
+ if (!dt)
return;
- old = ce->runtime.last;
- ce->runtime.last = lrc_get_runtime(ce);
- dt = ce->runtime.last - old;
-
if (unlikely(dt < 0)) {
CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
- old, ce->runtime.last, dt);
- st_update_runtime_underflow(ce, dt);
+ old, stats->runtime.last, dt);
+ st_runtime_underflow(stats, dt);
return;
}
- ewma_runtime_add(&ce->runtime.avg, dt);
- ce->runtime.total += dt;
+ ewma_runtime_add(&stats->runtime.avg, dt);
+ stats->runtime.total += dt;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 6e4f9f58fca5..7371bb5c8129 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -11,9 +11,10 @@
#include <linux/bitfield.h>
#include <linux/types.h>
+#include "intel_context.h"
+
struct drm_i915_gem_object;
struct i915_gem_ww_ctx;
-struct intel_context;
struct intel_engine_cs;
struct intel_ring;
struct kref;
@@ -120,4 +121,28 @@ static inline u32 lrc_desc_priority(int prio)
return GEN12_CTX_PRIORITY_NORMAL;
}
+static inline void lrc_runtime_start(struct intel_context *ce)
+{
+ struct intel_context_stats *stats = &ce->stats;
+
+ if (intel_context_is_barrier(ce))
+ return;
+
+ if (stats->active)
+ return;
+
+ WRITE_ONCE(stats->active, intel_context_clock());
+}
+
+static inline void lrc_runtime_stop(struct intel_context *ce)
+{
+ struct intel_context_stats *stats = &ce->stats;
+
+ if (!stats->active)
+ return;
+
+ lrc_update_runtime(ce);
+ WRITE_ONCE(stats->active, 0);
+}
+
#endif /* __INTEL_LRC_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 20444d6ceb3c..9d552f30b627 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -17,6 +17,8 @@ struct insert_pte_data {
#define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */
+#define GET_CCS_BYTES(i915, size) (HAS_FLAT_CCS(i915) ? \
+ DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0)
static bool engine_supports_migration(struct intel_engine_cs *engine)
{
if (!engine)
@@ -467,6 +469,123 @@ static bool wa_1209644611_applies(int ver, u32 size)
return height % 4 == 3 && height <= 8;
}
+/**
+ * DOC: Flat-CCS - Memory compression for Local memory
+ *
+ * On Xe-HP and later devices, we use dedicated compression control state (CCS)
+ * stored in local memory for each surface, to support the 3D and media
+ * compression formats.
+ *
+ * The memory required for the CCS of the entire local memory is 1/256 of the
+ * local memory size. So before the kernel boot, the required memory is reserved
+ * for the CCS data and a secure register will be programmed with the CCS base
+ * address.
+ *
+ * Flat CCS data needs to be cleared when a lmem object is allocated.
+ * And CCS data can be copied in and out of CCS region through
+ * XY_CTRL_SURF_COPY_BLT. CPU can't access the CCS data directly.
+ *
+ * When we exhaust the lmem, if the object's placements support smem, then we can
+ * directly decompress the compressed lmem object into smem and start using it
+ * from smem itself.
+ *
+ * But when we need to swapout the compressed lmem object into a smem region
+ * though objects' placement doesn't support smem, then we copy the lmem content
+ * as it is into smem region along with ccs data (using XY_CTRL_SURF_COPY_BLT).
+ * When the object is referred, lmem content will be swaped in along with
+ * restoration of the CCS data (using XY_CTRL_SURF_COPY_BLT) at corresponding
+ * location.
+ */
+
+static inline u32 *i915_flush_dw(u32 *cmd, u32 flags)
+{
+ *cmd++ = MI_FLUSH_DW | flags;
+ *cmd++ = 0;
+ *cmd++ = 0;
+
+ return cmd;
+}
+
+static u32 calc_ctrl_surf_instr_size(struct drm_i915_private *i915, int size)
+{
+ u32 num_cmds, num_blks, total_size;
+
+ if (!GET_CCS_BYTES(i915, size))
+ return 0;
+
+ /*
+ * XY_CTRL_SURF_COPY_BLT transfers CCS in 256 byte
+ * blocks. one XY_CTRL_SURF_COPY_BLT command can
+ * transfer upto 1024 blocks.
+ */
+ num_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size),
+ NUM_CCS_BYTES_PER_BLOCK);
+ num_cmds = DIV_ROUND_UP(num_blks, NUM_CCS_BLKS_PER_XFER);
+ total_size = XY_CTRL_SURF_INSTR_SIZE * num_cmds;
+
+ /*
+ * Adding a flush before and after XY_CTRL_SURF_COPY_BLT
+ */
+ total_size += 2 * MI_FLUSH_DW_SIZE;
+
+ return total_size;
+}
+
+static int emit_copy_ccs(struct i915_request *rq,
+ u32 dst_offset, u8 dst_access,
+ u32 src_offset, u8 src_access, int size)
+{
+ struct drm_i915_private *i915 = rq->engine->i915;
+ int mocs = rq->engine->gt->mocs.uc_index << 1;
+ u32 num_ccs_blks, ccs_ring_size;
+ u32 *cs;
+
+ ccs_ring_size = calc_ctrl_surf_instr_size(i915, size);
+ WARN_ON(!ccs_ring_size);
+
+ cs = intel_ring_begin(rq, round_up(ccs_ring_size, 2));
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ num_ccs_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size),
+ NUM_CCS_BYTES_PER_BLOCK);
+ GEM_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER);
+ cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
+
+ /*
+ * The XY_CTRL_SURF_COPY_BLT instruction is used to copy the CCS
+ * data in and out of the CCS region.
+ *
+ * We can copy at most 1024 blocks of 256 bytes using one
+ * XY_CTRL_SURF_COPY_BLT instruction.
+ *
+ * In case we need to copy more than 1024 blocks, we need to add
+ * another instruction to the same batch buffer.
+ *
+ * 1024 blocks of 256 bytes of CCS represent a total 256KB of CCS.
+ *
+ * 256 KB of CCS represents 256 * 256 KB = 64 MB of LMEM.
+ */
+ *cs++ = XY_CTRL_SURF_COPY_BLT |
+ src_access << SRC_ACCESS_TYPE_SHIFT |
+ dst_access << DST_ACCESS_TYPE_SHIFT |
+ ((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT;
+ *cs++ = src_offset;
+ *cs++ = rq->engine->instance |
+ FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
+ *cs++ = dst_offset;
+ *cs++ = rq->engine->instance |
+ FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
+
+ cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
+ if (ccs_ring_size & 1)
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
static int emit_copy(struct i915_request *rq,
u32 dst_offset, u32 src_offset, int size)
{
@@ -514,6 +633,65 @@ static int emit_copy(struct i915_request *rq,
return 0;
}
+static int scatter_list_length(struct scatterlist *sg)
+{
+ int len = 0;
+
+ while (sg && sg_dma_len(sg)) {
+ len += sg_dma_len(sg);
+ sg = sg_next(sg);
+ };
+
+ return len;
+}
+
+static void
+calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem,
+ int *src_sz, int *ccs_sz, u32 bytes_to_cpy,
+ u32 ccs_bytes_to_cpy)
+{
+ if (ccs_bytes_to_cpy) {
+ /*
+ * We can only copy the ccs data corresponding to
+ * the CHUNK_SZ of lmem which is
+ * GET_CCS_BYTES(i915, CHUNK_SZ))
+ */
+ *ccs_sz = min_t(int, ccs_bytes_to_cpy, GET_CCS_BYTES(i915, CHUNK_SZ));
+
+ if (!src_is_lmem)
+ /*
+ * When CHUNK_SZ is passed all the pages upto CHUNK_SZ
+ * will be taken for the blt. in Flat-ccs supported
+ * platform Smem obj will have more pages than required
+ * for main meory hence limit it to the required size
+ * for main memory
+ */
+ *src_sz = min_t(int, bytes_to_cpy, CHUNK_SZ);
+ } else { /* ccs handling is not required */
+ *src_sz = CHUNK_SZ;
+ }
+}
+
+static void get_ccs_sg_sgt(struct sgt_dma *it, u32 bytes_to_cpy)
+{
+ u32 len;
+
+ do {
+ GEM_BUG_ON(!it->sg || !sg_dma_len(it->sg));
+ len = it->max - it->dma;
+ if (len > bytes_to_cpy) {
+ it->dma += bytes_to_cpy;
+ break;
+ }
+
+ bytes_to_cpy -= len;
+
+ it->sg = __sg_next(it->sg);
+ it->dma = sg_dma_address(it->sg);
+ it->max = it->dma + sg_dma_len(it->sg);
+ } while (bytes_to_cpy);
+}
+
int
intel_context_migrate_copy(struct intel_context *ce,
const struct i915_deps *deps,
@@ -525,17 +703,67 @@ intel_context_migrate_copy(struct intel_context *ce,
bool dst_is_lmem,
struct i915_request **out)
{
- struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst);
+ struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst), it_ccs;
+ struct drm_i915_private *i915 = ce->engine->i915;
+ u32 ccs_bytes_to_cpy = 0, bytes_to_cpy;
+ enum i915_cache_level ccs_cache_level;
+ int src_sz, dst_sz, ccs_sz;
+ u32 src_offset, dst_offset;
+ u8 src_access, dst_access;
struct i915_request *rq;
+ bool ccs_is_src;
int err;
GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
+ GEM_BUG_ON(IS_DGFX(ce->engine->i915) && (!src_is_lmem && !dst_is_lmem));
*out = NULL;
GEM_BUG_ON(ce->ring->size < SZ_64K);
+ src_sz = scatter_list_length(src);
+ bytes_to_cpy = src_sz;
+
+ if (HAS_FLAT_CCS(i915) && src_is_lmem ^ dst_is_lmem) {
+ src_access = !src_is_lmem && dst_is_lmem;
+ dst_access = !src_access;
+
+ dst_sz = scatter_list_length(dst);
+ if (src_is_lmem) {
+ it_ccs = it_dst;
+ ccs_cache_level = dst_cache_level;
+ ccs_is_src = false;
+ } else if (dst_is_lmem) {
+ bytes_to_cpy = dst_sz;
+ it_ccs = it_src;
+ ccs_cache_level = src_cache_level;
+ ccs_is_src = true;
+ }
+
+ /*
+ * When there is a eviction of ccs needed smem will have the
+ * extra pages for the ccs data
+ *
+ * TO-DO: Want to move the size mismatch check to a WARN_ON,
+ * but still we have some requests of smem->lmem with same size.
+ * Need to fix it.
+ */
+ ccs_bytes_to_cpy = src_sz != dst_sz ? GET_CCS_BYTES(i915, bytes_to_cpy) : 0;
+ if (ccs_bytes_to_cpy)
+ get_ccs_sg_sgt(&it_ccs, bytes_to_cpy);
+ }
+
+ src_offset = 0;
+ dst_offset = CHUNK_SZ;
+ if (HAS_64K_PAGES(ce->engine->i915)) {
+ src_offset = 0;
+ dst_offset = 0;
+ if (src_is_lmem)
+ src_offset = CHUNK_SZ;
+ if (dst_is_lmem)
+ dst_offset = 2 * CHUNK_SZ;
+ }
+
do {
- u32 src_offset, dst_offset;
int len;
rq = i915_request_create(ce);
@@ -563,22 +791,16 @@ intel_context_migrate_copy(struct intel_context *ce,
if (err)
goto out_rq;
- src_offset = 0;
- dst_offset = CHUNK_SZ;
- if (HAS_64K_PAGES(ce->engine->i915)) {
- GEM_BUG_ON(!src_is_lmem && !dst_is_lmem);
-
- src_offset = 0;
- dst_offset = 0;
- if (src_is_lmem)
- src_offset = CHUNK_SZ;
- if (dst_is_lmem)
- dst_offset = 2 * CHUNK_SZ;
- }
+ calculate_chunk_sz(i915, src_is_lmem, &src_sz, &ccs_sz,
+ bytes_to_cpy, ccs_bytes_to_cpy);
len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem,
- src_offset, CHUNK_SZ);
- if (len <= 0) {
+ src_offset, src_sz);
+ if (!len) {
+ err = -EINVAL;
+ goto out_rq;
+ }
+ if (len < 0) {
err = len;
goto out_rq;
}
@@ -596,7 +818,46 @@ intel_context_migrate_copy(struct intel_context *ce,
if (err)
goto out_rq;
- err = emit_copy(rq, dst_offset, src_offset, len);
+ err = emit_copy(rq, dst_offset, src_offset, len);
+ if (err)
+ goto out_rq;
+
+ bytes_to_cpy -= len;
+
+ if (ccs_bytes_to_cpy) {
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ goto out_rq;
+
+ err = emit_pte(rq, &it_ccs, ccs_cache_level, false,
+ ccs_is_src ? src_offset : dst_offset,
+ ccs_sz);
+
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ goto out_rq;
+
+ /*
+ * Using max of src_sz and dst_sz, as we need to
+ * pass the lmem size corresponding to the ccs
+ * blocks we need to handle.
+ */
+ ccs_sz = max_t(int, ccs_is_src ? ccs_sz : src_sz,
+ ccs_is_src ? dst_sz : ccs_sz);
+
+ err = emit_copy_ccs(rq, dst_offset, dst_access,
+ src_offset, src_access, ccs_sz);
+ if (err)
+ goto out_rq;
+
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ goto out_rq;
+
+ /* Converting back to ccs bytes */
+ ccs_sz = GET_CCS_BYTES(rq->engine->i915, ccs_sz);
+ ccs_bytes_to_cpy -= ccs_sz;
+ }
/* Arbitration is re-enabled between requests. */
out_rq:
@@ -604,9 +865,26 @@ out_rq:
i915_request_put(*out);
*out = i915_request_get(rq);
i915_request_add(rq);
- if (err || !it_src.sg || !sg_dma_len(it_src.sg))
+
+ if (err)
break;
+ if (!bytes_to_cpy && !ccs_bytes_to_cpy) {
+ if (src_is_lmem)
+ WARN_ON(it_src.sg && sg_dma_len(it_src.sg));
+ else
+ WARN_ON(it_dst.sg && sg_dma_len(it_dst.sg));
+ break;
+ }
+
+ if (WARN_ON(!it_src.sg || !sg_dma_len(it_src.sg) ||
+ !it_dst.sg || !sg_dma_len(it_dst.sg) ||
+ (ccs_bytes_to_cpy && (!it_ccs.sg ||
+ !sg_dma_len(it_ccs.sg))))) {
+ err = -EINVAL;
+ break;
+ }
+
cond_resched();
} while (1);
@@ -614,35 +892,65 @@ out_ce:
return err;
}
-static int emit_clear(struct i915_request *rq, u64 offset, int size, u32 value)
+static int emit_clear(struct i915_request *rq, u32 offset, int size,
+ u32 value, bool is_lmem)
{
- const int ver = GRAPHICS_VER(rq->engine->i915);
+ struct drm_i915_private *i915 = rq->engine->i915;
+ int mocs = rq->engine->gt->mocs.uc_index << 1;
+ const int ver = GRAPHICS_VER(i915);
+ int ring_sz;
u32 *cs;
GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
- offset += (u64)rq->engine->instance << 32;
+ if (HAS_FLAT_CCS(i915) && ver >= 12)
+ ring_sz = XY_FAST_COLOR_BLT_DW;
+ else if (ver >= 8)
+ ring_sz = 8;
+ else
+ ring_sz = 6;
- cs = intel_ring_begin(rq, ver >= 8 ? 8 : 6);
+ cs = intel_ring_begin(rq, ring_sz);
if (IS_ERR(cs))
return PTR_ERR(cs);
- if (ver >= 8) {
+ if (HAS_FLAT_CCS(i915) && ver >= 12) {
+ *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
+ (XY_FAST_COLOR_BLT_DW - 2);
+ *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
+ (PAGE_SIZE - 1);
+ *cs++ = 0;
+ *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+ *cs++ = offset;
+ *cs++ = rq->engine->instance;
+ *cs++ = !is_lmem << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT;
+ /* BG7 */
+ *cs++ = value;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ /* BG11 */
+ *cs++ = 0;
+ *cs++ = 0;
+ /* BG13 */
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ } else if (ver >= 8) {
*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
*cs++ = 0;
*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
- *cs++ = lower_32_bits(offset);
- *cs++ = upper_32_bits(offset);
+ *cs++ = offset;
+ *cs++ = rq->engine->instance;
*cs++ = value;
*cs++ = MI_NOOP;
} else {
- GEM_BUG_ON(upper_32_bits(offset));
*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
*cs++ = 0;
*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
- *cs++ = lower_32_bits(offset);
+ *cs++ = offset;
*cs++ = value;
}
@@ -659,8 +967,10 @@ intel_context_migrate_clear(struct intel_context *ce,
u32 value,
struct i915_request **out)
{
+ struct drm_i915_private *i915 = ce->engine->i915;
struct sgt_dma it = sg_sgt(sg);
struct i915_request *rq;
+ u32 offset;
int err;
GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
@@ -668,8 +978,11 @@ intel_context_migrate_clear(struct intel_context *ce,
GEM_BUG_ON(ce->ring->size < SZ_64K);
+ offset = 0;
+ if (HAS_64K_PAGES(i915) && is_lmem)
+ offset = CHUNK_SZ;
+
do {
- u32 offset;
int len;
rq = i915_request_create(ce);
@@ -697,10 +1010,6 @@ intel_context_migrate_clear(struct intel_context *ce,
if (err)
goto out_rq;
- offset = 0;
- if (HAS_64K_PAGES(ce->engine->i915) && is_lmem)
- offset = CHUNK_SZ;
-
len = emit_pte(rq, &it, cache_level, is_lmem, offset, CHUNK_SZ);
if (len <= 0) {
err = len;
@@ -711,7 +1020,22 @@ intel_context_migrate_clear(struct intel_context *ce,
if (err)
goto out_rq;
- err = emit_clear(rq, offset, len, value);
+ err = emit_clear(rq, offset, len, value, is_lmem);
+ if (err)
+ goto out_rq;
+
+ if (HAS_FLAT_CCS(i915) && is_lmem && !value) {
+ /*
+ * copy the content of memory into corresponding
+ * ccs surface
+ */
+ err = emit_copy_ccs(rq, offset, INDIRECT_ACCESS, offset,
+ DIRECT_ACCESS, len);
+ if (err)
+ goto out_rq;
+ }
+
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
/* Arbitration is re-enabled between requests. */
out_rq:
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index d91e2beb7517..d8b94d638559 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -91,7 +91,7 @@ write_dma_entry(struct drm_i915_gem_object * const pdma,
u64 * const vaddr = __px_vaddr(pdma);
vaddr[idx] = encoded_entry;
- clflush_cache_range(&vaddr[idx], sizeof(u64));
+ drm_clflush_virt_range(&vaddr[idx], sizeof(u64));
}
void
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 63db136cbc27..b4770690e794 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -6,6 +6,7 @@
#include <linux/pm_runtime.h>
#include <linux/string_helpers.h>
+#include "gem/i915_gem_region.h"
#include "i915_drv.h"
#include "i915_reg.h"
#include "i915_vgpu.h"
@@ -325,9 +326,10 @@ static int vlv_rc6_init(struct intel_rc6 *rc6)
resource_size_t pcbr_offset;
pcbr_offset = (pcbr & ~4095) - i915->dsm.start;
- pctx = i915_gem_object_create_stolen_for_preallocated(i915,
- pcbr_offset,
- pctx_size);
+ pctx = i915_gem_object_create_region_at(i915->mm.stolen_region,
+ pcbr_offset,
+ pctx_size,
+ 0);
if (IS_ERR(pctx))
return PTR_ERR(pctx);
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index 6cecfdae07ad..f5111c0a0060 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -93,6 +93,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
struct intel_memory_region *mem;
resource_size_t min_page_size;
resource_size_t io_start;
+ resource_size_t io_size;
resource_size_t lmem_size;
int err;
@@ -122,9 +123,14 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
lmem_size = intel_uncore_read64(&i915->uncore, GEN12_GSMBASE);
}
+ if (i915->params.lmem_size > 0) {
+ lmem_size = min_t(resource_size_t, lmem_size,
+ mul_u32_u32(i915->params.lmem_size, SZ_1M));
+ }
io_start = pci_resource_start(pdev, 2);
- if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2)))
+ io_size = min(pci_resource_len(pdev, 2), lmem_size);
+ if (!io_size)
return ERR_PTR(-ENODEV);
min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
@@ -134,7 +140,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
lmem_size,
min_page_size,
io_start,
- lmem_size,
+ io_size,
INTEL_MEMORY_LOCAL,
0,
&intel_region_lmem_ops);
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index a6ae213c7d89..5422a3b84bd4 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -772,14 +772,15 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
intel_engine_mask_t awake = 0;
enum intel_engine_id id;
+ /* For GuC mode, ensure submission is disabled before stopping ring */
+ intel_uc_reset_prepare(&gt->uc);
+
for_each_engine(engine, gt, id) {
if (intel_engine_pm_get_if_awake(engine))
awake |= engine->mask;
reset_prepare_engine(engine);
}
- intel_uc_reset_prepare(&gt->uc);
-
return awake;
}
@@ -1319,7 +1320,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
engine_mask &= gt->info.engine_mask;
if (flags & I915_ERROR_CAPTURE) {
- i915_capture_error_state(gt, engine_mask);
+ i915_capture_error_state(gt, engine_mask, CORE_DUMP_FLAG_NONE);
intel_gt_clear_error_registers(gt, engine_mask);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 6d7ec3bf1f32..5423bfd301ad 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -767,7 +767,7 @@ static int mi_set_context(struct i915_request *rq,
if (GRAPHICS_VER(i915) == 7) {
if (num_engines) {
struct intel_engine_cs *signaller;
- i915_reg_t last_reg = {}; /* keep gcc quiet */
+ i915_reg_t last_reg = INVALID_MMIO_REG; /* keep gcc quiet */
*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
for_each_engine(signaller, engine->gt, id) {
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index a9c13b1a3018..3476a11f294c 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1070,24 +1070,67 @@ int intel_rps_set(struct intel_rps *rps, u8 val)
return 0;
}
-static void gen6_rps_init(struct intel_rps *rps)
+static u32 intel_rps_read_state_cap(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
- u32 rp_state_cap = intel_rps_read_state_cap(rps);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
- /* All of these values are in units of 50MHz */
+ if (IS_XEHPSDV(i915))
+ return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP);
+ else if (IS_GEN9_LP(i915))
+ return intel_uncore_read(uncore, BXT_RP_STATE_CAP);
+ else
+ return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
+}
+
+/**
+ * gen6_rps_get_freq_caps - Get freq caps exposed by HW
+ * @rps: the intel_rps structure
+ * @caps: returned freq caps
+ *
+ * Returned "caps" frequencies should be converted to MHz using
+ * intel_gpu_freq()
+ */
+void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 rp_state_cap;
+
+ rp_state_cap = intel_rps_read_state_cap(rps);
/* static values from HW: RP0 > RP1 > RPn (min_freq) */
if (IS_GEN9_LP(i915)) {
- rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
- rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
- rps->min_freq = (rp_state_cap >> 0) & 0xff;
+ caps->rp0_freq = (rp_state_cap >> 16) & 0xff;
+ caps->rp1_freq = (rp_state_cap >> 8) & 0xff;
+ caps->min_freq = (rp_state_cap >> 0) & 0xff;
} else {
- rps->rp0_freq = (rp_state_cap >> 0) & 0xff;
- rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
- rps->min_freq = (rp_state_cap >> 16) & 0xff;
+ caps->rp0_freq = (rp_state_cap >> 0) & 0xff;
+ caps->rp1_freq = (rp_state_cap >> 8) & 0xff;
+ caps->min_freq = (rp_state_cap >> 16) & 0xff;
}
+ if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
+ /*
+ * In this case rp_state_cap register reports frequencies in
+ * units of 50 MHz. Convert these to the actual "hw unit", i.e.
+ * units of 16.67 MHz
+ */
+ caps->rp0_freq *= GEN9_FREQ_SCALER;
+ caps->rp1_freq *= GEN9_FREQ_SCALER;
+ caps->min_freq *= GEN9_FREQ_SCALER;
+ }
+}
+
+static void gen6_rps_init(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_rps_freq_caps caps;
+
+ gen6_rps_get_freq_caps(rps, &caps);
+ rps->rp0_freq = caps.rp0_freq;
+ rps->rp1_freq = caps.rp1_freq;
+ rps->min_freq = caps.min_freq;
+
/* hw_max = RP0 until we check for overclocking */
rps->max_freq = rps->rp0_freq;
@@ -1095,26 +1138,18 @@ static void gen6_rps_init(struct intel_rps *rps)
if (IS_HASWELL(i915) || IS_BROADWELL(i915) ||
IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
u32 ddcc_status = 0;
+ u32 mult = 1;
+ if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11)
+ mult = GEN9_FREQ_SCALER;
if (snb_pcode_read(i915, HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
&ddcc_status, NULL) == 0)
rps->efficient_freq =
- clamp_t(u8,
- (ddcc_status >> 8) & 0xff,
+ clamp_t(u32,
+ ((ddcc_status >> 8) & 0xff) * mult,
rps->min_freq,
rps->max_freq);
}
-
- if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
- /* Store the frequency values in 16.66 MHZ units, which is
- * the natural hardware unit for SKL
- */
- rps->rp0_freq *= GEN9_FREQ_SCALER;
- rps->rp1_freq *= GEN9_FREQ_SCALER;
- rps->min_freq *= GEN9_FREQ_SCALER;
- rps->max_freq *= GEN9_FREQ_SCALER;
- rps->efficient_freq *= GEN9_FREQ_SCALER;
- }
}
static bool rps_reset(struct intel_rps *rps)
@@ -2219,19 +2254,6 @@ int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val)
return set_min_freq(rps, val);
}
-u32 intel_rps_read_state_cap(struct intel_rps *rps)
-{
- struct drm_i915_private *i915 = rps_to_i915(rps);
- struct intel_uncore *uncore = rps_to_uncore(rps);
-
- if (IS_XEHPSDV(i915))
- return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP);
- else if (IS_GEN9_LP(i915))
- return intel_uncore_read(uncore, BXT_RP_STATE_CAP);
- else
- return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
-}
-
static void intel_rps_set_manual(struct intel_rps *rps, bool enable)
{
struct intel_uncore *uncore = rps_to_uncore(rps);
@@ -2244,18 +2266,18 @@ static void intel_rps_set_manual(struct intel_rps *rps, bool enable)
void intel_rps_raise_unslice(struct intel_rps *rps)
{
struct intel_uncore *uncore = rps_to_uncore(rps);
- u32 rp0_unslice_req;
mutex_lock(&rps->lock);
if (rps_uses_slpc(rps)) {
/* RP limits have not been initialized yet for SLPC path */
- rp0_unslice_req = ((intel_rps_read_state_cap(rps) >> 0)
- & 0xff) * GEN9_FREQ_SCALER;
+ struct intel_rps_freq_caps caps;
+
+ gen6_rps_get_freq_caps(rps, &caps);
intel_rps_set_manual(rps, true);
intel_uncore_write(uncore, GEN6_RPNSWREQ,
- ((rp0_unslice_req <<
+ ((caps.rp0_freq <<
GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) |
GEN9_IGNORE_SLICE_RATIO));
intel_rps_set_manual(rps, false);
@@ -2269,18 +2291,18 @@ void intel_rps_raise_unslice(struct intel_rps *rps)
void intel_rps_lower_unslice(struct intel_rps *rps)
{
struct intel_uncore *uncore = rps_to_uncore(rps);
- u32 rpn_unslice_req;
mutex_lock(&rps->lock);
if (rps_uses_slpc(rps)) {
/* RP limits have not been initialized yet for SLPC path */
- rpn_unslice_req = ((intel_rps_read_state_cap(rps) >> 16)
- & 0xff) * GEN9_FREQ_SCALER;
+ struct intel_rps_freq_caps caps;
+
+ gen6_rps_get_freq_caps(rps, &caps);
intel_rps_set_manual(rps, true);
intel_uncore_write(uncore, GEN6_RPNSWREQ,
- ((rpn_unslice_req <<
+ ((caps.min_freq <<
GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) |
GEN9_IGNORE_SLICE_RATIO));
intel_rps_set_manual(rps, false);
@@ -2291,6 +2313,24 @@ void intel_rps_lower_unslice(struct intel_rps *rps)
mutex_unlock(&rps->lock);
}
+static u32 rps_read_mmio(struct intel_rps *rps, i915_reg_t reg32)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+ intel_wakeref_t wakeref;
+ u32 val;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ val = intel_uncore_read(gt->uncore, reg32);
+
+ return val;
+}
+
+bool rps_read_mask_mmio(struct intel_rps *rps,
+ i915_reg_t reg32, u32 mask)
+{
+ return rps_read_mmio(rps, reg32) & mask;
+}
+
/* External interface for intel_ips.ko */
static struct drm_i915_private __rcu *ips_mchdev;
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
index c6d76a3d1331..1e8d56491308 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -7,6 +7,7 @@
#define INTEL_RPS_H
#include "intel_rps_types.h"
+#include "i915_reg_defs.h"
struct i915_request;
@@ -44,10 +45,13 @@ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps);
u32 intel_rps_get_rpn_frequency(struct intel_rps *rps);
u32 intel_rps_read_punit_req(struct intel_rps *rps);
u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps);
-u32 intel_rps_read_state_cap(struct intel_rps *rps);
+void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps);
void intel_rps_raise_unslice(struct intel_rps *rps);
void intel_rps_lower_unslice(struct intel_rps *rps);
+u32 intel_rps_read_throttle_reason(struct intel_rps *rps);
+bool rps_read_mask_mmio(struct intel_rps *rps, i915_reg_t reg32, u32 mask);
+
void gen5_rps_irq_handler(struct intel_rps *rps);
void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h
index 3941d8551f52..9173ec75f2b8 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h
@@ -37,6 +37,21 @@ enum {
INTEL_RPS_TIMER,
};
+/**
+ * struct intel_rps_freq_caps - rps freq capabilities
+ * @rp0_freq: non-overclocked max frequency
+ * @rp1_freq: "less than" RP0 power/freqency
+ * @min_freq: aka RPn, minimum frequency
+ *
+ * Freq caps exposed by HW, values are in "hw units" and intel_gpu_freq()
+ * should be used to convert to MHz
+ */
+struct intel_rps_freq_caps {
+ u8 rp0_freq;
+ u8 rp1_freq;
+ u8 min_freq;
+};
+
struct intel_rps {
struct mutex lock; /* protects enabling and the worker */
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 614915ffbd37..9881a6790574 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -10,6 +10,8 @@
#include "intel_gt_regs.h"
#include "intel_sseu.h"
+#include "linux/string_helpers.h"
+
void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
u8 max_subslices, u8 max_eus_per_subslice)
{
@@ -35,8 +37,8 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
}
static u32
-_intel_sseu_get_subslices(const struct sseu_dev_info *sseu,
- const u8 *subslice_mask, u8 slice)
+sseu_get_subslices(const struct sseu_dev_info *sseu,
+ const u8 *subslice_mask, u8 slice)
{
int i, offset = slice * sseu->ss_stride;
u32 mask = 0;
@@ -51,12 +53,17 @@ _intel_sseu_get_subslices(const struct sseu_dev_info *sseu,
u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
{
- return _intel_sseu_get_subslices(sseu, sseu->subslice_mask, slice);
+ return sseu_get_subslices(sseu, sseu->subslice_mask, slice);
+}
+
+static u32 sseu_get_geometry_subslices(const struct sseu_dev_info *sseu)
+{
+ return sseu_get_subslices(sseu, sseu->geometry_subslice_mask, 0);
}
u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu)
{
- return _intel_sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0);
+ return sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0);
}
void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
@@ -720,16 +727,11 @@ void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
str_yes_no(sseu->has_eu_pg));
}
-void intel_sseu_print_topology(const struct sseu_dev_info *sseu,
- struct drm_printer *p)
+static void sseu_print_hsw_topology(const struct sseu_dev_info *sseu,
+ struct drm_printer *p)
{
int s, ss;
- if (sseu->max_slices == 0) {
- drm_printf(p, "Unavailable\n");
- return;
- }
-
for (s = 0; s < sseu->max_slices; s++) {
drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n",
s, intel_sseu_subslices_per_slice(sseu, s),
@@ -744,6 +746,36 @@ void intel_sseu_print_topology(const struct sseu_dev_info *sseu,
}
}
+static void sseu_print_xehp_topology(const struct sseu_dev_info *sseu,
+ struct drm_printer *p)
+{
+ u32 g_dss_mask = sseu_get_geometry_subslices(sseu);
+ u32 c_dss_mask = intel_sseu_get_compute_subslices(sseu);
+ int dss;
+
+ for (dss = 0; dss < sseu->max_subslices; dss++) {
+ u16 enabled_eus = sseu_get_eus(sseu, 0, dss);
+
+ drm_printf(p, "DSS_%02d: G:%3s C:%3s, %2u EUs (0x%04hx)\n", dss,
+ str_yes_no(g_dss_mask & BIT(dss)),
+ str_yes_no(c_dss_mask & BIT(dss)),
+ hweight16(enabled_eus), enabled_eus);
+ }
+}
+
+void intel_sseu_print_topology(struct drm_i915_private *i915,
+ const struct sseu_dev_info *sseu,
+ struct drm_printer *p)
+{
+ if (sseu->max_slices == 0) {
+ drm_printf(p, "Unavailable\n");
+ } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+ sseu_print_xehp_topology(sseu, p);
+ } else {
+ sseu_print_hsw_topology(sseu, p);
+ }
+}
+
u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice)
{
u16 slice_mask = 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index 8a79cd8eaab4..5c078df4729c 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -15,26 +15,49 @@ struct drm_i915_private;
struct intel_gt;
struct drm_printer;
-#define GEN_MAX_SLICES (3) /* SKL upper bound */
-#define GEN_MAX_SUBSLICES (32) /* XEHPSDV upper bound */
-#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
-#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
-#define GEN_MAX_EUS (16) /* TGL upper bound */
-#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
+/*
+ * Maximum number of slices on older platforms. Slices no longer exist
+ * starting on Xe_HP ("gslices," "cslices," etc. are a different concept and
+ * are not expressed through fusing).
+ */
+#define GEN_MAX_HSW_SLICES 3
+
+/*
+ * Maximum number of subslices that can exist within a HSW-style slice. This
+ * is only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the
+ * GEN_MAX_DSS value below).
+ */
+#define GEN_MAX_SS_PER_HSW_SLICE 6
+
+/* Maximum number of DSS on newer platforms (Xe_HP and beyond). */
+#define GEN_MAX_DSS 32
+
+/* Maximum number of EUs that can exist within a subslice or DSS. */
+#define GEN_MAX_EUS_PER_SS 16
+
+#define SSEU_MAX(a, b) ((a) > (b) ? (a) : (b))
+
+/* The maximum number of bits needed to express each subslice/DSS independently */
+#define GEN_SS_MASK_SIZE SSEU_MAX(GEN_MAX_DSS, \
+ GEN_MAX_HSW_SLICES * GEN_MAX_SS_PER_HSW_SLICE)
+
+#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
+#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_SS_MASK_SIZE)
+#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS_PER_SS)
#define GEN_DSS_PER_GSLICE 4
#define GEN_DSS_PER_CSLICE 8
#define GEN_DSS_PER_MSLICE 8
-#define GEN_MAX_GSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_GSLICE)
-#define GEN_MAX_CSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_CSLICE)
+#define GEN_MAX_GSLICES (GEN_MAX_DSS / GEN_DSS_PER_GSLICE)
+#define GEN_MAX_CSLICES (GEN_MAX_DSS / GEN_DSS_PER_CSLICE)
struct sseu_dev_info {
u8 slice_mask;
- u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
- u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
- u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
- u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
+ u8 subslice_mask[GEN_SS_MASK_SIZE];
+ u8 geometry_subslice_mask[GEN_SS_MASK_SIZE];
+ u8 compute_subslice_mask[GEN_SS_MASK_SIZE];
+ u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE];
u16 eu_total;
u8 eu_per_subslice;
u8 min_eu_in_pool;
@@ -116,7 +139,8 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
const struct intel_sseu *req_sseu);
void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p);
-void intel_sseu_print_topology(const struct sseu_dev_info *sseu,
+void intel_sseu_print_topology(struct drm_i915_private *i915,
+ const struct sseu_dev_info *sseu,
struct drm_printer *p);
u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice);
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
index a9d5bc49f361..2d5d011e01db 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
@@ -248,7 +248,7 @@ int intel_sseu_status(struct seq_file *m, struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
const struct intel_gt_info *info = &gt->info;
- struct sseu_dev_info sseu;
+ struct sseu_dev_info *sseu;
intel_wakeref_t wakeref;
if (GRAPHICS_VER(i915) < 8)
@@ -258,23 +258,29 @@ int intel_sseu_status(struct seq_file *m, struct intel_gt *gt)
i915_print_sseu_info(m, true, HAS_POOLED_EU(i915), &info->sseu);
seq_puts(m, "SSEU Device Status\n");
- memset(&sseu, 0, sizeof(sseu));
- intel_sseu_set_info(&sseu, info->sseu.max_slices,
+
+ sseu = kzalloc(sizeof(*sseu), GFP_KERNEL);
+ if (!sseu)
+ return -ENOMEM;
+
+ intel_sseu_set_info(sseu, info->sseu.max_slices,
info->sseu.max_subslices,
info->sseu.max_eus_per_subslice);
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
if (IS_CHERRYVIEW(i915))
- cherryview_sseu_device_status(gt, &sseu);
+ cherryview_sseu_device_status(gt, sseu);
else if (IS_BROADWELL(i915))
- bdw_sseu_device_status(gt, &sseu);
+ bdw_sseu_device_status(gt, sseu);
else if (GRAPHICS_VER(i915) == 9)
- gen9_sseu_device_status(gt, &sseu);
+ gen9_sseu_device_status(gt, sseu);
else if (GRAPHICS_VER(i915) >= 11)
- gen11_sseu_device_status(gt, &sseu);
+ gen11_sseu_device_status(gt, sseu);
}
- i915_print_sseu_info(m, false, HAS_POOLED_EU(i915), &sseu);
+ i915_print_sseu_info(m, false, HAS_POOLED_EU(i915), sseu);
+
+ kfree(sseu);
return 0;
}
@@ -287,22 +293,22 @@ static int sseu_status_show(struct seq_file *m, void *unused)
}
DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(sseu_status);
-static int rcs_topology_show(struct seq_file *m, void *unused)
+static int sseu_topology_show(struct seq_file *m, void *unused)
{
struct intel_gt *gt = m->private;
struct drm_printer p = drm_seq_file_printer(m);
- intel_sseu_print_topology(&gt->info.sseu, &p);
+ intel_sseu_print_topology(gt->i915, &gt->info.sseu, &p);
return 0;
}
-DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rcs_topology);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(sseu_topology);
void intel_sseu_debugfs_register(struct intel_gt *gt, struct dentry *root)
{
static const struct intel_gt_debugfs_file files[] = {
{ "sseu_status", &sseu_status_fops, NULL },
- { "rcs_topology", &rcs_topology_fops, NULL },
+ { "sseu_topology", &sseu_topology_fops, NULL },
};
intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index c014b40d2e9f..a05c4b99b3fb 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1072,9 +1072,15 @@ static void __set_mcr_steering(struct i915_wa_list *wal,
static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
unsigned int slice, unsigned int subslice)
{
- drm_dbg(&gt->i915->drm, "MCR slice=0x%x, subslice=0x%x\n", slice, subslice);
+ struct drm_printer p = drm_debug_printer("MCR Steering:");
__set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice);
+
+ gt->default_steering.groupid = slice;
+ gt->default_steering.instanceid = subslice;
+
+ if (drm_debug_enabled(DRM_UT_DRIVER))
+ intel_gt_report_steering(&p, gt, false);
}
static void
@@ -2188,11 +2194,15 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
*/
wa_write_or(wal, GEN7_FF_THREAD_MODE,
GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
+ }
+ if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) ||
+ IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
/*
* Wa_1606700617:tgl,dg1,adl-p
* Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
* Wa_14010826681:tgl,dg1,rkl,adl-p
+ * Wa_18019627453:dg2
*/
wa_masked_en(wal,
GEN9_CS_DEBUG_MODE1,
@@ -2310,7 +2320,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
FF_DOP_CLOCK_GATE_DISABLE);
}
- if (IS_GRAPHICS_VER(i915, 9, 12)) {
+ if (HAS_PERCTX_PREEMPT_CTRL(i915)) {
/* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
wa_masked_en(wal,
GEN7_FF_SLICE_CS_CHICKEN1,
@@ -2618,6 +2628,11 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
GLOBAL_INVALIDATION_MODE);
}
+
+ if (IS_DG2(i915)) {
+ /* Wa_22014226127:dg2 */
+ wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
+ }
}
static void
@@ -2633,7 +2648,7 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
* to a single RCS/CCS engine's workaround list since
* they're reset as part of the general render domain reset.
*/
- if (engine->class == RENDER_CLASS)
+ if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
general_render_compute_wa_init(engine, wal);
if (engine->class == RENDER_CLASS)
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 72d5faab8f9a..09f8cd2d0e2c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -1736,15 +1736,9 @@ static int live_preempt(void *arg)
enum intel_engine_id id;
int err = -ENOMEM;
- if (igt_spinner_init(&spin_hi, gt))
- return -ENOMEM;
-
- if (igt_spinner_init(&spin_lo, gt))
- goto err_spin_hi;
-
ctx_hi = kernel_context(gt->i915, NULL);
if (!ctx_hi)
- goto err_spin_lo;
+ return -ENOMEM;
ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
ctx_lo = kernel_context(gt->i915, NULL);
@@ -1752,6 +1746,12 @@ static int live_preempt(void *arg)
goto err_ctx_hi;
ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
+ if (igt_spinner_init(&spin_hi, gt))
+ goto err_ctx_lo;
+
+ if (igt_spinner_init(&spin_lo, gt))
+ goto err_spin_hi;
+
for_each_engine(engine, gt, id) {
struct igt_live_test t;
struct i915_request *rq;
@@ -1761,14 +1761,14 @@ static int live_preempt(void *arg)
if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
- goto err_ctx_lo;
+ goto err_spin_lo;
}
rq = spinner_create_request(&spin_lo, ctx_lo, engine,
MI_ARB_CHECK);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- goto err_ctx_lo;
+ goto err_spin_lo;
}
i915_request_add(rq);
@@ -1777,7 +1777,7 @@ static int live_preempt(void *arg)
GEM_TRACE_DUMP();
intel_gt_set_wedged(gt);
err = -EIO;
- goto err_ctx_lo;
+ goto err_spin_lo;
}
rq = spinner_create_request(&spin_hi, ctx_hi, engine,
@@ -1785,7 +1785,7 @@ static int live_preempt(void *arg)
if (IS_ERR(rq)) {
igt_spinner_end(&spin_lo);
err = PTR_ERR(rq);
- goto err_ctx_lo;
+ goto err_spin_lo;
}
i915_request_add(rq);
@@ -1794,7 +1794,7 @@ static int live_preempt(void *arg)
GEM_TRACE_DUMP();
intel_gt_set_wedged(gt);
err = -EIO;
- goto err_ctx_lo;
+ goto err_spin_lo;
}
igt_spinner_end(&spin_hi);
@@ -1802,19 +1802,19 @@ static int live_preempt(void *arg)
if (igt_live_test_end(&t)) {
err = -EIO;
- goto err_ctx_lo;
+ goto err_spin_lo;
}
}
err = 0;
-err_ctx_lo:
- kernel_context_close(ctx_lo);
-err_ctx_hi:
- kernel_context_close(ctx_hi);
err_spin_lo:
igt_spinner_fini(&spin_lo);
err_spin_hi:
igt_spinner_fini(&spin_hi);
+err_ctx_lo:
+ kernel_context_close(ctx_lo);
+err_ctx_hi:
+ kernel_context_close(ctx_hi);
return err;
}
@@ -1828,20 +1828,20 @@ static int live_late_preempt(void *arg)
enum intel_engine_id id;
int err = -ENOMEM;
- if (igt_spinner_init(&spin_hi, gt))
- return -ENOMEM;
-
- if (igt_spinner_init(&spin_lo, gt))
- goto err_spin_hi;
-
ctx_hi = kernel_context(gt->i915, NULL);
if (!ctx_hi)
- goto err_spin_lo;
+ return -ENOMEM;
ctx_lo = kernel_context(gt->i915, NULL);
if (!ctx_lo)
goto err_ctx_hi;
+ if (igt_spinner_init(&spin_hi, gt))
+ goto err_ctx_lo;
+
+ if (igt_spinner_init(&spin_lo, gt))
+ goto err_spin_hi;
+
/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
ctx_lo->sched.priority = 1;
@@ -1854,14 +1854,14 @@ static int live_late_preempt(void *arg)
if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
- goto err_ctx_lo;
+ goto err_spin_lo;
}
rq = spinner_create_request(&spin_lo, ctx_lo, engine,
MI_ARB_CHECK);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- goto err_ctx_lo;
+ goto err_spin_lo;
}
i915_request_add(rq);
@@ -1875,7 +1875,7 @@ static int live_late_preempt(void *arg)
if (IS_ERR(rq)) {
igt_spinner_end(&spin_lo);
err = PTR_ERR(rq);
- goto err_ctx_lo;
+ goto err_spin_lo;
}
i915_request_add(rq);
@@ -1898,19 +1898,19 @@ static int live_late_preempt(void *arg)
if (igt_live_test_end(&t)) {
err = -EIO;
- goto err_ctx_lo;
+ goto err_spin_lo;
}
}
err = 0;
-err_ctx_lo:
- kernel_context_close(ctx_lo);
-err_ctx_hi:
- kernel_context_close(ctx_hi);
err_spin_lo:
igt_spinner_fini(&spin_lo);
err_spin_hi:
igt_spinner_fini(&spin_hi);
+err_ctx_lo:
+ kernel_context_close(ctx_lo);
+err_ctx_hi:
+ kernel_context_close(ctx_hi);
return err;
err_wedged:
@@ -1918,7 +1918,7 @@ err_wedged:
igt_spinner_end(&spin_lo);
intel_gt_set_wedged(gt);
err = -EIO;
- goto err_ctx_lo;
+ goto err_spin_lo;
}
struct preempt_client {
@@ -3382,12 +3382,9 @@ static int live_preempt_timeout(void *arg)
if (!intel_has_reset_engine(gt))
return 0;
- if (igt_spinner_init(&spin_lo, gt))
- return -ENOMEM;
-
ctx_hi = kernel_context(gt->i915, NULL);
if (!ctx_hi)
- goto err_spin_lo;
+ return -ENOMEM;
ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
ctx_lo = kernel_context(gt->i915, NULL);
@@ -3395,6 +3392,9 @@ static int live_preempt_timeout(void *arg)
goto err_ctx_hi;
ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
+ if (igt_spinner_init(&spin_lo, gt))
+ goto err_ctx_lo;
+
for_each_engine(engine, gt, id) {
unsigned long saved_timeout;
struct i915_request *rq;
@@ -3406,21 +3406,21 @@ static int live_preempt_timeout(void *arg)
MI_NOOP); /* preemption disabled */
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- goto err_ctx_lo;
+ goto err_spin_lo;
}
i915_request_add(rq);
if (!igt_wait_for_spinner(&spin_lo, rq)) {
intel_gt_set_wedged(gt);
err = -EIO;
- goto err_ctx_lo;
+ goto err_spin_lo;
}
rq = igt_request_alloc(ctx_hi, engine);
if (IS_ERR(rq)) {
igt_spinner_end(&spin_lo);
err = PTR_ERR(rq);
- goto err_ctx_lo;
+ goto err_spin_lo;
}
/* Flush the previous CS ack before changing timeouts */
@@ -3440,7 +3440,7 @@ static int live_preempt_timeout(void *arg)
intel_gt_set_wedged(gt);
i915_request_put(rq);
err = -ETIME;
- goto err_ctx_lo;
+ goto err_spin_lo;
}
igt_spinner_end(&spin_lo);
@@ -3448,12 +3448,12 @@ static int live_preempt_timeout(void *arg)
}
err = 0;
+err_spin_lo:
+ igt_spinner_fini(&spin_lo);
err_ctx_lo:
kernel_context_close(ctx_lo);
err_ctx_hi:
kernel_context_close(ctx_hi);
-err_spin_lo:
- igt_spinner_fini(&spin_lo);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 21c29d315cc0..6ba52ef1acb8 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1753,8 +1753,8 @@ static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
if (IS_ERR(ce))
return PTR_ERR(ce);
- ce->runtime.num_underflow = 0;
- ce->runtime.max_underflow = 0;
+ ce->stats.runtime.num_underflow = 0;
+ ce->stats.runtime.max_underflow = 0;
do {
unsigned int loop = 1024;
@@ -1792,11 +1792,11 @@ static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
intel_context_get_avg_runtime_ns(ce));
err = 0;
- if (ce->runtime.num_underflow) {
+ if (ce->stats.runtime.num_underflow) {
pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
engine->name,
- ce->runtime.num_underflow,
- ce->runtime.max_underflow);
+ ce->stats.runtime.num_underflow,
+ ce->stats.runtime.max_underflow);
GEM_TRACE_DUMP();
err = -EOVERFLOW;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c
index c9c4f391c5cc..2b0c87999949 100644
--- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
+++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
@@ -132,6 +132,124 @@ err_free_src:
return err;
}
+static int intel_context_copy_ccs(struct intel_context *ce,
+ const struct i915_deps *deps,
+ struct scatterlist *sg,
+ enum i915_cache_level cache_level,
+ bool write_to_ccs,
+ struct i915_request **out)
+{
+ u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS;
+ u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS;
+ struct sgt_dma it = sg_sgt(sg);
+ struct i915_request *rq;
+ u32 offset;
+ int err;
+
+ GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
+ *out = NULL;
+
+ GEM_BUG_ON(ce->ring->size < SZ_64K);
+
+ offset = 0;
+ if (HAS_64K_PAGES(ce->engine->i915))
+ offset = CHUNK_SZ;
+
+ do {
+ int len;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_ce;
+ }
+
+ if (deps) {
+ err = i915_request_await_deps(rq, deps);
+ if (err)
+ goto out_rq;
+
+ if (rq->engine->emit_init_breadcrumb) {
+ err = rq->engine->emit_init_breadcrumb(rq);
+ if (err)
+ goto out_rq;
+ }
+
+ deps = NULL;
+ }
+
+ /* The PTE updates + clear must not be interrupted. */
+ err = emit_no_arbitration(rq);
+ if (err)
+ goto out_rq;
+
+ len = emit_pte(rq, &it, cache_level, true, offset, CHUNK_SZ);
+ if (len <= 0) {
+ err = len;
+ goto out_rq;
+ }
+
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ goto out_rq;
+
+ err = emit_copy_ccs(rq, offset, dst_access,
+ offset, src_access, len);
+ if (err)
+ goto out_rq;
+
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+
+ /* Arbitration is re-enabled between requests. */
+out_rq:
+ if (*out)
+ i915_request_put(*out);
+ *out = i915_request_get(rq);
+ i915_request_add(rq);
+ if (err || !it.sg || !sg_dma_len(it.sg))
+ break;
+
+ cond_resched();
+ } while (1);
+
+out_ce:
+ return err;
+}
+
+static int
+intel_migrate_ccs_copy(struct intel_migrate *m,
+ struct i915_gem_ww_ctx *ww,
+ const struct i915_deps *deps,
+ struct scatterlist *sg,
+ enum i915_cache_level cache_level,
+ bool write_to_ccs,
+ struct i915_request **out)
+{
+ struct intel_context *ce;
+ int err;
+
+ *out = NULL;
+ if (!m->context)
+ return -ENODEV;
+
+ ce = intel_migrate_create_context(m);
+ if (IS_ERR(ce))
+ ce = intel_context_get(m->context);
+ GEM_BUG_ON(IS_ERR(ce));
+
+ err = intel_context_pin_ww(ce, ww);
+ if (err)
+ goto out;
+
+ err = intel_context_copy_ccs(ce, deps, sg, cache_level,
+ write_to_ccs, out);
+
+ intel_context_unpin(ce);
+out:
+ intel_context_put(ce);
+ return err;
+}
+
static int clear(struct intel_migrate *migrate,
int (*fn)(struct intel_migrate *migrate,
struct i915_gem_ww_ctx *ww,
@@ -144,7 +262,8 @@ static int clear(struct intel_migrate *migrate,
struct drm_i915_gem_object *obj;
struct i915_request *rq;
struct i915_gem_ww_ctx ww;
- u32 *vaddr;
+ u32 *vaddr, val = 0;
+ bool ccs_cap = false;
int err = 0;
int i;
@@ -152,7 +271,15 @@ static int clear(struct intel_migrate *migrate,
if (IS_ERR(obj))
return 0;
+ /* Consider the rounded up memory too */
+ sz = obj->base.size;
+
+ if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj))
+ ccs_cap = true;
+
for_i915_gem_ww(&ww, err, true) {
+ int ccs_bytes, ccs_bytes_per_chunk;
+
err = i915_gem_object_lock(obj, &ww);
if (err)
continue;
@@ -167,44 +294,114 @@ static int clear(struct intel_migrate *migrate,
vaddr[i] = ~i;
i915_gem_object_flush_map(obj);
- err = fn(migrate, &ww, obj, sz, &rq);
- if (!err)
- continue;
+ if (ccs_cap && !val) {
+ /* Write the obj data into ccs surface */
+ err = intel_migrate_ccs_copy(migrate, &ww, NULL,
+ obj->mm.pages->sgl,
+ obj->cache_level,
+ true, &rq);
+ if (rq && !err) {
+ if (i915_request_wait(rq, 0, HZ) < 0) {
+ pr_err("%ps timed out, size: %u\n",
+ fn, sz);
+ err = -ETIME;
+ }
+ i915_request_put(rq);
+ rq = NULL;
+ }
+ if (err)
+ continue;
+ }
- if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
- pr_err("%ps failed, size: %u\n", fn, sz);
- if (rq) {
- i915_request_wait(rq, 0, HZ);
+ err = fn(migrate, &ww, obj, val, &rq);
+ if (rq && !err) {
+ if (i915_request_wait(rq, 0, HZ) < 0) {
+ pr_err("%ps timed out, size: %u\n", fn, sz);
+ err = -ETIME;
+ }
i915_request_put(rq);
+ rq = NULL;
}
- i915_gem_object_unpin_map(obj);
- }
- if (err)
- goto err_out;
+ if (err)
+ continue;
- if (rq) {
- if (i915_request_wait(rq, 0, HZ) < 0) {
- pr_err("%ps timed out, size: %u\n", fn, sz);
- err = -ETIME;
+ i915_gem_object_flush_map(obj);
+
+ /* Verify the set/clear of the obj mem */
+ for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
+ int x = i * 1024 +
+ i915_prandom_u32_max_state(1024, prng);
+
+ if (vaddr[x] != val) {
+ pr_err("%ps failed, (%u != %u), offset: %zu\n",
+ fn, vaddr[x], val, x * sizeof(u32));
+ igt_hexdump(vaddr + i * 1024, 4096);
+ err = -EINVAL;
+ }
}
- i915_request_put(rq);
- }
+ if (err)
+ continue;
- for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
- int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
+ if (ccs_cap && !val) {
+ for (i = 0; i < sz / sizeof(u32); i++)
+ vaddr[i] = ~i;
+ i915_gem_object_flush_map(obj);
+
+ err = intel_migrate_ccs_copy(migrate, &ww, NULL,
+ obj->mm.pages->sgl,
+ obj->cache_level,
+ false, &rq);
+ if (rq && !err) {
+ if (i915_request_wait(rq, 0, HZ) < 0) {
+ pr_err("%ps timed out, size: %u\n",
+ fn, sz);
+ err = -ETIME;
+ }
+ i915_request_put(rq);
+ rq = NULL;
+ }
+ if (err)
+ continue;
+
+ ccs_bytes = GET_CCS_BYTES(i915, sz);
+ ccs_bytes_per_chunk = GET_CCS_BYTES(i915, CHUNK_SZ);
+ i915_gem_object_flush_map(obj);
+
+ for (i = 0; !err && i < DIV_ROUND_UP(ccs_bytes, PAGE_SIZE); i++) {
+ int offset = ((i * PAGE_SIZE) /
+ ccs_bytes_per_chunk) * CHUNK_SZ / sizeof(u32);
+ int ccs_bytes_left = (ccs_bytes - i * PAGE_SIZE) / sizeof(u32);
+ int x = i915_prandom_u32_max_state(min_t(int, 1024,
+ ccs_bytes_left), prng);
+
+ if (vaddr[offset + x]) {
+ pr_err("%ps ccs clearing failed, offset: %ld/%d\n",
+ fn, i * PAGE_SIZE + x * sizeof(u32), ccs_bytes);
+ igt_hexdump(vaddr + offset,
+ min_t(int, 4096,
+ ccs_bytes_left * sizeof(u32)));
+ err = -EINVAL;
+ }
+ }
+
+ if (err)
+ continue;
+ }
+ i915_gem_object_unpin_map(obj);
+ }
- if (vaddr[x] != sz) {
- pr_err("%ps failed, size: %u, offset: %zu\n",
- fn, sz, x * sizeof(u32));
- igt_hexdump(vaddr + i * 1024, 4096);
- err = -EINVAL;
+ if (err) {
+ if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
+ pr_err("%ps failed, size: %u\n", fn, sz);
+ if (rq && err != -EINVAL) {
+ i915_request_wait(rq, 0, HZ);
+ i915_request_put(rq);
}
+
+ i915_gem_object_unpin_map(obj);
}
- i915_gem_object_unpin_map(obj);
-err_out:
i915_gem_object_put(obj);
-
return err;
}
@@ -621,13 +818,15 @@ static int perf_copy_blt(void *arg)
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
struct drm_i915_gem_object *src, *dst;
+ size_t sz;
int err;
src = create_init_lmem_internal(gt, sizes[i], true);
if (IS_ERR(src))
return PTR_ERR(src);
- dst = create_init_lmem_internal(gt, sizes[i], false);
+ sz = src->base.size;
+ dst = create_init_lmem_internal(gt, sz, false);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto err_src;
@@ -640,7 +839,7 @@ static int perf_copy_blt(void *arg)
dst->mm.pages->sgl,
I915_CACHE_NONE,
i915_gem_object_is_lmem(dst),
- sizes[i]);
+ sz);
i915_gem_object_unlock(dst);
i915_gem_object_put(dst);
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index 7afdadc7656f..be9ac47fa9d0 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -122,17 +122,14 @@ enum intel_guc_action {
INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003,
INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004,
- INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY = 0x1005,
- INTEL_GUC_ACTION_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006,
- INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007,
INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008,
INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
+ INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B,
INTEL_GUC_ACTION_SETUP_PC_GUCRC = 0x3004,
INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
+ INTEL_GUC_ACTION_GET_HWCONFIG = 0x4100,
INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502,
INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503,
- INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
- INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
INTEL_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
@@ -173,4 +170,11 @@ enum intel_guc_sleep_state_status {
#define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT)
#define GUC_LOG_CONTROL_DEFAULT_LOGGING (1 << 8)
+enum intel_guc_state_capture_event_status {
+ INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_SUCCESS = 0x0,
+ INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE = 0x1,
+};
+
+#define INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK 0x000000FF
+
#endif /* _ABI_GUC_ACTIONS_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
index c20658ee85a5..8085fb181274 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
@@ -8,6 +8,10 @@
enum intel_guc_response_status {
INTEL_GUC_RESPONSE_STATUS_SUCCESS = 0x0,
+ INTEL_GUC_RESPONSE_NOT_SUPPORTED = 0x20,
+ INTEL_GUC_RESPONSE_NO_ATTRIBUTE_TABLE = 0x201,
+ INTEL_GUC_RESPONSE_NO_DECRYPTION_KEY = 0x202,
+ INTEL_GUC_RESPONSE_DECRYPTION_FAILED = 0x204,
INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
};
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
index f0814a57c191..4a59478c3b5c 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
@@ -6,6 +6,8 @@
#ifndef _ABI_GUC_KLVS_ABI_H
#define _ABI_GUC_KLVS_ABI_H
+#include <linux/types.h>
+
/**
* DOC: GuC KLV
*
@@ -79,4 +81,17 @@
#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY 0x0907
#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN 1u
+/*
+ * Per context scheduling policy update keys.
+ */
+enum {
+ GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM = 0x2001,
+ GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT = 0x2002,
+ GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY = 0x2003,
+ GUC_CONTEXT_POLICIES_KLV_ID_PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY = 0x2004,
+ GUC_CONTEXT_POLICIES_KLV_ID_SLPM_GT_FREQUENCY = 0x2005,
+
+ GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5,
+};
+
#endif /* _ABI_GUC_KLVS_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h
new file mode 100644
index 000000000000..3624abfd22d1
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021-2022 Intel Corporation
+ */
+
+#ifndef _INTEL_GUC_CAPTURE_FWIF_H
+#define _INTEL_GUC_CAPTURE_FWIF_H
+
+#include <linux/types.h>
+#include "intel_guc_fwif.h"
+
+struct intel_guc;
+struct file;
+
+/**
+ * struct __guc_capture_bufstate
+ *
+ * Book-keeping structure used to track read and write pointers
+ * as we extract error capture data from the GuC-log-buffer's
+ * error-capture region as a stream of dwords.
+ */
+struct __guc_capture_bufstate {
+ u32 size;
+ void *data;
+ u32 rd;
+ u32 wr;
+};
+
+/**
+ * struct __guc_capture_parsed_output - extracted error capture node
+ *
+ * A single unit of extracted error-capture output data grouped together
+ * at an engine-instance level. We keep these nodes in a linked list.
+ * See cachelist and outlist below.
+ */
+struct __guc_capture_parsed_output {
+ /*
+ * A single set of 3 capture lists: a global-list
+ * an engine-class-list and an engine-instance list.
+ * outlist in __guc_capture_parsed_output will keep
+ * a linked list of these nodes that will eventually
+ * be detached from outlist and attached into to
+ * i915_gpu_codedump in response to a context reset
+ */
+ struct list_head link;
+ bool is_partial;
+ u32 eng_class;
+ u32 eng_inst;
+ u32 guc_id;
+ u32 lrca;
+ struct gcap_reg_list_info {
+ u32 vfid;
+ u32 num_regs;
+ struct guc_mmio_reg *regs;
+ } reginfo[GUC_CAPTURE_LIST_TYPE_MAX];
+#define GCAP_PARSED_REGLIST_INDEX_GLOBAL BIT(GUC_CAPTURE_LIST_TYPE_GLOBAL)
+#define GCAP_PARSED_REGLIST_INDEX_ENGCLASS BIT(GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS)
+#define GCAP_PARSED_REGLIST_INDEX_ENGINST BIT(GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE)
+};
+
+/**
+ * struct guc_debug_capture_list_header / struct guc_debug_capture_list
+ *
+ * As part of ADS registration, these header structures (followed by
+ * an array of 'struct guc_mmio_reg' entries) are used to register with
+ * GuC microkernel the list of registers we want it to dump out prior
+ * to a engine reset.
+ */
+struct guc_debug_capture_list_header {
+ u32 info;
+#define GUC_CAPTURELISTHDR_NUMDESCR GENMASK(15, 0)
+} __packed;
+
+struct guc_debug_capture_list {
+ struct guc_debug_capture_list_header header;
+ struct guc_mmio_reg regs[0];
+} __packed;
+
+/**
+ * struct __guc_mmio_reg_descr / struct __guc_mmio_reg_descr_group
+ *
+ * intel_guc_capture module uses these structures to maintain static
+ * tables (per unique platform) that consists of lists of registers
+ * (offsets, names, flags,...) that are used at the ADS regisration
+ * time as well as during runtime processing and reporting of error-
+ * capture states generated by GuC just prior to engine reset events.
+ */
+struct __guc_mmio_reg_descr {
+ i915_reg_t reg;
+ u32 flags;
+ u32 mask;
+ const char *regname;
+};
+
+struct __guc_mmio_reg_descr_group {
+ const struct __guc_mmio_reg_descr *list;
+ u32 num_regs;
+ u32 owner; /* see enum guc_capture_owner */
+ u32 type; /* see enum guc_capture_type */
+ u32 engine; /* as per MAX_ENGINE_CLASS */
+ struct __guc_mmio_reg_descr *extlist; /* only used for steered registers */
+};
+
+/**
+ * struct guc_state_capture_header_t / struct guc_state_capture_t /
+ * guc_state_capture_group_header_t / guc_state_capture_group_t
+ *
+ * Prior to resetting engines that have hung or faulted, GuC microkernel
+ * reports the engine error-state (register values that was read) by
+ * logging them into the shared GuC log buffer using these hierarchy
+ * of structures.
+ */
+struct guc_state_capture_header_t {
+ u32 owner;
+#define CAP_HDR_CAPTURE_VFID GENMASK(7, 0)
+ u32 info;
+#define CAP_HDR_CAPTURE_TYPE GENMASK(3, 0) /* see enum guc_capture_type */
+#define CAP_HDR_ENGINE_CLASS GENMASK(7, 4) /* see GUC_MAX_ENGINE_CLASSES */
+#define CAP_HDR_ENGINE_INSTANCE GENMASK(11, 8)
+ u32 lrca; /* if type-instance, LRCA (address) that hung, else set to ~0 */
+ u32 guc_id; /* if type-instance, context index of hung context, else set to ~0 */
+ u32 num_mmios;
+#define CAP_HDR_NUM_MMIOS GENMASK(9, 0)
+} __packed;
+
+struct guc_state_capture_t {
+ struct guc_state_capture_header_t header;
+ struct guc_mmio_reg mmio_entries[0];
+} __packed;
+
+enum guc_capture_group_types {
+ GUC_STATE_CAPTURE_GROUP_TYPE_FULL,
+ GUC_STATE_CAPTURE_GROUP_TYPE_PARTIAL,
+ GUC_STATE_CAPTURE_GROUP_TYPE_MAX,
+};
+
+struct guc_state_capture_group_header_t {
+ u32 owner;
+#define CAP_GRP_HDR_CAPTURE_VFID GENMASK(7, 0)
+ u32 info;
+#define CAP_GRP_HDR_NUM_CAPTURES GENMASK(7, 0)
+#define CAP_GRP_HDR_CAPTURE_TYPE GENMASK(15, 8) /* guc_capture_group_types */
+} __packed;
+
+/* this is the top level structure where an error-capture dump starts */
+struct guc_state_capture_group_t {
+ struct guc_state_capture_group_header_t grp_header;
+ struct guc_state_capture_t capture_entries[0];
+} __packed;
+
+/**
+ * struct __guc_capture_ads_cache
+ *
+ * A structure to cache register lists that were populated and registered
+ * with GuC at startup during ADS registration. This allows much quicker
+ * GuC resets without re-parsing all the tables for the given gt.
+ */
+struct __guc_capture_ads_cache {
+ bool is_valid;
+ void *ptr;
+ size_t size;
+ int status;
+};
+
+/**
+ * struct intel_guc_state_capture
+ *
+ * Internal context of the intel_guc_capture module.
+ */
+struct intel_guc_state_capture {
+ /**
+ * @reglists: static table of register lists used for error-capture state.
+ */
+ const struct __guc_mmio_reg_descr_group *reglists;
+
+ /**
+ * @extlists: allocated table of steered register lists used for error-capture state.
+ *
+ * NOTE: steered registers have multiple instances depending on the HW configuration
+ * (slices or dual-sub-slices) and thus depends on HW fuses discovered at startup
+ */
+ struct __guc_mmio_reg_descr_group *extlists;
+
+ /**
+ * @ads_cache: cached register lists that is ADS format ready
+ */
+ struct __guc_capture_ads_cache ads_cache[GUC_CAPTURE_LIST_INDEX_MAX]
+ [GUC_CAPTURE_LIST_TYPE_MAX]
+ [GUC_MAX_ENGINE_CLASSES];
+ void *ads_null_cache;
+
+ /**
+ * @cachelist: Pool of pre-allocated nodes for error capture output
+ *
+ * We need this pool of pre-allocated nodes because we cannot
+ * dynamically allocate new nodes when receiving the G2H notification
+ * because the event handlers for all G2H event-processing is called
+ * by the ct processing worker queue and when that queue is being
+ * processed, there is no absoluate guarantee that we are not in the
+ * midst of a GT reset operation (which doesn't allow allocations).
+ */
+ struct list_head cachelist;
+#define PREALLOC_NODES_MAX_COUNT (3 * GUC_MAX_ENGINE_CLASSES * GUC_MAX_INSTANCES_PER_CLASS)
+#define PREALLOC_NODES_DEFAULT_NUMREGS 64
+ int max_mmio_per_node;
+
+ /**
+ * @outlist: Pool of pre-allocated nodes for error capture output
+ *
+ * A linked list of parsed GuC error-capture output data before
+ * reporting with formatting via i915_gpu_coredump. Each node in this linked list shall
+ * contain a single engine-capture including global, engine-class and
+ * engine-instance register dumps as per guc_capture_parsed_output_node
+ */
+ struct list_head outlist;
+};
+
+#endif /* _INTEL_GUC_CAPTURE_FWIF_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 447a976c9f25..2c4ad4a65089 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -9,8 +9,9 @@
#include "gt/intel_gt_pm_irq.h"
#include "gt/intel_gt_regs.h"
#include "intel_guc.h"
-#include "intel_guc_slpc.h"
#include "intel_guc_ads.h"
+#include "intel_guc_capture.h"
+#include "intel_guc_slpc.h"
#include "intel_guc_submission.h"
#include "i915_drv.h"
#include "i915_irq.h"
@@ -291,6 +292,41 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50))
flags |= GUC_WA_POLLCS;
+ /* Wa_16011759253:dg2_g10:a0 */
+ if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
+ flags |= GUC_WA_GAM_CREDITS;
+
+ /* Wa_14014475959:dg2 */
+ if (IS_DG2(gt->i915))
+ flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
+
+ /*
+ * Wa_14012197797:dg2_g10:a0,dg2_g11:a0
+ * Wa_22011391025:dg2_g10,dg2_g11,dg2_g12
+ *
+ * The same WA bit is used for both and 22011391025 is applicable to
+ * all DG2.
+ */
+ if (IS_DG2(gt->i915))
+ flags |= GUC_WA_DUAL_QUEUE;
+
+ /* Wa_22011802037: graphics version 12 */
+ if (GRAPHICS_VER(gt->i915) == 12)
+ flags |= GUC_WA_PRE_PARSER;
+
+ /* Wa_16011777198:dg2 */
+ if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
+ IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
+ flags |= GUC_WA_RCS_RESET_BEFORE_RC6;
+
+ /*
+ * Wa_22012727170:dg2_g10[a0-c0), dg2_g11[a0..)
+ * Wa_22012727685:dg2_g11[a0..)
+ */
+ if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
+ IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER))
+ flags |= GUC_WA_CONTEXT_ISOLATION;
+
return flags;
}
@@ -362,9 +398,14 @@ int intel_guc_init(struct intel_guc *guc)
if (ret)
goto err_fw;
- ret = intel_guc_ads_create(guc);
+ ret = intel_guc_capture_init(guc);
if (ret)
goto err_log;
+
+ ret = intel_guc_ads_create(guc);
+ if (ret)
+ goto err_capture;
+
GEM_BUG_ON(!guc->ads_vma);
ret = intel_guc_ct_init(&guc->ct);
@@ -403,6 +444,8 @@ err_ct:
intel_guc_ct_fini(&guc->ct);
err_ads:
intel_guc_ads_destroy(guc);
+err_capture:
+ intel_guc_capture_destroy(guc);
err_log:
intel_guc_log_destroy(&guc->log);
err_fw:
@@ -430,6 +473,7 @@ void intel_guc_fini(struct intel_guc *guc)
intel_guc_ct_fini(&guc->ct);
intel_guc_ads_destroy(guc);
+ intel_guc_capture_destroy(guc);
intel_guc_log_destroy(&guc->log);
intel_uc_fw_fini(&guc->fw);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index bf7079480d47..3f3373f68123 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -10,18 +10,19 @@
#include <linux/iosys-map.h>
#include <linux/xarray.h>
-#include "intel_uncore.h"
+#include "intel_guc_ct.h"
#include "intel_guc_fw.h"
#include "intel_guc_fwif.h"
-#include "intel_guc_ct.h"
#include "intel_guc_log.h"
#include "intel_guc_reg.h"
#include "intel_guc_slpc_types.h"
#include "intel_uc_fw.h"
+#include "intel_uncore.h"
#include "i915_utils.h"
#include "i915_vma.h"
struct __guc_ads_blob;
+struct intel_guc_state_capture;
/**
* struct intel_guc - Top level structure of GuC.
@@ -38,6 +39,8 @@ struct intel_guc {
struct intel_guc_ct ct;
/** @slpc: sub-structure containing SLPC related data and objects */
struct intel_guc_slpc slpc;
+ /** @capture: the error-state-capture module's data and objects */
+ struct intel_guc_state_capture *capture;
/** @sched_engine: Global engine used to submit requests to GuC */
struct i915_sched_engine *sched_engine;
@@ -138,6 +141,8 @@ struct intel_guc {
bool submission_supported;
/** @submission_selected: tracks whether the user enabled GuC submission */
bool submission_selected;
+ /** @submission_initialized: tracks whether GuC submission has been initialised */
+ bool submission_initialized;
/**
* @rc_supported: tracks whether we support GuC rc on the current platform
*/
@@ -160,14 +165,11 @@ struct intel_guc {
struct guc_mmio_reg *ads_regset;
/** @ads_golden_ctxt_size: size of the golden contexts in the ADS */
u32 ads_golden_ctxt_size;
+ /** @ads_capture_size: size of register lists in the ADS used for error capture */
+ u32 ads_capture_size;
/** @ads_engine_usage_size: size of engine usage in the ADS */
u32 ads_engine_usage_size;
- /** @lrc_desc_pool: object allocated to hold the GuC LRC descriptor pool */
- struct i915_vma *lrc_desc_pool;
- /** @lrc_desc_pool_vaddr: contents of the GuC LRC descriptor pool */
- void *lrc_desc_pool_vaddr;
-
/**
* @context_lookup: used to resolve intel_context from guc_id, if a
* context is present in this structure it is registered with the GuC
@@ -431,6 +433,9 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
int intel_guc_error_capture_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len);
+struct intel_engine_cs *
+intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance);
+
void intel_guc_find_hung_context(struct intel_engine_cs *engine);
int intel_guc_global_policies_update(struct intel_guc *guc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 92cb88248391..3eabf4cf8eec 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -11,6 +11,7 @@
#include "gt/intel_lrc.h"
#include "gt/shmem_utils.h"
#include "intel_guc_ads.h"
+#include "intel_guc_capture.h"
#include "intel_guc_fwif.h"
#include "intel_uc.h"
#include "i915_drv.h"
@@ -86,8 +87,7 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc)
static u32 guc_ads_capture_size(struct intel_guc *guc)
{
- /* FIXME: Allocate a proper capture list */
- return PAGE_ALIGN(PAGE_SIZE);
+ return PAGE_ALIGN(guc->ads_capture_size);
}
static u32 guc_ads_private_data_size(struct intel_guc *guc)
@@ -276,15 +276,24 @@ __mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg)
return slot;
}
-static long __must_check guc_mmio_reg_add(struct temp_regset *regset,
- u32 offset, u32 flags)
+#define GUC_REGSET_STEERING(group, instance) ( \
+ FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \
+ FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \
+ GUC_REGSET_NEEDS_STEERING \
+)
+
+static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
+ struct temp_regset *regset,
+ i915_reg_t reg, u32 flags)
{
u32 count = regset->storage_used - (regset->registers - regset->storage);
- struct guc_mmio_reg reg = {
+ u32 offset = i915_mmio_reg_offset(reg);
+ struct guc_mmio_reg entry = {
.offset = offset,
.flags = flags,
};
struct guc_mmio_reg *slot;
+ u8 group, inst;
/*
* The mmio list is built using separate lists within the driver.
@@ -292,11 +301,22 @@ static long __must_check guc_mmio_reg_add(struct temp_regset *regset,
* register more than once. Do not consider this an error; silently
* move on if the register is already in the list.
*/
- if (bsearch(&reg, regset->registers, count,
- sizeof(reg), guc_mmio_reg_cmp))
+ if (bsearch(&entry, regset->registers, count,
+ sizeof(entry), guc_mmio_reg_cmp))
return 0;
- slot = __mmio_reg_add(regset, &reg);
+ /*
+ * The GuC doesn't have a default steering, so we need to explicitly
+ * steer all registers that need steering. However, we do not keep track
+ * of all the steering ranges, only of those that have a chance of using
+ * a non-default steering from the i915 pov. Instead of adding such
+ * tracking, it is easier to just program the default steering for all
+ * regs that don't need a non-default one.
+ */
+ intel_gt_get_valid_steering_for_reg(gt, reg, &group, &inst);
+ entry.flags |= GUC_REGSET_STEERING(group, inst);
+
+ slot = __mmio_reg_add(regset, &entry);
if (IS_ERR(slot))
return PTR_ERR(slot);
@@ -311,14 +331,16 @@ static long __must_check guc_mmio_reg_add(struct temp_regset *regset,
return 0;
}
-#define GUC_MMIO_REG_ADD(regset, reg, masked) \
- guc_mmio_reg_add(regset, \
- i915_mmio_reg_offset((reg)), \
+#define GUC_MMIO_REG_ADD(gt, regset, reg, masked) \
+ guc_mmio_reg_add(gt, \
+ regset, \
+ (reg), \
(masked) ? GUC_REGSET_MASKED : 0)
static int guc_mmio_regset_init(struct temp_regset *regset,
struct intel_engine_cs *engine)
{
+ struct intel_gt *gt = engine->gt;
const u32 base = engine->mmio_base;
struct i915_wa_list *wal = &engine->wa_list;
struct i915_wa *wa;
@@ -331,26 +353,26 @@ static int guc_mmio_regset_init(struct temp_regset *regset,
*/
regset->registers = regset->storage + regset->storage_used;
- ret |= GUC_MMIO_REG_ADD(regset, RING_MODE_GEN7(base), true);
- ret |= GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false);
- ret |= GUC_MMIO_REG_ADD(regset, RING_IMR(base), false);
+ ret |= GUC_MMIO_REG_ADD(gt, regset, RING_MODE_GEN7(base), true);
+ ret |= GUC_MMIO_REG_ADD(gt, regset, RING_HWS_PGA(base), false);
+ ret |= GUC_MMIO_REG_ADD(gt, regset, RING_IMR(base), false);
- if (engine->class == RENDER_CLASS &&
+ if ((engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) &&
CCS_MASK(engine->gt))
- ret |= GUC_MMIO_REG_ADD(regset, GEN12_RCU_MODE, true);
+ ret |= GUC_MMIO_REG_ADD(gt, regset, GEN12_RCU_MODE, true);
for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
- ret |= GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg);
+ ret |= GUC_MMIO_REG_ADD(gt, regset, wa->reg, wa->masked_reg);
/* Be extra paranoid and include all whitelist registers. */
for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++)
- ret |= GUC_MMIO_REG_ADD(regset,
+ ret |= GUC_MMIO_REG_ADD(gt, regset,
RING_FORCE_TO_NONPRIV(base, i),
false);
/* add in local MOCS registers */
for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++)
- ret |= GUC_MMIO_REG_ADD(regset, GEN9_LNCFCMOCS(i), false);
+ ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false);
return ret ? -1 : 0;
}
@@ -433,7 +455,7 @@ static void guc_mmio_reg_state_init(struct intel_guc *guc)
static void fill_engine_enable_masks(struct intel_gt *gt,
struct iosys_map *info_map)
{
- info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], 1);
+ info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], RCS_MASK(gt));
info_map_write(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], CCS_MASK(gt));
info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 1);
info_map_write(info_map, engine_enabled_masks[GUC_VIDEO_CLASS], VDBOX_MASK(gt));
@@ -589,24 +611,119 @@ static void guc_init_golden_context(struct intel_guc *guc)
GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size);
}
-static void guc_capture_list_init(struct intel_guc *guc)
+static int
+guc_capture_prep_lists(struct intel_guc *guc)
{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ u32 ads_ggtt, capture_offset, null_ggtt, total_size = 0;
+ struct guc_gt_system_info local_info;
+ struct iosys_map info_map;
+ bool ads_is_mapped;
+ size_t size = 0;
+ void *ptr;
int i, j;
- u32 addr_ggtt, offset;
- offset = guc_ads_capture_offset(guc);
- addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
+ ads_is_mapped = !iosys_map_is_null(&guc->ads_map);
+ if (ads_is_mapped) {
+ capture_offset = guc_ads_capture_offset(guc);
+ ads_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma);
+ info_map = IOSYS_MAP_INIT_OFFSET(&guc->ads_map,
+ offsetof(struct __guc_ads_blob, system_info));
+ } else {
+ memset(&local_info, 0, sizeof(local_info));
+ iosys_map_set_vaddr(&info_map, &local_info);
+ fill_engine_enable_masks(gt, &info_map);
+ }
- /* FIXME: Populate a proper capture list */
+ /* first, set aside the first page for a capture_list with zero descriptors */
+ total_size = PAGE_SIZE;
+ if (ads_is_mapped) {
+ if (!intel_guc_capture_getnullheader(guc, &ptr, &size))
+ iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size);
+ null_ggtt = ads_ggtt + capture_offset;
+ capture_offset += PAGE_SIZE;
+ }
for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
- ads_blob_write(guc, ads.capture_instance[i][j], addr_ggtt);
- ads_blob_write(guc, ads.capture_class[i][j], addr_ggtt);
- }
- ads_blob_write(guc, ads.capture_global[i], addr_ggtt);
+ /* null list if we dont have said engine or list */
+ if (!info_map_read(&info_map, engine_enabled_masks[j])) {
+ if (ads_is_mapped) {
+ ads_blob_write(guc, ads.capture_class[i][j], null_ggtt);
+ ads_blob_write(guc, ads.capture_instance[i][j], null_ggtt);
+ }
+ continue;
+ }
+ if (intel_guc_capture_getlistsize(guc, i,
+ GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
+ j, &size)) {
+ if (ads_is_mapped)
+ ads_blob_write(guc, ads.capture_class[i][j], null_ggtt);
+ goto engine_instance_list;
+ }
+ total_size += size;
+ if (ads_is_mapped) {
+ if (total_size > guc->ads_capture_size ||
+ intel_guc_capture_getlist(guc, i,
+ GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
+ j, &ptr)) {
+ ads_blob_write(guc, ads.capture_class[i][j], null_ggtt);
+ continue;
+ }
+ ads_blob_write(guc, ads.capture_class[i][j], ads_ggtt +
+ capture_offset);
+ iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size);
+ capture_offset += size;
+ }
+engine_instance_list:
+ if (intel_guc_capture_getlistsize(guc, i,
+ GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
+ j, &size)) {
+ if (ads_is_mapped)
+ ads_blob_write(guc, ads.capture_instance[i][j], null_ggtt);
+ continue;
+ }
+ total_size += size;
+ if (ads_is_mapped) {
+ if (total_size > guc->ads_capture_size ||
+ intel_guc_capture_getlist(guc, i,
+ GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
+ j, &ptr)) {
+ ads_blob_write(guc, ads.capture_instance[i][j], null_ggtt);
+ continue;
+ }
+ ads_blob_write(guc, ads.capture_instance[i][j], ads_ggtt +
+ capture_offset);
+ iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size);
+ capture_offset += size;
+ }
+ }
+ if (intel_guc_capture_getlistsize(guc, i, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &size)) {
+ if (ads_is_mapped)
+ ads_blob_write(guc, ads.capture_global[i], null_ggtt);
+ continue;
+ }
+ total_size += size;
+ if (ads_is_mapped) {
+ if (total_size > guc->ads_capture_size ||
+ intel_guc_capture_getlist(guc, i, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0,
+ &ptr)) {
+ ads_blob_write(guc, ads.capture_global[i], null_ggtt);
+ continue;
+ }
+ ads_blob_write(guc, ads.capture_global[i], ads_ggtt + capture_offset);
+ iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size);
+ capture_offset += size;
+ }
}
+
+ if (guc->ads_capture_size && guc->ads_capture_size != PAGE_ALIGN(total_size))
+ drm_warn(&i915->drm, "GuC->ADS->Capture alloc size changed from %d to %d\n",
+ guc->ads_capture_size, PAGE_ALIGN(total_size));
+
+ return PAGE_ALIGN(total_size);
}
static void __guc_ads_init(struct intel_guc *guc)
@@ -644,8 +761,8 @@ static void __guc_ads_init(struct intel_guc *guc)
base = intel_guc_ggtt_offset(guc, guc->ads_vma);
- /* Capture list for hang debug */
- guc_capture_list_init(guc);
+ /* Lists for error capture debug */
+ guc_capture_prep_lists(guc);
/* ADS */
ads_blob_write(guc, ads.scheduler_policies, base +
@@ -693,6 +810,12 @@ int intel_guc_ads_create(struct intel_guc *guc)
return ret;
guc->ads_golden_ctxt_size = ret;
+ /* Likewise the capture lists: */
+ ret = guc_capture_prep_lists(guc);
+ if (ret < 0)
+ return ret;
+ guc->ads_capture_size = ret;
+
/* Now the total size can be determined: */
size = guc_ads_blob_size(guc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
new file mode 100644
index 000000000000..c4e25966d3e9
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
@@ -0,0 +1,1657 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2022 Intel Corporation
+ */
+
+#include <linux/types.h>
+
+#include <drm/drm_print.h>
+
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_lrc.h"
+#include "guc_capture_fwif.h"
+#include "intel_guc_capture.h"
+#include "intel_guc_fwif.h"
+#include "i915_drv.h"
+#include "i915_gpu_error.h"
+#include "i915_irq.h"
+#include "i915_memcpy.h"
+#include "i915_reg.h"
+
+/*
+ * Define all device tables of GuC error capture register lists
+ * NOTE: For engine-registers, GuC only needs the register offsets
+ * from the engine-mmio-base
+ */
+#define COMMON_BASE_GLOBAL \
+ { FORCEWAKE_MT, 0, 0, "FORCEWAKE" }
+
+#define COMMON_GEN9BASE_GLOBAL \
+ { GEN8_FAULT_TLB_DATA0, 0, 0, "GEN8_FAULT_TLB_DATA0" }, \
+ { GEN8_FAULT_TLB_DATA1, 0, 0, "GEN8_FAULT_TLB_DATA1" }, \
+ { ERROR_GEN6, 0, 0, "ERROR_GEN6" }, \
+ { DONE_REG, 0, 0, "DONE_REG" }, \
+ { HSW_GTT_CACHE_EN, 0, 0, "HSW_GTT_CACHE_EN" }
+
+#define COMMON_GEN12BASE_GLOBAL \
+ { GEN12_FAULT_TLB_DATA0, 0, 0, "GEN12_FAULT_TLB_DATA0" }, \
+ { GEN12_FAULT_TLB_DATA1, 0, 0, "GEN12_FAULT_TLB_DATA1" }, \
+ { GEN12_AUX_ERR_DBG, 0, 0, "AUX_ERR_DBG" }, \
+ { GEN12_GAM_DONE, 0, 0, "GAM_DONE" }, \
+ { GEN12_RING_FAULT_REG, 0, 0, "FAULT_REG" }
+
+#define COMMON_BASE_ENGINE_INSTANCE \
+ { RING_PSMI_CTL(0), 0, 0, "RC PSMI" }, \
+ { RING_ESR(0), 0, 0, "ESR" }, \
+ { RING_DMA_FADD(0), 0, 0, "RING_DMA_FADD_LDW" }, \
+ { RING_DMA_FADD_UDW(0), 0, 0, "RING_DMA_FADD_UDW" }, \
+ { RING_IPEIR(0), 0, 0, "IPEIR" }, \
+ { RING_IPEHR(0), 0, 0, "IPEHR" }, \
+ { RING_INSTPS(0), 0, 0, "INSTPS" }, \
+ { RING_BBADDR(0), 0, 0, "RING_BBADDR_LOW32" }, \
+ { RING_BBADDR_UDW(0), 0, 0, "RING_BBADDR_UP32" }, \
+ { RING_BBSTATE(0), 0, 0, "BB_STATE" }, \
+ { CCID(0), 0, 0, "CCID" }, \
+ { RING_ACTHD(0), 0, 0, "ACTHD_LDW" }, \
+ { RING_ACTHD_UDW(0), 0, 0, "ACTHD_UDW" }, \
+ { RING_INSTPM(0), 0, 0, "INSTPM" }, \
+ { RING_INSTDONE(0), 0, 0, "INSTDONE" }, \
+ { RING_NOPID(0), 0, 0, "RING_NOPID" }, \
+ { RING_START(0), 0, 0, "START" }, \
+ { RING_HEAD(0), 0, 0, "HEAD" }, \
+ { RING_TAIL(0), 0, 0, "TAIL" }, \
+ { RING_CTL(0), 0, 0, "CTL" }, \
+ { RING_MI_MODE(0), 0, 0, "MODE" }, \
+ { RING_CONTEXT_CONTROL(0), 0, 0, "RING_CONTEXT_CONTROL" }, \
+ { RING_HWS_PGA(0), 0, 0, "HWS" }, \
+ { RING_MODE_GEN7(0), 0, 0, "GFX_MODE" }, \
+ { GEN8_RING_PDP_LDW(0, 0), 0, 0, "PDP0_LDW" }, \
+ { GEN8_RING_PDP_UDW(0, 0), 0, 0, "PDP0_UDW" }, \
+ { GEN8_RING_PDP_LDW(0, 1), 0, 0, "PDP1_LDW" }, \
+ { GEN8_RING_PDP_UDW(0, 1), 0, 0, "PDP1_UDW" }, \
+ { GEN8_RING_PDP_LDW(0, 2), 0, 0, "PDP2_LDW" }, \
+ { GEN8_RING_PDP_UDW(0, 2), 0, 0, "PDP2_UDW" }, \
+ { GEN8_RING_PDP_LDW(0, 3), 0, 0, "PDP3_LDW" }, \
+ { GEN8_RING_PDP_UDW(0, 3), 0, 0, "PDP3_UDW" }
+
+#define COMMON_BASE_HAS_EU \
+ { EIR, 0, 0, "EIR" }
+
+#define COMMON_BASE_RENDER \
+ { GEN7_SC_INSTDONE, 0, 0, "GEN7_SC_INSTDONE" }
+
+#define COMMON_GEN12BASE_RENDER \
+ { GEN12_SC_INSTDONE_EXTRA, 0, 0, "GEN12_SC_INSTDONE_EXTRA" }, \
+ { GEN12_SC_INSTDONE_EXTRA2, 0, 0, "GEN12_SC_INSTDONE_EXTRA2" }
+
+#define COMMON_GEN12BASE_VEC \
+ { GEN12_SFC_DONE(0), 0, 0, "SFC_DONE[0]" }, \
+ { GEN12_SFC_DONE(1), 0, 0, "SFC_DONE[1]" }, \
+ { GEN12_SFC_DONE(2), 0, 0, "SFC_DONE[2]" }, \
+ { GEN12_SFC_DONE(3), 0, 0, "SFC_DONE[3]" }
+
+/* XE_LPD - Global */
+static const struct __guc_mmio_reg_descr xe_lpd_global_regs[] = {
+ COMMON_BASE_GLOBAL,
+ COMMON_GEN9BASE_GLOBAL,
+ COMMON_GEN12BASE_GLOBAL,
+};
+
+/* XE_LPD - Render / Compute Per-Class */
+static const struct __guc_mmio_reg_descr xe_lpd_rc_class_regs[] = {
+ COMMON_BASE_HAS_EU,
+ COMMON_BASE_RENDER,
+ COMMON_GEN12BASE_RENDER,
+};
+
+/* GEN9/XE_LPD - Render / Compute Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_lpd_rc_inst_regs[] = {
+ COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/* GEN9/XE_LPD - Media Decode/Encode Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_lpd_vd_inst_regs[] = {
+ COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/* XE_LPD - Video Enhancement Per-Class */
+static const struct __guc_mmio_reg_descr xe_lpd_vec_class_regs[] = {
+ COMMON_GEN12BASE_VEC,
+};
+
+/* GEN9/XE_LPD - Video Enhancement Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_lpd_vec_inst_regs[] = {
+ COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/* GEN9/XE_LPD - Blitter Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe_lpd_blt_inst_regs[] = {
+ COMMON_BASE_ENGINE_INSTANCE,
+};
+
+/* GEN9 - Global */
+static const struct __guc_mmio_reg_descr default_global_regs[] = {
+ COMMON_BASE_GLOBAL,
+ COMMON_GEN9BASE_GLOBAL,
+};
+
+static const struct __guc_mmio_reg_descr default_rc_class_regs[] = {
+ COMMON_BASE_HAS_EU,
+ COMMON_BASE_RENDER,
+};
+
+/*
+ * Empty lists:
+ * GEN9/XE_LPD - Blitter Per-Class
+ * GEN9/XE_LPD - Media Decode/Encode Per-Class
+ * GEN9 - VEC Class
+ */
+static const struct __guc_mmio_reg_descr empty_regs_list[] = {
+};
+
+#define TO_GCAP_DEF_OWNER(x) (GUC_CAPTURE_LIST_INDEX_##x)
+#define TO_GCAP_DEF_TYPE(x) (GUC_CAPTURE_LIST_TYPE_##x)
+#define MAKE_REGLIST(regslist, regsowner, regstype, class) \
+ { \
+ regslist, \
+ ARRAY_SIZE(regslist), \
+ TO_GCAP_DEF_OWNER(regsowner), \
+ TO_GCAP_DEF_TYPE(regstype), \
+ class, \
+ NULL, \
+ }
+
+/* List of lists */
+static struct __guc_mmio_reg_descr_group default_lists[] = {
+ MAKE_REGLIST(default_global_regs, PF, GLOBAL, 0),
+ MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS),
+ MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS),
+ MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS),
+ MAKE_REGLIST(xe_lpd_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEOENHANCE_CLASS),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_BLITTER_CLASS),
+ MAKE_REGLIST(xe_lpd_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_BLITTER_CLASS),
+ {}
+};
+
+static const struct __guc_mmio_reg_descr_group xe_lpd_lists[] = {
+ MAKE_REGLIST(xe_lpd_global_regs, PF, GLOBAL, 0),
+ MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS),
+ MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS),
+ MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS),
+ MAKE_REGLIST(xe_lpd_vec_class_regs, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS),
+ MAKE_REGLIST(xe_lpd_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEOENHANCE_CLASS),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_BLITTER_CLASS),
+ MAKE_REGLIST(xe_lpd_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_BLITTER_CLASS),
+ {}
+};
+
+static const struct __guc_mmio_reg_descr_group *
+guc_capture_get_one_list(const struct __guc_mmio_reg_descr_group *reglists,
+ u32 owner, u32 type, u32 id)
+{
+ int i;
+
+ if (!reglists)
+ return NULL;
+
+ for (i = 0; reglists[i].list; ++i) {
+ if (reglists[i].owner == owner && reglists[i].type == type &&
+ (reglists[i].engine == id || reglists[i].type == GUC_CAPTURE_LIST_TYPE_GLOBAL))
+ return &reglists[i];
+ }
+
+ return NULL;
+}
+
+static struct __guc_mmio_reg_descr_group *
+guc_capture_get_one_ext_list(struct __guc_mmio_reg_descr_group *reglists,
+ u32 owner, u32 type, u32 id)
+{
+ int i;
+
+ if (!reglists)
+ return NULL;
+
+ for (i = 0; reglists[i].extlist; ++i) {
+ if (reglists[i].owner == owner && reglists[i].type == type &&
+ (reglists[i].engine == id || reglists[i].type == GUC_CAPTURE_LIST_TYPE_GLOBAL))
+ return &reglists[i];
+ }
+
+ return NULL;
+}
+
+static void guc_capture_free_extlists(struct __guc_mmio_reg_descr_group *reglists)
+{
+ int i = 0;
+
+ if (!reglists)
+ return;
+
+ while (reglists[i].extlist)
+ kfree(reglists[i++].extlist);
+}
+
+struct __ext_steer_reg {
+ const char *name;
+ i915_reg_t reg;
+};
+
+static const struct __ext_steer_reg xe_extregs[] = {
+ {"GEN7_SAMPLER_INSTDONE", GEN7_SAMPLER_INSTDONE},
+ {"GEN7_ROW_INSTDONE", GEN7_ROW_INSTDONE}
+};
+
+static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
+ const struct __ext_steer_reg *extlist,
+ int slice_id, int subslice_id)
+{
+ ext->reg = extlist->reg;
+ ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
+ ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
+ ext->regname = extlist->name;
+}
+
+static int
+__alloc_ext_regs(struct __guc_mmio_reg_descr_group *newlist,
+ const struct __guc_mmio_reg_descr_group *rootlist, int num_regs)
+{
+ struct __guc_mmio_reg_descr *list;
+
+ list = kcalloc(num_regs, sizeof(struct __guc_mmio_reg_descr), GFP_KERNEL);
+ if (!list)
+ return -ENOMEM;
+
+ newlist->extlist = list;
+ newlist->num_regs = num_regs;
+ newlist->owner = rootlist->owner;
+ newlist->engine = rootlist->engine;
+ newlist->type = rootlist->type;
+
+ return 0;
+}
+
+static void
+guc_capture_alloc_steered_lists_xe_lpd(struct intel_guc *guc,
+ const struct __guc_mmio_reg_descr_group *lists)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ int slice, subslice, i, num_steer_regs, num_tot_regs = 0;
+ const struct __guc_mmio_reg_descr_group *list;
+ struct __guc_mmio_reg_descr_group *extlists;
+ struct __guc_mmio_reg_descr *extarray;
+ struct sseu_dev_info *sseu;
+
+ /* In XE_LPD we only have steered registers for the render-class */
+ list = guc_capture_get_one_list(lists, GUC_CAPTURE_LIST_INDEX_PF,
+ GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, GUC_RENDER_CLASS);
+ /* skip if extlists was previously allocated */
+ if (!list || guc->capture->extlists)
+ return;
+
+ num_steer_regs = ARRAY_SIZE(xe_extregs);
+
+ sseu = &gt->info.sseu;
+ for_each_instdone_slice_subslice(i915, sseu, slice, subslice)
+ num_tot_regs += num_steer_regs;
+
+ if (!num_tot_regs)
+ return;
+
+ /* allocate an extra for an end marker */
+ extlists = kcalloc(2, sizeof(struct __guc_mmio_reg_descr_group), GFP_KERNEL);
+ if (!extlists)
+ return;
+
+ if (__alloc_ext_regs(&extlists[0], list, num_tot_regs)) {
+ kfree(extlists);
+ return;
+ }
+
+ extarray = extlists[0].extlist;
+ for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
+ for (i = 0; i < num_steer_regs; ++i) {
+ __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice);
+ ++extarray;
+ }
+ }
+
+ guc->capture->extlists = extlists;
+}
+
+static const struct __ext_steer_reg xehpg_extregs[] = {
+ {"XEHPG_INSTDONE_GEOM_SVG", XEHPG_INSTDONE_GEOM_SVG}
+};
+
+static bool __has_xehpg_extregs(u32 ipver)
+{
+ return (ipver >= IP_VER(12, 55));
+}
+
+static void
+guc_capture_alloc_steered_lists_xe_hpg(struct intel_guc *guc,
+ const struct __guc_mmio_reg_descr_group *lists,
+ u32 ipver)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ struct sseu_dev_info *sseu;
+ int slice, subslice, i, iter, num_steer_regs, num_tot_regs = 0;
+ const struct __guc_mmio_reg_descr_group *list;
+ struct __guc_mmio_reg_descr_group *extlists;
+ struct __guc_mmio_reg_descr *extarray;
+
+ /* In XE_LP / HPG we only have render-class steering registers during error-capture */
+ list = guc_capture_get_one_list(lists, GUC_CAPTURE_LIST_INDEX_PF,
+ GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, GUC_RENDER_CLASS);
+ /* skip if extlists was previously allocated */
+ if (!list || guc->capture->extlists)
+ return;
+
+ num_steer_regs = ARRAY_SIZE(xe_extregs);
+ if (__has_xehpg_extregs(ipver))
+ num_steer_regs += ARRAY_SIZE(xehpg_extregs);
+
+ sseu = &gt->info.sseu;
+ for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) {
+ num_tot_regs += num_steer_regs;
+ }
+
+ if (!num_tot_regs)
+ return;
+
+ /* allocate an extra for an end marker */
+ extlists = kcalloc(2, sizeof(struct __guc_mmio_reg_descr_group), GFP_KERNEL);
+ if (!extlists)
+ return;
+
+ if (__alloc_ext_regs(&extlists[0], list, num_tot_regs)) {
+ kfree(extlists);
+ return;
+ }
+
+ extarray = extlists[0].extlist;
+ for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) {
+ for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) {
+ __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice);
+ ++extarray;
+ }
+ if (__has_xehpg_extregs(ipver)) {
+ for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) {
+ __fill_ext_reg(extarray, &xehpg_extregs[i], slice, subslice);
+ ++extarray;
+ }
+ }
+ }
+
+ drm_dbg(&i915->drm, "GuC-capture found %d-ext-regs.\n", num_tot_regs);
+ guc->capture->extlists = extlists;
+}
+
+static const struct __guc_mmio_reg_descr_group *
+guc_capture_get_device_reglist(struct intel_guc *guc)
+{
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+
+ if (GRAPHICS_VER(i915) > 11) {
+ /*
+ * For certain engine classes, there are slice and subslice
+ * level registers requiring steering. We allocate and populate
+ * these at init time based on hw config add it as an extension
+ * list at the end of the pre-populated render list.
+ */
+ if (IS_DG2(i915))
+ guc_capture_alloc_steered_lists_xe_hpg(guc, xe_lpd_lists, IP_VER(12, 55));
+ else if (IS_XEHPSDV(i915))
+ guc_capture_alloc_steered_lists_xe_hpg(guc, xe_lpd_lists, IP_VER(12, 50));
+ else
+ guc_capture_alloc_steered_lists_xe_lpd(guc, xe_lpd_lists);
+
+ return xe_lpd_lists;
+ }
+
+ /* if GuC submission is enabled on a non-POR platform, just use a common baseline */
+ return default_lists;
+}
+
+static const char *
+__stringify_owner(u32 owner)
+{
+ switch (owner) {
+ case GUC_CAPTURE_LIST_INDEX_PF:
+ return "PF";
+ case GUC_CAPTURE_LIST_INDEX_VF:
+ return "VF";
+ default:
+ return "unknown";
+ }
+
+ return "";
+}
+
+static const char *
+__stringify_type(u32 type)
+{
+ switch (type) {
+ case GUC_CAPTURE_LIST_TYPE_GLOBAL:
+ return "Global";
+ case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS:
+ return "Class";
+ case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE:
+ return "Instance";
+ default:
+ return "unknown";
+ }
+
+ return "";
+}
+
+static const char *
+__stringify_engclass(u32 class)
+{
+ switch (class) {
+ case GUC_RENDER_CLASS:
+ return "Render";
+ case GUC_VIDEO_CLASS:
+ return "Video";
+ case GUC_VIDEOENHANCE_CLASS:
+ return "VideoEnhance";
+ case GUC_BLITTER_CLASS:
+ return "Blitter";
+ case GUC_COMPUTE_CLASS:
+ return "Compute";
+ default:
+ return "unknown";
+ }
+
+ return "";
+}
+
+static void
+guc_capture_warn_with_list_info(struct drm_i915_private *i915, char *msg,
+ u32 owner, u32 type, u32 classid)
+{
+ if (type == GUC_CAPTURE_LIST_TYPE_GLOBAL)
+ drm_dbg(&i915->drm, "GuC-capture: %s for %s %s-Registers.\n", msg,
+ __stringify_owner(owner), __stringify_type(type));
+ else
+ drm_dbg(&i915->drm, "GuC-capture: %s for %s %s-Registers on %s-Engine\n", msg,
+ __stringify_owner(owner), __stringify_type(type),
+ __stringify_engclass(classid));
+}
+
+static int
+guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
+ struct guc_mmio_reg *ptr, u16 num_entries)
+{
+ u32 i = 0, j = 0;
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ const struct __guc_mmio_reg_descr_group *reglists = guc->capture->reglists;
+ struct __guc_mmio_reg_descr_group *extlists = guc->capture->extlists;
+ const struct __guc_mmio_reg_descr_group *match;
+ struct __guc_mmio_reg_descr_group *matchext;
+
+ if (!reglists)
+ return -ENODEV;
+
+ match = guc_capture_get_one_list(reglists, owner, type, classid);
+ if (!match) {
+ guc_capture_warn_with_list_info(i915, "Missing register list init", owner, type,
+ classid);
+ return -ENODATA;
+ }
+
+ for (i = 0; i < num_entries && i < match->num_regs; ++i) {
+ ptr[i].offset = match->list[i].reg.reg;
+ ptr[i].value = 0xDEADF00D;
+ ptr[i].flags = match->list[i].flags;
+ ptr[i].mask = match->list[i].mask;
+ }
+
+ matchext = guc_capture_get_one_ext_list(extlists, owner, type, classid);
+ if (matchext) {
+ for (i = match->num_regs, j = 0; i < num_entries &&
+ i < (match->num_regs + matchext->num_regs) &&
+ j < matchext->num_regs; ++i, ++j) {
+ ptr[i].offset = matchext->extlist[j].reg.reg;
+ ptr[i].value = 0xDEADF00D;
+ ptr[i].flags = matchext->extlist[j].flags;
+ ptr[i].mask = matchext->extlist[j].mask;
+ }
+ }
+ if (i < num_entries)
+ drm_dbg(&i915->drm, "GuC-capture: Init reglist short %d out %d.\n",
+ (int)i, (int)num_entries);
+
+ return 0;
+}
+
+static int
+guc_cap_list_num_regs(struct intel_guc_state_capture *gc, u32 owner, u32 type, u32 classid)
+{
+ const struct __guc_mmio_reg_descr_group *match;
+ struct __guc_mmio_reg_descr_group *matchext;
+ int num_regs;
+
+ match = guc_capture_get_one_list(gc->reglists, owner, type, classid);
+ if (!match)
+ return 0;
+
+ num_regs = match->num_regs;
+
+ matchext = guc_capture_get_one_ext_list(gc->extlists, owner, type, classid);
+ if (matchext)
+ num_regs += matchext->num_regs;
+
+ return num_regs;
+}
+
+int
+intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
+ size_t *size)
+{
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ struct intel_guc_state_capture *gc = guc->capture;
+ struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][classid];
+ int num_regs;
+
+ if (!gc->reglists)
+ return -ENODEV;
+
+ if (cache->is_valid) {
+ *size = cache->size;
+ return cache->status;
+ }
+
+ num_regs = guc_cap_list_num_regs(gc, owner, type, classid);
+ if (!num_regs) {
+ guc_capture_warn_with_list_info(i915, "Missing register list size",
+ owner, type, classid);
+ return -ENODATA;
+ }
+
+ *size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
+ (num_regs * sizeof(struct guc_mmio_reg)));
+
+ return 0;
+}
+
+static void guc_capture_create_prealloc_nodes(struct intel_guc *guc);
+
+int
+intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
+ void **outptr)
+{
+ struct intel_guc_state_capture *gc = guc->capture;
+ struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][classid];
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ struct guc_debug_capture_list *listnode;
+ int ret, num_regs;
+ u8 *caplist, *tmp;
+ size_t size = 0;
+
+ if (!gc->reglists)
+ return -ENODEV;
+
+ if (cache->is_valid) {
+ *outptr = cache->ptr;
+ return cache->status;
+ }
+
+ /*
+ * ADS population of input registers is a good
+ * time to pre-allocate cachelist output nodes
+ */
+ guc_capture_create_prealloc_nodes(guc);
+
+ ret = intel_guc_capture_getlistsize(guc, owner, type, classid, &size);
+ if (ret) {
+ cache->is_valid = true;
+ cache->ptr = NULL;
+ cache->size = 0;
+ cache->status = ret;
+ return ret;
+ }
+
+ caplist = kzalloc(size, GFP_KERNEL);
+ if (!caplist) {
+ drm_dbg(&i915->drm, "GuC-capture: failed to alloc cached caplist");
+ return -ENOMEM;
+ }
+
+ /* populate capture list header */
+ tmp = caplist;
+ num_regs = guc_cap_list_num_regs(guc->capture, owner, type, classid);
+ listnode = (struct guc_debug_capture_list *)tmp;
+ listnode->header.info = FIELD_PREP(GUC_CAPTURELISTHDR_NUMDESCR, (u32)num_regs);
+
+ /* populate list of register descriptor */
+ tmp += sizeof(struct guc_debug_capture_list);
+ guc_capture_list_init(guc, owner, type, classid, (struct guc_mmio_reg *)tmp, num_regs);
+
+ /* cache this list */
+ cache->is_valid = true;
+ cache->ptr = caplist;
+ cache->size = size;
+ cache->status = 0;
+
+ *outptr = caplist;
+
+ return 0;
+}
+
+int
+intel_guc_capture_getnullheader(struct intel_guc *guc,
+ void **outptr, size_t *size)
+{
+ struct intel_guc_state_capture *gc = guc->capture;
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ int tmp = sizeof(u32) * 4;
+ void *null_header;
+
+ if (gc->ads_null_cache) {
+ *outptr = gc->ads_null_cache;
+ *size = tmp;
+ return 0;
+ }
+
+ null_header = kzalloc(tmp, GFP_KERNEL);
+ if (!null_header) {
+ drm_dbg(&i915->drm, "GuC-capture: failed to alloc cached nulllist");
+ return -ENOMEM;
+ }
+
+ gc->ads_null_cache = null_header;
+ *outptr = null_header;
+ *size = tmp;
+
+ return 0;
+}
+
+#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
+
+int
+intel_guc_capture_output_min_size_est(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int worst_min_size = 0, num_regs = 0;
+ size_t tmp = 0;
+
+ if (!guc->capture)
+ return -ENODEV;
+
+ /*
+ * If every single engine-instance suffered a failure in quick succession but
+ * were all unrelated, then a burst of multiple error-capture events would dump
+ * registers for every one engine instance, one at a time. In this case, GuC
+ * would even dump the global-registers repeatedly.
+ *
+ * For each engine instance, there would be 1 x guc_state_capture_group_t output
+ * followed by 3 x guc_state_capture_t lists. The latter is how the register
+ * dumps are split across different register types (where the '3' are global vs class
+ * vs instance). Finally, let's multiply the whole thing by 3x (just so we are
+ * not limited to just 1 round of data in a worst case full register dump log)
+ *
+ * NOTE: intel_guc_log that allocates the log buffer would round this size up to
+ * a power of two.
+ */
+
+ for_each_engine(engine, gt, id) {
+ worst_min_size += sizeof(struct guc_state_capture_group_header_t) +
+ (3 * sizeof(struct guc_state_capture_header_t));
+
+ if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp))
+ num_regs += tmp;
+
+ if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
+ engine->class, &tmp)) {
+ num_regs += tmp;
+ }
+ if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
+ engine->class, &tmp)) {
+ num_regs += tmp;
+ }
+ }
+
+ worst_min_size += (num_regs * sizeof(struct guc_mmio_reg));
+
+ return (worst_min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER);
+}
+
+/*
+ * KMD Init time flows:
+ * --------------------
+ * --> alloc A: GuC input capture regs lists (registered to GuC via ADS).
+ * intel_guc_ads acquires the register lists by calling
+ * intel_guc_capture_list_size and intel_guc_capture_list_get 'n' times,
+ * where n = 1 for global-reg-list +
+ * num_engine_classes for class-reg-list +
+ * num_engine_classes for instance-reg-list
+ * (since all instances of the same engine-class type
+ * have an identical engine-instance register-list).
+ * ADS module also calls separately for PF vs VF.
+ *
+ * --> alloc B: GuC output capture buf (registered via guc_init_params(log_param))
+ * Size = #define CAPTURE_BUFFER_SIZE (warns if on too-small)
+ * Note2: 'x 3' to hold multiple capture groups
+ *
+ * GUC Runtime notify capture:
+ * --------------------------
+ * --> G2H STATE_CAPTURE_NOTIFICATION
+ * L--> intel_guc_capture_process
+ * L--> Loop through B (head..tail) and for each engine instance's
+ * err-state-captured register-list we find, we alloc 'C':
+ * --> alloc C: A capture-output-node structure that includes misc capture info along
+ * with 3 register list dumps (global, engine-class and engine-instance)
+ * This node is created from a pre-allocated list of blank nodes in
+ * guc->capture->cachelist and populated with the error-capture
+ * data from GuC and then it's added into guc->capture->outlist linked
+ * list. This list is used for matchup and printout by i915_gpu_coredump
+ * and err_print_gt, (when user invokes the error capture sysfs).
+ *
+ * GUC --> notify context reset:
+ * -----------------------------
+ * --> G2H CONTEXT RESET
+ * L--> guc_handle_context_reset --> i915_capture_error_state
+ * L--> i915_gpu_coredump(..IS_GUC_CAPTURE) --> gt_record_engines
+ * --> capture_engine(..IS_GUC_CAPTURE)
+ * L--> intel_guc_capture_get_matching_node is where
+ * detach C from internal linked list and add it into
+ * intel_engine_coredump struct (if the context and
+ * engine of the event notification matches a node
+ * in the link list).
+ *
+ * User Sysfs / Debugfs
+ * --------------------
+ * --> i915_gpu_coredump_copy_to_buffer->
+ * L--> err_print_to_sgl --> err_print_gt
+ * L--> error_print_guc_captures
+ * L--> intel_guc_capture_print_node prints the
+ * register lists values of the attached node
+ * on the error-engine-dump being reported.
+ * L--> i915_reset_error_state ... -->__i915_gpu_coredump_free
+ * L--> ... cleanup_gt -->
+ * L--> intel_guc_capture_free_node returns the
+ * capture-output-node back to the internal
+ * cachelist for reuse.
+ *
+ */
+
+static int guc_capture_buf_cnt(struct __guc_capture_bufstate *buf)
+{
+ if (buf->wr >= buf->rd)
+ return (buf->wr - buf->rd);
+ return (buf->size - buf->rd) + buf->wr;
+}
+
+static int guc_capture_buf_cnt_to_end(struct __guc_capture_bufstate *buf)
+{
+ if (buf->rd > buf->wr)
+ return (buf->size - buf->rd);
+ return (buf->wr - buf->rd);
+}
+
+/*
+ * GuC's error-capture output is a ring buffer populated in a byte-stream fashion:
+ *
+ * The GuC Log buffer region for error-capture is managed like a ring buffer.
+ * The GuC firmware dumps error capture logs into this ring in a byte-stream flow.
+ * Additionally, as per the current and foreseeable future, all packed error-
+ * capture output structures are dword aligned.
+ *
+ * That said, if the GuC firmware is in the midst of writing a structure that is larger
+ * than one dword but the tail end of the err-capture buffer-region has lesser space left,
+ * we would need to extract that structure one dword at a time straddled across the end,
+ * onto the start of the ring.
+ *
+ * Below function, guc_capture_log_remove_dw is a helper for that. All callers of this
+ * function would typically do a straight-up memcpy from the ring contents and will only
+ * call this helper if their structure-extraction is straddling across the end of the
+ * ring. GuC firmware does not add any padding. The reason for the no-padding is to ease
+ * scalability for future expansion of output data types without requiring a redesign
+ * of the flow controls.
+ */
+static int
+guc_capture_log_remove_dw(struct intel_guc *guc, struct __guc_capture_bufstate *buf,
+ u32 *dw)
+{
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ int tries = 2;
+ int avail = 0;
+ u32 *src_data;
+
+ if (!guc_capture_buf_cnt(buf))
+ return 0;
+
+ while (tries--) {
+ avail = guc_capture_buf_cnt_to_end(buf);
+ if (avail >= sizeof(u32)) {
+ src_data = (u32 *)(buf->data + buf->rd);
+ *dw = *src_data;
+ buf->rd += 4;
+ return 4;
+ }
+ if (avail)
+ drm_dbg(&i915->drm, "GuC-Cap-Logs not dword aligned, skipping.\n");
+ buf->rd = 0;
+ }
+
+ return 0;
+}
+
+static bool
+guc_capture_data_extracted(struct __guc_capture_bufstate *b,
+ int size, void *dest)
+{
+ if (guc_capture_buf_cnt_to_end(b) >= size) {
+ memcpy(dest, (b->data + b->rd), size);
+ b->rd += size;
+ return true;
+ }
+ return false;
+}
+
+static int
+guc_capture_log_get_group_hdr(struct intel_guc *guc, struct __guc_capture_bufstate *buf,
+ struct guc_state_capture_group_header_t *ghdr)
+{
+ int read = 0;
+ int fullsize = sizeof(struct guc_state_capture_group_header_t);
+
+ if (fullsize > guc_capture_buf_cnt(buf))
+ return -1;
+
+ if (guc_capture_data_extracted(buf, fullsize, (void *)ghdr))
+ return 0;
+
+ read += guc_capture_log_remove_dw(guc, buf, &ghdr->owner);
+ read += guc_capture_log_remove_dw(guc, buf, &ghdr->info);
+ if (read != fullsize)
+ return -1;
+
+ return 0;
+}
+
+static int
+guc_capture_log_get_data_hdr(struct intel_guc *guc, struct __guc_capture_bufstate *buf,
+ struct guc_state_capture_header_t *hdr)
+{
+ int read = 0;
+ int fullsize = sizeof(struct guc_state_capture_header_t);
+
+ if (fullsize > guc_capture_buf_cnt(buf))
+ return -1;
+
+ if (guc_capture_data_extracted(buf, fullsize, (void *)hdr))
+ return 0;
+
+ read += guc_capture_log_remove_dw(guc, buf, &hdr->owner);
+ read += guc_capture_log_remove_dw(guc, buf, &hdr->info);
+ read += guc_capture_log_remove_dw(guc, buf, &hdr->lrca);
+ read += guc_capture_log_remove_dw(guc, buf, &hdr->guc_id);
+ read += guc_capture_log_remove_dw(guc, buf, &hdr->num_mmios);
+ if (read != fullsize)
+ return -1;
+
+ return 0;
+}
+
+static int
+guc_capture_log_get_register(struct intel_guc *guc, struct __guc_capture_bufstate *buf,
+ struct guc_mmio_reg *reg)
+{
+ int read = 0;
+ int fullsize = sizeof(struct guc_mmio_reg);
+
+ if (fullsize > guc_capture_buf_cnt(buf))
+ return -1;
+
+ if (guc_capture_data_extracted(buf, fullsize, (void *)reg))
+ return 0;
+
+ read += guc_capture_log_remove_dw(guc, buf, &reg->offset);
+ read += guc_capture_log_remove_dw(guc, buf, &reg->value);
+ read += guc_capture_log_remove_dw(guc, buf, &reg->flags);
+ read += guc_capture_log_remove_dw(guc, buf, &reg->mask);
+ if (read != fullsize)
+ return -1;
+
+ return 0;
+}
+
+static void
+guc_capture_delete_one_node(struct intel_guc *guc, struct __guc_capture_parsed_output *node)
+{
+ int i;
+
+ for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i)
+ kfree(node->reginfo[i].regs);
+ list_del(&node->link);
+ kfree(node);
+}
+
+static void
+guc_capture_delete_prealloc_nodes(struct intel_guc *guc)
+{
+ struct __guc_capture_parsed_output *n, *ntmp;
+
+ /*
+ * NOTE: At the end of driver operation, we must assume that we
+ * have prealloc nodes in both the cachelist as well as outlist
+ * if unclaimed error capture events occurred prior to shutdown.
+ */
+ list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link)
+ guc_capture_delete_one_node(guc, n);
+
+ list_for_each_entry_safe(n, ntmp, &guc->capture->cachelist, link)
+ guc_capture_delete_one_node(guc, n);
+}
+
+static void
+guc_capture_add_node_to_list(struct __guc_capture_parsed_output *node,
+ struct list_head *list)
+{
+ list_add_tail(&node->link, list);
+}
+
+static void
+guc_capture_add_node_to_outlist(struct intel_guc_state_capture *gc,
+ struct __guc_capture_parsed_output *node)
+{
+ guc_capture_add_node_to_list(node, &gc->outlist);
+}
+
+static void
+guc_capture_add_node_to_cachelist(struct intel_guc_state_capture *gc,
+ struct __guc_capture_parsed_output *node)
+{
+ guc_capture_add_node_to_list(node, &gc->cachelist);
+}
+
+static void
+guc_capture_init_node(struct intel_guc *guc, struct __guc_capture_parsed_output *node)
+{
+ struct guc_mmio_reg *tmp[GUC_CAPTURE_LIST_TYPE_MAX];
+ int i;
+
+ for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) {
+ tmp[i] = node->reginfo[i].regs;
+ memset(tmp[i], 0, sizeof(struct guc_mmio_reg) *
+ guc->capture->max_mmio_per_node);
+ }
+ memset(node, 0, sizeof(*node));
+ for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i)
+ node->reginfo[i].regs = tmp[i];
+
+ INIT_LIST_HEAD(&node->link);
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_get_prealloc_node(struct intel_guc *guc)
+{
+ struct __guc_capture_parsed_output *found = NULL;
+
+ if (!list_empty(&guc->capture->cachelist)) {
+ struct __guc_capture_parsed_output *n, *ntmp;
+
+ /* get first avail node from the cache list */
+ list_for_each_entry_safe(n, ntmp, &guc->capture->cachelist, link) {
+ found = n;
+ list_del(&n->link);
+ break;
+ }
+ } else {
+ struct __guc_capture_parsed_output *n, *ntmp;
+
+ /* traverse down and steal back the oldest node already allocated */
+ list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) {
+ found = n;
+ }
+ if (found)
+ list_del(&found->link);
+ }
+ if (found)
+ guc_capture_init_node(guc, found);
+
+ return found;
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_alloc_one_node(struct intel_guc *guc)
+{
+ struct __guc_capture_parsed_output *new;
+ int i;
+
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) {
+ new->reginfo[i].regs = kcalloc(guc->capture->max_mmio_per_node,
+ sizeof(struct guc_mmio_reg), GFP_KERNEL);
+ if (!new->reginfo[i].regs) {
+ while (i)
+ kfree(new->reginfo[--i].regs);
+ kfree(new);
+ return NULL;
+ }
+ }
+ guc_capture_init_node(guc, new);
+
+ return new;
+}
+
+static struct __guc_capture_parsed_output *
+guc_capture_clone_node(struct intel_guc *guc, struct __guc_capture_parsed_output *original,
+ u32 keep_reglist_mask)
+{
+ struct __guc_capture_parsed_output *new;
+ int i;
+
+ new = guc_capture_get_prealloc_node(guc);
+ if (!new)
+ return NULL;
+ if (!original)
+ return new;
+
+ new->is_partial = original->is_partial;
+
+ /* copy reg-lists that we want to clone */
+ for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) {
+ if (keep_reglist_mask & BIT(i)) {
+ GEM_BUG_ON(original->reginfo[i].num_regs >
+ guc->capture->max_mmio_per_node);
+
+ memcpy(new->reginfo[i].regs, original->reginfo[i].regs,
+ original->reginfo[i].num_regs * sizeof(struct guc_mmio_reg));
+
+ new->reginfo[i].num_regs = original->reginfo[i].num_regs;
+ new->reginfo[i].vfid = original->reginfo[i].vfid;
+
+ if (i == GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS) {
+ new->eng_class = original->eng_class;
+ } else if (i == GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE) {
+ new->eng_inst = original->eng_inst;
+ new->guc_id = original->guc_id;
+ new->lrca = original->lrca;
+ }
+ }
+ }
+
+ return new;
+}
+
+static void
+__guc_capture_create_prealloc_nodes(struct intel_guc *guc)
+{
+ struct __guc_capture_parsed_output *node = NULL;
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ int i;
+
+ for (i = 0; i < PREALLOC_NODES_MAX_COUNT; ++i) {
+ node = guc_capture_alloc_one_node(guc);
+ if (!node) {
+ drm_warn(&i915->drm, "GuC Capture pre-alloc-cache failure\n");
+ /* dont free the priors, use what we got and cleanup at shutdown */
+ return;
+ }
+ guc_capture_add_node_to_cachelist(guc->capture, node);
+ }
+}
+
+static int
+guc_get_max_reglist_count(struct intel_guc *guc)
+{
+ int i, j, k, tmp, maxregcount = 0;
+
+ for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; ++i) {
+ for (j = 0; j < GUC_CAPTURE_LIST_TYPE_MAX; ++j) {
+ for (k = 0; k < GUC_MAX_ENGINE_CLASSES; ++k) {
+ if (j == GUC_CAPTURE_LIST_TYPE_GLOBAL && k > 0)
+ continue;
+
+ tmp = guc_cap_list_num_regs(guc->capture, i, j, k);
+ if (tmp > maxregcount)
+ maxregcount = tmp;
+ }
+ }
+ }
+ if (!maxregcount)
+ maxregcount = PREALLOC_NODES_DEFAULT_NUMREGS;
+
+ return maxregcount;
+}
+
+static void
+guc_capture_create_prealloc_nodes(struct intel_guc *guc)
+{
+ /* skip if we've already done the pre-alloc */
+ if (guc->capture->max_mmio_per_node)
+ return;
+
+ guc->capture->max_mmio_per_node = guc_get_max_reglist_count(guc);
+ __guc_capture_create_prealloc_nodes(guc);
+}
+
+static int
+guc_capture_extract_reglists(struct intel_guc *guc, struct __guc_capture_bufstate *buf)
+{
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ struct guc_state_capture_group_header_t ghdr = {0};
+ struct guc_state_capture_header_t hdr = {0};
+ struct __guc_capture_parsed_output *node = NULL;
+ struct guc_mmio_reg *regs = NULL;
+ int i, numlists, numregs, ret = 0;
+ enum guc_capture_type datatype;
+ struct guc_mmio_reg tmp;
+ bool is_partial = false;
+
+ i = guc_capture_buf_cnt(buf);
+ if (!i)
+ return -ENODATA;
+ if (i % sizeof(u32)) {
+ drm_warn(&i915->drm, "GuC Capture new entries unaligned\n");
+ ret = -EIO;
+ goto bailout;
+ }
+
+ /* first get the capture group header */
+ if (guc_capture_log_get_group_hdr(guc, buf, &ghdr)) {
+ ret = -EIO;
+ goto bailout;
+ }
+ /*
+ * we would typically expect a layout as below where n would be expected to be
+ * anywhere between 3 to n where n > 3 if we are seeing multiple dependent engine
+ * instances being reset together.
+ * ____________________________________________
+ * | Capture Group |
+ * | ________________________________________ |
+ * | | Capture Group Header: | |
+ * | | - num_captures = 5 | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture1: | |
+ * | | Hdr: GLOBAL, numregs=a | |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... rega | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture2: | |
+ * | | Hdr: CLASS=RENDER/COMPUTE, numregs=b| |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... regb | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture3: | |
+ * | | Hdr: INSTANCE=RCS, numregs=c | |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... regc | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture4: | |
+ * | | Hdr: CLASS=RENDER/COMPUTE, numregs=d| |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... regd | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * | ________________________________________ |
+ * | | Capture5: | |
+ * | | Hdr: INSTANCE=CCS0, numregs=e | |
+ * | | ____________________________________ | |
+ * | | | Reglist | | |
+ * | | | - reg1, reg2, ... rege | | |
+ * | | |__________________________________| | |
+ * | |______________________________________| |
+ * |__________________________________________|
+ */
+ is_partial = FIELD_GET(CAP_GRP_HDR_CAPTURE_TYPE, ghdr.info);
+ numlists = FIELD_GET(CAP_GRP_HDR_NUM_CAPTURES, ghdr.info);
+
+ while (numlists--) {
+ if (guc_capture_log_get_data_hdr(guc, buf, &hdr)) {
+ ret = -EIO;
+ break;
+ }
+
+ datatype = FIELD_GET(CAP_HDR_CAPTURE_TYPE, hdr.info);
+ if (datatype > GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE) {
+ /* unknown capture type - skip over to next capture set */
+ numregs = FIELD_GET(CAP_HDR_NUM_MMIOS, hdr.num_mmios);
+ while (numregs--) {
+ if (guc_capture_log_get_register(guc, buf, &tmp)) {
+ ret = -EIO;
+ break;
+ }
+ }
+ continue;
+ } else if (node) {
+ /*
+ * Based on the current capture type and what we have so far,
+ * decide if we should add the current node into the internal
+ * linked list for match-up when i915_gpu_coredump calls later
+ * (and alloc a blank node for the next set of reglists)
+ * or continue with the same node or clone the current node
+ * but only retain the global or class registers (such as the
+ * case of dependent engine resets).
+ */
+ if (datatype == GUC_CAPTURE_LIST_TYPE_GLOBAL) {
+ guc_capture_add_node_to_outlist(guc->capture, node);
+ node = NULL;
+ } else if (datatype == GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS &&
+ node->reginfo[GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS].num_regs) {
+ /* Add to list, clone node and duplicate global list */
+ guc_capture_add_node_to_outlist(guc->capture, node);
+ node = guc_capture_clone_node(guc, node,
+ GCAP_PARSED_REGLIST_INDEX_GLOBAL);
+ } else if (datatype == GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE &&
+ node->reginfo[GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE].num_regs) {
+ /* Add to list, clone node and duplicate global + class lists */
+ guc_capture_add_node_to_outlist(guc->capture, node);
+ node = guc_capture_clone_node(guc, node,
+ (GCAP_PARSED_REGLIST_INDEX_GLOBAL |
+ GCAP_PARSED_REGLIST_INDEX_ENGCLASS));
+ }
+ }
+
+ if (!node) {
+ node = guc_capture_get_prealloc_node(guc);
+ if (!node) {
+ ret = -ENOMEM;
+ break;
+ }
+ if (datatype != GUC_CAPTURE_LIST_TYPE_GLOBAL)
+ drm_dbg(&i915->drm, "GuC Capture missing global dump: %08x!\n",
+ datatype);
+ }
+ node->is_partial = is_partial;
+ node->reginfo[datatype].vfid = FIELD_GET(CAP_HDR_CAPTURE_VFID, hdr.owner);
+ switch (datatype) {
+ case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE:
+ node->eng_class = FIELD_GET(CAP_HDR_ENGINE_CLASS, hdr.info);
+ node->eng_inst = FIELD_GET(CAP_HDR_ENGINE_INSTANCE, hdr.info);
+ node->lrca = hdr.lrca;
+ node->guc_id = hdr.guc_id;
+ break;
+ case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS:
+ node->eng_class = FIELD_GET(CAP_HDR_ENGINE_CLASS, hdr.info);
+ break;
+ default:
+ break;
+ }
+
+ numregs = FIELD_GET(CAP_HDR_NUM_MMIOS, hdr.num_mmios);
+ if (numregs > guc->capture->max_mmio_per_node) {
+ drm_dbg(&i915->drm, "GuC Capture list extraction clipped by prealloc!\n");
+ numregs = guc->capture->max_mmio_per_node;
+ }
+ node->reginfo[datatype].num_regs = numregs;
+ regs = node->reginfo[datatype].regs;
+ i = 0;
+ while (numregs--) {
+ if (guc_capture_log_get_register(guc, buf, &regs[i++])) {
+ ret = -EIO;
+ break;
+ }
+ }
+ }
+
+bailout:
+ if (node) {
+ /* If we have data, add to linked list for match-up when i915_gpu_coredump calls */
+ for (i = GUC_CAPTURE_LIST_TYPE_GLOBAL; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) {
+ if (node->reginfo[i].regs) {
+ guc_capture_add_node_to_outlist(guc->capture, node);
+ node = NULL;
+ break;
+ }
+ }
+ if (node) /* else return it back to cache list */
+ guc_capture_add_node_to_cachelist(guc->capture, node);
+ }
+ return ret;
+}
+
+static int __guc_capture_flushlog_complete(struct intel_guc *guc)
+{
+ u32 action[] = {
+ INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE,
+ GUC_CAPTURE_LOG_BUFFER
+ };
+
+ return intel_guc_send(guc, action, ARRAY_SIZE(action));
+}
+
+static void __guc_capture_process_output(struct intel_guc *guc)
+{
+ unsigned int buffer_size, read_offset, write_offset, full_count;
+ struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ struct guc_log_buffer_state log_buf_state_local;
+ struct guc_log_buffer_state *log_buf_state;
+ struct __guc_capture_bufstate buf;
+ void *src_data = NULL;
+ bool new_overflow;
+ int ret;
+
+ log_buf_state = guc->log.buf_addr +
+ (sizeof(struct guc_log_buffer_state) * GUC_CAPTURE_LOG_BUFFER);
+ src_data = guc->log.buf_addr + intel_guc_get_log_buffer_offset(GUC_CAPTURE_LOG_BUFFER);
+
+ /*
+ * Make a copy of the state structure, inside GuC log buffer
+ * (which is uncached mapped), on the stack to avoid reading
+ * from it multiple times.
+ */
+ memcpy(&log_buf_state_local, log_buf_state, sizeof(struct guc_log_buffer_state));
+ buffer_size = intel_guc_get_log_buffer_size(GUC_CAPTURE_LOG_BUFFER);
+ read_offset = log_buf_state_local.read_ptr;
+ write_offset = log_buf_state_local.sampled_write_ptr;
+ full_count = log_buf_state_local.buffer_full_cnt;
+
+ /* Bookkeeping stuff */
+ guc->log.stats[GUC_CAPTURE_LOG_BUFFER].flush += log_buf_state_local.flush_to_file;
+ new_overflow = intel_guc_check_log_buf_overflow(&guc->log, GUC_CAPTURE_LOG_BUFFER,
+ full_count);
+
+ /* Now copy the actual logs. */
+ if (unlikely(new_overflow)) {
+ /* copy the whole buffer in case of overflow */
+ read_offset = 0;
+ write_offset = buffer_size;
+ } else if (unlikely((read_offset > buffer_size) ||
+ (write_offset > buffer_size))) {
+ drm_err(&i915->drm, "invalid GuC log capture buffer state!\n");
+ /* copy whole buffer as offsets are unreliable */
+ read_offset = 0;
+ write_offset = buffer_size;
+ }
+
+ buf.size = buffer_size;
+ buf.rd = read_offset;
+ buf.wr = write_offset;
+ buf.data = src_data;
+
+ if (!uc->reset_in_progress) {
+ do {
+ ret = guc_capture_extract_reglists(guc, &buf);
+ } while (ret >= 0);
+ }
+
+ /* Update the state of log buffer err-cap state */
+ log_buf_state->read_ptr = write_offset;
+ log_buf_state->flush_to_file = 0;
+ __guc_capture_flushlog_complete(guc);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+
+static const char *
+guc_capture_reg_to_str(const struct intel_guc *guc, u32 owner, u32 type,
+ u32 class, u32 id, u32 offset, u32 *is_ext)
+{
+ const struct __guc_mmio_reg_descr_group *reglists = guc->capture->reglists;
+ struct __guc_mmio_reg_descr_group *extlists = guc->capture->extlists;
+ const struct __guc_mmio_reg_descr_group *match;
+ struct __guc_mmio_reg_descr_group *matchext;
+ int j;
+
+ *is_ext = 0;
+ if (!reglists)
+ return NULL;
+
+ match = guc_capture_get_one_list(reglists, owner, type, id);
+ if (!match)
+ return NULL;
+
+ for (j = 0; j < match->num_regs; ++j) {
+ if (offset == match->list[j].reg.reg)
+ return match->list[j].regname;
+ }
+ if (extlists) {
+ matchext = guc_capture_get_one_ext_list(extlists, owner, type, id);
+ if (!matchext)
+ return NULL;
+ for (j = 0; j < matchext->num_regs; ++j) {
+ if (offset == matchext->extlist[j].reg.reg) {
+ *is_ext = 1;
+ return matchext->extlist[j].regname;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+#ifdef CONFIG_DRM_I915_DEBUG_GUC
+#define __out(a, ...) \
+ do { \
+ drm_warn((&(a)->i915->drm), __VA_ARGS__); \
+ i915_error_printf((a), __VA_ARGS__); \
+ } while (0)
+#else
+#define __out(a, ...) \
+ i915_error_printf(a, __VA_ARGS__)
+#endif
+
+#define GCAP_PRINT_INTEL_ENG_INFO(ebuf, eng) \
+ do { \
+ __out(ebuf, " i915-Eng-Name: %s command stream\n", \
+ (eng)->name); \
+ __out(ebuf, " i915-Eng-Inst-Class: 0x%02x\n", (eng)->class); \
+ __out(ebuf, " i915-Eng-Inst-Id: 0x%02x\n", (eng)->instance); \
+ __out(ebuf, " i915-Eng-LogicalMask: 0x%08x\n", \
+ (eng)->logical_mask); \
+ } while (0)
+
+#define GCAP_PRINT_GUC_INST_INFO(ebuf, node) \
+ do { \
+ __out(ebuf, " GuC-Engine-Inst-Id: 0x%08x\n", \
+ (node)->eng_inst); \
+ __out(ebuf, " GuC-Context-Id: 0x%08x\n", (node)->guc_id); \
+ __out(ebuf, " LRCA: 0x%08x\n", (node)->lrca); \
+ } while (0)
+
+int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *ebuf,
+ const struct intel_engine_coredump *ee)
+{
+ const char *grptype[GUC_STATE_CAPTURE_GROUP_TYPE_MAX] = {
+ "full-capture",
+ "partial-capture"
+ };
+ const char *datatype[GUC_CAPTURE_LIST_TYPE_MAX] = {
+ "Global",
+ "Engine-Class",
+ "Engine-Instance"
+ };
+ struct intel_guc_state_capture *cap;
+ struct __guc_capture_parsed_output *node;
+ struct intel_engine_cs *eng;
+ struct guc_mmio_reg *regs;
+ struct intel_guc *guc;
+ const char *str;
+ int numregs, i, j;
+ u32 is_ext;
+
+ if (!ebuf || !ee)
+ return -EINVAL;
+ cap = ee->capture;
+ if (!cap || !ee->engine)
+ return -ENODEV;
+
+ guc = &ee->engine->gt->uc.guc;
+
+ __out(ebuf, "global --- GuC Error Capture on %s command stream:\n",
+ ee->engine->name);
+
+ node = ee->guc_capture_node;
+ if (!node) {
+ __out(ebuf, " No matching ee-node\n");
+ return 0;
+ }
+
+ __out(ebuf, "Coverage: %s\n", grptype[node->is_partial]);
+
+ for (i = GUC_CAPTURE_LIST_TYPE_GLOBAL; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) {
+ __out(ebuf, " RegListType: %s\n",
+ datatype[i % GUC_CAPTURE_LIST_TYPE_MAX]);
+ __out(ebuf, " Owner-Id: %d\n", node->reginfo[i].vfid);
+
+ switch (i) {
+ case GUC_CAPTURE_LIST_TYPE_GLOBAL:
+ default:
+ break;
+ case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS:
+ __out(ebuf, " GuC-Eng-Class: %d\n", node->eng_class);
+ __out(ebuf, " i915-Eng-Class: %d\n",
+ guc_class_to_engine_class(node->eng_class));
+ break;
+ case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE:
+ eng = intel_guc_lookup_engine(guc, node->eng_class, node->eng_inst);
+ if (eng)
+ GCAP_PRINT_INTEL_ENG_INFO(ebuf, eng);
+ else
+ __out(ebuf, " i915-Eng-Lookup Fail!\n");
+ GCAP_PRINT_GUC_INST_INFO(ebuf, node);
+ break;
+ }
+
+ numregs = node->reginfo[i].num_regs;
+ __out(ebuf, " NumRegs: %d\n", numregs);
+ j = 0;
+ while (numregs--) {
+ regs = node->reginfo[i].regs;
+ str = guc_capture_reg_to_str(guc, GUC_CAPTURE_LIST_INDEX_PF, i,
+ node->eng_class, 0, regs[j].offset, &is_ext);
+ if (!str)
+ __out(ebuf, " REG-0x%08x", regs[j].offset);
+ else
+ __out(ebuf, " %s", str);
+ if (is_ext)
+ __out(ebuf, "[%ld][%ld]",
+ FIELD_GET(GUC_REGSET_STEERING_GROUP, regs[j].flags),
+ FIELD_GET(GUC_REGSET_STEERING_INSTANCE, regs[j].flags));
+ __out(ebuf, ": 0x%08x\n", regs[j].value);
+ ++j;
+ }
+ }
+ return 0;
+}
+
+#endif //CONFIG_DRM_I915_CAPTURE_ERROR
+
+void intel_guc_capture_free_node(struct intel_engine_coredump *ee)
+{
+ if (!ee || !ee->guc_capture_node)
+ return;
+
+ guc_capture_add_node_to_cachelist(ee->capture, ee->guc_capture_node);
+ ee->capture = NULL;
+ ee->guc_capture_node = NULL;
+}
+
+void intel_guc_capture_get_matching_node(struct intel_gt *gt,
+ struct intel_engine_coredump *ee,
+ struct intel_context *ce)
+{
+ struct __guc_capture_parsed_output *n, *ntmp;
+ struct drm_i915_private *i915;
+ struct intel_guc *guc;
+
+ if (!gt || !ee || !ce)
+ return;
+
+ i915 = gt->i915;
+ guc = &gt->uc.guc;
+ if (!guc->capture)
+ return;
+
+ GEM_BUG_ON(ee->guc_capture_node);
+ /*
+ * Look for a matching GuC reported error capture node from
+ * the internal output link-list based on lrca, guc-id and engine
+ * identification.
+ */
+ list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) {
+ if (n->eng_inst == GUC_ID_TO_ENGINE_INSTANCE(ee->engine->guc_id) &&
+ n->eng_class == GUC_ID_TO_ENGINE_CLASS(ee->engine->guc_id) &&
+ n->guc_id && n->guc_id == ce->guc_id.id &&
+ (n->lrca & CTX_GTT_ADDRESS_MASK) && (n->lrca & CTX_GTT_ADDRESS_MASK) ==
+ (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) {
+ list_del(&n->link);
+ ee->guc_capture_node = n;
+ ee->capture = guc->capture;
+ return;
+ }
+ }
+ drm_dbg(&i915->drm, "GuC capture can't match ee to node\n");
+}
+
+void intel_guc_capture_process(struct intel_guc *guc)
+{
+ if (guc->capture)
+ __guc_capture_process_output(guc);
+}
+
+static void
+guc_capture_free_ads_cache(struct intel_guc_state_capture *gc)
+{
+ int i, j, k;
+ struct __guc_capture_ads_cache *cache;
+
+ for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; ++i) {
+ for (j = 0; j < GUC_CAPTURE_LIST_TYPE_MAX; ++j) {
+ for (k = 0; k < GUC_MAX_ENGINE_CLASSES; ++k) {
+ cache = &gc->ads_cache[i][j][k];
+ if (cache->is_valid)
+ kfree(cache->ptr);
+ }
+ }
+ }
+ kfree(gc->ads_null_cache);
+}
+
+void intel_guc_capture_destroy(struct intel_guc *guc)
+{
+ if (!guc->capture)
+ return;
+
+ guc_capture_free_ads_cache(guc->capture);
+
+ guc_capture_delete_prealloc_nodes(guc);
+
+ guc_capture_free_extlists(guc->capture->extlists);
+ kfree(guc->capture->extlists);
+
+ kfree(guc->capture);
+ guc->capture = NULL;
+}
+
+int intel_guc_capture_init(struct intel_guc *guc)
+{
+ guc->capture = kzalloc(sizeof(*guc->capture), GFP_KERNEL);
+ if (!guc->capture)
+ return -ENOMEM;
+
+ guc->capture->reglists = guc_capture_get_device_reglist(guc);
+
+ INIT_LIST_HEAD(&guc->capture->outlist);
+ INIT_LIST_HEAD(&guc->capture->cachelist);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
new file mode 100644
index 000000000000..d3d7bd0b6db6
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021-2021 Intel Corporation
+ */
+
+#ifndef _INTEL_GUC_CAPTURE_H
+#define _INTEL_GUC_CAPTURE_H
+
+#include <linux/types.h>
+
+struct drm_i915_error_state_buf;
+struct guc_gt_system_info;
+struct intel_engine_coredump;
+struct intel_context;
+struct intel_gt;
+struct intel_guc;
+
+void intel_guc_capture_free_node(struct intel_engine_coredump *ee);
+int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *m,
+ const struct intel_engine_coredump *ee);
+void intel_guc_capture_get_matching_node(struct intel_gt *gt, struct intel_engine_coredump *ee,
+ struct intel_context *ce);
+void intel_guc_capture_process(struct intel_guc *guc);
+int intel_guc_capture_output_min_size_est(struct intel_guc *guc);
+int intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
+ void **outptr);
+int intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
+ size_t *size);
+int intel_guc_capture_getnullheader(struct intel_guc *guc, void **outptr, size_t *size);
+void intel_guc_capture_destroy(struct intel_guc *guc);
+int intel_guc_capture_init(struct intel_guc *guc);
+
+#endif /* _INTEL_GUC_CAPTURE_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 4b300b6cc0f9..42cb7a9a6199 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -32,8 +32,8 @@
#define GUC_CLIENT_PRIORITY_NORMAL 3
#define GUC_CLIENT_PRIORITY_NUM 4
-#define GUC_MAX_LRC_DESCRIPTORS 65535
-#define GUC_INVALID_LRC_ID GUC_MAX_LRC_DESCRIPTORS
+#define GUC_MAX_CONTEXT_ID 65535
+#define GUC_INVALID_CONTEXT_ID GUC_MAX_CONTEXT_ID
#define GUC_RENDER_ENGINE 0
#define GUC_VIDEO_ENGINE 1
@@ -98,7 +98,13 @@
#define GUC_LOG_BUF_ADDR_SHIFT 12
#define GUC_CTL_WA 1
-#define GUC_WA_POLLCS BIT(18)
+#define GUC_WA_GAM_CREDITS BIT(10)
+#define GUC_WA_DUAL_QUEUE BIT(11)
+#define GUC_WA_RCS_RESET_BEFORE_RC6 BIT(13)
+#define GUC_WA_CONTEXT_ISOLATION BIT(15)
+#define GUC_WA_PRE_PARSER BIT(14)
+#define GUC_WA_HOLD_CCS_SWITCHOUT BIT(17)
+#define GUC_WA_POLLCS BIT(18)
#define GUC_CTL_FEATURE 2
#define GUC_CTL_ENABLE_SLPC BIT(2)
@@ -197,54 +203,45 @@ struct guc_wq_item {
u32 fence_id;
} __packed;
-struct guc_process_desc {
- u32 stage_id;
- u64 db_base_addr;
+struct guc_sched_wq_desc {
u32 head;
u32 tail;
u32 error_offset;
- u64 wq_base_addr;
- u32 wq_size_bytes;
u32 wq_status;
- u32 engine_presence;
- u32 priority;
- u32 reserved[36];
+ u32 reserved[28];
} __packed;
+/* Helper for context registration H2G */
+struct guc_ctxt_registration_info {
+ u32 flags;
+ u32 context_idx;
+ u32 engine_class;
+ u32 engine_submit_mask;
+ u32 wq_desc_lo;
+ u32 wq_desc_hi;
+ u32 wq_base_lo;
+ u32 wq_base_hi;
+ u32 wq_size;
+ u32 hwlrca_lo;
+ u32 hwlrca_hi;
+};
#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)
-#define CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US 1000000
-#define CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US 500000
+/* 32-bit KLV structure as used by policy updates and others */
+struct guc_klv_generic_dw_t {
+ u32 kl;
+ u32 value;
+} __packed;
-/* Preempt to idle on quantum expiry */
-#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE BIT(0)
+/* Format of the UPDATE_CONTEXT_POLICIES H2G data packet */
+struct guc_update_context_policy_header {
+ u32 action;
+ u32 ctx_id;
+} __packed;
-/*
- * GuC Context registration descriptor.
- * FIXME: This is only required to exist during context registration.
- * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC
- * is not required.
- */
-struct guc_lrc_desc {
- u32 hw_context_desc;
- u32 slpm_perf_mode_hint; /* SPLC v1 only */
- u32 slpm_freq_hint;
- u32 engine_submit_mask; /* In logical space */
- u8 engine_class;
- u8 reserved0[3];
- u32 priority;
- u32 process_desc;
- u32 wq_addr;
- u32 wq_size;
- u32 context_flags; /* CONTEXT_REGISTRATION_* */
- /* Time for one workload to execute. (in micro seconds) */
- u32 execution_quantum;
- /* Time to wait for a preemption request to complete before issuing a
- * reset. (in micro seconds).
- */
- u32 preemption_timeout;
- u32 policy_flags; /* CONTEXT_POLICY_* */
- u32 reserved1[19];
+struct guc_update_context_policy {
+ struct guc_update_context_policy_header header;
+ struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS];
} __packed;
#define GUC_POWER_UNSPECIFIED 0
@@ -285,10 +282,13 @@ struct guc_mmio_reg {
u32 offset;
u32 value;
u32 flags;
- u32 mask;
#define GUC_REGSET_MASKED BIT(0)
+#define GUC_REGSET_NEEDS_STEERING BIT(1)
#define GUC_REGSET_MASKED_WITH_VALUE BIT(2)
#define GUC_REGSET_RESTORE_ONLY BIT(3)
+#define GUC_REGSET_STEERING_GROUP GENMASK(15, 12)
+#define GUC_REGSET_STEERING_INSTANCE GENMASK(23, 20)
+ u32 mask;
} __packed;
/* GuC register sets */
@@ -311,6 +311,14 @@ enum {
GUC_CAPTURE_LIST_INDEX_MAX = 2,
};
+/*Register-types of GuC capture register lists */
+enum guc_capture_type {
+ GUC_CAPTURE_LIST_TYPE_GLOBAL = 0,
+ GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
+ GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
+ GUC_CAPTURE_LIST_TYPE_MAX,
+};
+
/* GuC Additional Data Struct */
struct guc_ads {
struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
new file mode 100644
index 000000000000..79c66b6b51a3
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "gt/intel_gt.h"
+#include "gt/intel_hwconfig.h"
+#include "i915_drv.h"
+#include "i915_memcpy.h"
+
+/*
+ * GuC has a blob containing hardware configuration information (HWConfig).
+ * This is formatted as a simple and flexible KLV (Key/Length/Value) table.
+ *
+ * For example, a minimal version could be:
+ * enum device_attr {
+ * ATTR_SOME_VALUE = 0,
+ * ATTR_SOME_MASK = 1,
+ * };
+ *
+ * static const u32 hwconfig[] = {
+ * ATTR_SOME_VALUE,
+ * 1, // Value Length in DWords
+ * 8, // Value
+ *
+ * ATTR_SOME_MASK,
+ * 3,
+ * 0x00FFFFFFFF, 0xFFFFFFFF, 0xFF000000,
+ * };
+ *
+ * The attribute ids are defined in a hardware spec.
+ */
+
+static int __guc_action_get_hwconfig(struct intel_guc *guc,
+ u32 ggtt_offset, u32 ggtt_size)
+{
+ u32 action[] = {
+ INTEL_GUC_ACTION_GET_HWCONFIG,
+ lower_32_bits(ggtt_offset),
+ upper_32_bits(ggtt_offset),
+ ggtt_size,
+ };
+ int ret;
+
+ ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
+ if (ret == -ENXIO)
+ return -ENOENT;
+
+ return ret;
+}
+
+static int guc_hwconfig_discover_size(struct intel_guc *guc, struct intel_hwconfig *hwconfig)
+{
+ int ret;
+
+ /*
+ * Sending a query with zero offset and size will return the
+ * size of the blob.
+ */
+ ret = __guc_action_get_hwconfig(guc, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0)
+ return -EINVAL;
+
+ hwconfig->size = ret;
+ return 0;
+}
+
+static int guc_hwconfig_fill_buffer(struct intel_guc *guc, struct intel_hwconfig *hwconfig)
+{
+ struct i915_vma *vma;
+ u32 ggtt_offset;
+ void *vaddr;
+ int ret;
+
+ GEM_BUG_ON(!hwconfig->size);
+
+ ret = intel_guc_allocate_and_map_vma(guc, hwconfig->size, &vma, &vaddr);
+ if (ret)
+ return ret;
+
+ ggtt_offset = intel_guc_ggtt_offset(guc, vma);
+
+ ret = __guc_action_get_hwconfig(guc, ggtt_offset, hwconfig->size);
+ if (ret >= 0)
+ memcpy(hwconfig->ptr, vaddr, hwconfig->size);
+
+ i915_vma_unpin_and_release(&vma, I915_VMA_RELEASE_MAP);
+
+ return ret;
+}
+
+static bool has_table(struct drm_i915_private *i915)
+{
+ if (IS_ALDERLAKE_P(i915))
+ return true;
+ if (IS_DG2(i915))
+ return true;
+
+ return false;
+}
+
+/**
+ * intel_guc_hwconfig_init - Initialize the HWConfig
+ *
+ * Retrieve the HWConfig table from the GuC and save it locally.
+ * It can then be queried on demand by other users later on.
+ */
+static int guc_hwconfig_init(struct intel_gt *gt)
+{
+ struct intel_hwconfig *hwconfig = &gt->info.hwconfig;
+ struct intel_guc *guc = &gt->uc.guc;
+ int ret;
+
+ if (!has_table(gt->i915))
+ return 0;
+
+ ret = guc_hwconfig_discover_size(guc, hwconfig);
+ if (ret)
+ return ret;
+
+ hwconfig->ptr = kmalloc(hwconfig->size, GFP_KERNEL);
+ if (!hwconfig->ptr) {
+ hwconfig->size = 0;
+ return -ENOMEM;
+ }
+
+ ret = guc_hwconfig_fill_buffer(guc, hwconfig);
+ if (ret < 0) {
+ intel_gt_fini_hwconfig(gt);
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * intel_gt_init_hwconfig - Initialize the HWConfig if available
+ *
+ * Retrieve the HWConfig table if available on the current platform.
+ */
+int intel_gt_init_hwconfig(struct intel_gt *gt)
+{
+ if (!intel_uc_uses_guc(&gt->uc))
+ return 0;
+
+ return guc_hwconfig_init(gt);
+}
+
+/**
+ * intel_gt_fini_hwconfig - Finalize the HWConfig
+ *
+ * Free up the memory allocation holding the table.
+ */
+void intel_gt_fini_hwconfig(struct intel_gt *gt)
+{
+ struct intel_hwconfig *hwconfig = &gt->info.hwconfig;
+
+ kfree(hwconfig->ptr);
+ hwconfig->size = 0;
+ hwconfig->ptr = NULL;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index a24dc6441872..78d2989fe917 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -10,9 +10,10 @@
#include "i915_drv.h"
#include "i915_irq.h"
#include "i915_memcpy.h"
+#include "intel_guc_capture.h"
#include "intel_guc_log.h"
-static void guc_log_capture_logs(struct intel_guc_log *log);
+static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log);
/**
* DOC: GuC firmware log
@@ -26,7 +27,8 @@ static void guc_log_capture_logs(struct intel_guc_log *log);
static int guc_action_flush_log_complete(struct intel_guc *guc)
{
u32 action[] = {
- INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE
+ INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE,
+ GUC_DEBUG_LOG_BUFFER
};
return intel_guc_send(guc, action, ARRAY_SIZE(action));
@@ -137,7 +139,7 @@ static void guc_move_to_next_buf(struct intel_guc_log *log)
smp_wmb();
/* All data has been written, so now move the offset of sub buffer. */
- relay_reserve(log->relay.channel, log->vma->obj->base.size);
+ relay_reserve(log->relay.channel, log->vma->obj->base.size - CAPTURE_BUFFER_SIZE);
/* Switch to the next sub buffer */
relay_flush(log->relay.channel);
@@ -157,9 +159,9 @@ static void *guc_get_write_buffer(struct intel_guc_log *log)
return relay_reserve(log->relay.channel, 0);
}
-static bool guc_check_log_buf_overflow(struct intel_guc_log *log,
- enum guc_log_buffer_type type,
- unsigned int full_cnt)
+bool intel_guc_check_log_buf_overflow(struct intel_guc_log *log,
+ enum guc_log_buffer_type type,
+ unsigned int full_cnt)
{
unsigned int prev_full_cnt = log->stats[type].sampled_overflow;
bool overflow = false;
@@ -182,7 +184,7 @@ static bool guc_check_log_buf_overflow(struct intel_guc_log *log,
return overflow;
}
-static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type)
+unsigned int intel_guc_get_log_buffer_size(enum guc_log_buffer_type type)
{
switch (type) {
case GUC_DEBUG_LOG_BUFFER:
@@ -198,7 +200,21 @@ static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type)
return 0;
}
-static void guc_read_update_log_buffer(struct intel_guc_log *log)
+size_t intel_guc_get_log_buffer_offset(enum guc_log_buffer_type type)
+{
+ enum guc_log_buffer_type i;
+ size_t offset = PAGE_SIZE;/* for the log_buffer_states */
+
+ for (i = GUC_DEBUG_LOG_BUFFER; i < GUC_MAX_LOG_BUFFER; ++i) {
+ if (i == type)
+ break;
+ offset += intel_guc_get_log_buffer_size(i);
+ }
+
+ return offset;
+}
+
+static void _guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
{
unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_cnt;
struct guc_log_buffer_state *log_buf_state, *log_buf_snapshot_state;
@@ -213,7 +229,8 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log)
goto out_unlock;
/* Get the pointer to shared GuC log buffer */
- log_buf_state = src_data = log->relay.buf_addr;
+ src_data = log->buf_addr;
+ log_buf_state = src_data;
/* Get the pointer to local buffer to store the logs */
log_buf_snapshot_state = dst_data = guc_get_write_buffer(log);
@@ -223,7 +240,7 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log)
* Used rate limited to avoid deluge of messages, logs might be
* getting consumed by User at a slow rate.
*/
- DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n");
+ DRM_ERROR_RATELIMITED("no sub-buffer to copy general logs\n");
log->relay.full_count++;
goto out_unlock;
@@ -233,7 +250,8 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log)
src_data += PAGE_SIZE;
dst_data += PAGE_SIZE;
- for (type = GUC_DEBUG_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) {
+ /* For relay logging, we exclude error state capture */
+ for (type = GUC_DEBUG_LOG_BUFFER; type <= GUC_CRASH_DUMP_LOG_BUFFER; type++) {
/*
* Make a copy of the state structure, inside GuC log buffer
* (which is uncached mapped), on the stack to avoid reading
@@ -241,14 +259,14 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log)
*/
memcpy(&log_buf_state_local, log_buf_state,
sizeof(struct guc_log_buffer_state));
- buffer_size = guc_get_log_buffer_size(type);
+ buffer_size = intel_guc_get_log_buffer_size(type);
read_offset = log_buf_state_local.read_ptr;
write_offset = log_buf_state_local.sampled_write_ptr;
full_cnt = log_buf_state_local.buffer_full_cnt;
/* Bookkeeping stuff */
log->stats[type].flush += log_buf_state_local.flush_to_file;
- new_overflow = guc_check_log_buf_overflow(log, type, full_cnt);
+ new_overflow = intel_guc_check_log_buf_overflow(log, type, full_cnt);
/* Update the state of shared log buffer */
log_buf_state->read_ptr = write_offset;
@@ -301,49 +319,43 @@ out_unlock:
mutex_unlock(&log->relay.lock);
}
-static void capture_logs_work(struct work_struct *work)
+static void copy_debug_logs_work(struct work_struct *work)
{
struct intel_guc_log *log =
container_of(work, struct intel_guc_log, relay.flush_work);
- guc_log_capture_logs(log);
+ guc_log_copy_debuglogs_for_relay(log);
}
-static int guc_log_map(struct intel_guc_log *log)
+static int guc_log_relay_map(struct intel_guc_log *log)
{
- void *vaddr;
-
lockdep_assert_held(&log->relay.lock);
- if (!log->vma)
+ if (!log->vma || !log->buf_addr)
return -ENODEV;
/*
- * Create a WC (Uncached for read) vmalloc mapping of log
- * buffer pages, so that we can directly get the data
- * (up-to-date) from memory.
+ * WC vmalloc mapping of log buffer pages was done at
+ * GuC Log Init time, but lets keep a ref for book-keeping
*/
- vaddr = i915_gem_object_pin_map_unlocked(log->vma->obj, I915_MAP_WC);
- if (IS_ERR(vaddr))
- return PTR_ERR(vaddr);
-
- log->relay.buf_addr = vaddr;
+ i915_gem_object_get(log->vma->obj);
+ log->relay.buf_in_use = true;
return 0;
}
-static void guc_log_unmap(struct intel_guc_log *log)
+static void guc_log_relay_unmap(struct intel_guc_log *log)
{
lockdep_assert_held(&log->relay.lock);
- i915_gem_object_unpin_map(log->vma->obj);
- log->relay.buf_addr = NULL;
+ i915_gem_object_put(log->vma->obj);
+ log->relay.buf_in_use = false;
}
void intel_guc_log_init_early(struct intel_guc_log *log)
{
mutex_init(&log->relay.lock);
- INIT_WORK(&log->relay.flush_work, capture_logs_work);
+ INIT_WORK(&log->relay.flush_work, copy_debug_logs_work);
log->relay.started = false;
}
@@ -358,8 +370,11 @@ static int guc_log_relay_create(struct intel_guc_log *log)
lockdep_assert_held(&log->relay.lock);
GEM_BUG_ON(!log->vma);
- /* Keep the size of sub buffers same as shared log buffer */
- subbuf_size = log->vma->size;
+ /*
+ * Keep the size of sub buffers same as shared log buffer
+ * but GuC log-events excludes the error-state-capture logs
+ */
+ subbuf_size = log->vma->size - CAPTURE_BUFFER_SIZE;
/*
* Store up to 8 snapshots, which is large enough to buffer sufficient
@@ -394,13 +409,13 @@ static void guc_log_relay_destroy(struct intel_guc_log *log)
log->relay.channel = NULL;
}
-static void guc_log_capture_logs(struct intel_guc_log *log)
+static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
{
struct intel_guc *guc = log_to_guc(log);
struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915;
intel_wakeref_t wakeref;
- guc_read_update_log_buffer(log);
+ _guc_log_copy_debuglogs_for_relay(log);
/*
* Generally device is expected to be active only at this
@@ -440,6 +455,7 @@ int intel_guc_log_create(struct intel_guc_log *log)
{
struct intel_guc *guc = log_to_guc(log);
struct i915_vma *vma;
+ void *vaddr;
u32 guc_log_size;
int ret;
@@ -447,23 +463,28 @@ int intel_guc_log_create(struct intel_guc_log *log)
/*
* GuC Log buffer Layout
+ * (this ordering must follow "enum guc_log_buffer_type" definition)
*
* +===============================+ 00B
- * | Crash dump state header |
- * +-------------------------------+ 32B
* | Debug state header |
+ * +-------------------------------+ 32B
+ * | Crash dump state header |
* +-------------------------------+ 64B
* | Capture state header |
* +-------------------------------+ 96B
* | |
* +===============================+ PAGE_SIZE (4KB)
- * | Crash Dump logs |
- * +===============================+ + CRASH_SIZE
* | Debug logs |
* +===============================+ + DEBUG_SIZE
+ * | Crash Dump logs |
+ * +===============================+ + CRASH_SIZE
* | Capture logs |
* +===============================+ + CAPTURE_SIZE
*/
+ if (intel_guc_capture_output_min_size_est(guc) > CAPTURE_BUFFER_SIZE)
+ DRM_WARN("GuC log buffer for state_capture maybe too small. %d < %d\n",
+ CAPTURE_BUFFER_SIZE, intel_guc_capture_output_min_size_est(guc));
+
guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE +
CAPTURE_BUFFER_SIZE;
@@ -474,6 +495,17 @@ int intel_guc_log_create(struct intel_guc_log *log)
}
log->vma = vma;
+ /*
+ * Create a WC (Uncached for read) vmalloc mapping up front immediate access to
+ * data from memory during critical events such as error capture
+ */
+ vaddr = i915_gem_object_pin_map_unlocked(log->vma->obj, I915_MAP_WC);
+ if (IS_ERR(vaddr)) {
+ ret = PTR_ERR(vaddr);
+ i915_vma_unpin_and_release(&log->vma, 0);
+ goto err;
+ }
+ log->buf_addr = vaddr;
log->level = __get_default_log_level(log);
DRM_DEBUG_DRIVER("guc_log_level=%d (%s, verbose:%s, verbosity:%d)\n",
@@ -484,13 +516,14 @@ int intel_guc_log_create(struct intel_guc_log *log)
return 0;
err:
- DRM_ERROR("Failed to allocate GuC log buffer. %d\n", ret);
+ DRM_ERROR("Failed to allocate or map GuC log buffer. %d\n", ret);
return ret;
}
void intel_guc_log_destroy(struct intel_guc_log *log)
{
- i915_vma_unpin_and_release(&log->vma, 0);
+ log->buf_addr = NULL;
+ i915_vma_unpin_and_release(&log->vma, I915_VMA_RELEASE_MAP);
}
int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
@@ -535,7 +568,7 @@ out_unlock:
bool intel_guc_log_relay_created(const struct intel_guc_log *log)
{
- return log->relay.buf_addr;
+ return log->buf_addr;
}
int intel_guc_log_relay_open(struct intel_guc_log *log)
@@ -566,7 +599,7 @@ int intel_guc_log_relay_open(struct intel_guc_log *log)
if (ret)
goto out_unlock;
- ret = guc_log_map(log);
+ ret = guc_log_relay_map(log);
if (ret)
goto out_relay;
@@ -616,8 +649,8 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
with_intel_runtime_pm(guc_to_gt(guc)->uncore->rpm, wakeref)
guc_action_flush_log(guc);
- /* GuC would have updated log buffer by now, so capture it */
- guc_log_capture_logs(log);
+ /* GuC would have updated log buffer by now, so copy it */
+ guc_log_copy_debuglogs_for_relay(log);
}
/*
@@ -646,7 +679,7 @@ void intel_guc_log_relay_close(struct intel_guc_log *log)
mutex_lock(&log->relay.lock);
GEM_BUG_ON(!intel_guc_log_relay_created(log));
- guc_log_unmap(log);
+ guc_log_relay_unmap(log);
guc_log_relay_destroy(log);
mutex_unlock(&log->relay.lock);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
index d7e1b6471fed..18007e639be9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@@ -49,8 +49,9 @@ struct intel_guc;
struct intel_guc_log {
u32 level;
struct i915_vma *vma;
+ void *buf_addr;
struct {
- void *buf_addr;
+ bool buf_in_use;
bool started;
struct work_struct flush_work;
struct rchan *channel;
@@ -66,6 +67,10 @@ struct intel_guc_log {
};
void intel_guc_log_init_early(struct intel_guc_log *log);
+bool intel_guc_check_log_buf_overflow(struct intel_guc_log *log, enum guc_log_buffer_type type,
+ unsigned int full_cnt);
+unsigned int intel_guc_get_log_buffer_size(enum guc_log_buffer_type type);
+size_t intel_guc_get_log_buffer_offset(enum guc_log_buffer_type type);
int intel_guc_log_create(struct intel_guc_log *log);
void intel_guc_log_destroy(struct intel_guc_log *log);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 9f032c65a488..1db833da42df 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -153,8 +153,8 @@ static int slpc_query_task_state(struct intel_guc_slpc *slpc)
ret = guc_action_slpc_query(guc, offset);
if (unlikely(ret))
- drm_err(&i915->drm, "Failed to query task state (%pe)\n",
- ERR_PTR(ret));
+ i915_probe_error(i915, "Failed to query task state (%pe)\n",
+ ERR_PTR(ret));
drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES);
@@ -171,8 +171,8 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value)
ret = guc_action_slpc_set_param(guc, id, value);
if (ret)
- drm_err(&i915->drm, "Failed to set param %d to %u (%pe)\n",
- id, value, ERR_PTR(ret));
+ i915_probe_error(i915, "Failed to set param %d to %u (%pe)\n",
+ id, value, ERR_PTR(ret));
return ret;
}
@@ -212,8 +212,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
freq);
if (ret)
- drm_err(&i915->drm, "Unable to force min freq to %u: %d",
- freq, ret);
+ i915_probe_error(i915, "Unable to force min freq to %u: %d",
+ freq, ret);
}
return ret;
@@ -248,9 +248,9 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr);
if (unlikely(err)) {
- drm_err(&i915->drm,
- "Failed to allocate SLPC struct (err=%pe)\n",
- ERR_PTR(err));
+ i915_probe_error(i915,
+ "Failed to allocate SLPC struct (err=%pe)\n",
+ ERR_PTR(err));
return err;
}
@@ -317,15 +317,15 @@ static int slpc_reset(struct intel_guc_slpc *slpc)
ret = guc_action_slpc_reset(guc, offset);
if (unlikely(ret < 0)) {
- drm_err(&i915->drm, "SLPC reset action failed (%pe)\n",
- ERR_PTR(ret));
+ i915_probe_error(i915, "SLPC reset action failed (%pe)\n",
+ ERR_PTR(ret));
return ret;
}
if (!ret) {
if (wait_for(slpc_is_running(slpc), SLPC_RESET_TIMEOUT_MS)) {
- drm_err(&i915->drm, "SLPC not enabled! State = %s\n",
- slpc_get_state_string(slpc));
+ i915_probe_error(i915, "SLPC not enabled! State = %s\n",
+ slpc_get_state_string(slpc));
return -EIO;
}
}
@@ -582,16 +582,12 @@ static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc)
static void slpc_get_rp_values(struct intel_guc_slpc *slpc)
{
struct intel_rps *rps = &slpc_to_gt(slpc)->rps;
- u32 rp_state_cap;
+ struct intel_rps_freq_caps caps;
- rp_state_cap = intel_rps_read_state_cap(rps);
-
- slpc->rp0_freq = REG_FIELD_GET(RP0_CAP_MASK, rp_state_cap) *
- GT_FREQUENCY_MULTIPLIER;
- slpc->rp1_freq = REG_FIELD_GET(RP1_CAP_MASK, rp_state_cap) *
- GT_FREQUENCY_MULTIPLIER;
- slpc->min_freq = REG_FIELD_GET(RPN_CAP_MASK, rp_state_cap) *
- GT_FREQUENCY_MULTIPLIER;
+ gen6_rps_get_freq_caps(rps, &caps);
+ slpc->rp0_freq = intel_gpu_freq(rps, caps.rp0_freq);
+ slpc->rp1_freq = intel_gpu_freq(rps, caps.rp1_freq);
+ slpc->min_freq = intel_gpu_freq(rps, caps.min_freq);
if (!slpc->boost_freq)
slpc->boost_freq = slpc->rp0_freq;
@@ -621,8 +617,8 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
ret = slpc_reset(slpc);
if (unlikely(ret < 0)) {
- drm_err(&i915->drm, "SLPC Reset event returned (%pe)\n",
- ERR_PTR(ret));
+ i915_probe_error(i915, "SLPC Reset event returned (%pe)\n",
+ ERR_PTR(ret));
return ret;
}
@@ -637,24 +633,24 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
/* Ignore efficient freq and set min to platform min */
ret = slpc_ignore_eff_freq(slpc, true);
if (unlikely(ret)) {
- drm_err(&i915->drm, "Failed to set SLPC min to RPn (%pe)\n",
- ERR_PTR(ret));
+ i915_probe_error(i915, "Failed to set SLPC min to RPn (%pe)\n",
+ ERR_PTR(ret));
return ret;
}
/* Set SLPC max limit to RP0 */
ret = slpc_use_fused_rp0(slpc);
if (unlikely(ret)) {
- drm_err(&i915->drm, "Failed to set SLPC max to RP0 (%pe)\n",
- ERR_PTR(ret));
+ i915_probe_error(i915, "Failed to set SLPC max to RP0 (%pe)\n",
+ ERR_PTR(ret));
return ret;
}
/* Revert SLPC min/max to softlimits if necessary */
ret = slpc_set_softlimits(slpc);
if (unlikely(ret)) {
- drm_err(&i915->drm, "Failed to set SLPC softlimits (%pe)\n",
- ERR_PTR(ret));
+ i915_probe_error(i915, "Failed to set SLPC softlimits (%pe)\n",
+ ERR_PTR(ret));
return ret;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 1ce7e04aa837..61a6f2424e24 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -25,6 +25,7 @@
#include "gt/intel_ring.h"
#include "intel_guc_ads.h"
+#include "intel_guc_capture.h"
#include "intel_guc_submission.h"
#include "i915_drv.h"
@@ -161,7 +162,8 @@ guc_create_parallel(struct intel_engine_cs **engines,
#define SCHED_STATE_ENABLED BIT(4)
#define SCHED_STATE_PENDING_ENABLE BIT(5)
#define SCHED_STATE_REGISTERED BIT(6)
-#define SCHED_STATE_BLOCKED_SHIFT 7
+#define SCHED_STATE_POLICY_REQUIRED BIT(7)
+#define SCHED_STATE_BLOCKED_SHIFT 8
#define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT)
#define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT)
@@ -300,6 +302,23 @@ static inline void clr_context_registered(struct intel_context *ce)
ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED;
}
+static inline bool context_policy_required(struct intel_context *ce)
+{
+ return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED;
+}
+
+static inline void set_context_policy_required(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED;
+}
+
+static inline void clr_context_policy_required(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED;
+}
+
static inline u32 context_blocked(struct intel_context *ce)
{
return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
@@ -351,12 +370,12 @@ request_to_scheduling_context(struct i915_request *rq)
static inline bool context_guc_id_invalid(struct intel_context *ce)
{
- return ce->guc_id.id == GUC_INVALID_LRC_ID;
+ return ce->guc_id.id == GUC_INVALID_CONTEXT_ID;
}
static inline void set_context_guc_id_invalid(struct intel_context *ce)
{
- ce->guc_id.id = GUC_INVALID_LRC_ID;
+ ce->guc_id.id = GUC_INVALID_CONTEXT_ID;
}
static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
@@ -395,12 +414,12 @@ struct sync_semaphore {
};
struct parent_scratch {
- struct guc_process_desc pdesc;
+ struct guc_sched_wq_desc wq_desc;
struct sync_semaphore go;
struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
- u8 unused[WQ_OFFSET - sizeof(struct guc_process_desc) -
+ u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
u32 wq[WQ_SIZE / sizeof(u32)];
@@ -437,15 +456,15 @@ __get_parent_scratch(struct intel_context *ce)
LRC_STATE_OFFSET) / sizeof(u32)));
}
-static struct guc_process_desc *
-__get_process_desc(struct intel_context *ce)
+static struct guc_sched_wq_desc *
+__get_wq_desc(struct intel_context *ce)
{
struct parent_scratch *ps = __get_parent_scratch(ce);
- return &ps->pdesc;
+ return &ps->wq_desc;
}
-static u32 *get_wq_pointer(struct guc_process_desc *desc,
+static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc,
struct intel_context *ce,
u32 wqi_size)
{
@@ -457,7 +476,7 @@ static u32 *get_wq_pointer(struct guc_process_desc *desc,
#define AVAILABLE_SPACE \
CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
if (wqi_size > AVAILABLE_SPACE) {
- ce->parallel.guc.wqi_head = READ_ONCE(desc->head);
+ ce->parallel.guc.wqi_head = READ_ONCE(wq_desc->head);
if (wqi_size > AVAILABLE_SPACE)
return NULL;
@@ -467,75 +486,27 @@ static u32 *get_wq_pointer(struct guc_process_desc *desc,
return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)];
}
-static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index)
-{
- struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr;
-
- GEM_BUG_ON(index >= GUC_MAX_LRC_DESCRIPTORS);
-
- return &base[index];
-}
-
static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
{
struct intel_context *ce = xa_load(&guc->context_lookup, id);
- GEM_BUG_ON(id >= GUC_MAX_LRC_DESCRIPTORS);
+ GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID);
return ce;
}
-static int guc_lrc_desc_pool_create(struct intel_guc *guc)
-{
- u32 size;
- int ret;
-
- size = PAGE_ALIGN(sizeof(struct guc_lrc_desc) *
- GUC_MAX_LRC_DESCRIPTORS);
- ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool,
- (void **)&guc->lrc_desc_pool_vaddr);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static void guc_lrc_desc_pool_destroy(struct intel_guc *guc)
-{
- guc->lrc_desc_pool_vaddr = NULL;
- i915_vma_unpin_and_release(&guc->lrc_desc_pool, I915_VMA_RELEASE_MAP);
-}
-
static inline bool guc_submission_initialized(struct intel_guc *guc)
{
- return !!guc->lrc_desc_pool_vaddr;
+ return guc->submission_initialized;
}
-static inline void reset_lrc_desc(struct intel_guc *guc, u32 id)
-{
- if (likely(guc_submission_initialized(guc))) {
- struct guc_lrc_desc *desc = __get_lrc_desc(guc, id);
- unsigned long flags;
-
- memset(desc, 0, sizeof(*desc));
-
- /*
- * xarray API doesn't have xa_erase_irqsave wrapper, so calling
- * the lower level functions directly.
- */
- xa_lock_irqsave(&guc->context_lookup, flags);
- __xa_erase(&guc->context_lookup, id);
- xa_unlock_irqrestore(&guc->context_lookup, flags);
- }
-}
-
-static inline bool lrc_desc_registered(struct intel_guc *guc, u32 id)
+static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id)
{
return __get_context(guc, id);
}
-static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id,
- struct intel_context *ce)
+static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id,
+ struct intel_context *ce)
{
unsigned long flags;
@@ -548,6 +519,22 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id,
xa_unlock_irqrestore(&guc->context_lookup, flags);
}
+static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id)
+{
+ unsigned long flags;
+
+ if (unlikely(!guc_submission_initialized(guc)))
+ return;
+
+ /*
+ * xarray API doesn't have xa_erase_irqsave wrapper, so calling
+ * the lower level functions directly.
+ */
+ xa_lock_irqsave(&guc->context_lookup, flags);
+ __xa_erase(&guc->context_lookup, id);
+ xa_unlock_irqrestore(&guc->context_lookup, flags);
+}
+
static void decr_outstanding_submission_g2h(struct intel_guc *guc)
{
if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
@@ -624,7 +611,8 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
true, timeout);
}
-static int guc_lrc_desc_pin(struct intel_context *ce, bool loop);
+static int guc_context_policy_init(struct intel_context *ce, bool loop);
+static int try_context_registration(struct intel_context *ce, bool loop);
static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
{
@@ -650,6 +638,12 @@ static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
GEM_BUG_ON(context_guc_id_invalid(ce));
+ if (context_policy_required(ce)) {
+ err = guc_context_policy_init(ce, false);
+ if (err)
+ return err;
+ }
+
spin_lock(&ce->guc_state.lock);
/*
@@ -743,7 +737,7 @@ static u32 wq_space_until_wrap(struct intel_context *ce)
return (WQ_SIZE - ce->parallel.guc.wqi_tail);
}
-static void write_wqi(struct guc_process_desc *desc,
+static void write_wqi(struct guc_sched_wq_desc *wq_desc,
struct intel_context *ce,
u32 wqi_size)
{
@@ -756,13 +750,13 @@ static void write_wqi(struct guc_process_desc *desc,
ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
(WQ_SIZE - 1);
- WRITE_ONCE(desc->tail, ce->parallel.guc.wqi_tail);
+ WRITE_ONCE(wq_desc->tail, ce->parallel.guc.wqi_tail);
}
static int guc_wq_noop_append(struct intel_context *ce)
{
- struct guc_process_desc *desc = __get_process_desc(ce);
- u32 *wqi = get_wq_pointer(desc, ce, wq_space_until_wrap(ce));
+ struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
+ u32 *wqi = get_wq_pointer(wq_desc, ce, wq_space_until_wrap(ce));
u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
if (!wqi)
@@ -781,7 +775,7 @@ static int __guc_wq_item_append(struct i915_request *rq)
{
struct intel_context *ce = request_to_scheduling_context(rq);
struct intel_context *child;
- struct guc_process_desc *desc = __get_process_desc(ce);
+ struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
unsigned int wqi_size = (ce->parallel.number_children + 4) *
sizeof(u32);
u32 *wqi;
@@ -792,7 +786,7 @@ static int __guc_wq_item_append(struct i915_request *rq)
GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
GEM_BUG_ON(context_guc_id_invalid(ce));
GEM_BUG_ON(context_wait_for_deregister_to_register(ce));
- GEM_BUG_ON(!lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id));
+ GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id));
/* Insert NOOP if this work queue item will wrap the tail pointer. */
if (wqi_size > wq_space_until_wrap(ce)) {
@@ -801,7 +795,7 @@ static int __guc_wq_item_append(struct i915_request *rq)
return ret;
}
- wqi = get_wq_pointer(desc, ce, wqi_size);
+ wqi = get_wq_pointer(wq_desc, ce, wqi_size);
if (!wqi)
return -EBUSY;
@@ -816,7 +810,7 @@ static int __guc_wq_item_append(struct i915_request *rq)
for_each_child(ce, child)
*wqi++ = child->ring->tail / sizeof(u64);
- write_wqi(desc, ce, wqi_size);
+ write_wqi(wq_desc, ce, wqi_size);
return 0;
}
@@ -920,9 +914,9 @@ register_context:
if (submit) {
struct intel_context *ce = request_to_scheduling_context(last);
- if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id) &&
+ if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) &&
!intel_context_is_banned(ce))) {
- ret = guc_lrc_desc_pin(ce, false);
+ ret = try_context_registration(ce, false);
if (unlikely(ret == -EPIPE)) {
goto deadlk;
} else if (ret == -EBUSY) {
@@ -1546,6 +1540,89 @@ static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub)
lrc_update_regs(ce, engine, head);
}
+static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
+{
+ static const i915_reg_t _reg[I915_NUM_ENGINES] = {
+ [RCS0] = MSG_IDLE_CS,
+ [BCS0] = MSG_IDLE_BCS,
+ [VCS0] = MSG_IDLE_VCS0,
+ [VCS1] = MSG_IDLE_VCS1,
+ [VCS2] = MSG_IDLE_VCS2,
+ [VCS3] = MSG_IDLE_VCS3,
+ [VCS4] = MSG_IDLE_VCS4,
+ [VCS5] = MSG_IDLE_VCS5,
+ [VCS6] = MSG_IDLE_VCS6,
+ [VCS7] = MSG_IDLE_VCS7,
+ [VECS0] = MSG_IDLE_VECS0,
+ [VECS1] = MSG_IDLE_VECS1,
+ [VECS2] = MSG_IDLE_VECS2,
+ [VECS3] = MSG_IDLE_VECS3,
+ [CCS0] = MSG_IDLE_CS,
+ [CCS1] = MSG_IDLE_CS,
+ [CCS2] = MSG_IDLE_CS,
+ [CCS3] = MSG_IDLE_CS,
+ };
+ u32 val;
+
+ if (!_reg[engine->id].reg)
+ return 0;
+
+ val = intel_uncore_read(engine->uncore, _reg[engine->id]);
+
+ /* bits[29:25] & bits[13:9] >> shift */
+ return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
+}
+
+static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
+{
+ int ret;
+
+ /* Ensure GPM receives fw up/down after CS is stopped */
+ udelay(1);
+
+ /* Wait for forcewake request to complete in GPM */
+ ret = __intel_wait_for_register_fw(gt->uncore,
+ GEN9_PWRGT_DOMAIN_STATUS,
+ fw_mask, fw_mask, 5000, 0, NULL);
+
+ /* Ensure CS receives fw ack from GPM */
+ udelay(1);
+
+ if (ret)
+ GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret);
+}
+
+/*
+ * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any
+ * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The
+ * pending status is indicated by bits[13:9] (masked by bits[ 29:25]) in the
+ * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we
+ * are concerned only with the gt reset here, we use a logical OR of pending
+ * forcewakeups from all reset domains and then wait for them to complete by
+ * querying PWRGT_DOMAIN_STATUS.
+ */
+static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
+{
+ u32 fw_pending;
+
+ if (GRAPHICS_VER(engine->i915) != 12)
+ return;
+
+ /*
+ * Wa_22011802037
+ * TODO: Occasionally trying to stop the cs times out, but does not
+ * adversely affect functionality. The timeout is set as a config
+ * parameter that defaults to 100ms. Assuming that this timeout is
+ * sufficient for any pending MI_FORCEWAKEs to complete, ignore the
+ * timeout returned here until it is root caused.
+ */
+ intel_engine_stop_cs(engine);
+
+ fw_pending = __cs_pending_mi_force_wakes(engine);
+ if (fw_pending)
+ __gpm_wait_for_fw_complete(engine->gt, fw_pending);
+}
+
static void guc_reset_nop(struct intel_engine_cs *engine)
{
}
@@ -1804,20 +1881,10 @@ static void reset_fail_worker_func(struct work_struct *w);
int intel_guc_submission_init(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
- int ret;
- if (guc->lrc_desc_pool)
+ if (guc->submission_initialized)
return 0;
- ret = guc_lrc_desc_pool_create(guc);
- if (ret)
- return ret;
- /*
- * Keep static analysers happy, let them know that we allocated the
- * vma after testing that it didn't exist earlier.
- */
- GEM_BUG_ON(!guc->lrc_desc_pool);
-
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
if (!guc->submission_state.guc_ids_bitmap)
@@ -1825,19 +1892,20 @@ int intel_guc_submission_init(struct intel_guc *guc)
guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
guc->timestamp.shift = gpm_timestamp_shift(gt);
+ guc->submission_initialized = true;
return 0;
}
void intel_guc_submission_fini(struct intel_guc *guc)
{
- if (!guc->lrc_desc_pool)
+ if (!guc->submission_initialized)
return;
guc_flush_destroyed_contexts(guc);
- guc_lrc_desc_pool_destroy(guc);
i915_sched_engine_put(guc->sched_engine);
bitmap_free(guc->submission_state.guc_ids_bitmap);
+ guc->submission_initialized = false;
}
static inline void queue_request(struct i915_sched_engine *sched_engine,
@@ -1884,7 +1952,7 @@ static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
return submission_disabled(guc) || guc->stalled_request ||
!i915_sched_engine_is_empty(sched_engine) ||
- !lrc_desc_registered(guc, ce->guc_id.id);
+ !ctx_id_mapped(guc, ce->guc_id.id);
}
static void guc_submit_request(struct i915_request *rq)
@@ -1941,7 +2009,7 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
else
ida_simple_remove(&guc->submission_state.guc_ids,
ce->guc_id.id);
- reset_lrc_desc(guc, ce->guc_id.id);
+ clr_ctx_id_mapping(guc, ce->guc_id.id);
set_context_guc_id_invalid(ce);
}
if (!list_empty(&ce->guc_id.link))
@@ -2094,65 +2162,96 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
static int __guc_action_register_multi_lrc(struct intel_guc *guc,
struct intel_context *ce,
- u32 guc_id,
- u32 offset,
+ struct guc_ctxt_registration_info *info,
bool loop)
{
struct intel_context *child;
- u32 action[4 + MAX_ENGINE_INSTANCE];
+ u32 action[13 + (MAX_ENGINE_INSTANCE * 2)];
int len = 0;
+ u32 next_id;
GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
- action[len++] = guc_id;
+ action[len++] = info->flags;
+ action[len++] = info->context_idx;
+ action[len++] = info->engine_class;
+ action[len++] = info->engine_submit_mask;
+ action[len++] = info->wq_desc_lo;
+ action[len++] = info->wq_desc_hi;
+ action[len++] = info->wq_base_lo;
+ action[len++] = info->wq_base_hi;
+ action[len++] = info->wq_size;
action[len++] = ce->parallel.number_children + 1;
- action[len++] = offset;
+ action[len++] = info->hwlrca_lo;
+ action[len++] = info->hwlrca_hi;
+
+ next_id = info->context_idx + 1;
for_each_child(ce, child) {
- offset += sizeof(struct guc_lrc_desc);
- action[len++] = offset;
+ GEM_BUG_ON(next_id++ != child->guc_id.id);
+
+ /*
+ * NB: GuC interface supports 64 bit LRCA even though i915/HW
+ * only supports 32 bit currently.
+ */
+ action[len++] = lower_32_bits(child->lrc.lrca);
+ action[len++] = upper_32_bits(child->lrc.lrca);
}
+ GEM_BUG_ON(len > ARRAY_SIZE(action));
+
return guc_submission_send_busy_loop(guc, action, len, 0, loop);
}
static int __guc_action_register_context(struct intel_guc *guc,
- u32 guc_id,
- u32 offset,
+ struct guc_ctxt_registration_info *info,
bool loop)
{
u32 action[] = {
INTEL_GUC_ACTION_REGISTER_CONTEXT,
- guc_id,
- offset,
+ info->flags,
+ info->context_idx,
+ info->engine_class,
+ info->engine_submit_mask,
+ info->wq_desc_lo,
+ info->wq_desc_hi,
+ info->wq_base_lo,
+ info->wq_base_hi,
+ info->wq_size,
+ info->hwlrca_lo,
+ info->hwlrca_hi,
};
return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
0, loop);
}
+static void prepare_context_registration_info(struct intel_context *ce,
+ struct guc_ctxt_registration_info *info);
+
static int register_context(struct intel_context *ce, bool loop)
{
+ struct guc_ctxt_registration_info info;
struct intel_guc *guc = ce_to_guc(ce);
- u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) +
- ce->guc_id.id * sizeof(struct guc_lrc_desc);
int ret;
GEM_BUG_ON(intel_context_is_child(ce));
trace_intel_context_register(ce);
+ prepare_context_registration_info(ce, &info);
+
if (intel_context_is_parent(ce))
- ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id,
- offset, loop);
+ ret = __guc_action_register_multi_lrc(guc, ce, &info, loop);
else
- ret = __guc_action_register_context(guc, ce->guc_id.id, offset,
- loop);
+ ret = __guc_action_register_context(guc, &info, loop);
if (likely(!ret)) {
unsigned long flags;
spin_lock_irqsave(&ce->guc_state.lock, flags);
set_context_registered(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+
+ guc_context_policy_init(ce, loop);
}
return ret;
@@ -2202,33 +2301,120 @@ static inline u32 get_children_join_value(struct intel_context *ce,
return __get_parent_scratch(ce)->join[child_index].semaphore;
}
-static void guc_context_policy_init(struct intel_engine_cs *engine,
- struct guc_lrc_desc *desc)
+struct context_policy {
+ u32 count;
+ struct guc_update_context_policy h2g;
+};
+
+static u32 __guc_context_policy_action_size(struct context_policy *policy)
{
- desc->policy_flags = 0;
+ size_t bytes = sizeof(policy->h2g.header) +
+ (sizeof(policy->h2g.klv[0]) * policy->count);
- if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
- desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE;
+ return bytes / sizeof(u32);
+}
+
+static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id)
+{
+ policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
+ policy->h2g.header.ctx_id = guc_id;
+ policy->count = 0;
+}
+
+#define MAKE_CONTEXT_POLICY_ADD(func, id) \
+static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \
+{ \
+ GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
+ policy->h2g.klv[policy->count].kl = \
+ FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
+ FIELD_PREP(GUC_KLV_0_LEN, 1); \
+ policy->h2g.klv[policy->count].value = data; \
+ policy->count++; \
+}
+
+MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
+MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
+MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
+MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
+
+#undef MAKE_CONTEXT_POLICY_ADD
+
+static int __guc_context_set_context_policies(struct intel_guc *guc,
+ struct context_policy *policy,
+ bool loop)
+{
+ return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g,
+ __guc_context_policy_action_size(policy),
+ 0, loop);
+}
+
+static int guc_context_policy_init(struct intel_context *ce, bool loop)
+{
+ struct intel_engine_cs *engine = ce->engine;
+ struct intel_guc *guc = &engine->gt->uc.guc;
+ struct context_policy policy;
+ u32 execution_quantum;
+ u32 preemption_timeout;
+ bool missing = false;
+ unsigned long flags;
+ int ret;
/* NB: For both of these, zero means disabled. */
- desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
- desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
+ execution_quantum = engine->props.timeslice_duration_ms * 1000;
+ preemption_timeout = engine->props.preempt_timeout_ms * 1000;
+
+ __guc_context_policy_start_klv(&policy, ce->guc_id.id);
+
+ __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
+ __guc_context_policy_add_execution_quantum(&policy, execution_quantum);
+ __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
+
+ if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
+ __guc_context_policy_add_preempt_to_idle(&policy, 1);
+
+ ret = __guc_context_set_context_policies(guc, &policy, loop);
+ missing = ret != 0;
+
+ if (!missing && intel_context_is_parent(ce)) {
+ struct intel_context *child;
+
+ for_each_child(ce, child) {
+ __guc_context_policy_start_klv(&policy, child->guc_id.id);
+
+ if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
+ __guc_context_policy_add_preempt_to_idle(&policy, 1);
+
+ child->guc_state.prio = ce->guc_state.prio;
+ __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
+ __guc_context_policy_add_execution_quantum(&policy, execution_quantum);
+ __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
+
+ ret = __guc_context_set_context_policies(guc, &policy, loop);
+ if (ret) {
+ missing = true;
+ break;
+ }
+ }
+ }
+
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
+ if (missing)
+ set_context_policy_required(ce);
+ else
+ clr_context_policy_required(ce);
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+
+ return ret;
}
-static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
+static void prepare_context_registration_info(struct intel_context *ce,
+ struct guc_ctxt_registration_info *info)
{
struct intel_engine_cs *engine = ce->engine;
- struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
struct intel_guc *guc = &engine->gt->uc.guc;
- u32 desc_idx = ce->guc_id.id;
- struct guc_lrc_desc *desc;
- bool context_registered;
- intel_wakeref_t wakeref;
- struct intel_context *child;
- int ret = 0;
+ u32 ctx_id = ce->guc_id.id;
GEM_BUG_ON(!engine->mask);
- GEM_BUG_ON(!sched_state_is_init(ce));
/*
* Ensure LRC + CT vmas are is same region as write barrier is done
@@ -2237,55 +2423,63 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
i915_gem_object_is_lmem(ce->ring->vma->obj));
- context_registered = lrc_desc_registered(guc, desc_idx);
-
- reset_lrc_desc(guc, desc_idx);
- set_lrc_desc_registered(guc, desc_idx, ce);
-
- desc = __get_lrc_desc(guc, desc_idx);
- desc->engine_class = engine_class_to_guc_class(engine->class);
- desc->engine_submit_mask = engine->logical_mask;
- desc->hw_context_desc = ce->lrc.lrca;
- desc->priority = ce->guc_state.prio;
- desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
- guc_context_policy_init(engine, desc);
+ memset(info, 0, sizeof(*info));
+ info->context_idx = ctx_id;
+ info->engine_class = engine_class_to_guc_class(engine->class);
+ info->engine_submit_mask = engine->logical_mask;
+ /*
+ * NB: GuC interface supports 64 bit LRCA even though i915/HW
+ * only supports 32 bit currently.
+ */
+ info->hwlrca_lo = lower_32_bits(ce->lrc.lrca);
+ info->hwlrca_hi = upper_32_bits(ce->lrc.lrca);
+ info->flags = CONTEXT_REGISTRATION_FLAG_KMD;
/*
* If context is a parent, we need to register a process descriptor
* describing a work queue and register all child contexts.
*/
if (intel_context_is_parent(ce)) {
- struct guc_process_desc *pdesc;
+ struct guc_sched_wq_desc *wq_desc;
+ u64 wq_desc_offset, wq_base_offset;
ce->parallel.guc.wqi_tail = 0;
ce->parallel.guc.wqi_head = 0;
- desc->process_desc = i915_ggtt_offset(ce->state) +
- __get_parent_scratch_offset(ce);
- desc->wq_addr = i915_ggtt_offset(ce->state) +
- __get_wq_offset(ce);
- desc->wq_size = WQ_SIZE;
+ wq_desc_offset = i915_ggtt_offset(ce->state) +
+ __get_parent_scratch_offset(ce);
+ wq_base_offset = i915_ggtt_offset(ce->state) +
+ __get_wq_offset(ce);
+ info->wq_desc_lo = lower_32_bits(wq_desc_offset);
+ info->wq_desc_hi = upper_32_bits(wq_desc_offset);
+ info->wq_base_lo = lower_32_bits(wq_base_offset);
+ info->wq_base_hi = upper_32_bits(wq_base_offset);
+ info->wq_size = WQ_SIZE;
- pdesc = __get_process_desc(ce);
- memset(pdesc, 0, sizeof(*(pdesc)));
- pdesc->stage_id = ce->guc_id.id;
- pdesc->wq_base_addr = desc->wq_addr;
- pdesc->wq_size_bytes = desc->wq_size;
- pdesc->wq_status = WQ_STATUS_ACTIVE;
-
- for_each_child(ce, child) {
- desc = __get_lrc_desc(guc, child->guc_id.id);
-
- desc->engine_class =
- engine_class_to_guc_class(engine->class);
- desc->hw_context_desc = child->lrc.lrca;
- desc->priority = ce->guc_state.prio;
- desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
- guc_context_policy_init(engine, desc);
- }
+ wq_desc = __get_wq_desc(ce);
+ memset(wq_desc, 0, sizeof(*wq_desc));
+ wq_desc->wq_status = WQ_STATUS_ACTIVE;
clear_children_join_go_memory(ce);
}
+}
+
+static int try_context_registration(struct intel_context *ce, bool loop)
+{
+ struct intel_engine_cs *engine = ce->engine;
+ struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
+ struct intel_guc *guc = &engine->gt->uc.guc;
+ intel_wakeref_t wakeref;
+ u32 ctx_id = ce->guc_id.id;
+ bool context_registered;
+ int ret = 0;
+
+ GEM_BUG_ON(!sched_state_is_init(ce));
+
+ context_registered = ctx_id_mapped(guc, ctx_id);
+
+ clr_ctx_id_mapping(guc, ctx_id);
+ set_ctx_id_mapping(guc, ctx_id, ce);
/*
* The context_lookup xarray is used to determine if the hardware
@@ -2311,7 +2505,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
}
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
if (unlikely(disabled)) {
- reset_lrc_desc(guc, desc_idx);
+ clr_ctx_id_mapping(guc, ctx_id);
return 0; /* Will get registered later */
}
@@ -2327,9 +2521,9 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
with_intel_runtime_pm(runtime_pm, wakeref)
ret = register_context(ce, loop);
if (unlikely(ret == -EBUSY)) {
- reset_lrc_desc(guc, desc_idx);
+ clr_ctx_id_mapping(guc, ctx_id);
} else if (unlikely(ret == -ENODEV)) {
- reset_lrc_desc(guc, desc_idx);
+ clr_ctx_id_mapping(guc, ctx_id);
ret = 0; /* Will get registered later */
}
}
@@ -2419,7 +2613,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc,
GUC_CONTEXT_DISABLE
};
- GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID);
+ GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID);
GEM_BUG_ON(intel_context_is_child(ce));
trace_intel_context_sched_disable(ce);
@@ -2516,7 +2710,7 @@ static bool context_cant_unblock(struct intel_context *ce)
return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) ||
context_guc_id_invalid(ce) ||
- !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id) ||
+ !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) ||
!intel_context_is_pinned(ce);
}
@@ -2580,13 +2774,11 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
u16 guc_id,
u32 preemption_timeout)
{
- u32 action[] = {
- INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT,
- guc_id,
- preemption_timeout
- };
+ struct context_policy policy;
- intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
+ __guc_context_policy_start_klv(&policy, guc_id);
+ __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
+ __guc_context_set_context_policies(guc, &policy, true);
}
static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
@@ -2686,7 +2878,7 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
bool disabled;
GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
- GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id));
+ GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id));
GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
GEM_BUG_ON(context_enabled(ce));
@@ -2803,7 +2995,7 @@ static void guc_context_destroy(struct kref *kref)
*/
spin_lock_irqsave(&guc->submission_state.lock, flags);
destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
- !lrc_desc_registered(guc, ce->guc_id.id);
+ !ctx_id_mapped(guc, ce->guc_id.id);
if (likely(!destroy)) {
if (!list_empty(&ce->guc_id.link))
list_del_init(&ce->guc_id.link);
@@ -2831,16 +3023,20 @@ static int guc_context_alloc(struct intel_context *ce)
return lrc_alloc(ce, ce->engine);
}
+static void __guc_context_set_prio(struct intel_guc *guc,
+ struct intel_context *ce)
+{
+ struct context_policy policy;
+
+ __guc_context_policy_start_klv(&policy, ce->guc_id.id);
+ __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
+ __guc_context_set_context_policies(guc, &policy, true);
+}
+
static void guc_context_set_prio(struct intel_guc *guc,
struct intel_context *ce,
u8 prio)
{
- u32 action[] = {
- INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY,
- ce->guc_id.id,
- prio,
- };
-
GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH ||
prio > GUC_CLIENT_PRIORITY_NORMAL);
lockdep_assert_held(&ce->guc_state.lock);
@@ -2851,9 +3047,9 @@ static void guc_context_set_prio(struct intel_guc *guc,
return;
}
- guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
-
ce->guc_state.prio = prio;
+ __guc_context_set_prio(guc, ce);
+
trace_intel_context_set_prio(ce);
}
@@ -3046,7 +3242,7 @@ static void guc_signal_context_fence(struct intel_context *ce)
static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
{
return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
- !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)) &&
+ !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) &&
!submission_disabled(ce_to_guc(ce));
}
@@ -3123,7 +3319,7 @@ static int guc_request_alloc(struct i915_request *rq)
if (unlikely(ret < 0))
return ret;
if (context_needs_register(ce, !!ret)) {
- ret = guc_lrc_desc_pin(ce, true);
+ ret = try_context_registration(ce, true);
if (unlikely(ret)) { /* unwind */
if (ret == -EPIPE) {
disable_submission(guc);
@@ -3560,7 +3756,7 @@ static void guc_sanitize(struct intel_engine_cs *engine)
sanitize_hwsp(engine);
/* And scrub the dirty cachelines for the HWSP */
- clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
+ drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
intel_engine_reset_pinned_contexts(engine);
}
@@ -3595,7 +3791,7 @@ static int guc_resume(struct intel_engine_cs *engine)
setup_hwsp(engine);
start_engine(engine);
- if (engine->class == RENDER_CLASS)
+ if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
xehp_enable_ccs_engines(engine);
return 0;
@@ -3614,9 +3810,17 @@ static void guc_set_default_submission(struct intel_engine_cs *engine)
static inline void guc_kernel_context_pin(struct intel_guc *guc,
struct intel_context *ce)
{
+ /*
+ * Note: we purposefully do not check the returns below because
+ * the registration can only fail if a reset is just starting.
+ * This is called at the end of reset so presumably another reset
+ * isn't happening and even it did this code would be run again.
+ */
+
if (context_guc_id_invalid(ce))
pin_guc_id(guc, ce);
- guc_lrc_desc_pin(ce, true);
+
+ try_context_registration(ce, true);
}
static inline void guc_init_lrc_mapping(struct intel_guc *guc)
@@ -3634,13 +3838,7 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc)
* Also, after a reset the of the GuC we want to make sure that the
* information shared with GuC is properly reset. The kernel LRCs are
* not attached to the gem_context, so they need to be added separately.
- *
- * Note: we purposefully do not check the return of guc_lrc_desc_pin,
- * because that function can only fail if a reset is just starting. This
- * is at the end of reset so presumably another reset isn't happening
- * and even it did this code would be run again.
*/
-
for_each_engine(engine, gt, id) {
struct intel_context *ce;
@@ -3680,7 +3878,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
engine->sched_engine->schedule = i915_schedule;
- engine->reset.prepare = guc_reset_nop;
+ engine->reset.prepare = guc_engine_reset_prepare;
engine->reset.rewind = guc_rewind_nop;
engine->reset.cancel = guc_reset_nop;
engine->reset.finish = guc_reset_nop;
@@ -3699,6 +3897,10 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
engine->flags |= I915_ENGINE_HAS_TIMESLICES;
+ /* Wa_14014475959:dg2 */
+ if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS)
+ engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
+
/*
* TODO: GuC supports timeslicing and semaphores as well, but they're
* handled by the firmware so some minor tweaks are required before
@@ -3835,32 +4037,32 @@ void intel_guc_submission_init_early(struct intel_guc *guc)
spin_lock_init(&guc->timestamp.lock);
INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
- guc->submission_state.num_guc_ids = GUC_MAX_LRC_DESCRIPTORS;
+ guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID;
guc->submission_supported = __guc_submission_supported(guc);
guc->submission_selected = __guc_submission_selected(guc);
}
static inline struct intel_context *
-g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
+g2h_context_lookup(struct intel_guc *guc, u32 ctx_id)
{
struct intel_context *ce;
- if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) {
+ if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) {
drm_err(&guc_to_gt(guc)->i915->drm,
- "Invalid desc_idx %u", desc_idx);
+ "Invalid ctx_id %u\n", ctx_id);
return NULL;
}
- ce = __get_context(guc, desc_idx);
+ ce = __get_context(guc, ctx_id);
if (unlikely(!ce)) {
drm_err(&guc_to_gt(guc)->i915->drm,
- "Context is NULL, desc_idx %u", desc_idx);
+ "Context is NULL, ctx_id %u\n", ctx_id);
return NULL;
}
if (unlikely(intel_context_is_child(ce))) {
drm_err(&guc_to_gt(guc)->i915->drm,
- "Context is child, desc_idx %u", desc_idx);
+ "Context is child, ctx_id %u\n", ctx_id);
return NULL;
}
@@ -3872,14 +4074,15 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
u32 len)
{
struct intel_context *ce;
- u32 desc_idx = msg[0];
+ u32 ctx_id;
if (unlikely(len < 1)) {
- drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
+ drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
return -EPROTO;
}
+ ctx_id = msg[0];
- ce = g2h_context_lookup(guc, desc_idx);
+ ce = g2h_context_lookup(guc, ctx_id);
if (unlikely(!ce))
return -EPROTO;
@@ -3923,14 +4126,15 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
{
struct intel_context *ce;
unsigned long flags;
- u32 desc_idx = msg[0];
+ u32 ctx_id;
if (unlikely(len < 2)) {
- drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
+ drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
return -EPROTO;
}
+ ctx_id = msg[0];
- ce = g2h_context_lookup(guc, desc_idx);
+ ce = g2h_context_lookup(guc, ctx_id);
if (unlikely(!ce))
return -EPROTO;
@@ -3938,8 +4142,8 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
(!context_pending_enable(ce) &&
!context_pending_disable(ce)))) {
drm_err(&guc_to_gt(guc)->i915->drm,
- "Bad context sched_state 0x%x, desc_idx %u",
- ce->guc_state.sched_state, desc_idx);
+ "Bad context sched_state 0x%x, ctx_id %u\n",
+ ce->guc_state.sched_state, ctx_id);
return -EPROTO;
}
@@ -4005,7 +4209,7 @@ static void capture_error_state(struct intel_guc *guc,
intel_engine_set_hung_context(engine, ce);
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
- i915_capture_error_state(gt, engine->mask);
+ i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE);
atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]);
}
@@ -4037,14 +4241,14 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
{
struct intel_context *ce;
unsigned long flags;
- int desc_idx;
+ int ctx_id;
if (unlikely(len != 1)) {
drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
return -EPROTO;
}
- desc_idx = msg[0];
+ ctx_id = msg[0];
/*
* The context lookup uses the xarray but lookups only require an RCU lock
@@ -4053,7 +4257,7 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
* asynchronously until the reset is done.
*/
xa_lock_irqsave(&guc->context_lookup, flags);
- ce = g2h_context_lookup(guc, desc_idx);
+ ce = g2h_context_lookup(guc, ctx_id);
if (ce)
intel_context_get(ce);
xa_unlock_irqrestore(&guc->context_lookup, flags);
@@ -4070,23 +4274,24 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
int intel_guc_error_capture_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len)
{
- int status;
+ u32 status;
if (unlikely(len != 1)) {
drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
return -EPROTO;
}
- status = msg[0];
- drm_info(&guc_to_gt(guc)->i915->drm, "Got error capture: status = %d", status);
+ status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
+ if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
+ drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space");
- /* FIXME: Do something with the capture */
+ intel_guc_capture_process(guc);
return 0;
}
-static struct intel_engine_cs *
-guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
+struct intel_engine_cs *
+intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
{
struct intel_gt *gt = guc_to_gt(guc);
u8 engine_class = guc_class_to_engine_class(guc_class);
@@ -4135,7 +4340,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
instance = msg[1];
reason = msg[2];
- engine = guc_lookup_engine(guc, guc_class, instance);
+ engine = intel_guc_lookup_engine(guc, guc_class, instance);
if (unlikely(!engine)) {
drm_err(&gt->i915->drm,
"Invalid engine %d:%d", guc_class, instance);
@@ -4333,17 +4538,17 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
guc_log_context_priority(p, ce);
if (intel_context_is_parent(ce)) {
- struct guc_process_desc *desc = __get_process_desc(ce);
+ struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
struct intel_context *child;
drm_printf(p, "\t\tNumber children: %u\n",
ce->parallel.number_children);
drm_printf(p, "\t\tWQI Head: %u\n",
- READ_ONCE(desc->head));
+ READ_ONCE(wq_desc->head));
drm_printf(p, "\t\tWQI Tail: %u\n",
- READ_ONCE(desc->tail));
+ READ_ONCE(wq_desc->tail));
drm_printf(p, "\t\tWQI Status: %u\n\n",
- READ_ONCE(desc->wq_status));
+ READ_ONCE(wq_desc->wq_status));
if (ce->engine->emit_bb_start ==
emit_bb_start_parent_no_preempt_mid_batch) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 541f180aaa29..a876d39e6bcf 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -53,21 +53,21 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
* firmware as TGL.
*/
#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
- fw_def(ALDERLAKE_P, 0, guc_def(adlp, 69, 0, 3)) \
- fw_def(ALDERLAKE_S, 0, guc_def(tgl, 69, 0, 3)) \
- fw_def(DG1, 0, guc_def(dg1, 69, 0, 3)) \
- fw_def(ROCKETLAKE, 0, guc_def(tgl, 69, 0, 3)) \
- fw_def(TIGERLAKE, 0, guc_def(tgl, 69, 0, 3)) \
- fw_def(JASPERLAKE, 0, guc_def(ehl, 69, 0, 3)) \
- fw_def(ELKHARTLAKE, 0, guc_def(ehl, 69, 0, 3)) \
- fw_def(ICELAKE, 0, guc_def(icl, 69, 0, 3)) \
- fw_def(COMETLAKE, 5, guc_def(cml, 69, 0, 3)) \
- fw_def(COMETLAKE, 0, guc_def(kbl, 69, 0, 3)) \
- fw_def(COFFEELAKE, 0, guc_def(kbl, 69, 0, 3)) \
- fw_def(GEMINILAKE, 0, guc_def(glk, 69, 0, 3)) \
- fw_def(KABYLAKE, 0, guc_def(kbl, 69, 0, 3)) \
- fw_def(BROXTON, 0, guc_def(bxt, 69, 0, 3)) \
- fw_def(SKYLAKE, 0, guc_def(skl, 69, 0, 3))
+ fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 1, 1)) \
+ fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 1, 1)) \
+ fw_def(DG1, 0, guc_def(dg1, 70, 1, 1)) \
+ fw_def(ROCKETLAKE, 0, guc_def(tgl, 70, 1, 1)) \
+ fw_def(TIGERLAKE, 0, guc_def(tgl, 70, 1, 1)) \
+ fw_def(JASPERLAKE, 0, guc_def(ehl, 70, 1, 1)) \
+ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 70, 1, 1)) \
+ fw_def(ICELAKE, 0, guc_def(icl, 70, 1, 1)) \
+ fw_def(COMETLAKE, 5, guc_def(cml, 70, 1, 1)) \
+ fw_def(COMETLAKE, 0, guc_def(kbl, 70, 1, 1)) \
+ fw_def(COFFEELAKE, 0, guc_def(kbl, 70, 1, 1)) \
+ fw_def(GEMINILAKE, 0, guc_def(glk, 70, 1, 1)) \
+ fw_def(KABYLAKE, 0, guc_def(kbl, 70, 1, 1)) \
+ fw_def(BROXTON, 0, guc_def(bxt, 70, 1, 1)) \
+ fw_def(SKYLAKE, 0, guc_def(skl, 70, 1, 1))
#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index a115894d5896..1df71d0796ae 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -148,7 +148,7 @@ static int intel_guc_steal_guc_ids(void *arg)
struct i915_request *spin_rq = NULL, *rq, *last = NULL;
int number_guc_id_stolen = guc->number_guc_id_stolen;
- ce = kzalloc(sizeof(*ce) * GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL);
+ ce = kcalloc(GUC_MAX_CONTEXT_ID, sizeof(*ce), GFP_KERNEL);
if (!ce) {
pr_err("Context array allocation failed\n");
return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 02239395ce81..94e5c29d2ee3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -309,7 +309,8 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
gpu = NULL;
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
- gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES);
+ gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES, CORE_DUMP_FLAG_NONE);
+
if (IS_ERR(gpu))
return PTR_ERR(gpu);
@@ -582,8 +583,9 @@ static int i915_wedged_get(void *data, u64 *val)
static int i915_wedged_set(void *data, u64 val)
{
struct drm_i915_private *i915 = data;
+ intel_gt_debugfs_reset_store(to_gt(i915), val);
- return intel_gt_debugfs_reset_store(to_gt(i915), val);
+ return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
@@ -731,15 +733,17 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
static int i915_forcewake_open(struct inode *inode, struct file *file)
{
struct drm_i915_private *i915 = inode->i_private;
+ intel_gt_pm_debugfs_forcewake_user_open(to_gt(i915));
- return intel_gt_pm_debugfs_forcewake_user_open(to_gt(i915));
+ return 0;
}
static int i915_forcewake_release(struct inode *inode, struct file *file)
{
struct drm_i915_private *i915 = inode->i_private;
+ intel_gt_pm_debugfs_forcewake_user_release(to_gt(i915));
- return intel_gt_pm_debugfs_forcewake_user_release(to_gt(i915));
+ return 0;
}
static const struct file_operations i915_forcewake_fops = {
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 09de45d3e274..3ffb617d75c9 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -77,6 +77,7 @@
#include "i915_file_private.h"
#include "i915_debugfs.h"
#include "i915_driver.h"
+#include "i915_drm_client.h"
#include "i915_drv.h"
#include "i915_getparam.h"
#include "i915_ioc32.h"
@@ -322,9 +323,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
intel_device_info_subplatform_init(dev_priv);
intel_step_init(dev_priv);
- intel_gt_init_early(to_gt(dev_priv), dev_priv);
intel_uncore_mmio_debug_init_early(&dev_priv->mmio_debug);
- intel_uncore_init_early(&dev_priv->uncore, to_gt(dev_priv));
spin_lock_init(&dev_priv->irq_lock);
spin_lock_init(&dev_priv->gpu_error.lock);
@@ -355,7 +354,9 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
intel_wopcm_init_early(&dev_priv->wopcm);
- __intel_gt_init_early(to_gt(dev_priv), dev_priv);
+ intel_root_gt_init_early(dev_priv);
+
+ i915_drm_clients_init(&dev_priv->clients, dev_priv);
i915_gem_init_early(dev_priv);
@@ -376,7 +377,8 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
err_gem:
i915_gem_cleanup_early(dev_priv);
- intel_gt_driver_late_release(to_gt(dev_priv));
+ intel_gt_driver_late_release_all(dev_priv);
+ i915_drm_clients_fini(&dev_priv->clients);
intel_region_ttm_device_fini(dev_priv);
err_ttm:
vlv_suspend_cleanup(dev_priv);
@@ -395,7 +397,8 @@ static void i915_driver_late_release(struct drm_i915_private *dev_priv)
intel_irq_fini(dev_priv);
intel_power_domains_cleanup(dev_priv);
i915_gem_cleanup_early(dev_priv);
- intel_gt_driver_late_release(to_gt(dev_priv));
+ intel_gt_driver_late_release_all(dev_priv);
+ i915_drm_clients_fini(&dev_priv->clients);
intel_region_ttm_device_fini(dev_priv);
vlv_suspend_cleanup(dev_priv);
i915_workqueues_cleanup(dev_priv);
@@ -426,13 +429,9 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv)
if (ret < 0)
return ret;
- ret = intel_uncore_setup_mmio(&dev_priv->uncore);
- if (ret < 0)
- goto err_bridge;
-
ret = intel_uncore_init_mmio(&dev_priv->uncore);
if (ret)
- goto err_mmio;
+ return ret;
/* Try to make sure MCHBAR is enabled before poking at it */
intel_setup_mchbar(dev_priv);
@@ -450,9 +449,6 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv)
err_uncore:
intel_teardown_mchbar(dev_priv);
intel_uncore_fini_mmio(&dev_priv->uncore);
-err_mmio:
- intel_uncore_cleanup_mmio(&dev_priv->uncore);
-err_bridge:
pci_dev_put(dev_priv->bridge_dev);
return ret;
@@ -466,7 +462,6 @@ static void i915_driver_mmio_release(struct drm_i915_private *dev_priv)
{
intel_teardown_mchbar(dev_priv);
intel_uncore_fini_mmio(&dev_priv->uncore);
- intel_uncore_cleanup_mmio(&dev_priv->uncore);
pci_dev_put(dev_priv->bridge_dev);
}
@@ -599,7 +594,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
if (ret)
goto err_ggtt;
- ret = intel_gt_probe_lmem(to_gt(dev_priv));
+ ret = intel_gt_tiles_init(dev_priv);
if (ret)
goto err_mem_regions;
@@ -850,10 +845,14 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
intel_vgpu_detect(i915);
- ret = i915_driver_mmio_probe(i915);
+ ret = intel_gt_probe_all(i915);
if (ret < 0)
goto out_runtime_pm_put;
+ ret = i915_driver_mmio_probe(i915);
+ if (ret < 0)
+ goto out_tiles_cleanup;
+
ret = i915_driver_hw_probe(i915);
if (ret < 0)
goto out_cleanup_mmio;
@@ -910,6 +909,8 @@ out_cleanup_hw:
i915_ggtt_driver_late_release(i915);
out_cleanup_mmio:
i915_driver_mmio_release(i915);
+out_tiles_cleanup:
+ intel_gt_release_all(i915);
out_runtime_pm_put:
enable_rpm_wakeref_asserts(&i915->runtime_pm);
i915_driver_late_release(i915);
@@ -1013,6 +1014,7 @@ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file)
struct drm_i915_file_private *file_priv = file->driver_priv;
i915_gem_context_close(file);
+ i915_drm_client_put(file_priv->client);
kfree_rcu(file_priv, rcu);
@@ -1743,6 +1745,9 @@ static const struct file_operations i915_driver_fops = {
.read = drm_read,
.compat_ioctl = i915_ioc32_compat_ioctl,
.llseek = noop_llseek,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = i915_drm_client_fdinfo,
+#endif
};
static int
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
new file mode 100644
index 000000000000..475a6f824cad
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include <uapi/drm/i915_drm.h>
+
+#include <drm/drm_print.h>
+
+#include "gem/i915_gem_context.h"
+#include "i915_drm_client.h"
+#include "i915_file_private.h"
+#include "i915_gem.h"
+#include "i915_utils.h"
+
+void i915_drm_clients_init(struct i915_drm_clients *clients,
+ struct drm_i915_private *i915)
+{
+ clients->i915 = i915;
+ clients->next_id = 0;
+
+ xa_init_flags(&clients->xarray, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
+}
+
+struct i915_drm_client *i915_drm_client_add(struct i915_drm_clients *clients)
+{
+ struct i915_drm_client *client;
+ struct xarray *xa = &clients->xarray;
+ int ret;
+
+ client = kzalloc(sizeof(*client), GFP_KERNEL);
+ if (!client)
+ return ERR_PTR(-ENOMEM);
+
+ xa_lock_irq(xa);
+ ret = __xa_alloc_cyclic(xa, &client->id, client, xa_limit_32b,
+ &clients->next_id, GFP_KERNEL);
+ xa_unlock_irq(xa);
+ if (ret < 0)
+ goto err;
+
+ kref_init(&client->kref);
+ spin_lock_init(&client->ctx_lock);
+ INIT_LIST_HEAD(&client->ctx_list);
+ client->clients = clients;
+
+ return client;
+
+err:
+ kfree(client);
+
+ return ERR_PTR(ret);
+}
+
+void __i915_drm_client_free(struct kref *kref)
+{
+ struct i915_drm_client *client =
+ container_of(kref, typeof(*client), kref);
+ struct xarray *xa = &client->clients->xarray;
+ unsigned long flags;
+
+ xa_lock_irqsave(xa, flags);
+ __xa_erase(xa, client->id);
+ xa_unlock_irqrestore(xa, flags);
+ kfree(client);
+}
+
+void i915_drm_clients_fini(struct i915_drm_clients *clients)
+{
+ GEM_BUG_ON(!xa_empty(&clients->xarray));
+ xa_destroy(&clients->xarray);
+}
+
+#ifdef CONFIG_PROC_FS
+static const char * const uabi_class_names[] = {
+ [I915_ENGINE_CLASS_RENDER] = "render",
+ [I915_ENGINE_CLASS_COPY] = "copy",
+ [I915_ENGINE_CLASS_VIDEO] = "video",
+ [I915_ENGINE_CLASS_VIDEO_ENHANCE] = "video-enhance",
+};
+
+static u64 busy_add(struct i915_gem_context *ctx, unsigned int class)
+{
+ struct i915_gem_engines_iter it;
+ struct intel_context *ce;
+ u64 total = 0;
+
+ for_each_gem_engine(ce, rcu_dereference(ctx->engines), it) {
+ if (ce->engine->uabi_class != class)
+ continue;
+
+ total += intel_context_get_total_runtime_ns(ce);
+ }
+
+ return total;
+}
+
+static void
+show_client_class(struct seq_file *m,
+ struct i915_drm_client *client,
+ unsigned int class)
+{
+ const struct list_head *list = &client->ctx_list;
+ u64 total = atomic64_read(&client->past_runtime[class]);
+ const unsigned int capacity =
+ client->clients->i915->engine_uabi_class_count[class];
+ struct i915_gem_context *ctx;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ctx, list, client_link)
+ total += busy_add(ctx, class);
+ rcu_read_unlock();
+
+ seq_printf(m, "drm-engine-%s:\t%llu ns\n",
+ uabi_class_names[class], total);
+
+ if (capacity > 1)
+ seq_printf(m, "drm-engine-capacity-%s:\t%u\n",
+ uabi_class_names[class],
+ capacity);
+}
+
+void i915_drm_client_fdinfo(struct seq_file *m, struct file *f)
+{
+ struct drm_file *file = f->private_data;
+ struct drm_i915_file_private *file_priv = file->driver_priv;
+ struct drm_i915_private *i915 = file_priv->dev_priv;
+ struct i915_drm_client *client = file_priv->client;
+ struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+ unsigned int i;
+
+ /*
+ * ******************************************************************
+ * For text output format description please see drm-usage-stats.rst!
+ * ******************************************************************
+ */
+
+ seq_printf(m, "drm-driver:\t%s\n", i915->drm.driver->name);
+ seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n",
+ pci_domain_nr(pdev->bus), pdev->bus->number,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ seq_printf(m, "drm-client-id:\t%u\n", client->id);
+
+ /*
+ * Temporarily skip showing client engine information with GuC submission till
+ * fetching engine busyness is implemented in the GuC submission backend
+ */
+ if (GRAPHICS_VER(i915) < 8 || intel_uc_uses_guc_submission(&i915->gt0.uc))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
+ show_client_class(m, client, i);
+}
+#endif
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
new file mode 100644
index 000000000000..5f5b02b01ba0
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __I915_DRM_CLIENT_H__
+#define __I915_DRM_CLIENT_H__
+
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/xarray.h>
+
+#include "gt/intel_engine_types.h"
+
+#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE
+
+struct drm_i915_private;
+
+struct i915_drm_clients {
+ struct drm_i915_private *i915;
+
+ struct xarray xarray;
+ u32 next_id;
+};
+
+struct i915_drm_client {
+ struct kref kref;
+
+ unsigned int id;
+
+ spinlock_t ctx_lock; /* For add/remove from ctx_list. */
+ struct list_head ctx_list; /* List of contexts belonging to client. */
+
+ struct i915_drm_clients *clients;
+
+ /**
+ * @past_runtime: Accumulation of pphwsp runtimes from closed contexts.
+ */
+ atomic64_t past_runtime[I915_LAST_UABI_ENGINE_CLASS + 1];
+};
+
+void i915_drm_clients_init(struct i915_drm_clients *clients,
+ struct drm_i915_private *i915);
+
+static inline struct i915_drm_client *
+i915_drm_client_get(struct i915_drm_client *client)
+{
+ kref_get(&client->kref);
+ return client;
+}
+
+void __i915_drm_client_free(struct kref *kref);
+
+static inline void i915_drm_client_put(struct i915_drm_client *client)
+{
+ kref_put(&client->kref, __i915_drm_client_free);
+}
+
+struct i915_drm_client *i915_drm_client_add(struct i915_drm_clients *clients);
+
+#ifdef CONFIG_PROC_FS
+void i915_drm_client_fdinfo(struct seq_file *m, struct file *f);
+#endif
+
+void i915_drm_clients_fini(struct i915_drm_clients *clients);
+
+#endif /* !__I915_DRM_CLIENT_H__ */
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ce2cd6491d6d..a6cf9716d6aa 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -61,6 +61,7 @@
#include "gt/intel_workarounds.h"
#include "gt/uc/intel_uc.h"
+#include "i915_drm_client.h"
#include "i915_gem.h"
#include "i915_gpu_error.h"
#include "i915_params.h"
@@ -500,6 +501,7 @@ struct drm_i915_private {
struct pci_dev *bridge_dev;
struct rb_root uabi_engines;
+ unsigned int engine_uabi_class_count[I915_LAST_UABI_ENGINE_CLASS + 1];
struct resource mch_res;
@@ -766,6 +768,14 @@ struct drm_i915_private {
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
struct intel_gt gt0;
+ /*
+ * i915->gt[0] == &i915->gt0
+ */
+#define I915_MAX_GT 4
+ struct intel_gt *gt[I915_MAX_GT];
+
+ struct kobject *sysfs_gt;
+
struct {
struct i915_gem_contexts {
spinlock_t lock; /* locks list */
@@ -810,6 +820,8 @@ struct drm_i915_private {
struct i915_pmu pmu;
+ struct i915_drm_clients clients;
+
struct i915_hdcp_comp_master *hdcp_master;
bool hdcp_comp_added;
@@ -1201,6 +1213,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
((gt)->info.engine_mask & \
GENMASK(first__ + count__ - 1, first__)) >> first__; \
})
+#define RCS_MASK(gt) \
+ ENGINE_INSTANCES_MASK(gt, RCS0, I915_MAX_RCS)
#define VDBOX_MASK(gt) \
ENGINE_INSTANCES_MASK(gt, VCS0, I915_MAX_VCS)
#define VEBOX_MASK(gt) \
@@ -1294,6 +1308,14 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define HAS_DMC(dev_priv) (INTEL_INFO(dev_priv)->display.has_dmc)
+#define HAS_HECI_PXP(dev_priv) \
+ (INTEL_INFO(dev_priv)->has_heci_pxp)
+
+#define HAS_HECI_GSCFI(dev_priv) \
+ (INTEL_INFO(dev_priv)->has_heci_gscfi)
+
+#define HAS_HECI_GSC(dev_priv) (HAS_HECI_PXP(dev_priv) || HAS_HECI_GSCFI(dev_priv))
+
#define HAS_MSO(i915) (DISPLAY_VER(i915) >= 12)
#define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm)
@@ -1363,6 +1385,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define HAS_GUC_DEPRIVILEGE(dev_priv) \
(INTEL_INFO(dev_priv)->has_guc_deprivilege)
+#define HAS_PERCTX_PREEMPT_CTRL(i915) \
+ ((GRAPHICS_VER(i915) >= 9) && GRAPHICS_VER_FULL(i915) < IP_VER(12, 55))
+
#define HAS_D12_PLANE_MINIMIZATION(dev_priv) (IS_ROCKETLAKE(dev_priv) || \
IS_ALDERLAKE_S(dev_priv))
diff --git a/drivers/gpu/drm/i915/i915_file_private.h b/drivers/gpu/drm/i915/i915_file_private.h
index fb16cc431b2a..f42877869692 100644
--- a/drivers/gpu/drm/i915/i915_file_private.h
+++ b/drivers/gpu/drm/i915/i915_file_private.h
@@ -12,6 +12,7 @@
struct drm_i915_private;
struct drm_file;
+struct i915_drm_client;
struct drm_i915_file_private {
struct drm_i915_private *dev_priv;
@@ -103,6 +104,8 @@ struct drm_i915_file_private {
/** ban_score: Accumulated score of all ctx bans and fast hangs. */
atomic_t ban_score;
unsigned long hang_timestamp;
+
+ struct i915_drm_client *client;
};
#endif /* __I915_FILE_PRIVATE_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2e10187cd0a0..702e5b89be22 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -118,6 +118,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
unsigned long flags)
{
struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm;
+ bool vm_trylock = !!(flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK);
LIST_HEAD(still_in_list);
intel_wakeref_t wakeref;
struct i915_vma *vma;
@@ -142,8 +143,6 @@ try_again:
while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
struct i915_vma,
obj_link))) {
- struct i915_address_space *vm = vma->vm;
-
list_move_tail(&vma->obj_link, &still_in_list);
if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK))
continue;
@@ -153,40 +152,44 @@ try_again:
break;
}
+ /*
+ * Requiring the vm destructor to take the object lock
+ * before destroying a vma would help us eliminate the
+ * i915_vm_tryget() here, AND thus also the barrier stuff
+ * at the end. That's an easy fix, but sleeping locks in
+ * a kthread should generally be avoided.
+ */
ret = -EAGAIN;
- if (!i915_vm_tryopen(vm))
+ if (!i915_vm_tryget(vma->vm))
break;
- /* Prevent vma being freed by i915_vma_parked as we unbind */
- vma = __i915_vma_get(vma);
spin_unlock(&obj->vma.lock);
- if (vma) {
- bool vm_trylock = !!(flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK);
- ret = -EBUSY;
- if (flags & I915_GEM_OBJECT_UNBIND_ASYNC) {
- assert_object_held(vma->obj);
- ret = i915_vma_unbind_async(vma, vm_trylock);
- }
+ /*
+ * Since i915_vma_parked() takes the object lock
+ * before vma destruction, it won't race us here,
+ * and destroy the vma from under us.
+ */
- if (ret == -EBUSY && (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
- !i915_vma_is_active(vma))) {
- if (vm_trylock) {
- if (mutex_trylock(&vma->vm->mutex)) {
- ret = __i915_vma_unbind(vma);
- mutex_unlock(&vma->vm->mutex);
- } else {
- ret = -EBUSY;
- }
- } else {
- ret = i915_vma_unbind(vma);
+ ret = -EBUSY;
+ if (flags & I915_GEM_OBJECT_UNBIND_ASYNC) {
+ assert_object_held(vma->obj);
+ ret = i915_vma_unbind_async(vma, vm_trylock);
+ }
+
+ if (ret == -EBUSY && (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
+ !i915_vma_is_active(vma))) {
+ if (vm_trylock) {
+ if (mutex_trylock(&vma->vm->mutex)) {
+ ret = __i915_vma_unbind(vma);
+ mutex_unlock(&vma->vm->mutex);
}
+ } else {
+ ret = i915_vma_unbind(vma);
}
-
- __i915_vma_put(vma);
}
- i915_vm_close(vm);
+ i915_vm_put(vma->vm);
spin_lock(&obj->vma.lock);
}
list_splice_init(&still_in_list, &obj->vma.list);
@@ -936,8 +939,19 @@ new_vma:
if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
return ERR_PTR(-ENOSPC);
+ /*
+ * If this misplaced vma is too big (i.e, at-least
+ * half the size of aperture) or hasn't been pinned
+ * mappable before, we ignore the misplacement when
+ * PIN_NONBLOCK is set in order to avoid the ping-pong
+ * issue described above. In other words, we try to
+ * avoid the costly operation of unbinding this vma
+ * from the GGTT and rebinding it back because there
+ * may not be enough space for this vma in the aperture.
+ */
if (flags & PIN_MAPPABLE &&
- vma->fence_size > ggtt->mappable_end / 2)
+ (vma->fence_size > ggtt->mappable_end / 2 ||
+ !i915_vma_is_map_and_fenceable(vma)))
return ERR_PTR(-ENOSPC);
}
@@ -1213,25 +1227,40 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
{
struct drm_i915_file_private *file_priv;
- int ret;
+ struct i915_drm_client *client;
+ int ret = -ENOMEM;
DRM_DEBUG("\n");
file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
if (!file_priv)
- return -ENOMEM;
+ goto err_alloc;
+
+ client = i915_drm_client_add(&i915->clients);
+ if (IS_ERR(client)) {
+ ret = PTR_ERR(client);
+ goto err_client;
+ }
file->driver_priv = file_priv;
file_priv->dev_priv = i915;
file_priv->file = file;
+ file_priv->client = client;
file_priv->bsd_engine = -1;
file_priv->hang_timestamp = jiffies;
ret = i915_gem_context_open(i915, file);
if (ret)
- kfree(file_priv);
+ goto err_context;
+
+ return 0;
+err_context:
+ i915_drm_client_put(client);
+err_client:
+ kfree(file_priv);
+err_alloc:
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index d0e7ee7b07df..0512c66fa4f3 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -48,6 +48,7 @@
#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
#include "gt/intel_gt_regs.h"
+#include "gt/uc/intel_guc_capture.h"
#include "i915_driver.h"
#include "i915_drv.h"
@@ -511,13 +512,10 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
const char *header,
const struct i915_gem_context_coredump *ctx)
{
- const u32 period = to_gt(m->i915)->clock_period_ns;
-
err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n",
header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
ctx->guilty, ctx->active,
- ctx->total_runtime * period,
- mul_u32_u32(ctx->avg_runtime, period));
+ ctx->total_runtime, ctx->avg_runtime);
}
static struct i915_vma_coredump *
@@ -532,8 +530,8 @@ __find_vma(struct i915_vma_coredump *vma, const char *name)
return NULL;
}
-static struct i915_vma_coredump *
-find_batch(const struct intel_engine_coredump *ee)
+struct i915_vma_coredump *
+intel_gpu_error_find_batch(const struct intel_engine_coredump *ee)
{
return __find_vma(ee->vma, "batch");
}
@@ -561,7 +559,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
error_print_instdone(m, ee);
- batch = find_batch(ee);
+ batch = intel_gpu_error_find_batch(ee);
if (batch) {
u64 start = batch->gtt_offset;
u64 end = start + batch->gtt_size;
@@ -596,15 +594,11 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
ee->vm_info.pp_dir_base);
}
}
- err_printf(m, " hung: %u\n", ee->hung);
- err_printf(m, " engine reset count: %u\n", ee->reset_count);
for (n = 0; n < ee->num_ports; n++) {
err_printf(m, " ELSP[%d]:", n);
error_print_request(m, " ", &ee->execlist[n]);
}
-
- error_print_context(m, " Active context: ", &ee->context);
}
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
@@ -616,9 +610,9 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
va_end(args);
}
-static void print_error_vma(struct drm_i915_error_state_buf *m,
- const struct intel_engine_cs *engine,
- const struct i915_vma_coredump *vma)
+void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
+ const struct intel_engine_cs *engine,
+ const struct i915_vma_coredump *vma)
{
char out[ASCII85_BUFSZ];
struct page *page;
@@ -687,7 +681,7 @@ static void err_print_uc(struct drm_i915_error_state_buf *m,
intel_uc_fw_dump(&error_uc->guc_fw, &p);
intel_uc_fw_dump(&error_uc->huc_fw, &p);
- print_error_vma(m, NULL, error_uc->guc_log);
+ intel_gpu_error_print_vma(m, NULL, error_uc->guc_log);
}
static void err_free_sgl(struct scatterlist *sgl)
@@ -713,26 +707,33 @@ static void err_print_gt_info(struct drm_i915_error_state_buf *m,
struct drm_printer p = i915_error_printer(m);
intel_gt_info_print(&gt->info, &p);
- intel_sseu_print_topology(&gt->info.sseu, &p);
+ intel_sseu_print_topology(gt->_gt->i915, &gt->info.sseu, &p);
}
-static void err_print_gt(struct drm_i915_error_state_buf *m,
- struct intel_gt_coredump *gt)
+static void err_print_gt_display(struct drm_i915_error_state_buf *m,
+ struct intel_gt_coredump *gt)
+{
+ err_printf(m, "IER: 0x%08x\n", gt->ier);
+ err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr);
+}
+
+static void err_print_gt_global_nonguc(struct drm_i915_error_state_buf *m,
+ struct intel_gt_coredump *gt)
{
- const struct intel_engine_coredump *ee;
int i;
err_printf(m, "GT awake: %s\n", str_yes_no(gt->awake));
err_printf(m, "EIR: 0x%08x\n", gt->eir);
- err_printf(m, "IER: 0x%08x\n", gt->ier);
+ err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er);
+
for (i = 0; i < gt->ngtier; i++)
err_printf(m, "GTIER[%d]: 0x%08x\n", i, gt->gtier[i]);
- err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er);
- err_printf(m, "FORCEWAKE: 0x%08x\n", gt->forcewake);
- err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr);
+}
- for (i = 0; i < gt->nfence; i++)
- err_printf(m, " fence[%d] = %08llx\n", i, gt->fence[i]);
+static void err_print_gt_global(struct drm_i915_error_state_buf *m,
+ struct intel_gt_coredump *gt)
+{
+ err_printf(m, "FORCEWAKE: 0x%08x\n", gt->forcewake);
if (IS_GRAPHICS_VER(m->i915, 6, 11)) {
err_printf(m, "ERROR: 0x%08x\n", gt->error);
@@ -755,7 +756,7 @@ static void err_print_gt(struct drm_i915_error_state_buf *m,
if (GRAPHICS_VER(m->i915) >= 12) {
int i;
- for (i = 0; i < GEN12_SFC_DONE_MAX; i++) {
+ for (i = 0; i < I915_MAX_SFC; i++) {
/*
* SFC_DONE resides in the VD forcewake domain, so it
* only exists if the corresponding VCS engine is
@@ -771,19 +772,38 @@ static void err_print_gt(struct drm_i915_error_state_buf *m,
err_printf(m, " GAM_DONE: 0x%08x\n", gt->gam_done);
}
+}
+
+static void err_print_gt_fences(struct drm_i915_error_state_buf *m,
+ struct intel_gt_coredump *gt)
+{
+ int i;
+
+ for (i = 0; i < gt->nfence; i++)
+ err_printf(m, " fence[%d] = %08llx\n", i, gt->fence[i]);
+}
+
+static void err_print_gt_engines(struct drm_i915_error_state_buf *m,
+ struct intel_gt_coredump *gt)
+{
+ const struct intel_engine_coredump *ee;
for (ee = gt->engine; ee; ee = ee->next) {
const struct i915_vma_coredump *vma;
- error_print_engine(m, ee);
+ if (ee->guc_capture_node)
+ intel_guc_capture_print_engine_node(m, ee);
+ else
+ error_print_engine(m, ee);
+
+ err_printf(m, " hung: %u\n", ee->hung);
+ err_printf(m, " engine reset count: %u\n", ee->reset_count);
+ error_print_context(m, " Active context: ", &ee->context);
+
for (vma = ee->vma; vma; vma = vma->next)
- print_error_vma(m, ee->engine, vma);
+ intel_gpu_error_print_vma(m, ee->engine, vma);
}
- if (gt->uc)
- err_print_uc(m, gt->uc);
-
- err_print_gt_info(m, gt);
}
static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
@@ -831,8 +851,30 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
err_printf(m, "RPM wakelock: %s\n", str_yes_no(error->wakelock));
err_printf(m, "PM suspended: %s\n", str_yes_no(error->suspended));
- if (error->gt)
- err_print_gt(m, error->gt);
+ if (error->gt) {
+ bool print_guc_capture = false;
+
+ if (error->gt->uc && error->gt->uc->is_guc_capture)
+ print_guc_capture = true;
+
+ err_print_gt_display(m, error->gt);
+ err_print_gt_global_nonguc(m, error->gt);
+ err_print_gt_fences(m, error->gt);
+
+ /*
+ * GuC dumped global, eng-class and eng-instance registers together
+ * as part of engine state dump so we print in err_print_gt_engines
+ */
+ if (!print_guc_capture)
+ err_print_gt_global(m, error->gt);
+
+ err_print_gt_engines(m, error->gt);
+
+ if (error->gt->uc)
+ err_print_uc(m, error->gt->uc);
+
+ err_print_gt_info(m, error->gt);
+ }
if (error->overlay)
intel_overlay_print_error_state(m, error->overlay);
@@ -980,6 +1022,7 @@ static void cleanup_gt(struct intel_gt_coredump *gt)
gt->engine = ee->next;
i915_vma_coredump_free(ee->vma);
+ intel_guc_capture_free_node(ee);
kfree(ee);
}
@@ -1313,8 +1356,8 @@ static bool record_context(struct i915_gem_context_coredump *e,
e->guilty = atomic_read(&ctx->guilty_count);
e->active = atomic_read(&ctx->active_count);
- e->total_runtime = rq->context->runtime.total;
- e->avg_runtime = ewma_runtime_read(&rq->context->runtime.avg);
+ e->total_runtime = intel_context_get_total_runtime_ns(rq->context);
+ e->avg_runtime = intel_context_get_avg_runtime_ns(rq->context);
simulated = i915_gem_context_no_error_capture(ctx);
@@ -1431,7 +1474,7 @@ static void add_vma_coredump(struct intel_engine_coredump *ee,
}
struct intel_engine_coredump *
-intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags)
{
struct intel_engine_coredump *ee;
@@ -1441,8 +1484,10 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
ee->engine = engine;
- engine_record_registers(ee);
- engine_record_execlists(ee);
+ if (!(dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)) {
+ engine_record_registers(ee);
+ engine_record_execlists(ee);
+ }
return ee;
}
@@ -1506,7 +1551,8 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
static struct intel_engine_coredump *
capture_engine(struct intel_engine_cs *engine,
- struct i915_vma_compress *compress)
+ struct i915_vma_compress *compress,
+ u32 dump_flags)
{
struct intel_engine_capture_vma *capture = NULL;
struct intel_engine_coredump *ee;
@@ -1514,7 +1560,7 @@ capture_engine(struct intel_engine_cs *engine,
struct i915_request *rq = NULL;
unsigned long flags;
- ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL);
+ ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL, dump_flags);
if (!ee)
return NULL;
@@ -1547,6 +1593,8 @@ capture_engine(struct intel_engine_cs *engine,
i915_request_put(rq);
goto no_request_capture;
}
+ if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
+ intel_guc_capture_get_matching_node(engine->gt, ee, ce);
intel_engine_coredump_add_vma(ee, capture, compress);
i915_request_put(rq);
@@ -1561,7 +1609,8 @@ no_request_capture:
static void
gt_record_engines(struct intel_gt_coredump *gt,
intel_engine_mask_t engine_mask,
- struct i915_vma_compress *compress)
+ struct i915_vma_compress *compress,
+ u32 dump_flags)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -1572,7 +1621,7 @@ gt_record_engines(struct intel_gt_coredump *gt,
/* Refill our page pool before entering atomic section */
pool_refill(&compress->pool, ALLOW_FAIL);
- ee = capture_engine(engine, compress);
+ ee = capture_engine(engine, compress, dump_flags);
if (!ee)
continue;
@@ -1580,6 +1629,8 @@ gt_record_engines(struct intel_gt_coredump *gt,
gt->simulated |= ee->simulated;
if (ee->simulated) {
+ if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
+ intel_guc_capture_free_node(ee);
kfree(ee);
continue;
}
@@ -1615,8 +1666,74 @@ gt_record_uc(struct intel_gt_coredump *gt,
return error_uc;
}
-/* Capture all registers which don't fit into another category. */
-static void gt_record_regs(struct intel_gt_coredump *gt)
+/* Capture display registers. */
+static void gt_record_display_regs(struct intel_gt_coredump *gt)
+{
+ struct intel_uncore *uncore = gt->_gt->uncore;
+ struct drm_i915_private *i915 = uncore->i915;
+
+ if (GRAPHICS_VER(i915) >= 6)
+ gt->derrmr = intel_uncore_read(uncore, DERRMR);
+
+ if (GRAPHICS_VER(i915) >= 8)
+ gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
+ else if (IS_VALLEYVIEW(i915))
+ gt->ier = intel_uncore_read(uncore, VLV_IER);
+ else if (HAS_PCH_SPLIT(i915))
+ gt->ier = intel_uncore_read(uncore, DEIER);
+ else if (GRAPHICS_VER(i915) == 2)
+ gt->ier = intel_uncore_read16(uncore, GEN2_IER);
+ else
+ gt->ier = intel_uncore_read(uncore, GEN2_IER);
+}
+
+/* Capture all other registers that GuC doesn't capture. */
+static void gt_record_global_nonguc_regs(struct intel_gt_coredump *gt)
+{
+ struct intel_uncore *uncore = gt->_gt->uncore;
+ struct drm_i915_private *i915 = uncore->i915;
+ int i;
+
+ if (IS_VALLEYVIEW(i915)) {
+ gt->gtier[0] = intel_uncore_read(uncore, GTIER);
+ gt->ngtier = 1;
+ } else if (GRAPHICS_VER(i915) >= 11) {
+ gt->gtier[0] =
+ intel_uncore_read(uncore,
+ GEN11_RENDER_COPY_INTR_ENABLE);
+ gt->gtier[1] =
+ intel_uncore_read(uncore, GEN11_VCS_VECS_INTR_ENABLE);
+ gt->gtier[2] =
+ intel_uncore_read(uncore, GEN11_GUC_SG_INTR_ENABLE);
+ gt->gtier[3] =
+ intel_uncore_read(uncore,
+ GEN11_GPM_WGBOXPERF_INTR_ENABLE);
+ gt->gtier[4] =
+ intel_uncore_read(uncore,
+ GEN11_CRYPTO_RSVD_INTR_ENABLE);
+ gt->gtier[5] =
+ intel_uncore_read(uncore,
+ GEN11_GUNIT_CSME_INTR_ENABLE);
+ gt->ngtier = 6;
+ } else if (GRAPHICS_VER(i915) >= 8) {
+ for (i = 0; i < 4; i++)
+ gt->gtier[i] =
+ intel_uncore_read(uncore, GEN8_GT_IER(i));
+ gt->ngtier = 4;
+ } else if (HAS_PCH_SPLIT(i915)) {
+ gt->gtier[0] = intel_uncore_read(uncore, GTIER);
+ gt->ngtier = 1;
+ }
+
+ gt->eir = intel_uncore_read(uncore, EIR);
+ gt->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER);
+}
+
+/*
+ * Capture all registers that relate to workload submission.
+ * NOTE: In GuC submission, when GuC resets an engine, it can dump these for us
+ */
+static void gt_record_global_regs(struct intel_gt_coredump *gt)
{
struct intel_uncore *uncore = gt->_gt->uncore;
struct drm_i915_private *i915 = uncore->i915;
@@ -1632,11 +1749,8 @@ static void gt_record_regs(struct intel_gt_coredump *gt)
*/
/* 1: Registers specific to a single generation */
- if (IS_VALLEYVIEW(i915)) {
- gt->gtier[0] = intel_uncore_read(uncore, GTIER);
- gt->ier = intel_uncore_read(uncore, VLV_IER);
+ if (IS_VALLEYVIEW(i915))
gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV);
- }
if (GRAPHICS_VER(i915) == 7)
gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT);
@@ -1664,7 +1778,6 @@ static void gt_record_regs(struct intel_gt_coredump *gt)
gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT);
if (GRAPHICS_VER(i915) >= 6) {
- gt->derrmr = intel_uncore_read(uncore, DERRMR);
if (GRAPHICS_VER(i915) < 12) {
gt->error = intel_uncore_read(uncore, ERROR_GEN6);
gt->done_reg = intel_uncore_read(uncore, DONE_REG);
@@ -1684,7 +1797,7 @@ static void gt_record_regs(struct intel_gt_coredump *gt)
gt->aux_err = intel_uncore_read(uncore, GEN12_AUX_ERR_DBG);
if (GRAPHICS_VER(i915) >= 12) {
- for (i = 0; i < GEN12_SFC_DONE_MAX; i++) {
+ for (i = 0; i < I915_MAX_SFC; i++) {
/*
* SFC_DONE resides in the VD forcewake domain, so it
* only exists if the corresponding VCS engine is
@@ -1700,44 +1813,6 @@ static void gt_record_regs(struct intel_gt_coredump *gt)
gt->gam_done = intel_uncore_read(uncore, GEN12_GAM_DONE);
}
-
- /* 4: Everything else */
- if (GRAPHICS_VER(i915) >= 11) {
- gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
- gt->gtier[0] =
- intel_uncore_read(uncore,
- GEN11_RENDER_COPY_INTR_ENABLE);
- gt->gtier[1] =
- intel_uncore_read(uncore, GEN11_VCS_VECS_INTR_ENABLE);
- gt->gtier[2] =
- intel_uncore_read(uncore, GEN11_GUC_SG_INTR_ENABLE);
- gt->gtier[3] =
- intel_uncore_read(uncore,
- GEN11_GPM_WGBOXPERF_INTR_ENABLE);
- gt->gtier[4] =
- intel_uncore_read(uncore,
- GEN11_CRYPTO_RSVD_INTR_ENABLE);
- gt->gtier[5] =
- intel_uncore_read(uncore,
- GEN11_GUNIT_CSME_INTR_ENABLE);
- gt->ngtier = 6;
- } else if (GRAPHICS_VER(i915) >= 8) {
- gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
- for (i = 0; i < 4; i++)
- gt->gtier[i] =
- intel_uncore_read(uncore, GEN8_GT_IER(i));
- gt->ngtier = 4;
- } else if (HAS_PCH_SPLIT(i915)) {
- gt->ier = intel_uncore_read(uncore, DEIER);
- gt->gtier[0] = intel_uncore_read(uncore, GTIER);
- gt->ngtier = 1;
- } else if (GRAPHICS_VER(i915) == 2) {
- gt->ier = intel_uncore_read16(uncore, GEN2_IER);
- } else if (!IS_VALLEYVIEW(i915)) {
- gt->ier = intel_uncore_read(uncore, GEN2_IER);
- }
- gt->eir = intel_uncore_read(uncore, EIR);
- gt->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER);
}
static void gt_record_info(struct intel_gt_coredump *gt)
@@ -1849,7 +1924,7 @@ i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
struct intel_gt_coredump *
-intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags)
{
struct intel_gt_coredump *gc;
@@ -1860,7 +1935,21 @@ intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
gc->_gt = gt;
gc->awake = intel_gt_pm_is_awake(gt);
- gt_record_regs(gc);
+ gt_record_display_regs(gc);
+ gt_record_global_nonguc_regs(gc);
+
+ /*
+ * GuC dumps global, eng-class and eng-instance registers
+ * (that can change as part of engine state during execution)
+ * before an engine is reset due to a hung context.
+ * GuC captures and reports all three groups of registers
+ * together as a single set before the engine is reset.
+ * Thus, if GuC triggered the context reset we retrieve
+ * the register values as part of gt_record_engines.
+ */
+ if (!(dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE))
+ gt_record_global_regs(gc);
+
gt_record_fences(gc);
return gc;
@@ -1894,7 +1983,7 @@ void i915_vma_capture_finish(struct intel_gt_coredump *gt,
}
static struct i915_gpu_coredump *
-__i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+__i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags)
{
struct drm_i915_private *i915 = gt->i915;
struct i915_gpu_coredump *error;
@@ -1908,7 +1997,7 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
if (!error)
return ERR_PTR(-ENOMEM);
- error->gt = intel_gt_coredump_alloc(gt, ALLOW_FAIL);
+ error->gt = intel_gt_coredump_alloc(gt, ALLOW_FAIL, dump_flags);
if (error->gt) {
struct i915_vma_compress *compress;
@@ -1919,11 +2008,19 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
return ERR_PTR(-ENOMEM);
}
+ if (INTEL_INFO(i915)->has_gt_uc) {
+ error->gt->uc = gt_record_uc(error->gt, compress);
+ if (error->gt->uc) {
+ if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
+ error->gt->uc->is_guc_capture = true;
+ else
+ GEM_BUG_ON(error->gt->uc->is_guc_capture);
+ }
+ }
+
gt_record_info(error->gt);
- gt_record_engines(error->gt, engine_mask, compress);
+ gt_record_engines(error->gt, engine_mask, compress, dump_flags);
- if (INTEL_INFO(i915)->has_gt_uc)
- error->gt->uc = gt_record_uc(error->gt, compress);
i915_vma_capture_finish(error->gt, compress);
@@ -1936,7 +2033,7 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
}
struct i915_gpu_coredump *
-i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags)
{
static DEFINE_MUTEX(capture_mutex);
int ret = mutex_lock_interruptible(&capture_mutex);
@@ -1945,7 +2042,7 @@ i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
if (ret)
return ERR_PTR(ret);
- dump = __i915_gpu_coredump(gt, engine_mask);
+ dump = __i915_gpu_coredump(gt, engine_mask, dump_flags);
mutex_unlock(&capture_mutex);
return dump;
@@ -1992,11 +2089,11 @@ void i915_error_state_store(struct i915_gpu_coredump *error)
* to pick up.
*/
void i915_capture_error_state(struct intel_gt *gt,
- intel_engine_mask_t engine_mask)
+ intel_engine_mask_t engine_mask, u32 dump_flags)
{
struct i915_gpu_coredump *error;
- error = i915_gpu_coredump(gt, engine_mask);
+ error = i915_gpu_coredump(gt, engine_mask, dump_flags);
if (IS_ERR(error)) {
cmpxchg(&gt->i915->gpu_error.first_error, NULL, error);
return;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 88ce3a08f555..a611abacd9c2 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -53,6 +53,8 @@ struct i915_request_coredump {
struct i915_sched_attr sched_attr;
};
+struct __guc_capture_parsed_output;
+
struct intel_engine_coredump {
const struct intel_engine_cs *engine;
@@ -84,11 +86,15 @@ struct intel_engine_coredump {
u32 rc_psmi; /* sleep state */
struct intel_instdone instdone;
+ /* GuC matched capture-lists info */
+ struct intel_guc_state_capture *capture;
+ struct __guc_capture_parsed_output *guc_capture_node;
+
struct i915_gem_context_coredump {
char comm[TASK_COMM_LEN];
u64 total_runtime;
- u32 avg_runtime;
+ u64 avg_runtime;
pid_t pid;
int active;
@@ -124,7 +130,6 @@ struct intel_gt_coredump {
u32 pgtbl_er;
u32 ier;
u32 gtier[6], ngtier;
- u32 derrmr;
u32 forcewake;
u32 error; /* gen6+ */
u32 err_int; /* gen7 */
@@ -137,9 +142,12 @@ struct intel_gt_coredump {
u32 gfx_mode;
u32 gtt_cache;
u32 aux_err; /* gen12 */
- u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */
u32 gam_done; /* gen12 */
+ /* Display related */
+ u32 derrmr;
+ u32 sfc_done[I915_MAX_SFC]; /* gen12 */
+
u32 nfence;
u64 fence[I915_MAX_NUM_FENCES];
@@ -149,6 +157,7 @@ struct intel_gt_coredump {
struct intel_uc_fw guc_fw;
struct intel_uc_fw huc_fw;
struct i915_vma_coredump *guc_log;
+ bool is_guc_capture;
} *uc;
struct intel_gt_coredump *next;
@@ -221,24 +230,32 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
return atomic_read(&error->reset_engine_count[engine->uabi_class]);
}
+#define CORE_DUMP_FLAG_NONE 0x0
+#define CORE_DUMP_FLAG_IS_GUC_CAPTURE BIT(0)
+
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
__printf(2, 3)
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
+void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
+ const struct intel_engine_cs *engine,
+ const struct i915_vma_coredump *vma);
+struct i915_vma_coredump *
+intel_gpu_error_find_batch(const struct intel_engine_coredump *ee);
struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
- intel_engine_mask_t engine_mask);
+ intel_engine_mask_t engine_mask, u32 dump_flags);
void i915_capture_error_state(struct intel_gt *gt,
- intel_engine_mask_t engine_mask);
+ intel_engine_mask_t engine_mask, u32 dump_flags);
struct i915_gpu_coredump *
i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
struct intel_gt_coredump *
-intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags);
struct intel_engine_coredump *
-intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags);
struct intel_engine_capture_vma *
intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
@@ -288,7 +305,7 @@ i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
}
static inline void
-i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags)
{
}
@@ -299,13 +316,13 @@ i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
}
static inline struct intel_gt_coredump *
-intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
+intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags)
{
return NULL;
}
static inline struct intel_engine_coredump *
-intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
+intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags)
{
return NULL;
}
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 5ad071e09301..701fbc98afa0 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -202,6 +202,9 @@ i915_param_named_unsafe(request_timeout_ms, uint, 0600,
"Default request/fence/batch buffer expiration timeout.");
#endif
+i915_param_named_unsafe(lmem_size, uint, 0400,
+ "Set the lmem size(in MiB) for each region. (default: 0, all memory)");
+
static __always_inline void _print_param(struct drm_printer *p,
const char *name,
const char *type,
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index c779a6f85c7e..b5e7ea45d191 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -73,6 +73,7 @@ struct drm_printer;
param(int, enable_dpcd_backlight, -1, 0600) \
param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \
param(unsigned int, request_timeout_ms, CONFIG_DRM_I915_REQUEST_TIMEOUT, CONFIG_DRM_I915_REQUEST_TIMEOUT ? 0600 : 0) \
+ param(unsigned int, lmem_size, 0, 0400) \
/* leave bools at the end to not create holes */ \
param(bool, enable_hangcheck, true, 0600) \
param(bool, load_detect_test, false, 0600) \
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 9e077929ed67..38f7de778914 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -901,7 +901,8 @@ static const struct intel_device_info rkl_info = {
.has_llc = 0, \
.has_pxp = 0, \
.has_snoop = 1, \
- .is_dgfx = 1
+ .is_dgfx = 1, \
+ .has_heci_gscfi = 1
static const struct intel_device_info dg1_info = {
GEN12_FEATURES,
@@ -1050,6 +1051,7 @@ static const struct intel_device_info xehpsdv_info = {
.has_4tile = 1, \
.has_64k_pages = 1, \
.has_guc_deprivilege = 1, \
+ .has_heci_pxp = 1, \
.needs_compact_pt = 1, \
.platform_engine_mask = \
BIT(RCS0) | BIT(BCS0) | \
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index 2dfbc22857a3..7584cec53d5d 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -9,6 +9,7 @@
#include "i915_drv.h"
#include "i915_perf.h"
#include "i915_query.h"
+#include "gt/intel_engine_user.h"
#include <uapi/drm/i915_drm.h>
static int copy_query_item(void *query_hdr, size_t query_sz,
@@ -28,36 +29,30 @@ static int copy_query_item(void *query_hdr, size_t query_sz,
return 0;
}
-static int query_topology_info(struct drm_i915_private *dev_priv,
- struct drm_i915_query_item *query_item)
+static int fill_topology_info(const struct sseu_dev_info *sseu,
+ struct drm_i915_query_item *query_item,
+ const u8 *subslice_mask)
{
- const struct sseu_dev_info *sseu = &to_gt(dev_priv)->info.sseu;
struct drm_i915_query_topology_info topo;
u32 slice_length, subslice_length, eu_length, total_length;
int ret;
- if (query_item->flags != 0)
- return -EINVAL;
+ BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
if (sseu->max_slices == 0)
return -ENODEV;
- BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
-
slice_length = sizeof(sseu->slice_mask);
subslice_length = sseu->max_slices * sseu->ss_stride;
eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride;
total_length = sizeof(topo) + slice_length + subslice_length +
eu_length;
- ret = copy_query_item(&topo, sizeof(topo), total_length,
- query_item);
+ ret = copy_query_item(&topo, sizeof(topo), total_length, query_item);
+
if (ret != 0)
return ret;
- if (topo.flags != 0)
- return -EINVAL;
-
memset(&topo, 0, sizeof(topo));
topo.max_slices = sseu->max_slices;
topo.max_subslices = sseu->max_subslices;
@@ -69,27 +64,64 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
topo.eu_stride = sseu->eu_stride;
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr),
- &topo, sizeof(topo)))
+ &topo, sizeof(topo)))
return -EFAULT;
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)),
- &sseu->slice_mask, slice_length))
+ &sseu->slice_mask, slice_length))
return -EFAULT;
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
- sizeof(topo) + slice_length),
- sseu->subslice_mask, subslice_length))
+ sizeof(topo) + slice_length),
+ subslice_mask, subslice_length))
return -EFAULT;
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
- sizeof(topo) +
- slice_length + subslice_length),
- sseu->eu_mask, eu_length))
+ sizeof(topo) +
+ slice_length + subslice_length),
+ sseu->eu_mask, eu_length))
return -EFAULT;
return total_length;
}
+static int query_topology_info(struct drm_i915_private *dev_priv,
+ struct drm_i915_query_item *query_item)
+{
+ const struct sseu_dev_info *sseu = &to_gt(dev_priv)->info.sseu;
+
+ if (query_item->flags != 0)
+ return -EINVAL;
+
+ return fill_topology_info(sseu, query_item, sseu->subslice_mask);
+}
+
+static int query_geometry_subslices(struct drm_i915_private *i915,
+ struct drm_i915_query_item *query_item)
+{
+ const struct sseu_dev_info *sseu;
+ struct intel_engine_cs *engine;
+ struct i915_engine_class_instance classinstance;
+
+ if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
+ return -ENODEV;
+
+ classinstance = *((struct i915_engine_class_instance *)&query_item->flags);
+
+ engine = intel_engine_lookup_user(i915, (u8)classinstance.engine_class,
+ (u8)classinstance.engine_instance);
+
+ if (!engine)
+ return -EINVAL;
+
+ if (engine->class != RENDER_CLASS)
+ return -EINVAL;
+
+ sseu = &engine->gt->info.sseu;
+
+ return fill_topology_info(sseu, query_item, sseu->geometry_subslice_mask);
+}
+
static int
query_engine_info(struct drm_i915_private *i915,
struct drm_i915_query_item *query_item)
@@ -479,12 +511,36 @@ static int query_memregion_info(struct drm_i915_private *i915,
return total_length;
}
+static int query_hwconfig_blob(struct drm_i915_private *i915,
+ struct drm_i915_query_item *query_item)
+{
+ struct intel_gt *gt = to_gt(i915);
+ struct intel_hwconfig *hwconfig = &gt->info.hwconfig;
+
+ if (!hwconfig->size || !hwconfig->ptr)
+ return -ENODEV;
+
+ if (query_item->length == 0)
+ return hwconfig->size;
+
+ if (query_item->length < hwconfig->size)
+ return -EINVAL;
+
+ if (copy_to_user(u64_to_user_ptr(query_item->data_ptr),
+ hwconfig->ptr, hwconfig->size))
+ return -EFAULT;
+
+ return hwconfig->size;
+}
+
static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
struct drm_i915_query_item *query_item) = {
query_topology_info,
query_engine_info,
query_perf_config,
query_memregion_info,
+ query_hwconfig_blob,
+ query_geometry_subslices,
};
int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 51f46fe45c72..efcfe32cd8eb 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -976,6 +976,10 @@
#define GEN12_COMPUTE2_RING_BASE 0x1e000
#define GEN12_COMPUTE3_RING_BASE 0x26000
#define BLT_RING_BASE 0x22000
+#define DG1_GSC_HECI1_BASE 0x00258000
+#define DG1_GSC_HECI2_BASE 0x00259000
+#define DG2_GSC_HECI1_BASE 0x00373000
+#define DG2_GSC_HECI2_BASE 0x00374000
@@ -1842,6 +1846,17 @@
#define GEN9_RP_STATE_LIMITS _MMIO(0x138148)
#define XEHPSDV_RP_STATE_CAP _MMIO(0x250014)
+#define GT0_PERF_LIMIT_REASONS _MMIO(0x1381a8)
+#define GT0_PERF_LIMIT_REASONS_MASK 0xde3
+#define PROCHOT_MASK REG_BIT(1)
+#define THERMAL_LIMIT_MASK REG_BIT(2)
+#define RATL_MASK REG_BIT(6)
+#define VR_THERMALERT_MASK REG_BIT(7)
+#define VR_TDC_MASK REG_BIT(8)
+#define POWER_LIMIT_4_MASK REG_BIT(9)
+#define POWER_LIMIT_1_MASK REG_BIT(11)
+#define POWER_LIMIT_2_MASK REG_BIT(12)
+
#define CHV_CLK_CTL1 _MMIO(0x101100)
#define VLV_CLK_CTL2 _MMIO(0x101104)
#define CLK_CTL2_CZCOUNT_30NS_SHIFT 28
@@ -8460,6 +8475,9 @@ enum skl_power_gate {
#define SGGI_DIS REG_BIT(15)
#define SGR_DIS REG_BIT(13)
+#define XEHPSDV_TILE0_ADDR_RANGE _MMIO(0x4900)
+#define XEHPSDV_TILE_LMEM_RANGE_SHIFT 8
+
#define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910)
#define XEHPSDV_CCS_BASE_SHIFT 8
diff --git a/drivers/gpu/drm/i915/i915_reg_defs.h b/drivers/gpu/drm/i915/i915_reg_defs.h
index d78d78fce431..8f486f77609f 100644
--- a/drivers/gpu/drm/i915/i915_reg_defs.h
+++ b/drivers/gpu/drm/i915/i915_reg_defs.h
@@ -123,6 +123,4 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define VLV_DISPLAY_BASE 0x180000
-#define GEN12_SFC_DONE_MAX 4
-
#endif /* __I915_REG_DEFS__ */
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index a4d1759375b9..8521daba212a 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -39,113 +39,12 @@
#include "i915_sysfs.h"
#include "intel_pm.h"
-static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev)
+struct drm_i915_private *kdev_minor_to_i915(struct device *kdev)
{
struct drm_minor *minor = dev_get_drvdata(kdev);
return to_i915(minor->dev);
}
-#ifdef CONFIG_PM
-static u32 calc_residency(struct drm_i915_private *dev_priv,
- i915_reg_t reg)
-{
- intel_wakeref_t wakeref;
- u64 res = 0;
-
- with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref)
- res = intel_rc6_residency_us(&to_gt(dev_priv)->rc6, reg);
-
- return DIV_ROUND_CLOSEST_ULL(res, 1000);
-}
-
-static ssize_t rc6_enable_show(struct device *kdev,
- struct device_attribute *attr, char *buf)
-{
- struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
- unsigned int mask;
-
- mask = 0;
- if (HAS_RC6(dev_priv))
- mask |= BIT(0);
- if (HAS_RC6p(dev_priv))
- mask |= BIT(1);
- if (HAS_RC6pp(dev_priv))
- mask |= BIT(2);
-
- return sysfs_emit(buf, "%x\n", mask);
-}
-
-static ssize_t rc6_residency_ms_show(struct device *kdev,
- struct device_attribute *attr, char *buf)
-{
- struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
- u32 rc6_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6);
- return sysfs_emit(buf, "%u\n", rc6_residency);
-}
-
-static ssize_t rc6p_residency_ms_show(struct device *kdev,
- struct device_attribute *attr, char *buf)
-{
- struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
- u32 rc6p_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6p);
- return sysfs_emit(buf, "%u\n", rc6p_residency);
-}
-
-static ssize_t rc6pp_residency_ms_show(struct device *kdev,
- struct device_attribute *attr, char *buf)
-{
- struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
- u32 rc6pp_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6pp);
- return sysfs_emit(buf, "%u\n", rc6pp_residency);
-}
-
-static ssize_t media_rc6_residency_ms_show(struct device *kdev,
- struct device_attribute *attr, char *buf)
-{
- struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
- u32 rc6_residency = calc_residency(dev_priv, VLV_GT_MEDIA_RC6);
- return sysfs_emit(buf, "%u\n", rc6_residency);
-}
-
-static DEVICE_ATTR_RO(rc6_enable);
-static DEVICE_ATTR_RO(rc6_residency_ms);
-static DEVICE_ATTR_RO(rc6p_residency_ms);
-static DEVICE_ATTR_RO(rc6pp_residency_ms);
-static DEVICE_ATTR_RO(media_rc6_residency_ms);
-
-static struct attribute *rc6_attrs[] = {
- &dev_attr_rc6_enable.attr,
- &dev_attr_rc6_residency_ms.attr,
- NULL
-};
-
-static const struct attribute_group rc6_attr_group = {
- .name = power_group_name,
- .attrs = rc6_attrs
-};
-
-static struct attribute *rc6p_attrs[] = {
- &dev_attr_rc6p_residency_ms.attr,
- &dev_attr_rc6pp_residency_ms.attr,
- NULL
-};
-
-static const struct attribute_group rc6p_attr_group = {
- .name = power_group_name,
- .attrs = rc6p_attrs
-};
-
-static struct attribute *media_rc6_attrs[] = {
- &dev_attr_media_rc6_residency_ms.attr,
- NULL
-};
-
-static const struct attribute_group media_rc6_attr_group = {
- .name = power_group_name,
- .attrs = media_rc6_attrs
-};
-#endif
-
static int l3_access_valid(struct drm_i915_private *i915, loff_t offset)
{
if (!HAS_L3_DPF(i915))
@@ -257,171 +156,6 @@ static const struct bin_attribute dpf_attrs_1 = {
.private = (void *)1
};
-static ssize_t gt_act_freq_mhz_show(struct device *kdev,
- struct device_attribute *attr, char *buf)
-{
- struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
- struct intel_rps *rps = &to_gt(i915)->rps;
-
- return sysfs_emit(buf, "%d\n", intel_rps_read_actual_frequency(rps));
-}
-
-static ssize_t gt_cur_freq_mhz_show(struct device *kdev,
- struct device_attribute *attr, char *buf)
-{
- struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
- struct intel_rps *rps = &to_gt(i915)->rps;
-
- return sysfs_emit(buf, "%d\n", intel_rps_get_requested_frequency(rps));
-}
-
-static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
-{
- struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
- struct intel_rps *rps = &to_gt(i915)->rps;
-
- return sysfs_emit(buf, "%d\n", intel_rps_get_boost_frequency(rps));
-}
-
-static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
- struct intel_rps *rps = &to_gt(dev_priv)->rps;
- ssize_t ret;
- u32 val;
-
- ret = kstrtou32(buf, 0, &val);
- if (ret)
- return ret;
-
- ret = intel_rps_set_boost_frequency(rps, val);
-
- return ret ?: count;
-}
-
-static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev,
- struct device_attribute *attr, char *buf)
-{
- struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
- struct intel_rps *rps = &to_gt(dev_priv)->rps;
-
- return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->efficient_freq));
-}
-
-static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
-{
- struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
- struct intel_gt *gt = to_gt(dev_priv);
- struct intel_rps *rps = &gt->rps;
-
- return sysfs_emit(buf, "%d\n", intel_rps_get_max_frequency(rps));
-}
-
-static ssize_t gt_max_freq_mhz_store(struct device *kdev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
- struct intel_gt *gt = to_gt(dev_priv);
- struct intel_rps *rps = &gt->rps;
- ssize_t ret;
- u32 val;
-
- ret = kstrtou32(buf, 0, &val);
- if (ret)
- return ret;
-
- ret = intel_rps_set_max_frequency(rps, val);
-
- return ret ?: count;
-}
-
-static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
-{
- struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
- struct intel_gt *gt = to_gt(i915);
- struct intel_rps *rps = &gt->rps;
-
- return sysfs_emit(buf, "%d\n", intel_rps_get_min_frequency(rps));
-}
-
-static ssize_t gt_min_freq_mhz_store(struct device *kdev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
- struct intel_rps *rps = &to_gt(i915)->rps;
- ssize_t ret;
- u32 val;
-
- ret = kstrtou32(buf, 0, &val);
- if (ret)
- return ret;
-
- ret = intel_rps_set_min_frequency(rps, val);
-
- return ret ?: count;
-}
-
-static DEVICE_ATTR_RO(gt_act_freq_mhz);
-static DEVICE_ATTR_RO(gt_cur_freq_mhz);
-static DEVICE_ATTR_RW(gt_boost_freq_mhz);
-static DEVICE_ATTR_RW(gt_max_freq_mhz);
-static DEVICE_ATTR_RW(gt_min_freq_mhz);
-
-static DEVICE_ATTR_RO(vlv_rpe_freq_mhz);
-
-static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf);
-static DEVICE_ATTR(gt_RP0_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL);
-static DEVICE_ATTR(gt_RP1_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL);
-static DEVICE_ATTR(gt_RPn_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL);
-
-/* For now we have a static number of RP states */
-static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
-{
- struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
- struct intel_rps *rps = &to_gt(dev_priv)->rps;
- u32 val;
-
- if (attr == &dev_attr_gt_RP0_freq_mhz)
- val = intel_rps_get_rp0_frequency(rps);
- else if (attr == &dev_attr_gt_RP1_freq_mhz)
- val = intel_rps_get_rp1_frequency(rps);
- else if (attr == &dev_attr_gt_RPn_freq_mhz)
- val = intel_rps_get_rpn_frequency(rps);
- else
- BUG();
-
- return sysfs_emit(buf, "%d\n", val);
-}
-
-static const struct attribute * const gen6_attrs[] = {
- &dev_attr_gt_act_freq_mhz.attr,
- &dev_attr_gt_cur_freq_mhz.attr,
- &dev_attr_gt_boost_freq_mhz.attr,
- &dev_attr_gt_max_freq_mhz.attr,
- &dev_attr_gt_min_freq_mhz.attr,
- &dev_attr_gt_RP0_freq_mhz.attr,
- &dev_attr_gt_RP1_freq_mhz.attr,
- &dev_attr_gt_RPn_freq_mhz.attr,
- NULL,
-};
-
-static const struct attribute * const vlv_attrs[] = {
- &dev_attr_gt_act_freq_mhz.attr,
- &dev_attr_gt_cur_freq_mhz.attr,
- &dev_attr_gt_boost_freq_mhz.attr,
- &dev_attr_gt_max_freq_mhz.attr,
- &dev_attr_gt_min_freq_mhz.attr,
- &dev_attr_gt_RP0_freq_mhz.attr,
- &dev_attr_gt_RP1_freq_mhz.attr,
- &dev_attr_gt_RPn_freq_mhz.attr,
- &dev_attr_vlv_rpe_freq_mhz.attr,
- NULL,
-};
-
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
@@ -492,29 +226,6 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
struct device *kdev = dev_priv->drm.primary->kdev;
int ret;
-#ifdef CONFIG_PM
- if (HAS_RC6(dev_priv)) {
- ret = sysfs_merge_group(&kdev->kobj,
- &rc6_attr_group);
- if (ret)
- drm_err(&dev_priv->drm,
- "RC6 residency sysfs setup failed\n");
- }
- if (HAS_RC6p(dev_priv)) {
- ret = sysfs_merge_group(&kdev->kobj,
- &rc6p_attr_group);
- if (ret)
- drm_err(&dev_priv->drm,
- "RC6p residency sysfs setup failed\n");
- }
- if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
- ret = sysfs_merge_group(&kdev->kobj,
- &media_rc6_attr_group);
- if (ret)
- drm_err(&dev_priv->drm,
- "Media RC6 residency sysfs setup failed\n");
- }
-#endif
if (HAS_L3_DPF(dev_priv)) {
ret = device_create_bin_file(kdev, &dpf_attrs);
if (ret)
@@ -530,13 +241,10 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
}
}
- ret = 0;
- if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
- ret = sysfs_create_files(&kdev->kobj, vlv_attrs);
- else if (GRAPHICS_VER(dev_priv) >= 6)
- ret = sysfs_create_files(&kdev->kobj, gen6_attrs);
- if (ret)
- drm_err(&dev_priv->drm, "RPS sysfs setup failed\n");
+ dev_priv->sysfs_gt = kobject_create_and_add("gt", &kdev->kobj);
+ if (!dev_priv->sysfs_gt)
+ drm_warn(&dev_priv->drm,
+ "failed to register GT sysfs directory\n");
i915_setup_error_capture(kdev);
@@ -549,14 +257,6 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
i915_teardown_error_capture(kdev);
- if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
- sysfs_remove_files(&kdev->kobj, vlv_attrs);
- else
- sysfs_remove_files(&kdev->kobj, gen6_attrs);
device_remove_bin_file(kdev, &dpf_attrs_1);
device_remove_bin_file(kdev, &dpf_attrs);
-#ifdef CONFIG_PM
- sysfs_unmerge_group(&kdev->kobj, &rc6_attr_group);
- sysfs_unmerge_group(&kdev->kobj, &rc6p_attr_group);
-#endif
}
diff --git a/drivers/gpu/drm/i915/i915_sysfs.h b/drivers/gpu/drm/i915/i915_sysfs.h
index 41afd4366416..243a17741e3f 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.h
+++ b/drivers/gpu/drm/i915/i915_sysfs.h
@@ -6,8 +6,11 @@
#ifndef __I915_SYSFS_H__
#define __I915_SYSFS_H__
+struct device;
struct drm_i915_private;
+struct drm_i915_private *kdev_minor_to_i915(struct device *kdev);
+
void i915_setup_sysfs(struct drm_i915_private *i915);
void i915_teardown_sysfs(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 129f668f21ff..a5109548abc0 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -70,8 +70,10 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
min_page_size = bo->page_alignment << PAGE_SHIFT;
GEM_BUG_ON(min_page_size < mm->chunk_size);
+ GEM_BUG_ON(!IS_ALIGNED(size, min_page_size));
- if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+ if (place->fpfn + bman_res->base.num_pages != place->lpfn &&
+ place->flags & TTM_PL_FLAG_CONTIGUOUS) {
unsigned long pages;
size = roundup_pow_of_two(size);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index d077f7b9eaad..162e8d83691b 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -47,7 +47,7 @@ static inline void assert_vma_held_evict(const struct i915_vma *vma)
* This is the only exception to the requirement of the object lock
* being held.
*/
- if (atomic_read(&vma->vm->open))
+ if (kref_read(&vma->vm->ref))
assert_object_held_shared(vma->obj);
}
@@ -113,6 +113,7 @@ vma_create(struct drm_i915_gem_object *obj,
struct i915_vma *pos = ERR_PTR(-E2BIG);
struct i915_vma *vma;
struct rb_node *rb, **p;
+ int err;
/* The aliasing_ppgtt should never be used directly! */
GEM_BUG_ON(vm == &vm->gt->ggtt->alias->vm);
@@ -121,8 +122,6 @@ vma_create(struct drm_i915_gem_object *obj,
if (vma == NULL)
return ERR_PTR(-ENOMEM);
- kref_init(&vma->ref);
- vma->vm = i915_vm_get(vm);
vma->ops = &vm->vma_ops;
vma->obj = obj;
vma->size = obj->base.size;
@@ -138,6 +137,8 @@ vma_create(struct drm_i915_gem_object *obj,
}
INIT_LIST_HEAD(&vma->closed_link);
+ INIT_LIST_HEAD(&vma->obj_link);
+ RB_CLEAR_NODE(&vma->obj_node);
if (view && view->type != I915_GGTT_VIEW_NORMAL) {
vma->ggtt_view = *view;
@@ -163,8 +164,16 @@ vma_create(struct drm_i915_gem_object *obj,
GEM_BUG_ON(!IS_ALIGNED(vma->size, I915_GTT_PAGE_SIZE));
- spin_lock(&obj->vma.lock);
+ err = mutex_lock_interruptible(&vm->mutex);
+ if (err) {
+ pos = ERR_PTR(err);
+ goto err_vma;
+ }
+
+ vma->vm = vm;
+ list_add_tail(&vma->vm_link, &vm->unbound_list);
+ spin_lock(&obj->vma.lock);
if (i915_is_ggtt(vm)) {
if (unlikely(overflows_type(vma->size, u32)))
goto err_unlock;
@@ -222,13 +231,15 @@ vma_create(struct drm_i915_gem_object *obj,
list_add_tail(&vma->obj_link, &obj->vma.list);
spin_unlock(&obj->vma.lock);
+ mutex_unlock(&vm->mutex);
return vma;
err_unlock:
spin_unlock(&obj->vma.lock);
+ list_del_init(&vma->vm_link);
+ mutex_unlock(&vm->mutex);
err_vma:
- i915_vm_put(vm);
i915_vma_free(vma);
return pos;
}
@@ -279,7 +290,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
struct i915_vma *vma;
GEM_BUG_ON(view && !i915_is_ggtt_or_dpt(vm));
- GEM_BUG_ON(!atomic_read(&vm->open));
+ GEM_BUG_ON(!kref_read(&vm->ref));
spin_lock(&obj->vma.lock);
vma = i915_vma_lookup(obj, vm, view);
@@ -322,7 +333,6 @@ static void __vma_release(struct dma_fence_work *work)
i915_gem_object_put(vw->pinned);
i915_vm_free_pt_stash(vw->vm, &vw->stash);
- i915_vm_put(vw->vm);
if (vw->vma_res)
i915_vma_resource_put(vw->vma_res);
}
@@ -515,21 +525,18 @@ int i915_vma_bind(struct i915_vma *vma,
if (!work->vma_res->bi.pages_rsgt)
work->pinned = i915_gem_object_get(vma->obj);
} else {
- if (vma->obj) {
- ret = i915_gem_object_wait_moving_fence(vma->obj, true);
- if (ret) {
- i915_vma_resource_free(vma->resource);
- vma->resource = NULL;
+ ret = i915_gem_object_wait_moving_fence(vma->obj, true);
+ if (ret) {
+ i915_vma_resource_free(vma->resource);
+ vma->resource = NULL;
- return ret;
- }
+ return ret;
}
vma->ops->bind_vma(vma->vm, NULL, vma->resource, cache_level,
bind_flags);
}
- if (vma->obj)
- set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
+ set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
atomic_or(bind_flags, &vma->flags);
return 0;
@@ -841,7 +848,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
- list_add_tail(&vma->vm_link, &vma->vm->bound_list);
+ list_move_tail(&vma->vm_link, &vma->vm->bound_list);
return 0;
}
@@ -857,7 +864,7 @@ i915_vma_detach(struct i915_vma *vma)
* vma, we can drop its hold on the backing storage and allow
* it to be reaped by the shrinker.
*/
- list_del(&vma->vm_link);
+ list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
}
static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
@@ -1357,18 +1364,10 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
if (err)
return err;
- if (vma->obj) {
- err = i915_gem_object_get_moving_fence(vma->obj, &moving);
- if (err)
- return err;
- } else {
- moving = NULL;
- }
-
if (flags & PIN_GLOBAL)
wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
- if (flags & vma->vm->bind_async_flags || moving) {
+ if (flags & vma->vm->bind_async_flags) {
/* lock VM */
err = i915_vm_lock_objects(vma->vm, ww);
if (err)
@@ -1380,7 +1379,11 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
goto err_rpm;
}
- work->vm = i915_vm_get(vma->vm);
+ work->vm = vma->vm;
+
+ err = i915_gem_object_get_moving_fence(vma->obj, &moving);
+ if (err)
+ goto err_rpm;
dma_fence_work_chain(&work->base, moving);
@@ -1625,16 +1628,6 @@ void i915_vma_reopen(struct i915_vma *vma)
__i915_vma_remove_closed(vma);
}
-void i915_vma_release(struct kref *ref)
-{
- struct i915_vma *vma = container_of(ref, typeof(*vma), ref);
-
- i915_vm_put(vma->vm);
- i915_active_fini(&vma->active);
- GEM_WARN_ON(vma->resource);
- i915_vma_free(vma);
-}
-
static void force_unbind(struct i915_vma *vma)
{
if (!drm_mm_node_allocated(&vma->node))
@@ -1645,7 +1638,7 @@ static void force_unbind(struct i915_vma *vma)
GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
}
-static void release_references(struct i915_vma *vma)
+static void release_references(struct i915_vma *vma, bool vm_ddestroy)
{
struct drm_i915_gem_object *obj = vma->obj;
@@ -1655,11 +1648,17 @@ static void release_references(struct i915_vma *vma)
list_del(&vma->obj_link);
if (!RB_EMPTY_NODE(&vma->obj_node))
rb_erase(&vma->obj_node, &obj->vma.tree);
+
spin_unlock(&obj->vma.lock);
__i915_vma_remove_closed(vma);
- __i915_vma_put(vma);
+ if (vm_ddestroy)
+ i915_vm_resv_put(vma->vm);
+
+ i915_active_fini(&vma->active);
+ GEM_WARN_ON(vma->resource);
+ i915_vma_free(vma);
}
/**
@@ -1674,8 +1673,12 @@ static void release_references(struct i915_vma *vma)
* - __i915_gem_object_pages_fini()
* - __i915_vm_close() - Blocks the above function by taking a reference on
* the object.
- * - __i915_vma_parked() - Blocks the above functions by taking an open-count on
- * the vm and a reference on the object.
+ * - __i915_vma_parked() - Blocks the above functions by taking a reference
+ * on the vm and a reference on the object. Also takes the object lock so
+ * destruction from __i915_vma_parked() can be blocked by holding the
+ * object lock. Since the object lock is only allowed from within i915 with
+ * an object refcount, holding the object lock also implicitly blocks the
+ * vma freeing from __i915_gem_object_pages_fini().
*
* Because of locks taken during destruction, a vma is also guaranteed to
* stay alive while the following locks are held if it was looked up while
@@ -1683,24 +1686,27 @@ static void release_references(struct i915_vma *vma)
* - vm->mutex
* - obj->vma.lock
* - gt->closed_lock
- *
- * A vma user can also temporarily keep the vma alive while holding a vma
- * reference.
*/
void i915_vma_destroy_locked(struct i915_vma *vma)
{
lockdep_assert_held(&vma->vm->mutex);
force_unbind(vma);
- release_references(vma);
+ list_del_init(&vma->vm_link);
+ release_references(vma, false);
}
void i915_vma_destroy(struct i915_vma *vma)
{
+ bool vm_ddestroy;
+
mutex_lock(&vma->vm->mutex);
force_unbind(vma);
+ list_del_init(&vma->vm_link);
+ vm_ddestroy = vma->vm_ddestroy;
+ vma->vm_ddestroy = false;
mutex_unlock(&vma->vm->mutex);
- release_references(vma);
+ release_references(vma, vm_ddestroy);
}
void i915_vma_parked(struct intel_gt *gt)
@@ -1718,7 +1724,7 @@ void i915_vma_parked(struct intel_gt *gt)
if (!kref_get_unless_zero(&obj->base.refcount))
continue;
- if (!i915_vm_tryopen(vm)) {
+ if (!i915_vm_tryget(vm)) {
i915_gem_object_put(obj);
continue;
}
@@ -1744,7 +1750,7 @@ void i915_vma_parked(struct intel_gt *gt)
}
i915_gem_object_put(obj);
- i915_vm_close(vm);
+ i915_vm_put(vm);
}
}
@@ -1903,7 +1909,9 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
/* If vm is not open, unbind is a nop. */
vma_res->needs_wakeref = i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND) &&
- atomic_read(&vma->vm->open);
+ kref_read(&vma->vm->ref);
+ vma_res->skip_pte_rewrite = !kref_read(&vma->vm->ref) ||
+ vma->vm->skip_pte_rewrite;
trace_i915_vma_unbind(vma);
unbind_fence = i915_vma_resource_unbind(vma_res);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 67ae7341c7e0..6034991d89fe 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -222,20 +222,6 @@ void i915_vma_unlink_ctx(struct i915_vma *vma);
void i915_vma_close(struct i915_vma *vma);
void i915_vma_reopen(struct i915_vma *vma);
-static inline struct i915_vma *__i915_vma_get(struct i915_vma *vma)
-{
- if (kref_get_unless_zero(&vma->ref))
- return vma;
-
- return NULL;
-}
-
-void i915_vma_release(struct kref *ref);
-static inline void __i915_vma_put(struct i915_vma *vma)
-{
- kref_put(&vma->ref, i915_vma_release);
-}
-
void i915_vma_destroy_locked(struct i915_vma *vma);
void i915_vma_destroy(struct i915_vma *vma);
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c
index 57ae92ba8af1..27c55027387a 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.c
+++ b/drivers/gpu/drm/i915/i915_vma_resource.c
@@ -178,7 +178,7 @@ static void i915_vma_resource_unbind_work(struct work_struct *work)
bool lockdep_cookie;
lockdep_cookie = dma_fence_begin_signalling();
- if (likely(atomic_read(&vm->open)))
+ if (likely(!vma_res->skip_pte_rewrite))
vma_res->ops->unbind_vma(vm, vma_res);
dma_fence_end_signalling(lockdep_cookie);
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h
index 25913913baa6..5d8427caa2ba 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.h
+++ b/drivers/gpu/drm/i915/i915_vma_resource.h
@@ -62,6 +62,11 @@ struct i915_page_sizes {
* deferred to a work item awaiting unsignaled fences. This is a hack.
* (dma_fence_work uses a fence flag for this, but this seems slightly
* cleaner).
+ * @needs_wakeref: Whether a wakeref is needed during unbind. Since we can't
+ * take a wakeref in the dma-fence signalling critical path, it needs to be
+ * taken when the unbind is scheduled.
+ * @skip_pte_rewrite: During ggtt suspend and vm takedown pte rewriting
+ * needs to be skipped for unbind.
*
* The lifetime of a struct i915_vma_resource is from a binding request to
* the actual possible asynchronous unbind has completed.
@@ -113,6 +118,7 @@ struct i915_vma_resource {
bool allocated:1;
bool immediate_unbind:1;
bool needs_wakeref:1;
+ bool skip_pte_rewrite:1;
};
bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index 88370dadca82..be6e028c3b57 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -211,7 +211,6 @@ struct i915_vma {
* handles (but same file) for execbuf, i.e. the number of aliases
* that exist in the ctx->handle_vmas LUT for this vma.
*/
- struct kref ref;
atomic_t open_count;
atomic_t flags;
/**
@@ -272,6 +271,13 @@ struct i915_vma {
atomic_t pages_count; /* number of active binds to the pages */
/**
+ * Whether we hold a reference on the vm dma_resv lock to temporarily
+ * block vm freeing until the vma is destroyed.
+ * Protected by the vm mutex.
+ */
+ bool vm_ddestroy;
+
+ /**
* Support different GGTT views into the same object.
* This means there can be multiple VMA mappings per object and per VM.
* i915_ggtt_view_type is used to distinguish between those entries.
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index f9b955810593..576d15a04c9e 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -141,6 +141,8 @@ enum intel_ppgtt_type {
func(has_flat_ccs); \
func(has_global_mocs); \
func(has_gt_uc); \
+ func(has_heci_pxp); \
+ func(has_heci_gscfi); \
func(has_guc_deprivilege); \
func(has_l3_dpf); \
func(has_llc); \
diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c
index ded78b83e0b5..e38d2db1c3e3 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/intel_memory_region.c
@@ -19,7 +19,7 @@ static const struct {
.class = INTEL_MEMORY_SYSTEM,
.instance = 0,
},
- [INTEL_REGION_LMEM] = {
+ [INTEL_REGION_LMEM_0] = {
.class = INTEL_MEMORY_LOCAL,
.instance = 0,
},
diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h
index bbc35ec5c090..3d8378c1b447 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.h
+++ b/drivers/gpu/drm/i915/intel_memory_region.h
@@ -29,14 +29,17 @@ enum intel_memory_type {
enum intel_region_id {
INTEL_REGION_SMEM = 0,
- INTEL_REGION_LMEM,
+ INTEL_REGION_LMEM_0,
+ INTEL_REGION_LMEM_1,
+ INTEL_REGION_LMEM_2,
+ INTEL_REGION_LMEM_3,
INTEL_REGION_STOLEN_SMEM,
INTEL_REGION_STOLEN_LMEM,
INTEL_REGION_UNKNOWN, /* Should be last */
};
#define REGION_SMEM BIT(INTEL_REGION_SMEM)
-#define REGION_LMEM BIT(INTEL_REGION_LMEM)
+#define REGION_LMEM BIT(INTEL_REGION_LMEM_0)
#define REGION_STOLEN_SMEM BIT(INTEL_REGION_STOLEN_SMEM)
#define REGION_STOLEN_LMEM BIT(INTEL_REGION_STOLEN_LMEM)
@@ -54,6 +57,7 @@ struct intel_memory_region_ops {
int (*init_object)(struct intel_memory_region *mem,
struct drm_i915_gem_object *obj,
+ resource_size_t offset,
resource_size_t size,
resource_size_t page_size,
unsigned int flags);
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c
index 737ef3f4ab54..62ff77445b01 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -12,6 +12,7 @@
#include "intel_region_ttm.h"
+#include "gem/i915_gem_region.h"
#include "gem/i915_gem_ttm.h" /* For the funcs/ops export only */
/**
* DOC: TTM support structure
@@ -191,6 +192,7 @@ intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
*/
struct ttm_resource *
intel_region_ttm_resource_alloc(struct intel_memory_region *mem,
+ resource_size_t offset,
resource_size_t size,
unsigned int flags)
{
@@ -202,7 +204,10 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem,
if (flags & I915_BO_ALLOC_CONTIGUOUS)
place.flags |= TTM_PL_FLAG_CONTIGUOUS;
- if (mem->io_size && mem->io_size < mem->total) {
+ if (offset != I915_BO_INVALID_OFFSET) {
+ place.fpfn = offset >> PAGE_SHIFT;
+ place.lpfn = place.fpfn + (size >> PAGE_SHIFT);
+ } else if (mem->io_size && mem->io_size < mem->total) {
if (flags & I915_BO_ALLOC_GPU_ONLY) {
place.flags |= TTM_PL_FLAG_TOPDOWN;
} else {
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h
index fdee5e7bd46c..cf9d86dcf409 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.h
+++ b/drivers/gpu/drm/i915/intel_region_ttm.h
@@ -36,6 +36,7 @@ struct ttm_device_funcs *i915_ttm_driver(void);
#ifdef CONFIG_DRM_I915_SELFTEST
struct ttm_resource *
intel_region_ttm_resource_alloc(struct intel_memory_region *mem,
+ resource_size_t offset,
resource_size_t size,
unsigned int flags);
#endif
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 24cd3c3729ca..83517a703eb6 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -2050,14 +2050,11 @@ static int i915_pmic_bus_access_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}
-int intel_uncore_setup_mmio(struct intel_uncore *uncore)
+int intel_uncore_setup_mmio(struct intel_uncore *uncore, phys_addr_t phys_addr)
{
struct drm_i915_private *i915 = uncore->i915;
- struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
- int mmio_bar;
int mmio_size;
- mmio_bar = GRAPHICS_VER(i915) == 2 ? 1 : 0;
/*
* Before gen4, the registers and the GTT are behind different BARs.
* However, from gen4 onwards, the registers and the GTT are shared
@@ -2074,7 +2071,7 @@ int intel_uncore_setup_mmio(struct intel_uncore *uncore)
else
mmio_size = 2 * 1024 * 1024;
- uncore->regs = pci_iomap(pdev, mmio_bar, mmio_size);
+ uncore->regs = ioremap(phys_addr, mmio_size);
if (uncore->regs == NULL) {
drm_err(&i915->drm, "failed to map registers\n");
return -EIO;
@@ -2085,9 +2082,7 @@ int intel_uncore_setup_mmio(struct intel_uncore *uncore)
void intel_uncore_cleanup_mmio(struct intel_uncore *uncore)
{
- struct pci_dev *pdev = to_pci_dev(uncore->i915->drm.dev);
-
- pci_iounmap(pdev, uncore->regs);
+ iounmap(uncore->regs);
}
void intel_uncore_init_early(struct intel_uncore *uncore,
@@ -2475,17 +2470,46 @@ intel_uncore_forcewake_for_reg(struct intel_uncore *uncore,
return fw_domains;
}
-u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore,
- i915_reg_t reg,
- int slice, int subslice)
+/**
+ * uncore_rw_with_mcr_steering_fw - Access a register after programming
+ * the MCR selector register.
+ * @uncore: pointer to struct intel_uncore
+ * @reg: register being accessed
+ * @rw_flag: FW_REG_READ for read access or FW_REG_WRITE for write access
+ * @slice: slice number (ignored for multi-cast write)
+ * @subslice: sub-slice number (ignored for multi-cast write)
+ * @value: register value to be written (ignored for read)
+ *
+ * Return: 0 for write access. register value for read access.
+ *
+ * Caller needs to make sure the relevant forcewake wells are up.
+ */
+static u32 uncore_rw_with_mcr_steering_fw(struct intel_uncore *uncore,
+ i915_reg_t reg, u8 rw_flag,
+ int slice, int subslice, u32 value)
{
- u32 mcr_mask, mcr_ss, mcr, old_mcr, val;
+ u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0;
lockdep_assert_held(&uncore->lock);
if (GRAPHICS_VER(uncore->i915) >= 11) {
mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
+
+ /*
+ * Wa_22013088509
+ *
+ * The setting of the multicast/unicast bit usually wouldn't
+ * matter for read operations (which always return the value
+ * from a single register instance regardless of how that bit
+ * is set), but some platforms have a workaround requiring us
+ * to remain in multicast mode for reads. There's no real
+ * downside to this, so we'll just go ahead and do so on all
+ * platforms; we'll only clear the multicast bit from the mask
+ * when exlicitly doing a write operation.
+ */
+ if (rw_flag == FW_REG_WRITE)
+ mcr_mask |= GEN11_MCR_MULTICAST;
} else {
mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
@@ -2497,7 +2521,10 @@ u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore,
mcr |= mcr_ss;
intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
- val = intel_uncore_read_fw(uncore, reg);
+ if (rw_flag == FW_REG_READ)
+ val = intel_uncore_read_fw(uncore, reg);
+ else
+ intel_uncore_write_fw(uncore, reg, value);
mcr &= ~mcr_mask;
mcr |= old_mcr & mcr_mask;
@@ -2507,14 +2534,16 @@ u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore,
return val;
}
-u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore,
- i915_reg_t reg, int slice, int subslice)
+static u32 uncore_rw_with_mcr_steering(struct intel_uncore *uncore,
+ i915_reg_t reg, u8 rw_flag,
+ int slice, int subslice,
+ u32 value)
{
enum forcewake_domains fw_domains;
u32 val;
fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
- FW_REG_READ);
+ rw_flag);
fw_domains |= intel_uncore_forcewake_for_reg(uncore,
GEN8_MCR_SELECTOR,
FW_REG_READ | FW_REG_WRITE);
@@ -2522,7 +2551,8 @@ u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore,
spin_lock_irq(&uncore->lock);
intel_uncore_forcewake_get__locked(uncore, fw_domains);
- val = intel_uncore_read_with_mcr_steering_fw(uncore, reg, slice, subslice);
+ val = uncore_rw_with_mcr_steering_fw(uncore, reg, rw_flag,
+ slice, subslice, value);
intel_uncore_forcewake_put__locked(uncore, fw_domains);
spin_unlock_irq(&uncore->lock);
@@ -2530,6 +2560,28 @@ u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore,
return val;
}
+u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore,
+ i915_reg_t reg, int slice, int subslice)
+{
+ return uncore_rw_with_mcr_steering_fw(uncore, reg, FW_REG_READ,
+ slice, subslice, 0);
+}
+
+u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore,
+ i915_reg_t reg, int slice, int subslice)
+{
+ return uncore_rw_with_mcr_steering(uncore, reg, FW_REG_READ,
+ slice, subslice, 0);
+}
+
+void intel_uncore_write_with_mcr_steering(struct intel_uncore *uncore,
+ i915_reg_t reg, u32 value,
+ int slice, int subslice)
+{
+ uncore_rw_with_mcr_steering(uncore, reg, FW_REG_WRITE,
+ slice, subslice, value);
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_uncore.c"
#include "selftests/intel_uncore.c"
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
index 6ff56d673e2b..52fe3d89dd2b 100644
--- a/drivers/gpu/drm/i915/intel_uncore.h
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -29,6 +29,7 @@
#include <linux/notifier.h>
#include <linux/hrtimer.h>
#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/types.h>
#include "i915_reg_defs.h"
@@ -214,12 +215,14 @@ u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore,
int slice, int subslice);
u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore,
i915_reg_t reg, int slice, int subslice);
-
+void intel_uncore_write_with_mcr_steering(struct intel_uncore *uncore,
+ i915_reg_t reg, u32 value,
+ int slice, int subslice);
void
intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug);
void intel_uncore_init_early(struct intel_uncore *uncore,
struct intel_gt *gt);
-int intel_uncore_setup_mmio(struct intel_uncore *uncore);
+int intel_uncore_setup_mmio(struct intel_uncore *uncore, phys_addr_t phys_addr);
int intel_uncore_init_mmio(struct intel_uncore *uncore);
void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore,
struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index ab751192eb3b..8633bec18fa7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1112,10 +1112,16 @@ static int misaligned_case(struct i915_address_space *vm, struct intel_memory_re
expected_vma_size = round_up(size, 1 << (ffs(vma->resource->page_sizes_gtt) - 1));
expected_node_size = expected_vma_size;
- if (NEEDS_COMPACT_PT(vm->i915) && i915_gem_object_is_lmem(obj)) {
- /* compact-pt should expand lmem node to 2MB */
+ if (HAS_64K_PAGES(vm->i915) && i915_gem_object_is_lmem(obj)) {
+ /*
+ * The compact-pt should expand lmem node to 2MB for the ppGTT,
+ * for all other cases we should only expect 64K.
+ */
expected_vma_size = round_up(size, I915_GTT_PAGE_SIZE_64K);
- expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_2M);
+ if (NEEDS_COMPACT_PT(vm->i915) && !i915_is_ggtt(vm))
+ expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_2M);
+ else
+ expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_64K);
}
if (vma->size != expected_vma_size || vma->node.size != expected_node_size) {
@@ -1150,7 +1156,7 @@ static int misaligned_pin(struct i915_address_space *vm,
flags |= PIN_GLOBAL;
for_each_memory_region(mr, vm->i915, id) {
- u64 min_alignment = i915_vm_min_alignment(vm, (enum intel_memory_type)id);
+ u64 min_alignment = i915_vm_min_alignment(vm, mr->type);
u64 size = min_alignment;
u64 addr = round_down(hole_start + (hole_size / 2), min_alignment);
@@ -1205,7 +1211,7 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
goto out_free;
}
GEM_BUG_ON(offset_in_page(ppgtt->vm.total));
- GEM_BUG_ON(!atomic_read(&ppgtt->vm.open));
+ assert_vm_alive(&ppgtt->vm);
err = func(&ppgtt->vm, 0, ppgtt->vm.total, end_time);
@@ -1438,7 +1444,7 @@ static void track_vma_bind(struct i915_vma *vma)
vma->resource->bi.pages = vma->pages;
mutex_lock(&vma->vm->mutex);
- list_add_tail(&vma->vm_link, &vma->vm->bound_list);
+ list_move_tail(&vma->vm_link, &vma->vm->bound_list);
mutex_unlock(&vma->vm->mutex);
}
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 573d9b2e1a4a..9c31a16f8380 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -73,7 +73,7 @@ static void mock_device_release(struct drm_device *dev)
destroy_workqueue(i915->wq);
intel_region_ttm_device_fini(i915);
- intel_gt_driver_late_release(to_gt(i915));
+ intel_gt_driver_late_release_all(i915);
intel_memory_regions_driver_release(i915);
drm_mode_config_cleanup(&i915->drm);
@@ -112,6 +112,11 @@ static struct dev_pm_domain pm_domain = {
},
};
+static void mock_gt_probe(struct drm_i915_private *i915)
+{
+ i915->gt[0] = &i915->gt0;
+}
+
struct drm_i915_private *mock_gem_device(void)
{
#if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU)
@@ -180,11 +185,11 @@ struct drm_i915_private *mock_gem_device(void)
spin_lock_init(&i915->gpu_error.lock);
i915_gem_init__mm(i915);
- intel_gt_init_early(to_gt(i915), i915);
- __intel_gt_init_early(to_gt(i915), i915);
+ intel_root_gt_init_early(i915);
mock_uncore_init(&i915->uncore, i915);
atomic_inc(&to_gt(i915)->wakeref.count); /* disable; no hw support */
to_gt(i915)->awake = -ENODEV;
+ mock_gt_probe(i915);
ret = intel_region_ttm_device_init(i915);
if (ret)
@@ -229,7 +234,7 @@ err_unlock:
err_drv:
intel_region_ttm_device_fini(i915);
err_ttm:
- intel_gt_driver_late_release(to_gt(i915));
+ intel_gt_driver_late_release_all(i915);
intel_memory_regions_driver_release(i915);
drm_mode_config_cleanup(&i915->drm);
mock_destroy_device(i915);
diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c
index f64325491f35..670557ce1024 100644
--- a/drivers/gpu/drm/i915/selftests/mock_region.c
+++ b/drivers/gpu/drm/i915/selftests/mock_region.c
@@ -26,6 +26,7 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj)
int err;
obj->mm.res = intel_region_ttm_resource_alloc(obj->mm.region,
+ obj->bo_offset,
obj->base.size,
obj->flags);
if (IS_ERR(obj->mm.res))
@@ -57,6 +58,7 @@ static const struct drm_i915_gem_object_ops mock_region_obj_ops = {
static int mock_object_init(struct intel_memory_region *mem,
struct drm_i915_gem_object *obj,
+ resource_size_t offset,
resource_size_t size,
resource_size_t page_size,
unsigned int flags)
@@ -70,6 +72,8 @@ static int mock_object_init(struct intel_memory_region *mem,
drm_gem_private_object_init(&i915->drm, &obj->base, size);
i915_gem_object_init(obj, &mock_region_obj_ops, &lock_class, flags);
+ obj->bo_offset = offset;
+
obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
diff --git a/drivers/misc/mei/Kconfig b/drivers/misc/mei/Kconfig
index 0e0bcd0da852..d21486d69df2 100644
--- a/drivers/misc/mei/Kconfig
+++ b/drivers/misc/mei/Kconfig
@@ -46,6 +46,20 @@ config INTEL_MEI_TXE
Supported SoCs:
Intel Bay Trail
+config INTEL_MEI_GSC
+ tristate "Intel MEI GSC embedded device"
+ depends on INTEL_MEI
+ depends on INTEL_MEI_ME
+ depends on X86 && PCI
+ depends on DRM_I915
+ help
+ Intel auxiliary driver for GSC devices embedded in Intel graphics devices.
+
+ An MEI device here called GSC can be embedded in an
+ Intel graphics devices, to support a range of chassis
+ tasks such as graphics card firmware update and security
+ tasks.
+
source "drivers/misc/mei/hdcp/Kconfig"
source "drivers/misc/mei/pxp/Kconfig"
diff --git a/drivers/misc/mei/Makefile b/drivers/misc/mei/Makefile
index d8e5165917f2..fb740d754900 100644
--- a/drivers/misc/mei/Makefile
+++ b/drivers/misc/mei/Makefile
@@ -18,6 +18,9 @@ obj-$(CONFIG_INTEL_MEI_ME) += mei-me.o
mei-me-objs := pci-me.o
mei-me-objs += hw-me.o
+obj-$(CONFIG_INTEL_MEI_GSC) += mei-gsc.o
+mei-gsc-objs := gsc-me.o
+
obj-$(CONFIG_INTEL_MEI_TXE) += mei-txe.o
mei-txe-objs := pci-txe.o
mei-txe-objs += hw-txe.o
diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
index 67844089db21..59506ba6fc48 100644
--- a/drivers/misc/mei/bus-fixup.c
+++ b/drivers/misc/mei/bus-fixup.c
@@ -30,6 +30,12 @@ static const uuid_le mei_nfc_info_guid = MEI_UUID_NFC_INFO;
#define MEI_UUID_MKHIF_FIX UUID_LE(0x55213584, 0x9a29, 0x4916, \
0xba, 0xdf, 0xf, 0xb7, 0xed, 0x68, 0x2a, 0xeb)
+#define MEI_UUID_IGSC_MKHI UUID_LE(0xE2C2AFA2, 0x3817, 0x4D19, \
+ 0x9D, 0x95, 0x06, 0xB1, 0x6B, 0x58, 0x8A, 0x5D)
+
+#define MEI_UUID_IGSC_MKHI_FIX UUID_LE(0x46E0C1FB, 0xA546, 0x414F, \
+ 0x91, 0x70, 0xB7, 0xF4, 0x6D, 0x57, 0xB4, 0xAD)
+
#define MEI_UUID_HDCP UUID_LE(0xB638AB7E, 0x94E2, 0x4EA2, \
0xA5, 0x52, 0xD1, 0xC5, 0x4B, 0x62, 0x7F, 0x04)
@@ -241,6 +247,23 @@ static void mei_mkhi_fix(struct mei_cl_device *cldev)
mei_cldev_disable(cldev);
}
+static void mei_gsc_mkhi_ver(struct mei_cl_device *cldev)
+{
+ int ret;
+
+ /* No need to enable the client if nothing is needed from it */
+ if (!cldev->bus->fw_f_fw_ver_supported)
+ return;
+
+ ret = mei_cldev_enable(cldev);
+ if (ret)
+ return;
+
+ ret = mei_fwver(cldev);
+ if (ret < 0)
+ dev_err(&cldev->dev, "FW version command failed %d\n", ret);
+ mei_cldev_disable(cldev);
+}
/**
* mei_wd - wd client on the bus, change protocol version
* as the API has changed.
@@ -492,6 +515,8 @@ static struct mei_fixup {
MEI_FIXUP(MEI_UUID_NFC_HCI, mei_nfc),
MEI_FIXUP(MEI_UUID_WD, mei_wd),
MEI_FIXUP(MEI_UUID_MKHIF_FIX, mei_mkhi_fix),
+ MEI_FIXUP(MEI_UUID_IGSC_MKHI, mei_gsc_mkhi_ver),
+ MEI_FIXUP(MEI_UUID_IGSC_MKHI_FIX, mei_gsc_mkhi_ver),
MEI_FIXUP(MEI_UUID_HDCP, whitelist),
MEI_FIXUP(MEI_UUID_ANY, vt_support),
MEI_FIXUP(MEI_UUID_PAVP, whitelist),
diff --git a/drivers/misc/mei/gsc-me.c b/drivers/misc/mei/gsc-me.c
new file mode 100644
index 000000000000..c8145e9b62b6
--- /dev/null
+++ b/drivers/misc/mei/gsc-me.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(c) 2019-2022, Intel Corporation. All rights reserved.
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#include <linux/module.h>
+#include <linux/mei_aux.h>
+#include <linux/device.h>
+#include <linux/irqreturn.h>
+#include <linux/jiffies.h>
+#include <linux/ktime.h>
+#include <linux/delay.h>
+#include <linux/pm_runtime.h>
+
+#include "mei_dev.h"
+#include "hw-me.h"
+#include "hw-me-regs.h"
+
+#include "mei-trace.h"
+
+#define MEI_GSC_RPM_TIMEOUT 500
+
+static int mei_gsc_read_hfs(const struct mei_device *dev, int where, u32 *val)
+{
+ struct mei_me_hw *hw = to_me_hw(dev);
+
+ *val = ioread32(hw->mem_addr + where + 0xC00);
+
+ return 0;
+}
+
+static int mei_gsc_probe(struct auxiliary_device *aux_dev,
+ const struct auxiliary_device_id *aux_dev_id)
+{
+ struct mei_aux_device *adev = auxiliary_dev_to_mei_aux_dev(aux_dev);
+ struct mei_device *dev;
+ struct mei_me_hw *hw;
+ struct device *device;
+ const struct mei_cfg *cfg;
+ int ret;
+
+ cfg = mei_me_get_cfg(aux_dev_id->driver_data);
+ if (!cfg)
+ return -ENODEV;
+
+ device = &aux_dev->dev;
+
+ dev = mei_me_dev_init(device, cfg);
+ if (!dev) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ hw = to_me_hw(dev);
+ hw->mem_addr = devm_ioremap_resource(device, &adev->bar);
+ if (IS_ERR(hw->mem_addr)) {
+ dev_err(device, "mmio not mapped\n");
+ ret = PTR_ERR(hw->mem_addr);
+ goto err;
+ }
+
+ hw->irq = adev->irq;
+ hw->read_fws = mei_gsc_read_hfs;
+
+ dev_set_drvdata(device, dev);
+
+ ret = devm_request_threaded_irq(device, hw->irq,
+ mei_me_irq_quick_handler,
+ mei_me_irq_thread_handler,
+ IRQF_ONESHOT, KBUILD_MODNAME, dev);
+ if (ret) {
+ dev_err(device, "irq register failed %d\n", ret);
+ goto err;
+ }
+
+ pm_runtime_get_noresume(device);
+ pm_runtime_set_active(device);
+ pm_runtime_enable(device);
+
+ /* Continue to char device setup in spite of firmware handshake failure.
+ * In order to provide access to the firmware status registers to the user
+ * space via sysfs.
+ */
+ if (mei_start(dev))
+ dev_warn(device, "init hw failure.\n");
+
+ pm_runtime_set_autosuspend_delay(device, MEI_GSC_RPM_TIMEOUT);
+ pm_runtime_use_autosuspend(device);
+
+ ret = mei_register(dev, device);
+ if (ret)
+ goto register_err;
+
+ pm_runtime_put_noidle(device);
+ return 0;
+
+register_err:
+ mei_stop(dev);
+ devm_free_irq(device, hw->irq, dev);
+
+err:
+ dev_err(device, "probe failed: %d\n", ret);
+ dev_set_drvdata(device, NULL);
+ return ret;
+}
+
+static void mei_gsc_remove(struct auxiliary_device *aux_dev)
+{
+ struct mei_device *dev;
+ struct mei_me_hw *hw;
+
+ dev = dev_get_drvdata(&aux_dev->dev);
+ if (!dev)
+ return;
+
+ hw = to_me_hw(dev);
+
+ mei_stop(dev);
+
+ mei_deregister(dev);
+
+ pm_runtime_disable(&aux_dev->dev);
+
+ mei_disable_interrupts(dev);
+ devm_free_irq(&aux_dev->dev, hw->irq, dev);
+}
+
+static int __maybe_unused mei_gsc_pm_suspend(struct device *device)
+{
+ struct mei_device *dev = dev_get_drvdata(device);
+
+ if (!dev)
+ return -ENODEV;
+
+ mei_stop(dev);
+
+ mei_disable_interrupts(dev);
+
+ return 0;
+}
+
+static int __maybe_unused mei_gsc_pm_resume(struct device *device)
+{
+ struct mei_device *dev = dev_get_drvdata(device);
+ int err;
+
+ if (!dev)
+ return -ENODEV;
+
+ err = mei_restart(dev);
+ if (err)
+ return err;
+
+ /* Start timer if stopped in suspend */
+ schedule_delayed_work(&dev->timer_work, HZ);
+
+ return 0;
+}
+
+static int __maybe_unused mei_gsc_pm_runtime_idle(struct device *device)
+{
+ struct mei_device *dev = dev_get_drvdata(device);
+
+ if (!dev)
+ return -ENODEV;
+ if (mei_write_is_idle(dev))
+ pm_runtime_autosuspend(device);
+
+ return -EBUSY;
+}
+
+static int __maybe_unused mei_gsc_pm_runtime_suspend(struct device *device)
+{
+ struct mei_device *dev = dev_get_drvdata(device);
+ struct mei_me_hw *hw;
+ int ret;
+
+ if (!dev)
+ return -ENODEV;
+
+ mutex_lock(&dev->device_lock);
+
+ if (mei_write_is_idle(dev)) {
+ hw = to_me_hw(dev);
+ hw->pg_state = MEI_PG_ON;
+ ret = 0;
+ } else {
+ ret = -EAGAIN;
+ }
+
+ mutex_unlock(&dev->device_lock);
+
+ return ret;
+}
+
+static int __maybe_unused mei_gsc_pm_runtime_resume(struct device *device)
+{
+ struct mei_device *dev = dev_get_drvdata(device);
+ struct mei_me_hw *hw;
+ irqreturn_t irq_ret;
+
+ if (!dev)
+ return -ENODEV;
+
+ mutex_lock(&dev->device_lock);
+
+ hw = to_me_hw(dev);
+ hw->pg_state = MEI_PG_OFF;
+
+ mutex_unlock(&dev->device_lock);
+
+ irq_ret = mei_me_irq_thread_handler(1, dev);
+ if (irq_ret != IRQ_HANDLED)
+ dev_err(dev->dev, "thread handler fail %d\n", irq_ret);
+
+ return 0;
+}
+
+static const struct dev_pm_ops mei_gsc_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(mei_gsc_pm_suspend,
+ mei_gsc_pm_resume)
+ SET_RUNTIME_PM_OPS(mei_gsc_pm_runtime_suspend,
+ mei_gsc_pm_runtime_resume,
+ mei_gsc_pm_runtime_idle)
+};
+
+static const struct auxiliary_device_id mei_gsc_id_table[] = {
+ {
+ .name = "i915.mei-gsc",
+ .driver_data = MEI_ME_GSC_CFG,
+
+ },
+ {
+ .name = "i915.mei-gscfi",
+ .driver_data = MEI_ME_GSCFI_CFG,
+ },
+ {
+ /* sentinel */
+ }
+};
+MODULE_DEVICE_TABLE(auxiliary, mei_gsc_id_table);
+
+static struct auxiliary_driver mei_gsc_driver = {
+ .probe = mei_gsc_probe,
+ .remove = mei_gsc_remove,
+ .driver = {
+ /* auxiliary_driver_register() sets .name to be the modname */
+ .pm = &mei_gsc_pm_ops,
+ },
+ .id_table = mei_gsc_id_table
+};
+module_auxiliary_driver(mei_gsc_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_ALIAS("auxiliary:i915.mei-gsc");
+MODULE_ALIAS("auxiliary:i915.mei-gscfi");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
index 719fee9af156..9870bf717979 100644
--- a/drivers/misc/mei/hw-me.c
+++ b/drivers/misc/mei/hw-me.c
@@ -1226,6 +1226,7 @@ irqreturn_t mei_me_irq_quick_handler(int irq, void *dev_id)
me_intr_disable(dev, hcsr);
return IRQ_WAKE_THREAD;
}
+EXPORT_SYMBOL_GPL(mei_me_irq_quick_handler);
/**
* mei_me_irq_thread_handler - function called after ISR to handle the interrupt
@@ -1326,6 +1327,7 @@ end:
mutex_unlock(&dev->device_lock);
return IRQ_HANDLED;
}
+EXPORT_SYMBOL_GPL(mei_me_irq_thread_handler);
static const struct mei_hw_ops mei_me_hw_ops = {
@@ -1440,6 +1442,12 @@ static bool mei_me_fw_type_sps_ign(const struct pci_dev *pdev)
#define MEI_CFG_KIND_ITOUCH \
.kind = "itouch"
+#define MEI_CFG_TYPE_GSC \
+ .kind = "gsc"
+
+#define MEI_CFG_TYPE_GSCFI \
+ .kind = "gscfi"
+
#define MEI_CFG_FW_SPS_IGN \
.quirk_probe = mei_me_fw_type_sps_ign
@@ -1572,6 +1580,20 @@ static const struct mei_cfg mei_me_pch15_sps_cfg = {
MEI_CFG_FW_SPS_IGN,
};
+/* Graphics System Controller */
+static const struct mei_cfg mei_me_gsc_cfg = {
+ MEI_CFG_TYPE_GSC,
+ MEI_CFG_PCH8_HFS,
+ MEI_CFG_FW_VER_SUPP,
+};
+
+/* Graphics System Controller Firmware Interface */
+static const struct mei_cfg mei_me_gscfi_cfg = {
+ MEI_CFG_TYPE_GSCFI,
+ MEI_CFG_PCH8_HFS,
+ MEI_CFG_FW_VER_SUPP,
+};
+
/*
* mei_cfg_list - A list of platform platform specific configurations.
* Note: has to be synchronized with enum mei_cfg_idx.
@@ -1592,6 +1614,8 @@ static const struct mei_cfg *const mei_cfg_list[] = {
[MEI_ME_PCH12_SPS_ITOUCH_CFG] = &mei_me_pch12_itouch_sps_cfg,
[MEI_ME_PCH15_CFG] = &mei_me_pch15_cfg,
[MEI_ME_PCH15_SPS_CFG] = &mei_me_pch15_sps_cfg,
+ [MEI_ME_GSC_CFG] = &mei_me_gsc_cfg,
+ [MEI_ME_GSCFI_CFG] = &mei_me_gscfi_cfg,
};
const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx)
@@ -1602,7 +1626,8 @@ const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx)
return NULL;
return mei_cfg_list[idx];
-};
+}
+EXPORT_SYMBOL_GPL(mei_me_get_cfg);
/**
* mei_me_dev_init - allocates and initializes the mei device structure
@@ -1637,4 +1662,4 @@ struct mei_device *mei_me_dev_init(struct device *parent,
return dev;
}
-
+EXPORT_SYMBOL_GPL(mei_me_dev_init);
diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h
index 00a7132ac7a2..a071c645e905 100644
--- a/drivers/misc/mei/hw-me.h
+++ b/drivers/misc/mei/hw-me.h
@@ -112,6 +112,8 @@ enum mei_cfg_idx {
MEI_ME_PCH12_SPS_ITOUCH_CFG,
MEI_ME_PCH15_CFG,
MEI_ME_PCH15_SPS_CFG,
+ MEI_ME_GSC_CFG,
+ MEI_ME_GSCFI_CFG,
MEI_ME_NUM_CFG,
};
diff --git a/include/linux/mei_aux.h b/include/linux/mei_aux.h
new file mode 100644
index 000000000000..587f25128848
--- /dev/null
+++ b/include/linux/mei_aux.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2022, Intel Corporation. All rights reserved.
+ */
+#ifndef _LINUX_MEI_AUX_H
+#define _LINUX_MEI_AUX_H
+
+#include <linux/auxiliary_bus.h>
+
+struct mei_aux_device {
+ struct auxiliary_device aux_dev;
+ int irq;
+ struct resource bar;
+};
+
+#define auxiliary_dev_to_mei_aux_dev(auxiliary_dev) \
+ container_of(auxiliary_dev, struct mei_aux_device, aux_dev)
+
+#endif /* _LINUX_MEI_AUX_H */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 05c3642aaece..35ca528803fd 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -172,7 +172,9 @@ enum drm_i915_gem_engine_class {
I915_ENGINE_CLASS_INVALID = -1
};
-/*
+/**
+ * struct i915_engine_class_instance - Engine class/instance identifier
+ *
* There may be more than one engine fulfilling any role within the system.
* Each engine of a class is given a unique instance number and therefore
* any engine can be specified by its class:instance tuplet. APIs that allow
@@ -180,10 +182,21 @@ enum drm_i915_gem_engine_class {
* for this identification.
*/
struct i915_engine_class_instance {
- __u16 engine_class; /* see enum drm_i915_gem_engine_class */
- __u16 engine_instance;
+ /**
+ * @engine_class:
+ *
+ * Engine class from enum drm_i915_gem_engine_class
+ */
+ __u16 engine_class;
#define I915_ENGINE_CLASS_INVALID_NONE -1
#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2
+
+ /**
+ * @engine_instance:
+ *
+ * Engine instance.
+ */
+ __u16 engine_instance;
};
/**
@@ -2657,24 +2670,65 @@ enum drm_i915_perf_record_type {
DRM_I915_PERF_RECORD_MAX /* non-ABI */
};
-/*
+/**
+ * struct drm_i915_perf_oa_config
+ *
* Structure to upload perf dynamic configuration into the kernel.
*/
struct drm_i915_perf_oa_config {
- /** String formatted like "%08x-%04x-%04x-%04x-%012x" */
+ /**
+ * @uuid:
+ *
+ * String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x"
+ */
char uuid[36];
+ /**
+ * @n_mux_regs:
+ *
+ * Number of mux regs in &mux_regs_ptr.
+ */
__u32 n_mux_regs;
+
+ /**
+ * @n_boolean_regs:
+ *
+ * Number of boolean regs in &boolean_regs_ptr.
+ */
__u32 n_boolean_regs;
+
+ /**
+ * @n_flex_regs:
+ *
+ * Number of flex regs in &flex_regs_ptr.
+ */
__u32 n_flex_regs;
- /*
- * These fields are pointers to tuples of u32 values (register address,
- * value). For example the expected length of the buffer pointed by
- * mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs).
+ /**
+ * @mux_regs_ptr:
+ *
+ * Pointer to tuples of u32 values (register address, value) for mux
+ * registers. Expected length of buffer is (2 * sizeof(u32) *
+ * &n_mux_regs).
*/
__u64 mux_regs_ptr;
+
+ /**
+ * @boolean_regs_ptr:
+ *
+ * Pointer to tuples of u32 values (register address, value) for mux
+ * registers. Expected length of buffer is (2 * sizeof(u32) *
+ * &n_boolean_regs).
+ */
__u64 boolean_regs_ptr;
+
+ /**
+ * @flex_regs_ptr:
+ *
+ * Pointer to tuples of u32 values (register address, value) for mux
+ * registers. Expected length of buffer is (2 * sizeof(u32) *
+ * &n_flex_regs).
+ */
__u64 flex_regs_ptr;
};
@@ -2685,12 +2739,24 @@ struct drm_i915_perf_oa_config {
* @data_ptr is also depends on the specific @query_id.
*/
struct drm_i915_query_item {
- /** @query_id: The id for this query */
+ /**
+ * @query_id:
+ *
+ * The id for this query. Currently accepted query IDs are:
+ * - %DRM_I915_QUERY_TOPOLOGY_INFO (see struct drm_i915_query_topology_info)
+ * - %DRM_I915_QUERY_ENGINE_INFO (see struct drm_i915_engine_info)
+ * - %DRM_I915_QUERY_PERF_CONFIG (see struct drm_i915_query_perf_config)
+ * - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions)
+ * - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`)
+ * - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info)
+ */
__u64 query_id;
-#define DRM_I915_QUERY_TOPOLOGY_INFO 1
-#define DRM_I915_QUERY_ENGINE_INFO 2
-#define DRM_I915_QUERY_PERF_CONFIG 3
-#define DRM_I915_QUERY_MEMORY_REGIONS 4
+#define DRM_I915_QUERY_TOPOLOGY_INFO 1
+#define DRM_I915_QUERY_ENGINE_INFO 2
+#define DRM_I915_QUERY_PERF_CONFIG 3
+#define DRM_I915_QUERY_MEMORY_REGIONS 4
+#define DRM_I915_QUERY_HWCONFIG_BLOB 5
+#define DRM_I915_QUERY_GEOMETRY_SUBSLICES 6
/* Must be kept compact -- no holes and well documented */
/**
@@ -2706,14 +2772,17 @@ struct drm_i915_query_item {
/**
* @flags:
*
- * When query_id == DRM_I915_QUERY_TOPOLOGY_INFO, must be 0.
+ * When &query_id == %DRM_I915_QUERY_TOPOLOGY_INFO, must be 0.
*
- * When query_id == DRM_I915_QUERY_PERF_CONFIG, must be one of the
+ * When &query_id == %DRM_I915_QUERY_PERF_CONFIG, must be one of the
* following:
*
- * - DRM_I915_QUERY_PERF_CONFIG_LIST
- * - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID
- * - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID
+ * - %DRM_I915_QUERY_PERF_CONFIG_LIST
+ * - %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID
+ * - %DRM_I915_QUERY_PERF_CONFIG_FOR_UUID
+ *
+ * When &query_id == %DRM_I915_QUERY_GEOMETRY_SUBSLICES must contain
+ * a struct i915_engine_class_instance that references a render engine.
*/
__u32 flags;
#define DRM_I915_QUERY_PERF_CONFIG_LIST 1
@@ -2771,66 +2840,112 @@ struct drm_i915_query {
__u64 items_ptr;
};
-/*
- * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO :
- *
- * data: contains the 3 pieces of information :
- *
- * - the slice mask with one bit per slice telling whether a slice is
- * available. The availability of slice X can be queried with the following
- * formula :
- *
- * (data[X / 8] >> (X % 8)) & 1
- *
- * - the subslice mask for each slice with one bit per subslice telling
- * whether a subslice is available. Gen12 has dual-subslices, which are
- * similar to two gen11 subslices. For gen12, this array represents dual-
- * subslices. The availability of subslice Y in slice X can be queried
- * with the following formula :
- *
- * (data[subslice_offset +
- * X * subslice_stride +
- * Y / 8] >> (Y % 8)) & 1
- *
- * - the EU mask for each subslice in each slice with one bit per EU telling
- * whether an EU is available. The availability of EU Z in subslice Y in
- * slice X can be queried with the following formula :
+/**
+ * struct drm_i915_query_topology_info
*
- * (data[eu_offset +
- * (X * max_subslices + Y) * eu_stride +
- * Z / 8] >> (Z % 8)) & 1
+ * Describes slice/subslice/EU information queried by
+ * %DRM_I915_QUERY_TOPOLOGY_INFO
*/
struct drm_i915_query_topology_info {
- /*
+ /**
+ * @flags:
+ *
* Unused for now. Must be cleared to zero.
*/
__u16 flags;
+ /**
+ * @max_slices:
+ *
+ * The number of bits used to express the slice mask.
+ */
__u16 max_slices;
+
+ /**
+ * @max_subslices:
+ *
+ * The number of bits used to express the subslice mask.
+ */
__u16 max_subslices;
+
+ /**
+ * @max_eus_per_subslice:
+ *
+ * The number of bits in the EU mask that correspond to a single
+ * subslice's EUs.
+ */
__u16 max_eus_per_subslice;
- /*
+ /**
+ * @subslice_offset:
+ *
* Offset in data[] at which the subslice masks are stored.
*/
__u16 subslice_offset;
- /*
+ /**
+ * @subslice_stride:
+ *
* Stride at which each of the subslice masks for each slice are
* stored.
*/
__u16 subslice_stride;
- /*
+ /**
+ * @eu_offset:
+ *
* Offset in data[] at which the EU masks are stored.
*/
__u16 eu_offset;
- /*
+ /**
+ * @eu_stride:
+ *
* Stride at which each of the EU masks for each subslice are stored.
*/
__u16 eu_stride;
+ /**
+ * @data:
+ *
+ * Contains 3 pieces of information :
+ *
+ * - The slice mask with one bit per slice telling whether a slice is
+ * available. The availability of slice X can be queried with the
+ * following formula :
+ *
+ * .. code:: c
+ *
+ * (data[X / 8] >> (X % 8)) & 1
+ *
+ * Starting with Xe_HP platforms, Intel hardware no longer has
+ * traditional slices so i915 will always report a single slice
+ * (hardcoded slicemask = 0x1) which contains all of the platform's
+ * subslices. I.e., the mask here does not reflect any of the newer
+ * hardware concepts such as "gslices" or "cslices" since userspace
+ * is capable of inferring those from the subslice mask.
+ *
+ * - The subslice mask for each slice with one bit per subslice telling
+ * whether a subslice is available. Starting with Gen12 we use the
+ * term "subslice" to refer to what the hardware documentation
+ * describes as a "dual-subslices." The availability of subslice Y
+ * in slice X can be queried with the following formula :
+ *
+ * .. code:: c
+ *
+ * (data[subslice_offset + X * subslice_stride + Y / 8] >> (Y % 8)) & 1
+ *
+ * - The EU mask for each subslice in each slice, with one bit per EU
+ * telling whether an EU is available. The availability of EU Z in
+ * subslice Y in slice X can be queried with the following formula :
+ *
+ * .. code:: c
+ *
+ * (data[eu_offset +
+ * (X * max_subslices + Y) * eu_stride +
+ * Z / 8
+ * ] >> (Z % 8)) & 1
+ */
__u8 data[];
};
@@ -2951,52 +3066,68 @@ struct drm_i915_query_engine_info {
struct drm_i915_engine_info engines[];
};
-/*
- * Data written by the kernel with query DRM_I915_QUERY_PERF_CONFIG.
+/**
+ * struct drm_i915_query_perf_config
+ *
+ * Data written by the kernel with query %DRM_I915_QUERY_PERF_CONFIG and
+ * %DRM_I915_QUERY_GEOMETRY_SUBSLICES.
*/
struct drm_i915_query_perf_config {
union {
- /*
- * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets
- * this fields to the number of configurations available.
+ /**
+ * @n_configs:
+ *
+ * When &drm_i915_query_item.flags ==
+ * %DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets this fields to
+ * the number of configurations available.
*/
__u64 n_configs;
- /*
- * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID,
- * i915 will use the value in this field as configuration
- * identifier to decide what data to write into config_ptr.
+ /**
+ * @config:
+ *
+ * When &drm_i915_query_item.flags ==
+ * %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID, i915 will use the
+ * value in this field as configuration identifier to decide
+ * what data to write into config_ptr.
*/
__u64 config;
- /*
- * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID,
- * i915 will use the value in this field as configuration
- * identifier to decide what data to write into config_ptr.
+ /**
+ * @uuid:
+ *
+ * When &drm_i915_query_item.flags ==
+ * %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID, i915 will use the
+ * value in this field as configuration identifier to decide
+ * what data to write into config_ptr.
*
* String formatted like "%08x-%04x-%04x-%04x-%012x"
*/
char uuid[36];
};
- /*
+ /**
+ * @flags:
+ *
* Unused for now. Must be cleared to zero.
*/
__u32 flags;
- /*
- * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 will
- * write an array of __u64 of configuration identifiers.
+ /**
+ * @data:
*
- * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_DATA, i915 will
- * write a struct drm_i915_perf_oa_config. If the following fields of
- * drm_i915_perf_oa_config are set not set to 0, i915 will write into
- * the associated pointers the values of submitted when the
+ * When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_LIST,
+ * i915 will write an array of __u64 of configuration identifiers.
+ *
+ * When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_DATA,
+ * i915 will write a struct drm_i915_perf_oa_config. If the following
+ * fields of struct drm_i915_perf_oa_config are not set to 0, i915 will
+ * write into the associated pointers the values of submitted when the
* configuration was created :
*
- * - n_mux_regs
- * - n_boolean_regs
- * - n_flex_regs
+ * - &drm_i915_perf_oa_config.n_mux_regs
+ * - &drm_i915_perf_oa_config.n_boolean_regs
+ * - &drm_i915_perf_oa_config.n_flex_regs
*/
__u8 data[];
};
@@ -3135,6 +3266,16 @@ struct drm_i915_query_memory_regions {
};
/**
+ * DOC: GuC HWCONFIG blob uAPI
+ *
+ * The GuC produces a blob with information about the current device.
+ * i915 reads this blob from GuC and makes it available via this uAPI.
+ *
+ * The format and meaning of the blob content are documented in the
+ * Programmer's Reference Manual.
+ */
+
+/**
* struct drm_i915_gem_create_ext - Existing gem_create behaviour, with added
* extension support using struct i915_user_extension.
*