summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-25 13:16:16 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-25 13:16:16 -0700
commit2d09a9449ecd9a2b9fdac62408c12ee20b6307d2 (patch)
treedabca7097e2da5a6233173ca4eb943a803a00d2f
parent317a76a996043d336b85197aed5f44184b36ac4a (diff)
parent64fa6b9322a904198589c0479dca6f2ed7f2eb04 (diff)
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas: "Nothing major this time around. Apart from the usual perf/PMU updates, some page table cleanups, the notable features are average CPU frequency based on the AMUv1 counters, CONFIG_HOTPLUG_SMT and MOPS instructions (memcpy/memset) in the uaccess routines. Perf and PMUs: - Support for the 'Rainier' CPU PMU from Arm - Preparatory driver changes and cleanups that pave the way for BRBE support - Support for partial virtualisation of the Apple-M1 PMU - Support for the second event filter in Arm CSPMU designs - Minor fixes and cleanups (CMN and DWC PMUs) - Enable EL2 requirements for FEAT_PMUv3p9 Power, CPU topology: - Support for AMUv1-based average CPU frequency - Run-time SMT control wired up for arm64 (CONFIG_HOTPLUG_SMT). It adds a generic topology_is_primary_thread() function overridden by x86 and powerpc New(ish) features: - MOPS (memcpy/memset) support for the uaccess routines Security/confidential compute: - Fix the DMA address for devices used in Realms with Arm CCA. The CCA architecture uses the address bit to differentiate between shared and private addresses - Spectre-BHB: assume CPUs Linux doesn't know about vulnerable by default Memory management clean-ups: - Drop the P*D_TABLE_BIT definition in preparation for 128-bit PTEs - Some minor page table accessor clean-ups - PIE/POE (permission indirection/overlay) helpers clean-up Kselftests: - MTE: skip hugetlb tests if MTE is not supported on such mappings and user correct naming for sync/async tag checking modes Miscellaneous: - Add a PKEY_UNRESTRICTED definition as 0 to uapi (toolchain people request) - Sysreg updates for new register fields - CPU type info for some Qualcomm Kryo cores" * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (72 commits) arm64: mm: Don't use %pK through printk perf/arm_cspmu: Fix missing io.h include arm64: errata: Add newer ARM cores to the spectre_bhb_loop_affected() lists arm64: cputype: Add MIDR_CORTEX_A76AE arm64: errata: Add KRYO 2XX/3XX/4XX silver cores to Spectre BHB safe list arm64: errata: Assume that unknown CPUs _are_ vulnerable to Spectre BHB arm64: errata: Add QCOM_KRYO_4XX_GOLD to the spectre_bhb_k24_list arm64/sysreg: Enforce whole word match for open/close tokens arm64/sysreg: Fix unbalanced closing block arm64: Kconfig: Enable HOTPLUG_SMT arm64: topology: Support SMT control on ACPI based system arch_topology: Support SMT control for OF based system cpu/SMT: Provide a default topology_is_primary_thread() arm64/mm: Define PTDESC_ORDER perf/arm_cspmu: Add PMEVFILT2R support perf/arm_cspmu: Generalise event filtering perf/arm_cspmu: Move register definitons to header arm64/kernel: Always use level 2 or higher for early mappings arm64/mm: Drop PXD_TABLE_BIT arm64/mm: Check pmd_table() in pmd_trans_huge() ...
-rw-r--r--Documentation/admin-guide/pm/cpufreq.rst17
-rw-r--r--Documentation/arch/arm64/booting.rst22
-rw-r--r--arch/arm64/Kconfig3
-rw-r--r--arch/arm64/include/asm/apple_m1_pmu.h1
-rw-r--r--arch/arm64/include/asm/asm-extable.h10
-rw-r--r--arch/arm64/include/asm/asm-uaccess.h4
-rw-r--r--arch/arm64/include/asm/cputype.h14
-rw-r--r--arch/arm64/include/asm/el2_setup.h25
-rw-r--r--arch/arm64/include/asm/extable.h4
-rw-r--r--arch/arm64/include/asm/fpsimd.h1
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h8
-rw-r--r--arch/arm64/include/asm/mem_encrypt.h11
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h35
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h36
-rw-r--r--arch/arm64/include/asm/pgtable.h80
-rw-r--r--arch/arm64/include/asm/por.h11
-rw-r--r--arch/arm64/include/asm/spectre.h1
-rw-r--r--arch/arm64/include/asm/sysreg.h15
-rw-r--r--arch/arm64/kernel/pi/map_range.c6
-rw-r--r--arch/arm64/kernel/proton-pack.c218
-rw-r--r--arch/arm64/kernel/signal.c2
-rw-r--r--arch/arm64/kernel/topology.c182
-rw-r--r--arch/arm64/kvm/at.c8
-rw-r--r--arch/arm64/kvm/ptdump.c4
-rw-r--r--arch/arm64/lib/clear_user.S25
-rw-r--r--arch/arm64/lib/copy_from_user.S10
-rw-r--r--arch/arm64/lib/copy_template.S10
-rw-r--r--arch/arm64/lib/copy_to_user.S10
-rw-r--r--arch/arm64/mm/extable.c40
-rw-r--r--arch/arm64/mm/fault.c4
-rw-r--r--arch/arm64/mm/hugetlbpage.c20
-rw-r--r--arch/arm64/mm/kasan_init.c6
-rw-r--r--arch/arm64/mm/mmu.c10
-rw-r--r--arch/arm64/mm/physaddr.c2
-rw-r--r--arch/arm64/mm/ptdump.c4
-rwxr-xr-xarch/arm64/tools/gen-sysreg.awk31
-rw-r--r--arch/arm64/tools/sysreg105
-rw-r--r--arch/powerpc/include/asm/topology.h1
-rw-r--r--arch/x86/include/asm/topology.h2
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c2
-rw-r--r--arch/x86/kernel/cpu/proc.c7
-rw-r--r--drivers/base/arch_topology.c26
-rw-r--r--drivers/cpufreq/Kconfig.x8612
-rw-r--r--drivers/cpufreq/cpufreq.c38
-rw-r--r--drivers/perf/apple_m1_cpu_pmu.c70
-rw-r--r--drivers/perf/arm-cmn.c5
-rw-r--r--drivers/perf/arm_cspmu/ampere_cspmu.c32
-rw-r--r--drivers/perf/arm_cspmu/arm_cspmu.c81
-rw-r--r--drivers/perf/arm_cspmu/arm_cspmu.h57
-rw-r--r--drivers/perf/arm_cspmu/nvidia_cspmu.c22
-rw-r--r--drivers/perf/arm_pmu.c8
-rw-r--r--drivers/perf/arm_pmuv3.c11
-rw-r--r--drivers/perf/arm_v7_pmu.c50
-rw-r--r--drivers/perf/dwc_pcie_pmu.c51
-rw-r--r--include/linux/cpufreq.h2
-rw-r--r--include/linux/dma-direct.h13
-rw-r--r--include/linux/mem_encrypt.h23
-rw-r--r--include/linux/perf/arm_pmu.h13
-rw-r--r--include/linux/topology.h23
-rw-r--r--include/uapi/asm-generic/mman-common.h1
-rw-r--r--tools/testing/selftests/arm64/mte/check_hugetlb_options.c19
-rw-r--r--tools/testing/selftests/mm/mseal_test.c6
-rw-r--r--tools/testing/selftests/mm/pkey-helpers.h3
-rw-r--r--tools/testing/selftests/mm/pkey_sighandler_tests.c4
-rw-r--r--tools/testing/selftests/mm/protection_keys.c2
-rw-r--r--tools/testing/selftests/powerpc/include/pkeys.h5
-rw-r--r--tools/testing/selftests/powerpc/mm/pkey_exec_prot.c2
-rw-r--r--tools/testing/selftests/powerpc/mm/pkey_siginfo.c2
-rw-r--r--tools/testing/selftests/powerpc/ptrace/core-pkey.c6
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c6
70 files changed, 1116 insertions, 484 deletions
diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
index a21369eba034..3950583f2b15 100644
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@@ -248,6 +248,20 @@ are the following:
If that frequency cannot be determined, this attribute should not
be present.
+``cpuinfo_avg_freq``
+ An average frequency (in KHz) of all CPUs belonging to a given policy,
+ derived from a hardware provided feedback and reported on a time frame
+ spanning at most few milliseconds.
+
+ This is expected to be based on the frequency the hardware actually runs
+ at and, as such, might require specialised hardware support (such as AMU
+ extension on ARM). If one cannot be determined, this attribute should
+ not be present.
+
+ Note, that failed attempt to retrieve current frequency for a given
+ CPU(s) will result in an appropriate error, i.e: EAGAIN for CPU that
+ remains idle (raised on ARM).
+
``cpuinfo_max_freq``
Maximum possible operating frequency the CPUs belonging to this policy
can run at (in kHz).
@@ -293,7 +307,8 @@ are the following:
Some architectures (e.g. ``x86``) may attempt to provide information
more precisely reflecting the current CPU frequency through this
attribute, but that still may not be the exact current CPU frequency as
- seen by the hardware at the moment.
+ seen by the hardware at the moment. This behavior though, is only
+ available via c:macro:``CPUFREQ_ARCH_CUR_FREQ`` option.
``scaling_driver``
The scaling driver currently in use.
diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst
index cad6fdc96b98..dee7b6de864f 100644
--- a/Documentation/arch/arm64/booting.rst
+++ b/Documentation/arch/arm64/booting.rst
@@ -288,6 +288,12 @@ Before jumping into the kernel, the following conditions must be met:
- SCR_EL3.FGTEn (bit 27) must be initialised to 0b1.
+ For CPUs with the Fine Grained Traps 2 (FEAT_FGT2) extension present:
+
+ - If EL3 is present and the kernel is entered at EL2:
+
+ - SCR_EL3.FGTEn2 (bit 59) must be initialised to 0b1.
+
For CPUs with support for HCRX_EL2 (FEAT_HCX) present:
- If EL3 is present and the kernel is entered at EL2:
@@ -382,6 +388,22 @@ Before jumping into the kernel, the following conditions must be met:
- SMCR_EL2.EZT0 (bit 30) must be initialised to 0b1.
+ For CPUs with the Performance Monitors Extension (FEAT_PMUv3p9):
+
+ - If EL3 is present:
+
+ - MDCR_EL3.EnPM2 (bit 7) must be initialised to 0b1.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - HDFGRTR2_EL2.nPMICNTR_EL0 (bit 2) must be initialised to 0b1.
+ - HDFGRTR2_EL2.nPMICFILTR_EL0 (bit 3) must be initialised to 0b1.
+ - HDFGRTR2_EL2.nPMUACR_EL1 (bit 4) must be initialised to 0b1.
+
+ - HDFGWTR2_EL2.nPMICNTR_EL0 (bit 2) must be initialised to 0b1.
+ - HDFGWTR2_EL2.nPMICFILTR_EL0 (bit 3) must be initialised to 0b1.
+ - HDFGWTR2_EL2.nPMUACR_EL1 (bit 4) must be initialised to 0b1.
+
For CPUs with Memory Copy and Memory Set instructions (FEAT_MOPS):
- If the kernel is entered at EL1 and EL2 is present:
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c9fe3e7566a6..f2d724c1dfd2 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -251,6 +251,7 @@ config ARM64
select HAVE_KRETPROBES
select HAVE_GENERIC_VDSO
select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
+ select HOTPLUG_SMT if HOTPLUG_CPU
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select KASAN_VMALLOC if KASAN
@@ -324,7 +325,7 @@ config ARCH_MMAP_RND_BITS_MIN
default 18
# max bits determined by the following formula:
-# VA_BITS - PAGE_SHIFT - 3
+# VA_BITS - PTDESC_TABLE_SHIFT
config ARCH_MMAP_RND_BITS_MAX
default 19 if ARM64_VA_BITS=36
default 24 if ARM64_VA_BITS=39
diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h
index 99483b19b99f..02e05d05851f 100644
--- a/arch/arm64/include/asm/apple_m1_pmu.h
+++ b/arch/arm64/include/asm/apple_m1_pmu.h
@@ -37,6 +37,7 @@
#define PMCR0_PMI_ENABLE_8_9 GENMASK(45, 44)
#define SYS_IMP_APL_PMCR1_EL1 sys_reg(3, 1, 15, 1, 0)
+#define SYS_IMP_APL_PMCR1_EL12 sys_reg(3, 1, 15, 7, 2)
#define PMCR1_COUNT_A64_EL0_0_7 GENMASK(15, 8)
#define PMCR1_COUNT_A64_EL1_0_7 GENMASK(23, 16)
#define PMCR1_COUNT_A64_EL0_8_9 GENMASK(41, 40)
diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h
index b8a5861dc7b7..292f2687a12e 100644
--- a/arch/arm64/include/asm/asm-extable.h
+++ b/arch/arm64/include/asm/asm-extable.h
@@ -9,7 +9,8 @@
#define EX_TYPE_BPF 1
#define EX_TYPE_UACCESS_ERR_ZERO 2
#define EX_TYPE_KACCESS_ERR_ZERO 3
-#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4
+#define EX_TYPE_UACCESS_CPY 4
+#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 5
/* Data fields for EX_TYPE_UACCESS_ERR_ZERO */
#define EX_DATA_REG_ERR_SHIFT 0
@@ -23,6 +24,9 @@
#define EX_DATA_REG_ADDR_SHIFT 5
#define EX_DATA_REG_ADDR GENMASK(9, 5)
+/* Data fields for EX_TYPE_UACCESS_CPY */
+#define EX_DATA_UACCESS_WRITE BIT(0)
+
#ifdef __ASSEMBLY__
#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
@@ -69,6 +73,10 @@
.endif
.endm
+ .macro _asm_extable_uaccess_cpy, insn, fixup, uaccess_is_write
+ __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_UACCESS_CPY, \uaccess_is_write)
+ .endm
+
#else /* __ASSEMBLY__ */
#include <linux/stringify.h>
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index 5b6efe8abeeb..9148f5a31968 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -61,6 +61,10 @@ alternative_else_nop_endif
9999: x; \
_asm_extable_uaccess 9999b, l
+#define USER_CPY(l, uaccess_is_write, x...) \
+9999: x; \
+ _asm_extable_uaccess_cpy 9999b, l, uaccess_is_write
+
/*
* Generate the assembly for LDTR/STTR with exception table entries.
* This is complicated as there is no post-increment or pair versions of the
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 6f3f4142e214..20284bf34722 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -75,6 +75,7 @@
#define ARM_CPU_PART_CORTEX_A76 0xD0B
#define ARM_CPU_PART_NEOVERSE_N1 0xD0C
#define ARM_CPU_PART_CORTEX_A77 0xD0D
+#define ARM_CPU_PART_CORTEX_A76AE 0xD0E
#define ARM_CPU_PART_NEOVERSE_V1 0xD40
#define ARM_CPU_PART_CORTEX_A78 0xD41
#define ARM_CPU_PART_CORTEX_A78AE 0xD42
@@ -119,6 +120,7 @@
#define QCOM_CPU_PART_KRYO 0x200
#define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800
#define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801
+#define QCOM_CPU_PART_KRYO_3XX_GOLD 0x802
#define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803
#define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804
#define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805
@@ -159,6 +161,7 @@
#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
+#define MIDR_CORTEX_A76AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76AE)
#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
@@ -196,10 +199,21 @@
#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD)
#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER)
+#define MIDR_QCOM_KRYO_3XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_GOLD)
#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER)
#define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD)
#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER)
#define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1)
+
+/*
+ * NOTES:
+ * - Qualcomm Kryo 5XX Prime / Gold ID themselves as MIDR_CORTEX_A77
+ * - Qualcomm Kryo 5XX Silver IDs itself as MIDR_QCOM_KRYO_4XX_SILVER
+ * - Qualcomm Kryo 6XX Prime IDs itself as MIDR_CORTEX_X1
+ * - Qualcomm Kryo 6XX Gold IDs itself as ARM_CPU_PART_CORTEX_A78
+ * - Qualcomm Kryo 6XX Silver IDs itself as MIDR_CORTEX_A55
+ */
+
#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 555c613fd232..ebceaae3c749 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -259,6 +259,30 @@
.Lskip_fgt_\@:
.endm
+.macro __init_el2_fgt2
+ mrs x1, id_aa64mmfr0_el1
+ ubfx x1, x1, #ID_AA64MMFR0_EL1_FGT_SHIFT, #4
+ cmp x1, #ID_AA64MMFR0_EL1_FGT_FGT2
+ b.lt .Lskip_fgt2_\@
+
+ mov x0, xzr
+ mrs x1, id_aa64dfr0_el1
+ ubfx x1, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
+ cmp x1, #ID_AA64DFR0_EL1_PMUVer_V3P9
+ b.lt .Lskip_pmuv3p9_\@
+
+ orr x0, x0, #HDFGRTR2_EL2_nPMICNTR_EL0
+ orr x0, x0, #HDFGRTR2_EL2_nPMICFILTR_EL0
+ orr x0, x0, #HDFGRTR2_EL2_nPMUACR_EL1
+.Lskip_pmuv3p9_\@:
+ msr_s SYS_HDFGRTR2_EL2, x0
+ msr_s SYS_HDFGWTR2_EL2, x0
+ msr_s SYS_HFGRTR2_EL2, xzr
+ msr_s SYS_HFGWTR2_EL2, xzr
+ msr_s SYS_HFGITR2_EL2, xzr
+.Lskip_fgt2_\@:
+.endm
+
.macro __init_el2_gcs
mrs_s x1, SYS_ID_AA64PFR1_EL1
ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4
@@ -304,6 +328,7 @@
__init_el2_nvhe_idregs
__init_el2_cptr
__init_el2_fgt
+ __init_el2_fgt2
__init_el2_gcs
.endm
diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h
index 72b0e71cc3de..9dc39612bdf5 100644
--- a/arch/arm64/include/asm/extable.h
+++ b/arch/arm64/include/asm/extable.h
@@ -33,6 +33,8 @@ do { \
(b)->data = (tmp).data; \
} while (0)
+bool insn_may_access_user(unsigned long addr, unsigned long esr);
+
#ifdef CONFIG_BPF_JIT
bool ex_handler_bpf(const struct exception_table_entry *ex,
struct pt_regs *regs);
@@ -45,5 +47,5 @@ bool ex_handler_bpf(const struct exception_table_entry *ex,
}
#endif /* !CONFIG_BPF_JIT */
-bool fixup_exception(struct pt_regs *regs);
+bool fixup_exception(struct pt_regs *regs, unsigned long esr);
#endif
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index f2a84efc3618..564bc09b3e06 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -80,7 +80,6 @@ extern void fpsimd_signal_preserve_current_state(void);
extern void fpsimd_preserve_current_state(void);
extern void fpsimd_restore_current_state(void);
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
-extern void fpsimd_kvm_prepare(void);
struct cpu_fp_state {
struct user_fpsimd_state *st;
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index fd5a08450b12..9e93733523f6 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -45,11 +45,11 @@
#define SPAN_NR_ENTRIES(vstart, vend, shift) \
((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1)
-#define EARLY_ENTRIES(vstart, vend, shift, add) \
- (SPAN_NR_ENTRIES(vstart, vend, shift) + (add))
+#define EARLY_ENTRIES(lvl, vstart, vend) \
+ SPAN_NR_ENTRIES(vstart, vend, SWAPPER_BLOCK_SHIFT + lvl * PTDESC_TABLE_SHIFT)
-#define EARLY_LEVEL(lvl, lvls, vstart, vend, add) \
- (lvls > lvl ? EARLY_ENTRIES(vstart, vend, SWAPPER_BLOCK_SHIFT + lvl * (PAGE_SHIFT - 3), add) : 0)
+#define EARLY_LEVEL(lvl, lvls, vstart, vend, add) \
+ ((lvls) > (lvl) ? EARLY_ENTRIES(lvl, vstart, vend) + (add) : 0)
#define EARLY_PAGES(lvls, vstart, vend, add) (1 /* PGDIR page */ \
+ EARLY_LEVEL(3, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \
diff --git a/arch/arm64/include/asm/mem_encrypt.h b/arch/arm64/include/asm/mem_encrypt.h
index f8f78f622dd2..a2a1eeb36d4b 100644
--- a/arch/arm64/include/asm/mem_encrypt.h
+++ b/arch/arm64/include/asm/mem_encrypt.h
@@ -21,4 +21,15 @@ static inline bool force_dma_unencrypted(struct device *dev)
return is_realm_world();
}
+/*
+ * For Arm CCA guests, canonical addresses are "encrypted", so no changes
+ * required for dma_addr_encrypted().
+ * The unencrypted DMA buffers must be accessed via the unprotected IPA,
+ * "top IPA bit" set.
+ */
+#define dma_addr_unencrypted(x) ((x) | PROT_NS_SHARED)
+
+/* Clear the "top" IPA bit while converting back */
+#define dma_addr_canonical(x) ((x) & ~PROT_NS_SHARED)
+
#endif /* __ASM_MEM_ENCRYPT_H */
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index a9136cc551cc..f3b77deedfa2 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -7,40 +7,46 @@
#include <asm/memory.h>
+#define PTDESC_ORDER 3
+
+/* Number of VA bits resolved by a single translation table level */
+#define PTDESC_TABLE_SHIFT (PAGE_SHIFT - PTDESC_ORDER)
+
/*
* Number of page-table levels required to address 'va_bits' wide
* address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT)
- * bits with (PAGE_SHIFT - 3) bits at each page table level. Hence:
+ * bits with PTDESC_TABLE_SHIFT bits at each page table level. Hence:
*
- * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3))
+ * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), PTDESC_TABLE_SHIFT)
*
* where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d))
*
* We cannot include linux/kernel.h which defines DIV_ROUND_UP here
* due to build issues. So we open code DIV_ROUND_UP here:
*
- * ((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3))
+ * ((((va_bits) - PAGE_SHIFT) + PTDESC_TABLE_SHIFT - 1) / PTDESC_TABLE_SHIFT)
*
* which gets simplified as :
*/
-#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3))
+#define ARM64_HW_PGTABLE_LEVELS(va_bits) \
+ (((va_bits) - PTDESC_ORDER - 1) / PTDESC_TABLE_SHIFT)
/*
* Size mapped by an entry at level n ( -1 <= n <= 3)
- * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
+ * We map PTDESC_TABLE_SHIFT at all translation levels and PAGE_SHIFT bits
* in the final page. The maximum number of translation levels supported by
* the architecture is 5. Hence, starting at level n, we have further
* ((4 - n) - 1) levels of translation excluding the offset within the page.
* So, the total number of bits mapped by an entry at level n is :
*
- * ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT
+ * ((4 - n) - 1) * PTDESC_TABLE_SHIFT + PAGE_SHIFT
*
* Rearranging it a bit we get :
- * (4 - n) * (PAGE_SHIFT - 3) + 3
+ * (4 - n) * PTDESC_TABLE_SHIFT + PTDESC_ORDER
*/
-#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3)
+#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) (PTDESC_TABLE_SHIFT * (4 - (n)) + PTDESC_ORDER)
-#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3))
+#define PTRS_PER_PTE (1 << PTDESC_TABLE_SHIFT)
/*
* PMD_SHIFT determines the size a level 2 page table entry can map.
@@ -49,7 +55,7 @@
#define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
-#define PTRS_PER_PMD (1 << (PAGE_SHIFT - 3))
+#define PTRS_PER_PMD (1 << PTDESC_TABLE_SHIFT)
#endif
/*
@@ -59,14 +65,14 @@
#define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE-1))
-#define PTRS_PER_PUD (1 << (PAGE_SHIFT - 3))
+#define PTRS_PER_PUD (1 << PTDESC_TABLE_SHIFT)
#endif
#if CONFIG_PGTABLE_LEVELS > 4
#define P4D_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(0)
#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
#define P4D_MASK (~(P4D_SIZE-1))
-#define PTRS_PER_P4D (1 << (PAGE_SHIFT - 3))
+#define PTRS_PER_P4D (1 << PTDESC_TABLE_SHIFT)
#endif
/*
@@ -97,7 +103,6 @@
* Level -1 descriptor (PGD).
*/
#define PGD_TYPE_TABLE (_AT(pgdval_t, 3) << 0)
-#define PGD_TABLE_BIT (_AT(pgdval_t, 1) << 1)
#define PGD_TYPE_MASK (_AT(pgdval_t, 3) << 0)
#define PGD_TABLE_AF (_AT(pgdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */
#define PGD_TABLE_PXN (_AT(pgdval_t, 1) << 59)
@@ -107,7 +112,6 @@
* Level 0 descriptor (P4D).
*/
#define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0)
-#define P4D_TABLE_BIT (_AT(p4dval_t, 1) << 1)
#define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0)
#define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0)
#define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */
@@ -119,7 +123,6 @@
* Level 1 descriptor (PUD).
*/
#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0)
-#define PUD_TABLE_BIT (_AT(pudval_t, 1) << 1)
#define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0)
#define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0)
#define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */
@@ -133,7 +136,6 @@
#define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0)
#define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0)
#define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0)
-#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1)
#define PMD_TABLE_AF (_AT(pmdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */
/*
@@ -162,7 +164,6 @@
#define PTE_VALID (_AT(pteval_t, 1) << 0)
#define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0)
#define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0)
-#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1)
#define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
#define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */
#define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index a95f1f77bb39..7830d031742e 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -169,25 +169,25 @@ static inline bool __pure lpa2_is_enabled(void)
#define PAGE_GCS_RO __pgprot(_PAGE_GCS_RO)
#define PIE_E0 ( \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS), PIE_GCS) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX_O) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX_O) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R_O) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW_O))
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_GCS) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY), PIE_R_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED), PIE_RW_O))
#define PIE_E1 ( \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS), PIE_NONE_O) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS_RO), PIE_NONE_O) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_ROX), PIE_RX) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_EXEC), PIE_RWX) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_RO), PIE_R) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL), PIE_RW))
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_NONE_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_NONE_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY), PIE_R) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED), PIE_RW) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_ROX), PIE_RX) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_EXEC), PIE_RWX) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_RO), PIE_R) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL), PIE_RW))
#endif /* __ASM_PGTABLE_PROT_H */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 0b2a2ad1b9e8..84f05f781a70 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -68,10 +68,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e))
-/*
- * Macros to convert between a physical address and its placement in a
- * page table entry, taking care of 52-bit addresses.
- */
#ifdef CONFIG_ARM64_PA_BITS_52
static inline phys_addr_t __pte_to_phys(pte_t pte)
{
@@ -84,8 +80,15 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK;
}
#else
-#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_LOW)
-#define __phys_to_pte_val(phys) (phys)
+static inline phys_addr_t __pte_to_phys(pte_t pte)
+{
+ return pte_val(pte) & PTE_ADDR_LOW;
+}
+
+static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
+{
+ return phys;
+}
#endif
#define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT)
@@ -483,12 +486,12 @@ static inline pmd_t pte_pmd(pte_t pte)
static inline pgprot_t mk_pud_sect_prot(pgprot_t prot)
{
- return __pgprot((pgprot_val(prot) & ~PUD_TABLE_BIT) | PUD_TYPE_SECT);
+ return __pgprot((pgprot_val(prot) & ~PUD_TYPE_MASK) | PUD_TYPE_SECT);
}
static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
{
- return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT);
+ return __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT);
}
static inline pte_t pte_swp_mkexclusive(pte_t pte)
@@ -548,18 +551,6 @@ static inline int pmd_protnone(pmd_t pmd)
#endif
#define pmd_present(pmd) pte_present(pmd_pte(pmd))
-
-/*
- * THP definitions.
- */
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline int pmd_trans_huge(pmd_t pmd)
-{
- return pmd_val(pmd) && pmd_present(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
#define pmd_valid(pmd) pte_valid(pmd_pte(pmd))
@@ -585,7 +576,18 @@ static inline int pmd_trans_huge(pmd_t pmd)
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
-#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+ /*
+ * It's possible that the pmd is present-invalid on entry
+ * and in that case it needs to remain present-invalid on
+ * exit. So ensure the VALID bit does not get modified.
+ */
+ pmdval_t mask = PMD_TYPE_MASK & ~PTE_VALID;
+ pmdval_t val = PMD_TYPE_SECT & ~PTE_VALID;
+
+ return __pmd((pmd_val(pmd) & ~mask) | val);
+}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_devmap(pmd) pte_devmap(pmd_pte(pmd))
@@ -613,7 +615,18 @@ static inline pmd_t pmd_mkspecial(pmd_t pmd)
#define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud)))
#define pud_write(pud) pte_write(pud_pte(pud))
-#define pud_mkhuge(pud) (__pud(pud_val(pud) & ~PUD_TABLE_BIT))
+static inline pud_t pud_mkhuge(pud_t pud)
+{
+ /*
+ * It's possible that the pud is present-invalid on entry
+ * and in that case it needs to remain present-invalid on
+ * exit. So ensure the VALID bit does not get modified.
+ */
+ pudval_t mask = PUD_TYPE_MASK & ~PTE_VALID;
+ pudval_t val = PUD_TYPE_SECT & ~PTE_VALID;
+
+ return __pud((pud_val(pud) & ~mask) | val);
+}
#define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud))
#define __phys_to_pud_val(phys) __phys_to_pte_val(phys)
@@ -724,6 +737,18 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
#define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE)
#define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ /*
+ * If pmd is present-invalid, pmd_table() won't detect it
+ * as a table, so force the valid bit for the comparison.
+ */
+ return pmd_val(pmd) && pmd_present(pmd) &&
+ !pmd_table(__pmd(pmd_val(pmd) | PTE_VALID));
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
static inline bool pud_sect(pud_t pud) { return false; }
static inline bool pud_table(pud_t pud) { return true; }
@@ -805,7 +830,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
#define pud_none(pud) (!pud_val(pud))
-#define pud_bad(pud) (!pud_table(pud))
+#define pud_bad(pud) ((pud_val(pud) & PUD_TYPE_MASK) != \
+ PUD_TYPE_TABLE)
#define pud_present(pud) pte_present(pud_pte(pud))
#ifndef __PAGETABLE_PMD_FOLDED
#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
@@ -896,7 +922,9 @@ static inline bool mm_pud_folded(const struct mm_struct *mm)
pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e))
#define p4d_none(p4d) (pgtable_l4_enabled() && !p4d_val(p4d))
-#define p4d_bad(p4d) (pgtable_l4_enabled() && !(p4d_val(p4d) & P4D_TABLE_BIT))
+#define p4d_bad(p4d) (pgtable_l4_enabled() && \
+ ((p4d_val(p4d) & P4D_TYPE_MASK) != \
+ P4D_TYPE_TABLE))
#define p4d_present(p4d) (!p4d_none(p4d))
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
@@ -1023,7 +1051,9 @@ static inline bool mm_p4d_folded(const struct mm_struct *mm)
pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e))
#define pgd_none(pgd) (pgtable_l5_enabled() && !pgd_val(pgd))
-#define pgd_bad(pgd) (pgtable_l5_enabled() && !(pgd_val(pgd) & PGD_TABLE_BIT))
+#define pgd_bad(pgd) (pgtable_l5_enabled() && \
+ ((pgd_val(pgd) & PGD_TYPE_MASK) != \
+ PGD_TYPE_TABLE))
#define pgd_present(pgd) (!pgd_none(pgd))
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
diff --git a/arch/arm64/include/asm/por.h b/arch/arm64/include/asm/por.h
index e06e9f473675..d913d5b529e4 100644
--- a/arch/arm64/include/asm/por.h
+++ b/arch/arm64/include/asm/por.h
@@ -6,26 +6,27 @@
#ifndef _ASM_ARM64_POR_H
#define _ASM_ARM64_POR_H
-#define POR_BITS_PER_PKEY 4
-#define POR_ELx_IDX(por_elx, idx) (((por_elx) >> ((idx) * POR_BITS_PER_PKEY)) & 0xf)
+#include <asm/sysreg.h>
+
+#define POR_EL0_INIT POR_ELx_PERM_PREP(0, POE_RWX)
static inline bool por_elx_allows_read(u64 por, u8 pkey)
{
- u8 perm = POR_ELx_IDX(por, pkey);
+ u8 perm = POR_ELx_PERM_GET(pkey, por);
return perm & POE_R;
}
static inline bool por_elx_allows_write(u64 por, u8 pkey)
{
- u8 perm = POR_ELx_IDX(por, pkey);
+ u8 perm = POR_ELx_PERM_GET(pkey, por);
return perm & POE_W;
}
static inline bool por_elx_allows_exec(u64 por, u8 pkey)
{
- u8 perm = POR_ELx_IDX(por, pkey);
+ u8 perm = POR_ELx_PERM_GET(pkey, por);
return perm & POE_X;
}
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
index 0c4d9045c31f..f1524cdeacf1 100644
--- a/arch/arm64/include/asm/spectre.h
+++ b/arch/arm64/include/asm/spectre.h
@@ -97,7 +97,6 @@ enum mitigation_state arm64_get_meltdown_state(void);
enum mitigation_state arm64_get_spectre_bhb_state(void);
bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope);
-u8 spectre_bhb_loop_affected(int scope);
void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 05ea5223d2d5..e3252f8bb465 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1062,8 +1062,11 @@
#define PIE_RX UL(0xa)
#define PIE_RW UL(0xc)
#define PIE_RWX UL(0xe)
+#define PIE_MASK UL(0xf)
-#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4))
+#define PIRx_ELx_BITS_PER_IDX 4
+#define PIRx_ELx_PERM_SHIFT(idx) ((idx) * PIRx_ELx_BITS_PER_IDX)
+#define PIRx_ELx_PERM_PREP(idx, perm) (((perm) & PIE_MASK) << PIRx_ELx_PERM_SHIFT(idx))
/*
* Permission Overlay Extension (POE) permission encodings.
@@ -1074,12 +1077,14 @@
#define POE_RX UL(0x3)
#define POE_W UL(0x4)
#define POE_RW UL(0x5)
-#define POE_XW UL(0x6)
-#define POE_RXW UL(0x7)
+#define POE_WX UL(0x6)
+#define POE_RWX UL(0x7)
#define POE_MASK UL(0xf)
-/* Initial value for Permission Overlay Extension for EL0 */
-#define POR_EL0_INIT POE_RXW
+#define POR_ELx_BITS_PER_IDX 4
+#define POR_ELx_PERM_SHIFT(idx) ((idx) * POR_ELx_BITS_PER_IDX)
+#define POR_ELx_PERM_GET(idx, reg) (((reg) >> POR_ELx_PERM_SHIFT(idx)) & POE_MASK)
+#define POR_ELx_PERM_PREP(idx, perm) (((perm) & POE_MASK) << POR_ELx_PERM_SHIFT(idx))
/*
* Definitions for Guarded Control Stack
diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
index 2b69e3beeef8..81345f68f9fc 100644
--- a/arch/arm64/kernel/pi/map_range.c
+++ b/arch/arm64/kernel/pi/map_range.c
@@ -31,7 +31,7 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
{
u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX;
pteval_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
- int lshift = (3 - level) * (PAGE_SHIFT - 3);
+ int lshift = (3 - level) * PTDESC_TABLE_SHIFT;
u64 lmask = (PAGE_SIZE << lshift) - 1;
start &= PAGE_MASK;
@@ -45,12 +45,12 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
* clearing the mapping
*/
if (protval)
- protval |= (level < 3) ? PMD_TYPE_SECT : PTE_TYPE_PAGE;
+ protval |= (level == 2) ? PMD_TYPE_SECT : PTE_TYPE_PAGE;
while (start < end) {
u64 next = min((start | lmask) + 1, PAGE_ALIGN(end));
- if (level < 3 && (start | next | pa) & lmask) {
+ if (level < 2 || (level == 2 && (start | next | pa) & lmask)) {
/*
* This chunk needs a finer grained mapping. Create a
* table mapping if necessary and recurse.
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index da53722f95d4..0f51fd10b4b0 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -845,52 +845,86 @@ static unsigned long system_bhb_mitigations;
* This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any
* SCOPE_SYSTEM call will give the right answer.
*/
-u8 spectre_bhb_loop_affected(int scope)
+static bool is_spectre_bhb_safe(int scope)
+{
+ static const struct midr_range spectre_bhb_safe_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A510),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A520),
+ MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
+ {},
+ };
+ static bool all_safe = true;
+
+ if (scope != SCOPE_LOCAL_CPU)
+ return all_safe;
+
+ if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_safe_list))
+ return true;
+
+ all_safe = false;
+
+ return false;
+}
+
+static u8 spectre_bhb_loop_affected(void)
{
u8 k = 0;
- static u8 max_bhb_k;
-
- if (scope == SCOPE_LOCAL_CPU) {
- static const struct midr_range spectre_bhb_k32_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
- {},
- };
- static const struct midr_range spectre_bhb_k24_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
- {},
- };
- static const struct midr_range spectre_bhb_k11_list[] = {
- MIDR_ALL_VERSIONS(MIDR_AMPERE1),
- {},
- };
- static const struct midr_range spectre_bhb_k8_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
- {},
- };
-
- if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list))
- k = 32;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list))
- k = 24;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list))
- k = 11;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list))
- k = 8;
-
- max_bhb_k = max(max_bhb_k, k);
- } else {
- k = max_bhb_k;
- }
+
+ static const struct midr_range spectre_bhb_k132_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
+ };
+ static const struct midr_range spectre_bhb_k38_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A715),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
+ };
+ static const struct midr_range spectre_bhb_k32_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k24_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76AE),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_GOLD),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k11_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k8_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ {},
+ };
+
+ if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k132_list))
+ k = 132;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k38_list))
+ k = 38;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list))
+ k = 32;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list))
+ k = 24;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list))
+ k = 11;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list))
+ k = 8;
return k;
}
@@ -916,29 +950,13 @@ static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void)
}
}
-static bool is_spectre_bhb_fw_affected(int scope)
+static bool has_spectre_bhb_fw_mitigation(void)
{
- static bool system_affected;
enum mitigation_state fw_state;
bool has_smccc = arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE;
- static const struct midr_range spectre_bhb_firmware_mitigated_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
- {},
- };
- bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(),
- spectre_bhb_firmware_mitigated_list);
-
- if (scope != SCOPE_LOCAL_CPU)
- return system_affected;
fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
- if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) {
- system_affected = true;
- return true;
- }
-
- return false;
+ return has_smccc && fw_state == SPECTRE_MITIGATED;
}
static bool supports_ecbhb(int scope)
@@ -954,6 +972,8 @@ static bool supports_ecbhb(int scope)
ID_AA64MMFR1_EL1_ECBHB_SHIFT);
}
+static u8 max_bhb_k;
+
bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
int scope)
{
@@ -962,16 +982,18 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
if (supports_csv2p3(scope))
return false;
- if (supports_clearbhb(scope))
- return true;
-
- if (spectre_bhb_loop_affected(scope))
- return true;
+ if (is_spectre_bhb_safe(scope))
+ return false;
- if (is_spectre_bhb_fw_affected(scope))
- return true;
+ /*
+ * At this point the core isn't known to be "safe" so we're going to
+ * assume it's vulnerable. We still need to update `max_bhb_k` though,
+ * but only if we aren't mitigating with clearbhb though.
+ */
+ if (scope == SCOPE_LOCAL_CPU && !supports_clearbhb(SCOPE_LOCAL_CPU))
+ max_bhb_k = max(max_bhb_k, spectre_bhb_loop_affected());
- return false;
+ return true;
}
static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
@@ -1002,7 +1024,7 @@ early_param("nospectre_bhb", parse_spectre_bhb_param);
void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
{
bp_hardening_cb_t cpu_cb;
- enum mitigation_state fw_state, state = SPECTRE_VULNERABLE;
+ enum mitigation_state state = SPECTRE_VULNERABLE;
struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU))
@@ -1028,7 +1050,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN);
state = SPECTRE_MITIGATED;
set_bit(BHB_INSN, &system_bhb_mitigations);
- } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) {
+ } else if (spectre_bhb_loop_affected()) {
/*
* Ensure KVM uses the indirect vector which will have the
* branchy-loop added. A57/A72-r0 will already have selected
@@ -1041,32 +1063,29 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP);
state = SPECTRE_MITIGATED;
set_bit(BHB_LOOP, &system_bhb_mitigations);
- } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) {
- fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
- if (fw_state == SPECTRE_MITIGATED) {
- /*
- * Ensure KVM uses one of the spectre bp_hardening
- * vectors. The indirect vector doesn't include the EL3
- * call, so needs upgrading to
- * HYP_VECTOR_SPECTRE_INDIRECT.
- */
- if (!data->slot || data->slot == HYP_VECTOR_INDIRECT)
- data->slot += 1;
-
- this_cpu_set_vectors(EL1_VECTOR_BHB_FW);
-
- /*
- * The WA3 call in the vectors supersedes the WA1 call
- * made during context-switch. Uninstall any firmware
- * bp_hardening callback.
- */
- cpu_cb = spectre_v2_get_sw_mitigation_cb();
- if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb)
- __this_cpu_write(bp_hardening_data.fn, NULL);
-
- state = SPECTRE_MITIGATED;
- set_bit(BHB_FW, &system_bhb_mitigations);
- }
+ } else if (has_spectre_bhb_fw_mitigation()) {
+ /*
+ * Ensure KVM uses one of the spectre bp_hardening
+ * vectors. The indirect vector doesn't include the EL3
+ * call, so needs upgrading to
+ * HYP_VECTOR_SPECTRE_INDIRECT.
+ */
+ if (!data->slot || data->slot == HYP_VECTOR_INDIRECT)
+ data->slot += 1;
+
+ this_cpu_set_vectors(EL1_VECTOR_BHB_FW);
+
+ /*
+ * The WA3 call in the vectors supersedes the WA1 call
+ * made during context-switch. Uninstall any firmware
+ * bp_hardening callback.
+ */
+ cpu_cb = spectre_v2_get_sw_mitigation_cb();
+ if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb)
+ __this_cpu_write(bp_hardening_data.fn, NULL);
+
+ state = SPECTRE_MITIGATED;
+ set_bit(BHB_FW, &system_bhb_mitigations);
}
update_mitigation_state(&spectre_bhb_state, state);
@@ -1100,7 +1119,6 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt,
{
u8 rd;
u32 insn;
- u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM);
BUG_ON(nr_inst != 1); /* MOV -> MOV */
@@ -1109,7 +1127,7 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt,
insn = le32_to_cpu(*origptr);
rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn);
- insn = aarch64_insn_gen_movewide(rd, loop_count, 0,
+ insn = aarch64_insn_gen_movewide(rd, max_bhb_k, 0,
AARCH64_INSN_VARIANT_64BIT,
AARCH64_INSN_MOVEWIDE_ZERO);
*updptr++ = cpu_to_le32(insn);
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 99ea26d400ff..a7c37afb4ebe 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -91,7 +91,7 @@ static void save_reset_user_access_state(struct user_access_state *ua_state)
u64 por_enable_all = 0;
for (int pkey = 0; pkey < arch_max_pkey(); pkey++)
- por_enable_all |= POE_RXW << (pkey * POR_BITS_PER_PKEY);
+ por_enable_all |= POR_ELx_PERM_PREP(pkey, POE_RWX);
ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0);
write_sysreg_s(por_enable_all, SYS_POR_EL0);
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index cb180684d10d..5d07ee85bdae 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -15,8 +15,11 @@
#include <linux/arch_topology.h>
#include <linux/cacheinfo.h>
#include <linux/cpufreq.h>
+#include <linux/cpu_smt.h>
#include <linux/init.h>
#include <linux/percpu.h>
+#include <linux/sched/isolation.h>
+#include <linux/xarray.h>
#include <asm/cpu.h>
#include <asm/cputype.h>
@@ -37,17 +40,28 @@ static bool __init acpi_cpu_is_threaded(int cpu)
return !!is_threaded;
}
+struct cpu_smt_info {
+ unsigned int thread_num;
+ int core_id;
+};
+
/*
* Propagate the topology information of the processor_topology_node tree to the
* cpu_topology array.
*/
int __init parse_acpi_topology(void)
{
+ unsigned int max_smt_thread_num = 1;
+ struct cpu_smt_info *entry;
+ struct xarray hetero_cpu;
+ unsigned long hetero_id;
int cpu, topology_id;
if (acpi_disabled)
return 0;
+ xa_init(&hetero_cpu);
+
for_each_possible_cpu(cpu) {
topology_id = find_acpi_cpu_topology(cpu, 0);
if (topology_id < 0)
@@ -57,6 +71,34 @@ int __init parse_acpi_topology(void)
cpu_topology[cpu].thread_id = topology_id;
topology_id = find_acpi_cpu_topology(cpu, 1);
cpu_topology[cpu].core_id = topology_id;
+
+ /*
+ * In the PPTT, CPUs below a node with the 'identical
+ * implementation' flag have the same number of threads.
+ * Count the number of threads for only one CPU (i.e.
+ * one core_id) among those with the same hetero_id.
+ * See the comment of find_acpi_cpu_topology_hetero_id()
+ * for more details.
+ *
+ * One entry is created for each node having:
+ * - the 'identical implementation' flag
+ * - its parent not having the flag
+ */
+ hetero_id = find_acpi_cpu_topology_hetero_id(cpu);
+ entry = xa_load(&hetero_cpu, hetero_id);
+ if (!entry) {
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ WARN_ON_ONCE(!entry);
+
+ if (entry) {
+ entry->core_id = topology_id;
+ entry->thread_num = 1;
+ xa_store(&hetero_cpu, hetero_id,
+ entry, GFP_KERNEL);
+ }
+ } else if (entry->core_id == topology_id) {
+ entry->thread_num++;
+ }
} else {
cpu_topology[cpu].thread_id = -1;
cpu_topology[cpu].core_id = topology_id;
@@ -67,6 +109,19 @@ int __init parse_acpi_topology(void)
cpu_topology[cpu].package_id = topology_id;
}
+ /*
+ * This is a short loop since the number of XArray elements is the
+ * number of heterogeneous CPU clusters. On a homogeneous system
+ * there's only one entry in the XArray.
+ */
+ xa_for_each(&hetero_cpu, hetero_id, entry) {
+ max_smt_thread_num = max(max_smt_thread_num, entry->thread_num);
+ xa_erase(&hetero_cpu, hetero_id);
+ kfree(entry);
+ }
+
+ cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num);
+ xa_destroy(&hetero_cpu);
return 0;
}
#endif
@@ -88,18 +143,28 @@ int __init parse_acpi_topology(void)
* initialized.
*/
static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT);
-static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
-static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
static cpumask_var_t amu_fie_cpus;
+struct amu_cntr_sample {
+ u64 arch_const_cycles_prev;
+ u64 arch_core_cycles_prev;
+ unsigned long last_scale_update;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct amu_cntr_sample, cpu_amu_samples);
+
void update_freq_counters_refs(void)
{
- this_cpu_write(arch_core_cycles_prev, read_corecnt());
- this_cpu_write(arch_const_cycles_prev, read_constcnt());
+ struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples);
+
+ amu_sample->arch_core_cycles_prev = read_corecnt();
+ amu_sample->arch_const_cycles_prev = read_constcnt();
}
static inline bool freq_counters_valid(int cpu)
{
+ struct amu_cntr_sample *amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu);
+
if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask))
return false;
@@ -108,8 +173,8 @@ static inline bool freq_counters_valid(int cpu)
return false;
}
- if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) ||
- !per_cpu(arch_core_cycles_prev, cpu))) {
+ if (unlikely(!amu_sample->arch_const_cycles_prev ||
+ !amu_sample->arch_core_cycles_prev)) {
pr_debug("CPU%d: cycle counters are not enabled.\n", cpu);
return false;
}
@@ -152,17 +217,22 @@ void freq_inv_set_max_ratio(int cpu, u64 max_rate)
static void amu_scale_freq_tick(void)
{
+ struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples);
u64 prev_core_cnt, prev_const_cnt;
u64 core_cnt, const_cnt, scale;
- prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
- prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
+ prev_const_cnt = amu_sample->arch_const_cycles_prev;
+ prev_core_cnt = amu_sample->arch_core_cycles_prev;
update_freq_counters_refs();
- const_cnt = this_cpu_read(arch_const_cycles_prev);
- core_cnt = this_cpu_read(arch_core_cycles_prev);
+ const_cnt = amu_sample->arch_const_cycles_prev;
+ core_cnt = amu_sample->arch_core_cycles_prev;
+ /*
+ * This should not happen unless the AMUs have been reset and the
+ * counter values have not been restored - unlikely
+ */
if (unlikely(core_cnt <= prev_core_cnt ||
const_cnt <= prev_const_cnt))
return;
@@ -182,6 +252,8 @@ static void amu_scale_freq_tick(void)
scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
this_cpu_write(arch_freq_scale, (unsigned long)scale);
+
+ amu_sample->last_scale_update = jiffies;
}
static struct scale_freq_data amu_sfd = {
@@ -189,6 +261,96 @@ static struct scale_freq_data amu_sfd = {
.set_freq_scale = amu_scale_freq_tick,
};
+static __always_inline bool amu_fie_cpu_supported(unsigned int cpu)
+{
+ return cpumask_available(amu_fie_cpus) &&
+ cpumask_test_cpu(cpu, amu_fie_cpus);
+}
+
+void arch_cpu_idle_enter(void)
+{
+ unsigned int cpu = smp_processor_id();
+
+ if (!amu_fie_cpu_supported(cpu))
+ return;
+
+ /* Kick in AMU update but only if one has not happened already */
+ if (housekeeping_cpu(cpu, HK_TYPE_TICK) &&
+ time_is_before_jiffies(per_cpu(cpu_amu_samples.last_scale_update, cpu)))
+ amu_scale_freq_tick();
+}
+
+#define AMU_SAMPLE_EXP_MS 20
+
+int arch_freq_get_on_cpu(int cpu)
+{
+ struct amu_cntr_sample *amu_sample;
+ unsigned int start_cpu = cpu;
+ unsigned long last_update;
+ unsigned int freq = 0;
+ u64 scale;
+
+ if (!amu_fie_cpu_supported(cpu) || !arch_scale_freq_ref(cpu))
+ return -EOPNOTSUPP;
+
+ while (1) {
+
+ amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu);
+
+ last_update = amu_sample->last_scale_update;
+
+ /*
+ * For those CPUs that are in full dynticks mode, or those that have
+ * not seen tick for a while, try an alternative source for the counters
+ * (and thus freq scale), if available, for given policy: this boils
+ * down to identifying an active cpu within the same freq domain, if any.
+ */
+ if (!housekeeping_cpu(cpu, HK_TYPE_TICK) ||
+ time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) {
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ int ref_cpu;
+
+ if (!policy)
+ return -EINVAL;
+
+ if (!cpumask_intersects(policy->related_cpus,
+ housekeeping_cpumask(HK_TYPE_TICK))) {
+ cpufreq_cpu_put(policy);
+ return -EOPNOTSUPP;
+ }
+
+ for_each_cpu_wrap(ref_cpu, policy->cpus, cpu + 1) {
+ if (ref_cpu == start_cpu) {
+ /* Prevent verifying same CPU twice */
+ ref_cpu = nr_cpu_ids;
+ break;
+ }
+ if (!idle_cpu(ref_cpu))
+ break;
+ }
+
+ cpufreq_cpu_put(policy);
+
+ if (ref_cpu >= nr_cpu_ids)
+ /* No alternative to pull info from */
+ return -EAGAIN;
+
+ cpu = ref_cpu;
+ } else {
+ break;
+ }
+ }
+ /*
+ * Reversed computation to the one used to determine
+ * the arch_freq_scale value
+ * (see amu_scale_freq_tick for details)
+ */
+ scale = arch_scale_freq_capacity(cpu);
+ freq = scale * arch_scale_freq_ref(cpu);
+ freq >>= SCHED_CAPACITY_SHIFT;
+ return freq;
+}
+
static void amu_fie_setup(const struct cpumask *cpus)
{
int cpu;
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index 3a96c96816e9..f74a66ce3064 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -1090,22 +1090,22 @@ static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
break;
}
- if (pov_perms & ~POE_RXW)
+ if (pov_perms & ~POE_RWX)
pov_perms = POE_NONE;
if (wi->poe && wr->pov) {
wr->pr &= pov_perms & POE_R;
- wr->px &= pov_perms & POE_X;
wr->pw &= pov_perms & POE_W;
+ wr->px &= pov_perms & POE_X;
}
- if (uov_perms & ~POE_RXW)
+ if (uov_perms & ~POE_RWX)
uov_perms = POE_NONE;
if (wi->e0poe && wr->uov) {
wr->ur &= uov_perms & POE_R;
- wr->ux &= uov_perms & POE_X;
wr->uw &= uov_perms & POE_W;
+ wr->ux &= uov_perms & POE_X;
}
}
diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
index e4a342e903e2..098416d7e5c2 100644
--- a/arch/arm64/kvm/ptdump.c
+++ b/arch/arm64/kvm/ptdump.c
@@ -52,8 +52,8 @@ static const struct ptdump_prot_bits stage2_pte_bits[] = {
.set = "AF",
.clear = " ",
}, {
- .mask = PTE_TABLE_BIT | PTE_VALID,
- .val = PTE_VALID,
+ .mask = PMD_TYPE_MASK,
+ .val = PMD_TYPE_SECT,
.set = "BLK",
.clear = " ",
},
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index a5a5f5b97b17..de9a303b6ad0 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -17,14 +17,27 @@
* Alignment fixed up by hardware.
*/
- .p2align 4
- // Alignment is for the loop, but since the prologue (including BTI)
- // is also 16 bytes we can keep any padding outside the function
SYM_FUNC_START(__arch_clear_user)
add x2, x0, x1
+
+#ifdef CONFIG_AS_HAS_MOPS
+ .arch_extension mops
+alternative_if_not ARM64_HAS_MOPS
+ b .Lno_mops
+alternative_else_nop_endif
+
+USER(9f, setpt [x0]!, x1!, xzr)
+USER(6f, setmt [x0]!, x1!, xzr)
+USER(6f, setet [x0]!, x1!, xzr)
+ mov x0, #0
+ ret
+.Lno_mops:
+#endif
+
subs x1, x1, #8
b.mi 2f
-1:
+
+1: .p2align 4
USER(9f, sttr xzr, [x0])
add x0, x0, #8
subs x1, x1, #8
@@ -47,6 +60,10 @@ USER(7f, sttrb wzr, [x2, #-1])
ret
// Exception fixups
+6: b.cs 9f
+ // Registers are in Option A format
+ add x0, x0, x1
+ b 9f
7: sub x0, x2, #5 // Adjust for faulting on the final byte...
8: add x0, x0, #4 // ...or the second word of the 4-7 byte case
9: sub x0, x2, x0
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 34e317907524..400057d607ec 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -52,6 +52,13 @@
stp \reg1, \reg2, [\ptr], \val
.endm
+ .macro cpy1 dst, src, count
+ .arch_extension mops
+ USER_CPY(9997f, 0, cpyfprt [\dst]!, [\src]!, \count!)
+ USER_CPY(9996f, 0, cpyfmrt [\dst]!, [\src]!, \count!)
+ USER_CPY(9996f, 0, cpyfert [\dst]!, [\src]!, \count!)
+ .endm
+
end .req x5
srcin .req x15
SYM_FUNC_START(__arch_copy_from_user)
@@ -62,6 +69,9 @@ SYM_FUNC_START(__arch_copy_from_user)
ret
// Exception fixups
+9996: b.cs 9997f
+ // Registers are in Option A format
+ add dst, dst, count
9997: cmp dst, dstin
b.ne 9998f
// Before being absolutely sure we couldn't copy anything, try harder
diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S
index 488df234c49a..7f2f5a0e2fb9 100644
--- a/arch/arm64/lib/copy_template.S
+++ b/arch/arm64/lib/copy_template.S
@@ -40,6 +40,16 @@ D_l .req x13
D_h .req x14
mov dst, dstin
+
+#ifdef CONFIG_AS_HAS_MOPS
+alternative_if_not ARM64_HAS_MOPS
+ b .Lno_mops
+alternative_else_nop_endif
+ cpy1 dst, src, count
+ b .Lexitfunc
+.Lno_mops:
+#endif
+
cmp count, #16
/*When memory length is less than 16, the accessed are not aligned.*/
b.lo .Ltiny15
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 802231772608..819f2e3fc7a9 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -51,6 +51,13 @@
user_stp 9997f, \reg1, \reg2, \ptr, \val
.endm
+ .macro cpy1 dst, src, count
+ .arch_extension mops
+ USER_CPY(9997f, 1, cpyfpwt [\dst]!, [\src]!, \count!)
+ USER_CPY(9996f, 1, cpyfmwt [\dst]!, [\src]!, \count!)
+ USER_CPY(9996f, 1, cpyfewt [\dst]!, [\src]!, \count!)
+ .endm
+
end .req x5
srcin .req x15
SYM_FUNC_START(__arch_copy_to_user)
@@ -61,6 +68,9 @@ SYM_FUNC_START(__arch_copy_to_user)
ret
// Exception fixups
+9996: b.cs 9997f
+ // Registers are in Option A format
+ add dst, dst, count
9997: cmp dst, dstin
b.ne 9998f
// Before being absolutely sure we couldn't copy anything, try harder
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
index 228d681a8715..6e0528831cd3 100644
--- a/arch/arm64/mm/extable.c
+++ b/arch/arm64/mm/extable.c
@@ -8,8 +8,33 @@
#include <linux/uaccess.h>
#include <asm/asm-extable.h>
+#include <asm/esr.h>
#include <asm/ptrace.h>
+static bool cpy_faulted_on_uaccess(const struct exception_table_entry *ex,
+ unsigned long esr)
+{
+ bool uaccess_is_write = FIELD_GET(EX_DATA_UACCESS_WRITE, ex->data);
+ bool fault_on_write = esr & ESR_ELx_WNR;
+
+ return uaccess_is_write == fault_on_write;
+}
+
+bool insn_may_access_user(unsigned long addr, unsigned long esr)
+{
+ const struct exception_table_entry *ex = search_exception_tables(addr);
+
+ if (!ex)
+ return false;
+
+ switch (ex->type) {
+ case EX_TYPE_UACCESS_CPY:
+ return cpy_faulted_on_uaccess(ex, esr);
+ default:
+ return true;
+ }
+}
+
static inline unsigned long
get_ex_fixup(const struct exception_table_entry *ex)
{
@@ -29,6 +54,17 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
return true;
}
+static bool ex_handler_uaccess_cpy(const struct exception_table_entry *ex,
+ struct pt_regs *regs, unsigned long esr)
+{
+ /* Do not fix up faults on kernel memory accesses */
+ if (!cpy_faulted_on_uaccess(ex, esr))
+ return false;
+
+ regs->pc = get_ex_fixup(ex);
+ return true;
+}
+
static bool
ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
struct pt_regs *regs)
@@ -56,7 +92,7 @@ ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
return true;
}
-bool fixup_exception(struct pt_regs *regs)
+bool fixup_exception(struct pt_regs *regs, unsigned long esr)
{
const struct exception_table_entry *ex;
@@ -70,6 +106,8 @@ bool fixup_exception(struct pt_regs *regs)
case EX_TYPE_UACCESS_ERR_ZERO:
case EX_TYPE_KACCESS_ERR_ZERO:
return ex_handler_uaccess_err_zero(ex, regs);
+ case EX_TYPE_UACCESS_CPY:
+ return ex_handler_uaccess_cpy(ex, regs, esr);
case EX_TYPE_LOAD_UNALIGNED_ZEROPAD:
return ex_handler_load_unaligned_zeropad(ex, regs);
}
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index ef63651099a9..ec0a337891dd 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -375,7 +375,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr,
* Are we prepared to handle this kernel fault?
* We are almost certainly not prepared to handle instruction faults.
*/
- if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
+ if (!is_el1_instruction_abort(esr) && fixup_exception(regs, esr))
return;
if (WARN_RATELIMIT(is_spurious_el1_translation_fault(addr, esr, regs),
@@ -606,7 +606,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
die_kernel_fault("execution of user memory",
addr, esr, regs);
- if (!search_exception_tables(regs->pc))
+ if (!insn_may_access_user(regs->pc, esr))
die_kernel_fault("access to user memory outside uaccess routines",
addr, esr, regs);
}
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index b3a7fafe8892..cfe8cb8ba1cc 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -334,7 +334,9 @@ unsigned long hugetlb_mask_last_page(struct hstate *h)
switch (hp_size) {
#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SIZE:
- return PGDIR_SIZE - PUD_SIZE;
+ if (pud_sect_supported())
+ return PGDIR_SIZE - PUD_SIZE;
+ break;
#endif
case CONT_PMD_SIZE:
return PUD_SIZE - CONT_PMD_SIZE;
@@ -356,23 +358,21 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
switch (pagesize) {
#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SIZE:
- entry = pud_pte(pud_mkhuge(pte_pud(entry)));
+ if (pud_sect_supported())
+ return pud_pte(pud_mkhuge(pte_pud(entry)));
break;
#endif
case CONT_PMD_SIZE:
- entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
- fallthrough;
+ return pmd_pte(pmd_mkhuge(pmd_mkcont(pte_pmd(entry))));
case PMD_SIZE:
- entry = pmd_pte(pmd_mkhuge(pte_pmd(entry)));
- break;
+ return pmd_pte(pmd_mkhuge(pte_pmd(entry)));
case CONT_PTE_SIZE:
- entry = pte_mkcont(entry);
- break;
+ return pte_mkcont(entry);
default:
- pr_warn("%s: unrecognized huge page size 0x%lx\n",
- __func__, pagesize);
break;
}
+ pr_warn("%s: unrecognized huge page size 0x%lx\n",
+ __func__, pagesize);
return entry;
}
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index b65a29440a0c..d541ce45daeb 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -190,7 +190,7 @@ static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
*/
static bool __init root_level_aligned(u64 addr)
{
- int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 1) * (PAGE_SHIFT - 3);
+ int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 1) * PTDESC_TABLE_SHIFT;
return (addr % (PAGE_SIZE << shift)) == 0;
}
@@ -245,7 +245,7 @@ static int __init root_level_idx(u64 addr)
*/
u64 vabits = IS_ENABLED(CONFIG_ARM64_64K_PAGES) ? VA_BITS
: vabits_actual;
- int shift = (ARM64_HW_PGTABLE_LEVELS(vabits) - 1) * (PAGE_SHIFT - 3);
+ int shift = (ARM64_HW_PGTABLE_LEVELS(vabits) - 1) * PTDESC_TABLE_SHIFT;
return (addr & ~_PAGE_OFFSET(vabits)) >> (shift + PAGE_SHIFT);
}
@@ -269,7 +269,7 @@ static void __init clone_next_level(u64 addr, pgd_t *tmp_pg_dir, pud_t *pud)
*/
static int __init next_level_idx(u64 addr)
{
- int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 2) * (PAGE_SHIFT - 3);
+ int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 2) * PTDESC_TABLE_SHIFT;
return (addr >> (shift + PAGE_SHIFT)) % PTRS_PER_PTE;
}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 1dfe1a8efdbe..b98f89420713 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1558,9 +1558,8 @@ void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp)
#ifdef CONFIG_ARCH_HAS_PKEYS
int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val)
{
- u64 new_por = POE_RXW;
+ u64 new_por;
u64 old_por;
- u64 pkey_shift;
if (!system_supports_poe())
return -ENOSPC;
@@ -1574,7 +1573,7 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long i
return -EINVAL;
/* Set the bits we need in POR: */
- new_por = POE_RXW;
+ new_por = POE_RWX;
if (init_val & PKEY_DISABLE_WRITE)
new_por &= ~POE_W;
if (init_val & PKEY_DISABLE_ACCESS)
@@ -1585,12 +1584,11 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long i
new_por &= ~POE_X;
/* Shift the bits in to the correct place in POR for pkey: */
- pkey_shift = pkey * POR_BITS_PER_PKEY;
- new_por <<= pkey_shift;
+ new_por = POR_ELx_PERM_PREP(pkey, new_por);
/* Get old POR and mask off any old bits in place: */
old_por = read_sysreg_s(SYS_POR_EL0);
- old_por &= ~(POE_MASK << pkey_shift);
+ old_por &= ~(POE_MASK << POR_ELx_PERM_SHIFT(pkey));
/* Write old part along with new part: */
write_sysreg_s(old_por | new_por, SYS_POR_EL0);
diff --git a/arch/arm64/mm/physaddr.c b/arch/arm64/mm/physaddr.c
index cde44c13dda1..7d94e09b01b3 100644
--- a/arch/arm64/mm/physaddr.c
+++ b/arch/arm64/mm/physaddr.c
@@ -10,7 +10,7 @@
phys_addr_t __virt_to_phys(unsigned long x)
{
WARN(!__is_lm_address(__tag_reset(x)),
- "virt_to_phys used for non-linear address: %pK (%pS)\n",
+ "virt_to_phys used for non-linear address: %p (%pS)\n",
(void *)x,
(void *)x);
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 688fbe0271ca..8cec0da4cff2 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -80,8 +80,8 @@ static const struct ptdump_prot_bits pte_bits[] = {
.set = "CON",
.clear = " ",
}, {
- .mask = PTE_TABLE_BIT | PTE_VALID,
- .val = PTE_VALID,
+ .mask = PMD_TYPE_MASK,
+ .val = PMD_TYPE_SECT,
.set = "BLK",
.clear = " ",
}, {
diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk
index 1a2afc9fdd42..f2a1732cb1f6 100755
--- a/arch/arm64/tools/gen-sysreg.awk
+++ b/arch/arm64/tools/gen-sysreg.awk
@@ -111,7 +111,7 @@ END {
/^$/ { next }
/^[\t ]*#/ { next }
-/^SysregFields/ && block_current() == "Root" {
+$1 == "SysregFields" && block_current() == "Root" {
block_push("SysregFields")
expect_fields(2)
@@ -127,7 +127,8 @@ END {
next
}
-/^EndSysregFields/ && block_current() == "SysregFields" {
+$1 == "EndSysregFields" && block_current() == "SysregFields" {
+ expect_fields(1)
if (next_bit > 0)
fatal("Unspecified bits in " reg)
@@ -145,7 +146,7 @@ END {
next
}
-/^Sysreg/ && block_current() == "Root" {
+$1 == "Sysreg" && block_current() == "Root" {
block_push("Sysreg")
expect_fields(7)
@@ -177,7 +178,8 @@ END {
next
}
-/^EndSysreg/ && block_current() == "Sysreg" {
+$1 == "EndSysreg" && block_current() == "Sysreg" {
+ expect_fields(1)
if (next_bit > 0)
fatal("Unspecified bits in " reg)
@@ -206,7 +208,7 @@ END {
# Currently this is effectivey a comment, in future we may want to emit
# defines for the fields.
-(/^Fields/ || /^Mapping/) && block_current() == "Sysreg" {
+($1 == "Fields" || $1 == "Mapping") && block_current() == "Sysreg" {
expect_fields(2)
if (next_bit != 63)
@@ -224,7 +226,7 @@ END {
}
-/^Res0/ && (block_current() == "Sysreg" || block_current() == "SysregFields") {
+$1 == "Res0" && (block_current() == "Sysreg" || block_current() == "SysregFields") {
expect_fields(2)
parse_bitdef(reg, "RES0", $2)
field = "RES0_" msb "_" lsb
@@ -234,7 +236,7 @@ END {
next
}
-/^Res1/ && (block_current() == "Sysreg" || block_current() == "SysregFields") {
+$1 == "Res1" && (block_current() == "Sysreg" || block_current() == "SysregFields") {
expect_fields(2)
parse_bitdef(reg, "RES1", $2)
field = "RES1_" msb "_" lsb
@@ -244,7 +246,7 @@ END {
next
}
-/^Unkn/ && (block_current() == "Sysreg" || block_current() == "SysregFields") {
+$1 == "Unkn" && (block_current() == "Sysreg" || block_current() == "SysregFields") {
expect_fields(2)
parse_bitdef(reg, "UNKN", $2)
field = "UNKN_" msb "_" lsb
@@ -254,7 +256,7 @@ END {
next
}
-/^Field/ && (block_current() == "Sysreg" || block_current() == "SysregFields") {
+$1 == "Field" && (block_current() == "Sysreg" || block_current() == "SysregFields") {
expect_fields(3)
field = $3
parse_bitdef(reg, field, $2)
@@ -265,14 +267,14 @@ END {
next
}
-/^Raz/ && (block_current() == "Sysreg" || block_current() == "SysregFields") {
+$1 == "Raz" && (block_current() == "Sysreg" || block_current() == "SysregFields") {
expect_fields(2)
parse_bitdef(reg, field, $2)
next
}
-/^SignedEnum/ && (block_current() == "Sysreg" || block_current() == "SysregFields") {
+$1 == "SignedEnum" && (block_current() == "Sysreg" || block_current() == "SysregFields") {
block_push("Enum")
expect_fields(3)
@@ -285,7 +287,7 @@ END {
next
}
-/^UnsignedEnum/ && (block_current() == "Sysreg" || block_current() == "SysregFields") {
+$1 == "UnsignedEnum" && (block_current() == "Sysreg" || block_current() == "SysregFields") {
block_push("Enum")
expect_fields(3)
@@ -298,7 +300,7 @@ END {
next
}
-/^Enum/ && (block_current() == "Sysreg" || block_current() == "SysregFields") {
+$1 == "Enum" && (block_current() == "Sysreg" || block_current() == "SysregFields") {
block_push("Enum")
expect_fields(3)
@@ -310,7 +312,8 @@ END {
next
}
-/^EndEnum/ && block_current() == "Enum" {
+$1 == "EndEnum" && block_current() == "Enum" {
+ expect_fields(1)
field = null
msb = null
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 762ee084b37c..10809047fc87 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -1664,6 +1664,7 @@ EndEnum
UnsignedEnum 59:56 FGT
0b0000 NI
0b0001 IMP
+ 0b0010 FGT2
EndEnum
Res0 55:48
UnsignedEnum 47:44 EXS
@@ -1725,6 +1726,7 @@ Enum 3:0 PARANGE
0b0100 44
0b0101 48
0b0110 52
+ 0b0111 56
EndEnum
EndSysreg
@@ -2074,7 +2076,7 @@ EndEnum
Res0 4:2
Field 1 ExTRE
Field 0 E0TRE
-EndSysregFields
+EndSysreg
Sysreg SMPRI_EL1 3 0 1 2 4
Res0 63:4
@@ -2641,6 +2643,101 @@ Field 0 E0HTRE
EndSysreg
+Sysreg HDFGRTR2_EL2 3 4 3 1 0
+Res0 63:25
+Field 24 nPMBMAR_EL1
+Field 23 nMDSTEPOP_EL1
+Field 22 nTRBMPAM_EL1
+Res0 21
+Field 20 nTRCITECR_EL1
+Field 19 nPMSDSFR_EL1
+Field 18 nSPMDEVAFF_EL1
+Field 17 nSPMID
+Field 16 nSPMSCR_EL1
+Field 15 nSPMACCESSR_EL1
+Field 14 nSPMCR_EL0
+Field 13 nSPMOVS
+Field 12 nSPMINTEN
+Field 11 nSPMCNTEN
+Field 10 nSPMSELR_EL0
+Field 9 nSPMEVTYPERn_EL0
+Field 8 nSPMEVCNTRn_EL0
+Field 7 nPMSSCR_EL1
+Field 6 nPMSSDATA
+Field 5 nMDSELR_EL1
+Field 4 nPMUACR_EL1
+Field 3 nPMICFILTR_EL0
+Field 2 nPMICNTR_EL0
+Field 1 nPMIAR_EL1
+Field 0 nPMECR_EL1
+EndSysreg
+
+Sysreg HDFGWTR2_EL2 3 4 3 1 1
+Res0 63:25
+Field 24 nPMBMAR_EL1
+Field 23 nMDSTEPOP_EL1
+Field 22 nTRBMPAM_EL1
+Field 21 nPMZR_EL0
+Field 20 nTRCITECR_EL1
+Field 19 nPMSDSFR_EL1
+Res0 18:17
+Field 16 nSPMSCR_EL1
+Field 15 nSPMACCESSR_EL1
+Field 14 nSPMCR_EL0
+Field 13 nSPMOVS
+Field 12 nSPMINTEN
+Field 11 nSPMCNTEN
+Field 10 nSPMSELR_EL0
+Field 9 nSPMEVTYPERn_EL0
+Field 8 nSPMEVCNTRn_EL0
+Field 7 nPMSSCR_EL1
+Res0 6
+Field 5 nMDSELR_EL1
+Field 4 nPMUACR_EL1
+Field 3 nPMICFILTR_EL0
+Field 2 nPMICNTR_EL0
+Field 1 nPMIAR_EL1
+Field 0 nPMECR_EL1
+EndSysreg
+
+Sysreg HFGRTR2_EL2 3 4 3 1 2
+Res0 63:15
+Field 14 nACTLRALIAS_EL1
+Field 13 nACTLRMASK_EL1
+Field 12 nTCR2ALIAS_EL1
+Field 11 nTCRALIAS_EL1
+Field 10 nSCTLRALIAS2_EL1
+Field 9 nSCTLRALIAS_EL1
+Field 8 nCPACRALIAS_EL1
+Field 7 nTCR2MASK_EL1
+Field 6 nTCRMASK_EL1
+Field 5 nSCTLR2MASK_EL1
+Field 4 nSCTLRMASK_EL1
+Field 3 nCPACRMASK_EL1
+Field 2 nRCWSMASK_EL1
+Field 1 nERXGSR_EL1
+Field 0 nPFAR_EL1
+EndSysreg
+
+Sysreg HFGWTR2_EL2 3 4 3 1 3
+Res0 63:15
+Field 14 nACTLRALIAS_EL1
+Field 13 nACTLRMASK_EL1
+Field 12 nTCR2ALIAS_EL1
+Field 11 nTCRALIAS_EL1
+Field 10 nSCTLRALIAS2_EL1
+Field 9 nSCTLRALIAS_EL1
+Field 8 nCPACRALIAS_EL1
+Field 7 nTCR2MASK_EL1
+Field 6 nTCRMASK_EL1
+Field 5 nSCTLR2MASK_EL1
+Field 4 nSCTLRMASK_EL1
+Field 3 nCPACRMASK_EL1
+Field 2 nRCWSMASK_EL1
+Res0 1
+Field 0 nPFAR_EL1
+EndSysreg
+
Sysreg HDFGRTR_EL2 3 4 3 1 4
Field 63 PMBIDR_EL1
Field 62 nPMSNEVFR_EL1
@@ -2813,6 +2910,12 @@ Field 1 AMEVCNTR00_EL0
Field 0 AMCNTEN0
EndSysreg
+Sysreg HFGITR2_EL2 3 4 3 1 7
+Res0 63:2
+Field 1 nDCCIVAPS
+Field 0 TSBCSYNC
+EndSysreg
+
Sysreg ZCR_EL2 3 4 1 2 0
Fields ZCR_ELx
EndSysreg
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 16bacfe8c7a2..da15b5efe807 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -152,6 +152,7 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
{
return cpu == cpu_first_thread_sibling(cpu);
}
+#define topology_is_primary_thread topology_is_primary_thread
static inline bool topology_smt_thread_allowed(unsigned int cpu)
{
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index ec134b719144..6c79ee7c0957 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -229,11 +229,11 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_primary_thread_mask);
}
+#define topology_is_primary_thread topology_is_primary_thread
#else /* CONFIG_SMP */
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
static inline int topology_max_smt_threads(void) { return 1; }
-static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
#endif /* !CONFIG_SMP */
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index f642de2ebdac..6cf31a1649c4 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -498,7 +498,7 @@ void arch_scale_freq_tick(void)
*/
#define MAX_SAMPLE_AGE ((unsigned long)HZ / 50)
-unsigned int arch_freq_get_on_cpu(int cpu)
+int arch_freq_get_on_cpu(int cpu)
{
struct aperfmperf *s = per_cpu_ptr(&cpu_samples, cpu);
unsigned int seq, freq;
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 41ed01f46bd9..6571d432cbe3 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -86,9 +86,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
if (cpu_has(c, X86_FEATURE_TSC)) {
- unsigned int freq = arch_freq_get_on_cpu(cpu);
+ int freq = arch_freq_get_on_cpu(cpu);
- seq_printf(m, "cpu MHz\t\t: %u.%03u\n", freq / 1000, (freq % 1000));
+ if (freq < 0)
+ seq_puts(m, "cpu MHz\t\t: Unknown\n");
+ else
+ seq_printf(m, "cpu MHz\t\t: %u.%03u\n", freq / 1000, (freq % 1000));
}
/* Cache size */
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 3ebe77566788..af0029d30dbe 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -11,6 +11,7 @@
#include <linux/cleanup.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
+#include <linux/cpu_smt.h>
#include <linux/device.h>
#include <linux/of.h>
#include <linux/slab.h>
@@ -28,7 +29,7 @@
static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
static struct cpumask scale_freq_counters_mask;
static bool scale_freq_invariant;
-DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 1;
+DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 0;
EXPORT_PER_CPU_SYMBOL_GPL(capacity_freq_ref);
static bool supports_scale_freq_counters(const struct cpumask *cpus)
@@ -293,13 +294,15 @@ void topology_normalize_cpu_scale(void)
capacity_scale = 1;
for_each_possible_cpu(cpu) {
- capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu);
+ capacity = raw_capacity[cpu] *
+ (per_cpu(capacity_freq_ref, cpu) ?: 1);
capacity_scale = max(capacity, capacity_scale);
}
pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale);
for_each_possible_cpu(cpu) {
- capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu);
+ capacity = raw_capacity[cpu] *
+ (per_cpu(capacity_freq_ref, cpu) ?: 1);
capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
capacity_scale);
topology_set_cpu_scale(cpu, capacity);
@@ -506,6 +509,10 @@ core_initcall(free_raw_capacity);
#endif
#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
+
+/* Used to enable the SMT control */
+static unsigned int max_smt_thread_num = 1;
+
/*
* This function returns the logic cpu number of the node.
* There are basically three kinds of return values:
@@ -565,6 +572,8 @@ static int __init parse_core(struct device_node *core, int package_id,
i++;
} while (1);
+ max_smt_thread_num = max_t(unsigned int, max_smt_thread_num, i);
+
cpu = get_cpu_for_node(core);
if (cpu >= 0) {
if (!leaf) {
@@ -677,6 +686,17 @@ static int __init parse_socket(struct device_node *socket)
if (!has_socket)
ret = parse_cluster(socket, 0, -1, 0);
+ /*
+ * Reset the max_smt_thread_num to 1 on failure. Since on failure
+ * we need to notify the framework the SMT is not supported, but
+ * max_smt_thread_num can be initialized to the SMT thread number
+ * of the cores which are successfully parsed.
+ */
+ if (ret)
+ max_smt_thread_num = 1;
+
+ cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num);
+
return ret;
}
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index 97c2d4f15d76..2c5c228408bf 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -340,3 +340,15 @@ config X86_SPEEDSTEP_RELAXED_CAP_CHECK
option lets the probing code bypass some of those checks if the
parameter "relaxed_check=1" is passed to the module.
+config CPUFREQ_ARCH_CUR_FREQ
+ default y
+ bool "Current frequency derived from HW provided feedback"
+ help
+ This determines whether the scaling_cur_freq sysfs attribute returns
+ the last requested frequency or a more precise value based on hardware
+ provided feedback (as architected counters).
+ Given that a more precise frequency can now be provided via the
+ cpuinfo_avg_freq attribute, by enabling this option,
+ scaling_cur_freq maintains the provision of a counter based frequency,
+ for compatibility reasons.
+
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 30ffbddc7ece..0ce79fed8e55 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -729,18 +729,26 @@ show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
show_one(scaling_min_freq, min);
show_one(scaling_max_freq, max);
-__weak unsigned int arch_freq_get_on_cpu(int cpu)
+__weak int arch_freq_get_on_cpu(int cpu)
{
- return 0;
+ return -EOPNOTSUPP;
+}
+
+static inline bool cpufreq_avg_freq_supported(struct cpufreq_policy *policy)
+{
+ return arch_freq_get_on_cpu(policy->cpu) != -EOPNOTSUPP;
}
static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf)
{
ssize_t ret;
- unsigned int freq;
+ int freq;
+
+ freq = IS_ENABLED(CONFIG_CPUFREQ_ARCH_CUR_FREQ)
+ ? arch_freq_get_on_cpu(policy->cpu)
+ : 0;
- freq = arch_freq_get_on_cpu(policy->cpu);
- if (freq)
+ if (freq > 0)
ret = sysfs_emit(buf, "%u\n", freq);
else if (cpufreq_driver->setpolicy && cpufreq_driver->get)
ret = sysfs_emit(buf, "%u\n", cpufreq_driver->get(policy->cpu));
@@ -785,6 +793,19 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
}
/*
+ * show_cpuinfo_avg_freq - average CPU frequency as detected by hardware
+ */
+static ssize_t show_cpuinfo_avg_freq(struct cpufreq_policy *policy,
+ char *buf)
+{
+ int avg_freq = arch_freq_get_on_cpu(policy->cpu);
+
+ if (avg_freq > 0)
+ return sysfs_emit(buf, "%u\n", avg_freq);
+ return avg_freq != 0 ? avg_freq : -EINVAL;
+}
+
+/*
* show_scaling_governor - show the current policy for the specified CPU
*/
static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
@@ -946,6 +967,7 @@ static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
}
cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
+cpufreq_freq_attr_ro(cpuinfo_avg_freq);
cpufreq_freq_attr_ro(cpuinfo_min_freq);
cpufreq_freq_attr_ro(cpuinfo_max_freq);
cpufreq_freq_attr_ro(cpuinfo_transition_latency);
@@ -1073,6 +1095,12 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
return ret;
}
+ if (cpufreq_avg_freq_supported(policy)) {
+ ret = sysfs_create_file(&policy->kobj, &cpuinfo_avg_freq.attr);
+ if (ret)
+ return ret;
+ }
+
ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
if (ret)
return ret;
diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c
index 06fd317529fc..9a0d1420ac4f 100644
--- a/drivers/perf/apple_m1_cpu_pmu.c
+++ b/drivers/perf/apple_m1_cpu_pmu.c
@@ -120,6 +120,8 @@ enum m1_pmu_events {
*/
M1_PMU_CFG_COUNT_USER = BIT(8),
M1_PMU_CFG_COUNT_KERNEL = BIT(9),
+ M1_PMU_CFG_COUNT_HOST = BIT(10),
+ M1_PMU_CFG_COUNT_GUEST = BIT(11),
};
/*
@@ -327,11 +329,10 @@ static void m1_pmu_disable_counter_interrupt(unsigned int index)
__m1_pmu_enable_counter_interrupt(index, false);
}
-static void m1_pmu_configure_counter(unsigned int index, u8 event,
- bool user, bool kernel)
+static void __m1_pmu_configure_event_filter(unsigned int index, bool user,
+ bool kernel, bool host)
{
- u64 val, user_bit, kernel_bit;
- int shift;
+ u64 clear, set, user_bit, kernel_bit;
switch (index) {
case 0 ... 7:
@@ -346,19 +347,27 @@ static void m1_pmu_configure_counter(unsigned int index, u8 event,
BUG();
}
- val = read_sysreg_s(SYS_IMP_APL_PMCR1_EL1);
-
+ clear = set = 0;
if (user)
- val |= user_bit;
+ set |= user_bit;
else
- val &= ~user_bit;
+ clear |= user_bit;
if (kernel)
- val |= kernel_bit;
+ set |= kernel_bit;
else
- val &= ~kernel_bit;
+ clear |= kernel_bit;
- write_sysreg_s(val, SYS_IMP_APL_PMCR1_EL1);
+ if (host)
+ sysreg_clear_set_s(SYS_IMP_APL_PMCR1_EL1, clear, set);
+ else if (is_kernel_in_hyp_mode())
+ sysreg_clear_set_s(SYS_IMP_APL_PMCR1_EL12, clear, set);
+}
+
+static void __m1_pmu_configure_eventsel(unsigned int index, u8 event)
+{
+ u64 clear = 0, set = 0;
+ int shift;
/*
* Counters 0 and 1 have fixed events. For anything else,
@@ -371,21 +380,32 @@ static void m1_pmu_configure_counter(unsigned int index, u8 event,
break;
case 2 ... 5:
shift = (index - 2) * 8;
- val = read_sysreg_s(SYS_IMP_APL_PMESR0_EL1);
- val &= ~((u64)0xff << shift);
- val |= (u64)event << shift;
- write_sysreg_s(val, SYS_IMP_APL_PMESR0_EL1);
+ clear |= (u64)0xff << shift;
+ set |= (u64)event << shift;
+ sysreg_clear_set_s(SYS_IMP_APL_PMESR0_EL1, clear, set);
break;
case 6 ... 9:
shift = (index - 6) * 8;
- val = read_sysreg_s(SYS_IMP_APL_PMESR1_EL1);
- val &= ~((u64)0xff << shift);
- val |= (u64)event << shift;
- write_sysreg_s(val, SYS_IMP_APL_PMESR1_EL1);
+ clear |= (u64)0xff << shift;
+ set |= (u64)event << shift;
+ sysreg_clear_set_s(SYS_IMP_APL_PMESR1_EL1, clear, set);
break;
}
}
+static void m1_pmu_configure_counter(unsigned int index, unsigned long config_base)
+{
+ bool kernel = config_base & M1_PMU_CFG_COUNT_KERNEL;
+ bool guest = config_base & M1_PMU_CFG_COUNT_GUEST;
+ bool host = config_base & M1_PMU_CFG_COUNT_HOST;
+ bool user = config_base & M1_PMU_CFG_COUNT_USER;
+ u8 evt = config_base & M1_PMU_CFG_EVENT;
+
+ __m1_pmu_configure_event_filter(index, user && host, kernel && host, true);
+ __m1_pmu_configure_event_filter(index, user && guest, kernel && guest, false);
+ __m1_pmu_configure_eventsel(index, evt);
+}
+
/* arm_pmu backend */
static void m1_pmu_enable_event(struct perf_event *event)
{
@@ -396,11 +416,7 @@ static void m1_pmu_enable_event(struct perf_event *event)
user = event->hw.config_base & M1_PMU_CFG_COUNT_USER;
kernel = event->hw.config_base & M1_PMU_CFG_COUNT_KERNEL;
- m1_pmu_disable_counter_interrupt(event->hw.idx);
- m1_pmu_disable_counter(event->hw.idx);
- isb();
-
- m1_pmu_configure_counter(event->hw.idx, evt, user, kernel);
+ m1_pmu_configure_counter(event->hw.idx, event->hw.config_base);
m1_pmu_enable_counter(event->hw.idx);
m1_pmu_enable_counter_interrupt(event->hw.idx);
isb();
@@ -558,7 +574,7 @@ static int m1_pmu_set_event_filter(struct hw_perf_event *event,
{
unsigned long config_base = 0;
- if (!attr->exclude_guest) {
+ if (!attr->exclude_guest && !is_kernel_in_hyp_mode()) {
pr_debug("ARM performance counters do not support mode exclusion\n");
return -EOPNOTSUPP;
}
@@ -566,6 +582,10 @@ static int m1_pmu_set_event_filter(struct hw_perf_event *event,
config_base |= M1_PMU_CFG_COUNT_KERNEL;
if (!attr->exclude_user)
config_base |= M1_PMU_CFG_COUNT_USER;
+ if (!attr->exclude_host)
+ config_base |= M1_PMU_CFG_COUNT_HOST;
+ if (!attr->exclude_guest)
+ config_base |= M1_PMU_CFG_COUNT_GUEST;
event->config_base = config_base;
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index ef959e66db7c..d4fe30ff225b 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -802,8 +802,6 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
CMN_EVENT_ATTR(_model, ccha_##_name, CMN_TYPE_CCHA, _event)
#define CMN_EVENT_CCLA(_name, _event) \
CMN_EVENT_ATTR(CMN_ANY, ccla_##_name, CMN_TYPE_CCLA, _event)
-#define CMN_EVENT_CCLA_RNI(_name, _event) \
- CMN_EVENT_ATTR(CMN_ANY, ccla_rni_##_name, CMN_TYPE_CCLA_RNI, _event)
#define CMN_EVENT_HNS(_name, _event) \
CMN_EVENT_ATTR(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event)
@@ -1798,6 +1796,9 @@ static int arm_cmn_event_init(struct perf_event *event)
} else if (type == CMN_TYPE_XP &&
(cmn->part == PART_CMN700 || cmn->part == PART_CMN_S3)) {
hw->wide_sel = true;
+ } else if (type == CMN_TYPE_RND) {
+ /* Secretly permit this as an alias for "rnid" events */
+ type = CMN_TYPE_RNI;
}
/* This is sufficiently annoying to recalculate, so cache it */
diff --git a/drivers/perf/arm_cspmu/ampere_cspmu.c b/drivers/perf/arm_cspmu/ampere_cspmu.c
index f72f5689923c..b8ca69fd9d1d 100644
--- a/drivers/perf/arm_cspmu/ampere_cspmu.c
+++ b/drivers/perf/arm_cspmu/ampere_cspmu.c
@@ -10,10 +10,10 @@
#include "arm_cspmu.h"
-#define PMAUXR0 0xD80
-#define PMAUXR1 0xD84
-#define PMAUXR2 0xD88
-#define PMAUXR3 0xD8C
+#define PMAUXR0 PMIMPDEF
+#define PMAUXR1 (PMIMPDEF + 0x4)
+#define PMAUXR2 (PMIMPDEF + 0x8)
+#define PMAUXR3 (PMIMPDEF + 0xC)
#define to_ampere_cspmu_ctx(cspmu) ((struct ampere_cspmu_ctx *)(cspmu->impl.ctx))
@@ -132,32 +132,20 @@ ampere_cspmu_get_name(const struct arm_cspmu *cspmu)
return ctx->name;
}
-static u32 ampere_cspmu_event_filter(const struct perf_event *event)
+static void ampere_cspmu_set_cc_filter(struct arm_cspmu *cspmu,
+ const struct perf_event *event)
{
/*
- * PMEVFILTR or PMCCFILTR aren't used in Ampere SoC PMU but are marked
- * as RES0. Make sure, PMCCFILTR is written zero.
+ * PMCCFILTR is RES0, so this is just a dummy callback to override
+ * the default implementation and avoid writing to it.
*/
- return 0;
}
static void ampere_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
- struct hw_perf_event *hwc,
- u32 filter)
+ const struct perf_event *event)
{
- struct perf_event *event;
- unsigned int idx;
u32 threshold, rank, bank;
- /*
- * At this point, all the events have the same filter settings.
- * Therefore, take the first event and use its configuration.
- */
- idx = find_first_bit(cspmu->hw_events.used_ctrs,
- cspmu->cycle_counter_logical_idx);
-
- event = cspmu->hw_events.events[idx];
-
threshold = get_threshold(event);
rank = get_rank(event);
bank = get_bank(event);
@@ -233,7 +221,7 @@ static int ampere_cspmu_init_ops(struct arm_cspmu *cspmu)
cspmu->impl.ctx = ctx;
- impl_ops->event_filter = ampere_cspmu_event_filter;
+ impl_ops->set_cc_filter = ampere_cspmu_set_cc_filter;
impl_ops->set_ev_filter = ampere_cspmu_set_ev_filter;
impl_ops->validate_event = ampere_cspmu_validate_event;
impl_ops->get_name = ampere_cspmu_get_name;
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
index 81e8b97e9353..efa9b229e701 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -40,51 +40,6 @@
ARM_CSPMU_EXT_ATTR(_name, arm_cspmu_cpumask_show, \
(unsigned long)_config)
-/*
- * CoreSight PMU Arch register offsets.
- */
-#define PMEVCNTR_LO 0x0
-#define PMEVCNTR_HI 0x4
-#define PMEVTYPER 0x400
-#define PMCCFILTR 0x47C
-#define PMEVFILTR 0xA00
-#define PMCNTENSET 0xC00
-#define PMCNTENCLR 0xC20
-#define PMINTENSET 0xC40
-#define PMINTENCLR 0xC60
-#define PMOVSCLR 0xC80
-#define PMOVSSET 0xCC0
-#define PMCFGR 0xE00
-#define PMCR 0xE04
-#define PMIIDR 0xE08
-
-/* PMCFGR register field */
-#define PMCFGR_NCG GENMASK(31, 28)
-#define PMCFGR_HDBG BIT(24)
-#define PMCFGR_TRO BIT(23)
-#define PMCFGR_SS BIT(22)
-#define PMCFGR_FZO BIT(21)
-#define PMCFGR_MSI BIT(20)
-#define PMCFGR_UEN BIT(19)
-#define PMCFGR_NA BIT(17)
-#define PMCFGR_EX BIT(16)
-#define PMCFGR_CCD BIT(15)
-#define PMCFGR_CC BIT(14)
-#define PMCFGR_SIZE GENMASK(13, 8)
-#define PMCFGR_N GENMASK(7, 0)
-
-/* PMCR register field */
-#define PMCR_TRO BIT(11)
-#define PMCR_HDBG BIT(10)
-#define PMCR_FZO BIT(9)
-#define PMCR_NA BIT(8)
-#define PMCR_DP BIT(5)
-#define PMCR_X BIT(4)
-#define PMCR_D BIT(3)
-#define PMCR_C BIT(2)
-#define PMCR_P BIT(1)
-#define PMCR_E BIT(0)
-
/* Each SET/CLR register supports up to 32 counters. */
#define ARM_CSPMU_SET_CLR_COUNTER_SHIFT 5
#define ARM_CSPMU_SET_CLR_COUNTER_NUM \
@@ -111,7 +66,9 @@ static unsigned long arm_cspmu_cpuhp_state;
static DEFINE_MUTEX(arm_cspmu_lock);
static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
- struct hw_perf_event *hwc, u32 filter);
+ const struct perf_event *event);
+static void arm_cspmu_set_cc_filter(struct arm_cspmu *cspmu,
+ const struct perf_event *event);
static struct acpi_apmt_node *arm_cspmu_apmt_node(struct device *dev)
{
@@ -226,6 +183,7 @@ arm_cspmu_event_attr_is_visible(struct kobject *kobj,
static struct attribute *arm_cspmu_format_attrs[] = {
ARM_CSPMU_FORMAT_EVENT_ATTR,
ARM_CSPMU_FORMAT_FILTER_ATTR,
+ ARM_CSPMU_FORMAT_FILTER2_ATTR,
NULL,
};
@@ -250,11 +208,6 @@ static bool arm_cspmu_is_cycle_counter_event(const struct perf_event *event)
return (event->attr.config == ARM_CSPMU_EVT_CYCLES_DEFAULT);
}
-static u32 arm_cspmu_event_filter(const struct perf_event *event)
-{
- return event->attr.config1 & ARM_CSPMU_FILTER_MASK;
-}
-
static ssize_t arm_cspmu_identifier_show(struct device *dev,
struct device_attribute *attr,
char *page)
@@ -416,7 +369,7 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
DEFAULT_IMPL_OP(get_name),
DEFAULT_IMPL_OP(is_cycle_counter_event),
DEFAULT_IMPL_OP(event_type),
- DEFAULT_IMPL_OP(event_filter),
+ DEFAULT_IMPL_OP(set_cc_filter),
DEFAULT_IMPL_OP(set_ev_filter),
DEFAULT_IMPL_OP(event_attr_is_visible),
};
@@ -812,26 +765,28 @@ static inline void arm_cspmu_set_event(struct arm_cspmu *cspmu,
}
static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
- struct hw_perf_event *hwc,
- u32 filter)
+ const struct perf_event *event)
{
- u32 offset = PMEVFILTR + (4 * hwc->idx);
+ u32 filter = event->attr.config1 & ARM_CSPMU_FILTER_MASK;
+ u32 filter2 = event->attr.config2 & ARM_CSPMU_FILTER_MASK;
+ u32 offset = 4 * event->hw.idx;
- writel(filter, cspmu->base0 + offset);
+ writel(filter, cspmu->base0 + PMEVFILTR + offset);
+ writel(filter2, cspmu->base0 + PMEVFILT2R + offset);
}
-static inline void arm_cspmu_set_cc_filter(struct arm_cspmu *cspmu, u32 filter)
+static void arm_cspmu_set_cc_filter(struct arm_cspmu *cspmu,
+ const struct perf_event *event)
{
- u32 offset = PMCCFILTR;
+ u32 filter = event->attr.config1 & ARM_CSPMU_FILTER_MASK;
- writel(filter, cspmu->base0 + offset);
+ writel(filter, cspmu->base0 + PMCCFILTR);
}
static void arm_cspmu_start(struct perf_event *event, int pmu_flags)
{
struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- u32 filter;
/* We always reprogram the counter */
if (pmu_flags & PERF_EF_RELOAD)
@@ -839,13 +794,11 @@ static void arm_cspmu_start(struct perf_event *event, int pmu_flags)
arm_cspmu_set_event_period(event);
- filter = cspmu->impl.ops.event_filter(event);
-
if (event->hw.extra_reg.idx == cspmu->cycle_counter_logical_idx) {
- arm_cspmu_set_cc_filter(cspmu, filter);
+ cspmu->impl.ops.set_cc_filter(cspmu, event);
} else {
arm_cspmu_set_event(cspmu, hwc);
- cspmu->impl.ops.set_ev_filter(cspmu, hwc, filter);
+ cspmu->impl.ops.set_ev_filter(cspmu, event);
}
hwc->state = 0;
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h
index 2621f3111148..19684b76bd96 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.h
+++ b/drivers/perf/arm_cspmu/arm_cspmu.h
@@ -47,6 +47,8 @@
/* Default filter format */
#define ARM_CSPMU_FORMAT_FILTER_ATTR \
ARM_CSPMU_FORMAT_ATTR(filter, "config1:0-31")
+#define ARM_CSPMU_FORMAT_FILTER2_ATTR \
+ ARM_CSPMU_FORMAT_ATTR(filter2, "config2:0-31")
/*
* This is the default event number for cycle count, if supported, since the
@@ -65,6 +67,53 @@
/* The cycle counter, if implemented, is located at counter[31]. */
#define ARM_CSPMU_CYCLE_CNTR_IDX 31
+/*
+ * CoreSight PMU Arch register offsets.
+ */
+#define PMEVCNTR_LO 0x0
+#define PMEVCNTR_HI 0x4
+#define PMEVTYPER 0x400
+#define PMCCFILTR 0x47C
+#define PMEVFILT2R 0x800
+#define PMEVFILTR 0xA00
+#define PMCNTENSET 0xC00
+#define PMCNTENCLR 0xC20
+#define PMINTENSET 0xC40
+#define PMINTENCLR 0xC60
+#define PMOVSCLR 0xC80
+#define PMOVSSET 0xCC0
+#define PMIMPDEF 0xD80
+#define PMCFGR 0xE00
+#define PMCR 0xE04
+#define PMIIDR 0xE08
+
+/* PMCFGR register field */
+#define PMCFGR_NCG GENMASK(31, 28)
+#define PMCFGR_HDBG BIT(24)
+#define PMCFGR_TRO BIT(23)
+#define PMCFGR_SS BIT(22)
+#define PMCFGR_FZO BIT(21)
+#define PMCFGR_MSI BIT(20)
+#define PMCFGR_UEN BIT(19)
+#define PMCFGR_NA BIT(17)
+#define PMCFGR_EX BIT(16)
+#define PMCFGR_CCD BIT(15)
+#define PMCFGR_CC BIT(14)
+#define PMCFGR_SIZE GENMASK(13, 8)
+#define PMCFGR_N GENMASK(7, 0)
+
+/* PMCR register field */
+#define PMCR_TRO BIT(11)
+#define PMCR_HDBG BIT(10)
+#define PMCR_FZO BIT(9)
+#define PMCR_NA BIT(8)
+#define PMCR_DP BIT(5)
+#define PMCR_X BIT(4)
+#define PMCR_D BIT(3)
+#define PMCR_C BIT(2)
+#define PMCR_P BIT(1)
+#define PMCR_E BIT(0)
+
/* PMIIDR register field */
#define ARM_CSPMU_PMIIDR_IMPLEMENTER GENMASK(11, 0)
#define ARM_CSPMU_PMIIDR_PRODUCTID GENMASK(31, 20)
@@ -103,11 +152,11 @@ struct arm_cspmu_impl_ops {
bool (*is_cycle_counter_event)(const struct perf_event *event);
/* Decode event type/id from configs */
u32 (*event_type)(const struct perf_event *event);
- /* Decode filter value from configs */
- u32 (*event_filter)(const struct perf_event *event);
- /* Set event filter */
+ /* Set event filters */
+ void (*set_cc_filter)(struct arm_cspmu *cspmu,
+ const struct perf_event *event);
void (*set_ev_filter)(struct arm_cspmu *cspmu,
- struct hw_perf_event *hwc, u32 filter);
+ const struct perf_event *event);
/* Implementation specific event validation */
int (*validate_event)(struct arm_cspmu *cspmu,
struct perf_event *event);
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c
index 8116c7846a46..dc6d4e3e2a1b 100644
--- a/drivers/perf/arm_cspmu/nvidia_cspmu.c
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
@@ -6,6 +6,7 @@
/* Support for NVIDIA specific attributes. */
+#include <linux/io.h>
#include <linux/module.h>
#include <linux/topology.h>
@@ -183,6 +184,24 @@ static u32 nv_cspmu_event_filter(const struct perf_event *event)
return filter_val;
}
+static void nv_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
+ const struct perf_event *event)
+{
+ u32 filter = nv_cspmu_event_filter(event);
+ u32 offset = PMEVFILTR + (4 * event->hw.idx);
+
+ writel(filter, cspmu->base0 + offset);
+}
+
+static void nv_cspmu_set_cc_filter(struct arm_cspmu *cspmu,
+ const struct perf_event *event)
+{
+ u32 filter = nv_cspmu_event_filter(event);
+
+ writel(filter, cspmu->base0 + PMCCFILTR);
+}
+
+
enum nv_cspmu_name_fmt {
NAME_FMT_GENERIC,
NAME_FMT_SOCKET
@@ -322,7 +341,8 @@ static int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
cspmu->impl.ctx = ctx;
/* NVIDIA specific callbacks. */
- impl_ops->event_filter = nv_cspmu_event_filter;
+ impl_ops->set_cc_filter = nv_cspmu_set_cc_filter;
+ impl_ops->set_ev_filter = nv_cspmu_set_ev_filter;
impl_ops->get_event_attrs = nv_cspmu_get_event_attrs;
impl_ops->get_format_attrs = nv_cspmu_get_format_attrs;
impl_ops->get_name = nv_cspmu_get_name;
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 398cce3d76fc..2f33e69a8caf 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -342,12 +342,10 @@ armpmu_add(struct perf_event *event, int flags)
if (idx < 0)
return idx;
- /*
- * If there is an event in the counter we are going to use then make
- * sure it is disabled.
- */
+ /* The newly-allocated counter should be empty */
+ WARN_ON_ONCE(hw_events->events[idx]);
+
event->hw.idx = idx;
- armpmu->disable(event);
hw_events->events[idx] = event;
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index 0e360feb3432..e506d59654e7 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -795,11 +795,6 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu)
static void armv8pmu_enable_event(struct perf_event *event)
{
- /*
- * Enable counter and interrupt, and set the counter to count
- * the event that we're interested in.
- */
- armv8pmu_disable_event_counter(event);
armv8pmu_write_event_type(event);
armv8pmu_enable_event_irq(event);
armv8pmu_enable_event_counter(event);
@@ -825,10 +820,10 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu)
else
armv8pmu_disable_user_access();
+ kvm_vcpu_pmu_resync_el0();
+
/* Enable all counters */
armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
-
- kvm_vcpu_pmu_resync_el0();
}
static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
@@ -1369,6 +1364,7 @@ PMUV3_INIT_SIMPLE(armv8_neoverse_v1)
PMUV3_INIT_SIMPLE(armv8_neoverse_v2)
PMUV3_INIT_SIMPLE(armv8_neoverse_v3)
PMUV3_INIT_SIMPLE(armv8_neoverse_v3ae)
+PMUV3_INIT_SIMPLE(armv8_rainier)
PMUV3_INIT_SIMPLE(armv8_nvidia_carmel)
PMUV3_INIT_SIMPLE(armv8_nvidia_denver)
@@ -1416,6 +1412,7 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,neoverse-v2-pmu", .data = armv8_neoverse_v2_pmu_init},
{.compatible = "arm,neoverse-v3-pmu", .data = armv8_neoverse_v3_pmu_init},
{.compatible = "arm,neoverse-v3ae-pmu", .data = armv8_neoverse_v3ae_pmu_init},
+ {.compatible = "arm,rainier-pmu", .data = armv8_rainier_pmu_init},
{.compatible = "cavium,thunder-pmu", .data = armv8_cavium_thunder_pmu_init},
{.compatible = "brcm,vulcan-pmu", .data = armv8_brcm_vulcan_pmu_init},
{.compatible = "nvidia,carmel-pmu", .data = armv8_nvidia_carmel_pmu_init},
diff --git a/drivers/perf/arm_v7_pmu.c b/drivers/perf/arm_v7_pmu.c
index 420cadd108e7..17831e1920bd 100644
--- a/drivers/perf/arm_v7_pmu.c
+++ b/drivers/perf/arm_v7_pmu.c
@@ -858,16 +858,6 @@ static void armv7pmu_enable_event(struct perf_event *event)
}
/*
- * Enable counter and interrupt, and set the counter to count
- * the event that we're interested in.
- */
-
- /*
- * Disable counter
- */
- armv7_pmnc_disable_counter(idx);
-
- /*
* Set event (if destined for PMNx counters)
* We only need to set the event for the cycle counter if we
* have the ability to perform event filtering.
@@ -875,14 +865,7 @@ static void armv7pmu_enable_event(struct perf_event *event)
if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
armv7_pmnc_write_evtsel(idx, hwc->config_base);
- /*
- * Enable interrupt for this counter
- */
armv7_pmnc_enable_intens(idx);
-
- /*
- * Enable counter
- */
armv7_pmnc_enable_counter(idx);
}
@@ -898,18 +881,7 @@ static void armv7pmu_disable_event(struct perf_event *event)
return;
}
- /*
- * Disable counter and interrupt
- */
-
- /*
- * Disable counter
- */
armv7_pmnc_disable_counter(idx);
-
- /*
- * Disable interrupt for this counter
- */
armv7_pmnc_disable_intens(idx);
}
@@ -1477,14 +1449,6 @@ static void krait_pmu_enable_event(struct perf_event *event)
int idx = hwc->idx;
/*
- * Enable counter and interrupt, and set the counter to count
- * the event that we're interested in.
- */
-
- /* Disable counter */
- armv7_pmnc_disable_counter(idx);
-
- /*
* Set event (if destined for PMNx counters)
* We set the event for the cycle counter because we
* have the ability to perform event filtering.
@@ -1494,10 +1458,7 @@ static void krait_pmu_enable_event(struct perf_event *event)
else
armv7_pmnc_write_evtsel(idx, hwc->config_base);
- /* Enable interrupt for this counter */
armv7_pmnc_enable_intens(idx);
-
- /* Enable counter */
armv7_pmnc_enable_counter(idx);
}
@@ -1798,14 +1759,6 @@ static void scorpion_pmu_enable_event(struct perf_event *event)
int idx = hwc->idx;
/*
- * Enable counter and interrupt, and set the counter to count
- * the event that we're interested in.
- */
-
- /* Disable counter */
- armv7_pmnc_disable_counter(idx);
-
- /*
* Set event (if destined for PMNx counters)
* We don't set the event for the cycle counter because we
* don't have the ability to perform event filtering.
@@ -1815,10 +1768,7 @@ static void scorpion_pmu_enable_event(struct perf_event *event)
else if (idx != ARMV7_IDX_CYCLE_COUNTER)
armv7_pmnc_write_evtsel(idx, hwc->config_base);
- /* Enable interrupt for this counter */
armv7_pmnc_enable_intens(idx);
-
- /* Enable counter */
armv7_pmnc_enable_counter(idx);
}
diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c
index cccecae9823f..f851e070760c 100644
--- a/drivers/perf/dwc_pcie_pmu.c
+++ b/drivers/perf/dwc_pcie_pmu.c
@@ -565,15 +565,15 @@ static int dwc_pcie_register_dev(struct pci_dev *pdev)
u32 sbdf;
sbdf = (pci_domain_nr(pdev->bus) << 16) | PCI_DEVID(pdev->bus->number, pdev->devfn);
- plat_dev = platform_device_register_data(NULL, "dwc_pcie_pmu", sbdf,
- pdev, sizeof(*pdev));
-
+ plat_dev = platform_device_register_simple("dwc_pcie_pmu", sbdf, NULL, 0);
if (IS_ERR(plat_dev))
return PTR_ERR(plat_dev);
dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL);
- if (!dev_info)
+ if (!dev_info) {
+ platform_device_unregister(plat_dev);
return -ENOMEM;
+ }
/* Cache platform device to handle pci device hotplug */
dev_info->plat_dev = plat_dev;
@@ -614,18 +614,26 @@ static struct notifier_block dwc_pcie_pmu_nb = {
static int dwc_pcie_pmu_probe(struct platform_device *plat_dev)
{
- struct pci_dev *pdev = plat_dev->dev.platform_data;
+ struct pci_dev *pdev;
struct dwc_pcie_pmu *pcie_pmu;
char *name;
u32 sbdf;
u16 vsec;
int ret;
+ sbdf = plat_dev->id;
+ pdev = pci_get_domain_bus_and_slot(sbdf >> 16, PCI_BUS_NUM(sbdf & 0xffff),
+ sbdf & 0xff);
+ if (!pdev) {
+ pr_err("No pdev found for the sbdf 0x%x\n", sbdf);
+ return -ENODEV;
+ }
+
vsec = dwc_pcie_des_cap(pdev);
if (!vsec)
return -ENODEV;
- sbdf = plat_dev->id;
+ pci_dev_put(pdev);
name = devm_kasprintf(&plat_dev->dev, GFP_KERNEL, "dwc_rootport_%x", sbdf);
if (!name)
return -ENOMEM;
@@ -640,7 +648,7 @@ static int dwc_pcie_pmu_probe(struct platform_device *plat_dev)
pcie_pmu->on_cpu = -1;
pcie_pmu->pmu = (struct pmu){
.name = name,
- .parent = &pdev->dev,
+ .parent = &plat_dev->dev,
.module = THIS_MODULE,
.attr_groups = dwc_pcie_attr_groups,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
@@ -730,6 +738,15 @@ static struct platform_driver dwc_pcie_pmu_driver = {
.driver = {.name = "dwc_pcie_pmu",},
};
+static void dwc_pcie_cleanup_devices(void)
+{
+ struct dwc_pcie_dev_info *dev_info, *tmp;
+
+ list_for_each_entry_safe(dev_info, tmp, &dwc_pcie_dev_info_head, dev_node) {
+ dwc_pcie_unregister_dev(dev_info);
+ }
+}
+
static int __init dwc_pcie_pmu_init(void)
{
struct pci_dev *pdev = NULL;
@@ -742,7 +759,7 @@ static int __init dwc_pcie_pmu_init(void)
ret = dwc_pcie_register_dev(pdev);
if (ret) {
pci_dev_put(pdev);
- return ret;
+ goto err_cleanup;
}
}
@@ -751,35 +768,35 @@ static int __init dwc_pcie_pmu_init(void)
dwc_pcie_pmu_online_cpu,
dwc_pcie_pmu_offline_cpu);
if (ret < 0)
- return ret;
+ goto err_cleanup;
dwc_pcie_pmu_hp_state = ret;
ret = platform_driver_register(&dwc_pcie_pmu_driver);
if (ret)
- goto platform_driver_register_err;
+ goto err_remove_cpuhp;
ret = bus_register_notifier(&pci_bus_type, &dwc_pcie_pmu_nb);
if (ret)
- goto platform_driver_register_err;
+ goto err_unregister_driver;
notify = true;
return 0;
-platform_driver_register_err:
+err_unregister_driver:
+ platform_driver_unregister(&dwc_pcie_pmu_driver);
+err_remove_cpuhp:
cpuhp_remove_multi_state(dwc_pcie_pmu_hp_state);
-
+err_cleanup:
+ dwc_pcie_cleanup_devices();
return ret;
}
static void __exit dwc_pcie_pmu_exit(void)
{
- struct dwc_pcie_dev_info *dev_info, *tmp;
-
if (notify)
bus_unregister_notifier(&pci_bus_type, &dwc_pcie_pmu_nb);
- list_for_each_entry_safe(dev_info, tmp, &dwc_pcie_dev_info_head, dev_node)
- dwc_pcie_unregister_dev(dev_info);
+ dwc_pcie_cleanup_devices();
platform_driver_unregister(&dwc_pcie_pmu_driver);
cpuhp_remove_multi_state(dwc_pcie_pmu_hp_state);
}
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 7fe0981a7e46..02fd4746231d 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -1184,7 +1184,7 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_
}
#endif
-extern unsigned int arch_freq_get_on_cpu(int cpu);
+extern int arch_freq_get_on_cpu(int cpu);
#ifndef arch_set_freq_scale
static __always_inline
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index d7e30d4f7503..f3bc0bcd7098 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -78,14 +78,18 @@ static inline dma_addr_t dma_range_map_max(const struct bus_dma_region *map)
#define phys_to_dma_unencrypted phys_to_dma
#endif
#else
-static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev,
- phys_addr_t paddr)
+static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
if (dev->dma_range_map)
return translate_phys_to_dma(dev, paddr);
return paddr;
}
+static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev,
+ phys_addr_t paddr)
+{
+ return dma_addr_unencrypted(__phys_to_dma(dev, paddr));
+}
/*
* If memory encryption is supported, phys_to_dma will set the memory encryption
* bit in the DMA address, and dma_to_phys will clear it.
@@ -94,19 +98,20 @@ static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev,
*/
static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
{
- return __sme_set(phys_to_dma_unencrypted(dev, paddr));
+ return dma_addr_encrypted(__phys_to_dma(dev, paddr));
}
static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr)
{
phys_addr_t paddr;
+ dma_addr = dma_addr_canonical(dma_addr);
if (dev->dma_range_map)
paddr = translate_dma_to_phys(dev, dma_addr);
else
paddr = dma_addr;
- return __sme_clr(paddr);
+ return paddr;
}
#endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h
index ae4526389261..07584c5e36fb 100644
--- a/include/linux/mem_encrypt.h
+++ b/include/linux/mem_encrypt.h
@@ -26,11 +26,34 @@
*/
#define __sme_set(x) ((x) | sme_me_mask)
#define __sme_clr(x) ((x) & ~sme_me_mask)
+
+#define dma_addr_encrypted(x) __sme_set(x)
+#define dma_addr_canonical(x) __sme_clr(x)
+
#else
#define __sme_set(x) (x)
#define __sme_clr(x) (x)
#endif
+/*
+ * dma_addr_encrypted() and dma_addr_unencrypted() are for converting a given DMA
+ * address to the respective type of addressing.
+ *
+ * dma_addr_canonical() is used to reverse any conversions for encrypted/decrypted
+ * back to the canonical address.
+ */
+#ifndef dma_addr_encrypted
+#define dma_addr_encrypted(x) (x)
+#endif
+
+#ifndef dma_addr_unencrypted
+#define dma_addr_unencrypted(x) (x)
+#endif
+
+#ifndef dma_addr_canonical
+#define dma_addr_canonical(x) (x)
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* __MEM_ENCRYPT_H__ */
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 4b5b83677e3f..c70d528594f2 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -84,7 +84,6 @@ struct arm_pmu {
struct pmu pmu;
cpumask_t supported_cpus;
char *name;
- int pmuver;
irqreturn_t (*handle_irq)(struct arm_pmu *pmu);
void (*enable)(struct perf_event *event);
void (*disable)(struct perf_event *event);
@@ -102,18 +101,20 @@ struct arm_pmu {
int (*map_event)(struct perf_event *event);
DECLARE_BITMAP(cntr_mask, ARMPMU_MAX_HWEVENTS);
bool secure_access; /* 32-bit ARM only */
-#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
- DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
-#define ARMV8_PMUV3_EXT_COMMON_EVENT_BASE 0x4000
- DECLARE_BITMAP(pmceid_ext_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
struct platform_device *plat_device;
struct pmu_hw_events __percpu *hw_events;
struct hlist_node node;
struct notifier_block cpu_pm_nb;
/* the attr_groups array must be NULL-terminated */
const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1];
- /* store the PMMIR_EL1 to expose slots */
+
+ /* PMUv3 only */
+ int pmuver;
u64 reg_pmmir;
+#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
+ DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
+#define ARMV8_PMUV3_EXT_COMMON_EVENT_BASE 0x4000
+ DECLARE_BITMAP(pmceid_ext_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
/* Only to be used by ACPI probing code */
unsigned long acpi_cpuid;
diff --git a/include/linux/topology.h b/include/linux/topology.h
index a1815f4395ab..24e715f0f6d2 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -240,6 +240,29 @@ static inline const struct cpumask *cpu_smt_mask(int cpu)
}
#endif
+#ifndef topology_is_primary_thread
+
+static inline bool topology_is_primary_thread(unsigned int cpu)
+{
+ /*
+ * When disabling SMT, the primary thread of the SMT will remain
+ * enabled/active. Architectures that have a special primary thread
+ * (e.g. x86) need to override this function. Otherwise the first
+ * thread in the SMT can be made the primary thread.
+ *
+ * The sibling cpumask of an offline CPU always contains the CPU
+ * itself on architectures using the implementation of
+ * CONFIG_GENERIC_ARCH_TOPOLOGY for building their topology.
+ * Other architectures not using CONFIG_GENERIC_ARCH_TOPOLOGY for
+ * building their topology have to check whether to use this default
+ * implementation or to override it.
+ */
+ return cpu == cpumask_first(topology_sibling_cpumask(cpu));
+}
+#define topology_is_primary_thread topology_is_primary_thread
+
+#endif
+
static inline const struct cpumask *cpu_cpu_mask(int cpu)
{
return cpumask_of_node(cpu_to_node(cpu));
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index 1ea2c4c33b86..ef1c27fa3c57 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -85,6 +85,7 @@
/* compatibility flags */
#define MAP_FILE 0
+#define PKEY_UNRESTRICTED 0x0
#define PKEY_DISABLE_ACCESS 0x1
#define PKEY_DISABLE_WRITE 0x2
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
diff --git a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
index 303260a6dc65..3bfcd3848432 100644
--- a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
+++ b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
@@ -227,6 +227,8 @@ static int check_child_hugetlb_memory_mapping(int mem_type, int mode, int mappin
int main(int argc, char *argv[])
{
int err;
+ void *map_ptr;
+ unsigned long map_size;
err = mte_default_setup();
if (err)
@@ -243,6 +245,15 @@ int main(int argc, char *argv[])
return KSFT_FAIL;
}
+ /* Check if MTE supports hugetlb mappings */
+ map_size = default_huge_page_size();
+ map_ptr = mmap(NULL, map_size, PROT_READ | PROT_MTE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+ if (map_ptr == MAP_FAILED)
+ ksft_exit_skip("PROT_MTE not supported with MAP_HUGETLB mappings\n");
+ else
+ munmap(map_ptr, map_size);
+
/* Set test plan */
ksft_set_plan(12);
@@ -270,13 +281,13 @@ int main(int argc, char *argv[])
"Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n");
evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
- "Check child hugetlb memory with private mapping, precise mode and mmap memory\n");
+ "Check child hugetlb memory with private mapping, sync error mode and mmap memory\n");
evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
- "Check child hugetlb memory with private mapping, precise mode and mmap memory\n");
+ "Check child hugetlb memory with private mapping, async error mode and mmap memory\n");
evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
- "Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n");
+ "Check child hugetlb memory with private mapping, sync error mode and mmap/mprotect memory\n");
evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
- "Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n");
+ "Check child hugetlb memory with private mapping, async error mode and mmap/mprotect memory\n");
mte_restore_setup();
free_hugetlb();
diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c
index ad17005521a8..005f29c86484 100644
--- a/tools/testing/selftests/mm/mseal_test.c
+++ b/tools/testing/selftests/mm/mseal_test.c
@@ -218,7 +218,7 @@ bool seal_support(void)
bool pkey_supported(void)
{
#if defined(__i386__) || defined(__x86_64__) /* arch */
- int pkey = sys_pkey_alloc(0, 0);
+ int pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
if (pkey > 0)
return true;
@@ -1671,7 +1671,7 @@ static void test_seal_discard_ro_anon_on_pkey(bool seal)
setup_single_address_rw(size, &ptr);
FAIL_TEST_IF_FALSE(ptr != (void *)-1);
- pkey = sys_pkey_alloc(0, 0);
+ pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
FAIL_TEST_IF_FALSE(pkey > 0);
ret = sys_mprotect_pkey((void *)ptr, size, PROT_READ | PROT_WRITE, pkey);
@@ -1683,7 +1683,7 @@ static void test_seal_discard_ro_anon_on_pkey(bool seal)
}
/* sealing doesn't take effect if PKRU allow write. */
- set_pkey(pkey, 0);
+ set_pkey(pkey, PKEY_UNRESTRICTED);
ret = sys_madvise(ptr, size, MADV_DONTNEED);
FAIL_TEST_IF_FALSE(!ret);
diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h
index f080e97b39be..ea404f80e6cb 100644
--- a/tools/testing/selftests/mm/pkey-helpers.h
+++ b/tools/testing/selftests/mm/pkey-helpers.h
@@ -13,6 +13,7 @@
#include <ucontext.h>
#include <sys/mman.h>
+#include <linux/mman.h>
#include <linux/types.h>
#include "../kselftest.h"
@@ -193,7 +194,7 @@ static inline u32 *siginfo_get_pkey_ptr(siginfo_t *si)
static inline int kernel_has_pkeys(void)
{
/* try allocating a key and see if it succeeds */
- int ret = sys_pkey_alloc(0, 0);
+ int ret = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
if (ret <= 0) {
return 0;
}
diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c
index 1ac8c8809880..b5e076a564c9 100644
--- a/tools/testing/selftests/mm/pkey_sighandler_tests.c
+++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c
@@ -311,7 +311,7 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void)
__write_pkey_reg(pkey_reg);
/* Protect the new stack with MPK 1 */
- pkey = sys_pkey_alloc(0, 0);
+ pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey);
/* Set up alternate signal stack that will use the default MPK */
@@ -484,7 +484,7 @@ static void test_pkru_sigreturn(void)
__write_pkey_reg(pkey_reg);
/* Protect the stack with MPK 2 */
- pkey = sys_pkey_alloc(0, 0);
+ pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey);
/* Set up alternate signal stack that will use the default MPK */
diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c
index 35565af308af..23ebec367015 100644
--- a/tools/testing/selftests/mm/protection_keys.c
+++ b/tools/testing/selftests/mm/protection_keys.c
@@ -463,7 +463,7 @@ static pid_t fork_lazy_child(void)
static int alloc_pkey(void)
{
int ret;
- unsigned long init_val = 0x0;
+ unsigned long init_val = PKEY_UNRESTRICTED;
dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n",
__func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg);
diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h
index 3a0129467de6..d6deb6ffa1b9 100644
--- a/tools/testing/selftests/powerpc/include/pkeys.h
+++ b/tools/testing/selftests/powerpc/include/pkeys.h
@@ -24,6 +24,9 @@
#undef PKEY_DISABLE_EXECUTE
#define PKEY_DISABLE_EXECUTE 0x4
+#undef PKEY_UNRESTRICTED
+#define PKEY_UNRESTRICTED 0x0
+
/* Older versions of libc do not define this */
#ifndef SEGV_PKUERR
#define SEGV_PKUERR 4
@@ -93,7 +96,7 @@ int pkeys_unsupported(void)
SKIP_IF(!hash_mmu);
/* Check if the system call is supported */
- pkey = sys_pkey_alloc(0, 0);
+ pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
SKIP_IF(pkey < 0);
sys_pkey_free(pkey);
diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
index 0af4f02669a1..29b91b7456eb 100644
--- a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
+++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
@@ -72,7 +72,7 @@ static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
switch (fault_type) {
case PKEY_DISABLE_ACCESS:
- pkey_set_rights(fault_pkey, 0);
+ pkey_set_rights(fault_pkey, PKEY_UNRESTRICTED);
break;
case PKEY_DISABLE_EXECUTE:
/*
diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
index 2db76e56d4cb..e89a164c686b 100644
--- a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
+++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
@@ -83,7 +83,7 @@ static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
mprotect(pgstart, pgsize, PROT_EXEC))
_exit(1);
else
- pkey_set_rights(pkey, 0);
+ pkey_set_rights(pkey, PKEY_UNRESTRICTED);
fault_count++;
}
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
index f061434af452..7ff53caeb4aa 100644
--- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -95,16 +95,16 @@ static int child(struct shared_info *info)
/* Get some pkeys so that we can change their bits in the AMR. */
pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
if (pkey1 < 0) {
- pkey1 = sys_pkey_alloc(0, 0);
+ pkey1 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
FAIL_IF(pkey1 < 0);
disable_execute = false;
}
- pkey2 = sys_pkey_alloc(0, 0);
+ pkey2 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
FAIL_IF(pkey2 < 0);
- pkey3 = sys_pkey_alloc(0, 0);
+ pkey3 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
FAIL_IF(pkey3 < 0);
info->amr |= 3ul << pkeyshift(pkey1) | 2ul << pkeyshift(pkey2);
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
index fc633014424f..10f63042cf91 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -57,16 +57,16 @@ static int child(struct shared_info *info)
/* Get some pkeys so that we can change their bits in the AMR. */
pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
if (pkey1 < 0) {
- pkey1 = sys_pkey_alloc(0, 0);
+ pkey1 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
CHILD_FAIL_IF(pkey1 < 0, &info->child_sync);
disable_execute = false;
}
- pkey2 = sys_pkey_alloc(0, 0);
+ pkey2 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
CHILD_FAIL_IF(pkey2 < 0, &info->child_sync);
- pkey3 = sys_pkey_alloc(0, 0);
+ pkey3 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
CHILD_FAIL_IF(pkey3 < 0, &info->child_sync);
info->amr1 |= 3ul << pkeyshift(pkey1);