summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap7
-rw-r--r--CREDITS4
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu3
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt53
-rw-r--r--Documentation/admin-guide/perf/hns3-pmu.rst136
-rw-r--r--Documentation/admin-guide/perf/index.rst1
-rw-r--r--Documentation/admin-guide/pm/cpuidle.rst15
-rw-r--r--Documentation/arm64/elf_hwcaps.rst4
-rw-r--r--Documentation/arm64/memory.rst10
-rw-r--r--Documentation/arm64/silicon-errata.rst6
-rw-r--r--Documentation/core-api/kernel-api.rst2
-rw-r--r--Documentation/core-api/protection-keys.rst44
-rw-r--r--Documentation/core-api/symbol-namespaces.rst4
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml134
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml65
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-controller.yaml123
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fec.yaml3
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml15
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml8
-rw-r--r--Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml1
-rw-r--r--Documentation/devicetree/bindings/timer/ingenic,tcu.yaml4
-rw-r--r--Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt6
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,cmt.yaml16
-rw-r--r--Documentation/devicetree/bindings/timer/st,nomadik-mtu.yaml58
-rw-r--r--Documentation/features/vm/ioremap_prot/arch-support.txt2
-rw-r--r--Documentation/filesystems/netfs_library.rst8
-rw-r--r--Documentation/livepatch/module-elf-format.rst10
-rw-r--r--Documentation/memory-barriers.txt11
-rw-r--r--Documentation/networking/dsa/dsa.rst363
-rw-r--r--Documentation/networking/ip-sysctl.rst19
-rw-r--r--Documentation/sound/soc/dai.rst2
-rw-r--r--Documentation/staging/static-keys.rst3
-rw-r--r--Documentation/translations/it_IT/core-api/symbol-namespaces.rst6
-rw-r--r--Documentation/translations/zh_CN/core-api/kernel-api.rst2
-rw-r--r--Documentation/translations/zh_CN/core-api/symbol-namespaces.rst2
-rw-r--r--Documentation/virt/kvm/api.rst19
-rw-r--r--Documentation/virt/kvm/arm/hyp-abi.rst11
-rw-r--r--MAINTAINERS24
-rw-r--r--Makefile2
-rw-r--r--arch/Kconfig10
-rw-r--r--arch/alpha/kernel/irq.c2
-rw-r--r--arch/arc/kernel/jump_label.c13
-rw-r--r--arch/arm/boot/dts/imx6qdl-ts7970.dtsi2
-rw-r--r--arch/arm/boot/dts/imx6ull-colibri.dtsi9
-rw-r--r--arch/arm/boot/dts/lan966x-kontron-kswitch-d10-mmt.dtsi8
-rw-r--r--arch/arm/boot/dts/lan966x.dtsi2
-rw-r--r--arch/arm/boot/dts/qcom-msm8974.dtsi30
-rw-r--r--arch/arm/boot/dts/sama5d2.dtsi2
-rw-r--r--arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts2
-rw-r--r--arch/arm/include/asm/dma.h2
-rw-r--r--arch/arm/include/asm/domain.h13
-rw-r--r--arch/arm/include/asm/io.h4
-rw-r--r--arch/arm/include/asm/mach/map.h1
-rw-r--r--arch/arm/include/asm/ptrace.h26
-rw-r--r--arch/arm/kernel/entry-common.S1
-rw-r--r--arch/arm/kernel/jump_label.c6
-rw-r--r--arch/arm/lib/findbit.S16
-rw-r--r--arch/arm/mach-hisi/Kconfig2
-rw-r--r--arch/arm/mach-mmp/mmp2.h2
-rw-r--r--arch/arm/mach-mmp/pxa168.h2
-rw-r--r--arch/arm/mach-mmp/pxa910.h2
-rw-r--r--arch/arm/mach-omap2/Kconfig2
-rw-r--r--arch/arm/mach-pxa/corgi.c2
-rw-r--r--arch/arm/mach-pxa/hx4700.c2
-rw-r--r--arch/arm/mach-pxa/icontrol.c4
-rw-r--r--arch/arm/mach-pxa/littleton.c2
-rw-r--r--arch/arm/mach-pxa/magician.c2
-rw-r--r--arch/arm/mach-pxa/spitz.c2
-rw-r--r--arch/arm/mach-pxa/z2.c4
-rw-r--r--arch/arm/mach-rockchip/pm.c7
-rw-r--r--arch/arm/mm/Kconfig6
-rw-r--r--arch/arm/mm/alignment.c3
-rw-r--r--arch/arm/mm/ioremap.c9
-rw-r--r--arch/arm/mm/mmu.c15
-rw-r--r--arch/arm/mm/nommu.c9
-rw-r--r--arch/arm/mm/proc-v7-bugs.c9
-rw-r--r--arch/arm/probes/decode.h26
-rw-r--r--arch/arm64/Kconfig37
-rw-r--r--arch/arm64/boot/Makefile5
-rw-r--r--arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi8
-rw-r--r--arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi5
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399.dtsi6
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts2
-rw-r--r--arch/arm64/include/asm/asm-extable.h79
-rw-r--r--arch/arm64/include/asm/asm-uaccess.h12
-rw-r--r--arch/arm64/include/asm/asm_pointer_auth.h4
-rw-r--r--arch/arm64/include/asm/assembler.h35
-rw-r--r--arch/arm64/include/asm/barrier.h12
-rw-r--r--arch/arm64/include/asm/cache.h41
-rw-r--r--arch/arm64/include/asm/cacheflush.h7
-rw-r--r--arch/arm64/include/asm/cpu.h1
-rw-r--r--arch/arm64/include/asm/cpu_ops.h9
-rw-r--r--arch/arm64/include/asm/cpufeature.h7
-rw-r--r--arch/arm64/include/asm/cpuidle.h15
-rw-r--r--arch/arm64/include/asm/el2_setup.h60
-rw-r--r--arch/arm64/include/asm/fixmap.h4
-rw-r--r--arch/arm64/include/asm/hwcap.h3
-rw-r--r--arch/arm64/include/asm/io.h24
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h18
-rw-r--r--arch/arm64/include/asm/memory.h9
-rw-r--r--arch/arm64/include/asm/mmu_context.h16
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h3
-rw-r--r--arch/arm64/include/asm/pgtable.h16
-rw-r--r--arch/arm64/include/asm/processor.h3
-rw-r--r--arch/arm64/include/asm/sysreg.h128
-rw-r--r--arch/arm64/include/asm/uaccess.h94
-rw-r--r--arch/arm64/include/asm/virt.h11
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h4
-rw-r--r--arch/arm64/kernel/Makefile7
-rw-r--r--arch/arm64/kernel/acpi.c2
-rw-r--r--arch/arm64/kernel/acpi_numa.c2
-rw-r--r--arch/arm64/kernel/alternative.c2
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c9
-rw-r--r--arch/arm64/kernel/cpu_errata.c26
-rw-r--r--arch/arm64/kernel/cpufeature.c376
-rw-r--r--arch/arm64/kernel/cpuidle.c29
-rw-r--r--arch/arm64/kernel/cpuinfo.c51
-rw-r--r--arch/arm64/kernel/entry.S53
-rw-r--r--arch/arm64/kernel/fpsimd.c1
-rw-r--r--arch/arm64/kernel/head.S525
-rw-r--r--arch/arm64/kernel/hibernate.c5
-rw-r--r--arch/arm64/kernel/hyp-stub.S117
-rw-r--r--arch/arm64/kernel/idreg-override.c93
-rw-r--r--arch/arm64/kernel/image-vars.h59
-rw-r--r--arch/arm64/kernel/jump_label.c11
-rw-r--r--arch/arm64/kernel/kaslr.c149
-rw-r--r--arch/arm64/kernel/kuser32.S1
-rw-r--r--arch/arm64/kernel/mte.c9
-rw-r--r--arch/arm64/kernel/pi/Makefile33
-rw-r--r--arch/arm64/kernel/pi/kaslr_early.c112
-rw-r--r--arch/arm64/kernel/signal.c20
-rw-r--r--arch/arm64/kernel/sigreturn32.S1
-rw-r--r--arch/arm64/kernel/sleep.S3
-rw-r--r--arch/arm64/kernel/stacktrace.c99
-rw-r--r--arch/arm64/kernel/suspend.c2
-rw-r--r--arch/arm64/kernel/traps.c6
-rw-r--r--arch/arm64/kernel/vdso/Makefile8
-rw-r--r--arch/arm64/kernel/vdso/vdso.lds.S16
-rw-r--r--arch/arm64/kernel/vdso32/Makefile1
-rw-r--r--arch/arm64/kernel/vdso32/vdso.lds.S27
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S22
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h32
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c12
-rw-r--r--arch/arm64/kvm/sys_regs.c14
-rw-r--r--arch/arm64/lib/mte.S2
-rw-r--r--arch/arm64/mm/cache.S41
-rw-r--r--arch/arm64/mm/copypage.c9
-rw-r--r--arch/arm64/mm/dma-mapping.c19
-rw-r--r--arch/arm64/mm/extable.c10
-rw-r--r--arch/arm64/mm/fault.c1
-rw-r--r--arch/arm64/mm/hugetlbpage.c10
-rw-r--r--arch/arm64/mm/init.c4
-rw-r--r--arch/arm64/mm/ioremap.c90
-rw-r--r--arch/arm64/mm/kasan_init.c4
-rw-r--r--arch/arm64/mm/mmu.c78
-rw-r--r--arch/arm64/mm/mteswap.c9
-rw-r--r--arch/arm64/mm/proc.S188
-rw-r--r--arch/arm64/tools/cpucaps2
-rwxr-xr-xarch/arm64/tools/gen-sysreg.awk2
-rw-r--r--arch/arm64/tools/sysreg264
-rw-r--r--arch/csky/include/asm/tlb.h15
-rw-r--r--arch/ia64/kernel/iosapic.c2
-rw-r--r--arch/ia64/kernel/irq.c4
-rw-r--r--arch/ia64/kernel/msi_ia64.c4
-rw-r--r--arch/loongarch/Kconfig3
-rw-r--r--arch/loongarch/include/asm/acpi.h142
-rw-r--r--arch/loongarch/include/asm/asmmacro.h12
-rw-r--r--arch/loongarch/include/asm/atomic.h37
-rw-r--r--arch/loongarch/include/asm/barrier.h4
-rw-r--r--arch/loongarch/include/asm/cmpxchg.h4
-rw-r--r--arch/loongarch/include/asm/compiler.h15
-rw-r--r--arch/loongarch/include/asm/elf.h2
-rw-r--r--arch/loongarch/include/asm/futex.h11
-rw-r--r--arch/loongarch/include/asm/irq.h51
-rw-r--r--arch/loongarch/include/asm/irqflags.h1
-rw-r--r--arch/loongarch/include/asm/local.h1
-rw-r--r--arch/loongarch/include/asm/loongson.h16
-rw-r--r--arch/loongarch/include/asm/stacktrace.h12
-rw-r--r--arch/loongarch/include/asm/thread_info.h4
-rw-r--r--arch/loongarch/include/asm/tlb.h10
-rw-r--r--arch/loongarch/include/asm/uaccess.h2
-rw-r--r--arch/loongarch/kernel/acpi.c65
-rw-r--r--arch/loongarch/kernel/cacheinfo.c11
-rw-r--r--arch/loongarch/kernel/entry.S4
-rw-r--r--arch/loongarch/kernel/env.c20
-rw-r--r--arch/loongarch/kernel/fpu.S174
-rw-r--r--arch/loongarch/kernel/genex.S12
-rw-r--r--arch/loongarch/kernel/head.S8
-rw-r--r--arch/loongarch/kernel/irq.c58
-rw-r--r--arch/loongarch/kernel/ptrace.c12
-rw-r--r--arch/loongarch/kernel/reset.c1
-rw-r--r--arch/loongarch/kernel/setup.c2
-rw-r--r--arch/loongarch/kernel/smp.c113
-rw-r--r--arch/loongarch/kernel/switch.S4
-rw-r--r--arch/loongarch/kernel/time.c14
-rw-r--r--arch/loongarch/lib/clear_user.S2
-rw-r--r--arch/loongarch/lib/copy_user.S2
-rw-r--r--arch/loongarch/lib/delay.c1
-rw-r--r--arch/loongarch/mm/page.S118
-rw-r--r--arch/loongarch/mm/tlbex.S98
-rw-r--r--arch/m68k/Kconfig.cpu7
-rw-r--r--arch/m68k/Kconfig.debug6
-rw-r--r--arch/m68k/Kconfig.machine5
-rw-r--r--arch/m68k/configs/amiga_defconfig14
-rw-r--r--arch/m68k/configs/apollo_defconfig14
-rw-r--r--arch/m68k/configs/atari_defconfig14
-rw-r--r--arch/m68k/configs/bvme6000_defconfig14
-rw-r--r--arch/m68k/configs/hp300_defconfig14
-rw-r--r--arch/m68k/configs/mac_defconfig14
-rw-r--r--arch/m68k/configs/multi_defconfig14
-rw-r--r--arch/m68k/configs/mvme147_defconfig14
-rw-r--r--arch/m68k/configs/mvme16x_defconfig14
-rw-r--r--arch/m68k/configs/q40_defconfig14
-rw-r--r--arch/m68k/configs/sun3_defconfig14
-rw-r--r--arch/m68k/configs/sun3x_defconfig14
-rw-r--r--arch/m68k/include/asm/bitops.h2
-rw-r--r--arch/m68k/include/asm/processor.h1
-rw-r--r--arch/m68k/include/uapi/asm/bootinfo-virt.h8
-rw-r--r--arch/m68k/kernel/traps.c1
-rw-r--r--arch/m68k/mac/iop.c4
-rw-r--r--arch/m68k/mac/macints.c35
-rw-r--r--arch/m68k/q40/q40ints.c2
-rw-r--r--arch/m68k/sun3/mmu_emu.c11
-rw-r--r--arch/m68k/virt/config.c11
-rw-r--r--arch/m68k/virt/ints.c3
-rw-r--r--arch/m68k/virt/platform.c58
-rw-r--r--arch/mips/cavium-octeon/octeon-irq.c4
-rw-r--r--arch/mips/include/asm/jump_label.h2
-rw-r--r--arch/mips/include/asm/mach-loongson64/irq.h3
-rw-r--r--arch/mips/kernel/jump_label.c19
-rw-r--r--arch/mips/kernel/module.c5
-rw-r--r--arch/parisc/kernel/irq.c2
-rw-r--r--arch/parisc/kernel/jump_label.c11
-rw-r--r--arch/powerpc/Kconfig5
-rw-r--r--arch/powerpc/include/asm/tlb.h2
-rw-r--r--arch/powerpc/kernel/Makefile1
-rw-r--r--arch/riscv/Kconfig2
-rw-r--r--arch/riscv/Makefile3
-rw-r--r--arch/riscv/boot/dts/canaan/canaan_kd233.dts2
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts2
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts2
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_go.dts6
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maixduino.dts2
-rw-r--r--arch/riscv/boot/dts/microchip/mpfs.dtsi4
-rw-r--r--arch/riscv/errata/sifive/errata.c3
-rw-r--r--arch/riscv/include/asm/pgtable-64.h12
-rw-r--r--arch/riscv/include/asm/pgtable.h6
-rw-r--r--arch/riscv/kernel/Makefile2
-rw-r--r--arch/riscv/kernel/elf_kexec.c2
-rw-r--r--arch/riscv/kernel/jump_label.c12
-rw-r--r--arch/riscv/kvm/mmu.c2
-rw-r--r--arch/riscv/kvm/vcpu.c2
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/Makefile8
-rw-r--r--arch/s390/include/asm/archrandom.h9
-rw-r--r--arch/s390/include/asm/jump_label.h5
-rw-r--r--arch/s390/include/asm/nospec-insn.h2
-rw-r--r--arch/s390/include/asm/tlb.h3
-rw-r--r--arch/s390/kernel/jump_label.c28
-rw-r--r--arch/s390/kernel/module.c1
-rw-r--r--arch/s390/lib/Makefile3
-rw-r--r--arch/s390/lib/expoline/Makefile3
-rw-r--r--arch/s390/lib/expoline/expoline.S (renamed from arch/s390/lib/expoline.S)0
-rw-r--r--arch/sh/include/asm/io.h8
-rw-r--r--arch/sh/kernel/irq.c7
-rw-r--r--arch/sparc/Kconfig2
-rw-r--r--arch/sparc/include/asm/tlb_64.h2
-rw-r--r--arch/sparc/kernel/module.c3
-rw-r--r--arch/um/include/asm/page.h4
-rw-r--r--arch/um/include/shared/mem.h4
-rw-r--r--arch/um/kernel/um_arch.c4
-rw-r--r--arch/um/os-Linux/skas/process.c6
-rw-r--r--arch/x86/.gitignore2
-rw-r--r--arch/x86/Kconfig116
-rw-r--r--arch/x86/Kconfig.debug3
-rw-r--r--arch/x86/Makefile7
-rw-r--r--arch/x86/entry/Makefile2
-rw-r--r--arch/x86/entry/calling.h62
-rw-r--r--arch/x86/entry/entry.S22
-rw-r--r--arch/x86/entry/entry_32.S2
-rw-r--r--arch/x86/entry/entry_64.S63
-rw-r--r--arch/x86/entry/entry_64_compat.S21
-rw-r--r--arch/x86/entry/vdso/Makefile1
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_emu_64.S9
-rw-r--r--arch/x86/events/amd/uncore.c146
-rw-r--r--arch/x86/events/intel/core.c7
-rw-r--r--arch/x86/events/intel/ds.c129
-rw-r--r--arch/x86/events/intel/lbr.c19
-rw-r--r--arch/x86/events/perf_event.h17
-rw-r--r--arch/x86/hyperv/irqdomain.c2
-rw-r--r--arch/x86/include/asm/alternative.h1
-rw-r--r--arch/x86/include/asm/amd-ibs.h16
-rw-r--r--arch/x86/include/asm/cpufeatures.h15
-rw-r--r--arch/x86/include/asm/disabled-features.h21
-rw-r--r--arch/x86/include/asm/fpu/api.h2
-rw-r--r--arch/x86/include/asm/linkage.h8
-rw-r--r--arch/x86/include/asm/msr-index.h13
-rw-r--r--arch/x86/include/asm/mwait.h1
-rw-r--r--arch/x86/include/asm/nospec-branch.h95
-rw-r--r--arch/x86/include/asm/perf_event.h16
-rw-r--r--arch/x86/include/asm/sev.h7
-rw-r--r--arch/x86/include/asm/special_insns.h9
-rw-r--r--arch/x86/include/asm/static_call.h19
-rw-r--r--arch/x86/include/asm/tlb.h3
-rw-r--r--arch/x86/include/asm/tlbflush.h1
-rw-r--r--arch/x86/include/asm/unwind_hints.h14
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h15
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/cppc.c6
-rw-r--r--arch/x86/kernel/alternative.c71
-rw-r--r--arch/x86/kernel/amd_nb.c13
-rw-r--r--arch/x86/kernel/asm-offsets.c6
-rw-r--r--arch/x86/kernel/cpu/amd.c46
-rw-r--r--arch/x86/kernel/cpu/bugs.c488
-rw-r--r--arch/x86/kernel/cpu/common.c61
-rw-r--r--arch/x86/kernel/cpu/cpu.h2
-rw-r--r--arch/x86/kernel/cpu/hygon.c6
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c47
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h2
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/cpu/vmware.c4
-rw-r--r--arch/x86/kernel/e820.c6
-rw-r--r--arch/x86/kernel/fpu/core.c14
-rw-r--r--arch/x86/kernel/ftrace.c7
-rw-r--r--arch/x86/kernel/head_32.S1
-rw-r--r--arch/x86/kernel/head_64.S5
-rw-r--r--arch/x86/kernel/jump_label.c13
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c74
-rw-r--r--arch/x86/kernel/module.c11
-rw-r--r--arch/x86/kernel/pmem.c7
-rw-r--r--arch/x86/kernel/process.c46
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S25
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S23
-rw-r--r--arch/x86/kernel/setup.c73
-rw-r--r--arch/x86/kernel/sev-shared.c25
-rw-r--r--arch/x86/kernel/sev.c17
-rw-r--r--arch/x86/kernel/static_call.c51
-rw-r--r--arch/x86/kernel/vmlinux.lds.S9
-rw-r--r--arch/x86/kvm/emulate.c35
-rw-r--r--arch/x86/kvm/svm/vmenter.S18
-rw-r--r--arch/x86/kvm/vmx/capabilities.h4
-rw-r--r--arch/x86/kvm/vmx/nested.c3
-rw-r--r--arch/x86/kvm/vmx/run_flags.h8
-rw-r--r--arch/x86/kvm/vmx/vmenter.S194
-rw-r--r--arch/x86/kvm/vmx/vmx.c84
-rw-r--r--arch/x86/kvm/vmx/vmx.h10
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h2
-rw-r--r--arch/x86/kvm/x86.c32
-rw-r--r--arch/x86/lib/memmove_64.S7
-rw-r--r--arch/x86/lib/retpoline.S79
-rw-r--r--arch/x86/mm/extable.c16
-rw-r--r--arch/x86/mm/init.c16
-rw-r--r--arch/x86/mm/mem_encrypt_boot.S10
-rw-r--r--arch/x86/mm/pkeys.c15
-rw-r--r--arch/x86/mm/tlb.c31
-rw-r--r--arch/x86/net/bpf_jit_comp.c26
-rw-r--r--arch/x86/platform/efi/efi_thunk_64.S5
-rw-r--r--arch/x86/purgatory/Makefile10
-rw-r--r--arch/x86/purgatory/kexec-purgatory.S14
-rw-r--r--arch/x86/xen/setup.c6
-rw-r--r--arch/x86/xen/xen-asm.S30
-rw-r--r--arch/x86/xen/xen-head.S1
-rw-r--r--arch/x86/xen/xen-ops.h6
-rw-r--r--arch/xtensa/kernel/irq.c7
-rw-r--r--block/blk-merge.c1
-rw-r--r--certs/Kconfig1
-rw-r--r--drivers/acpi/acpi_video.c11
-rw-r--r--drivers/acpi/bus.c3
-rw-r--r--drivers/acpi/cppc_acpi.c6
-rw-r--r--drivers/acpi/irq.c58
-rw-r--r--drivers/amba/bus.c8
-rw-r--r--drivers/base/cpu.c8
-rw-r--r--drivers/char/random.c2
-rw-r--r--drivers/clk/clk-lan966x.c2
-rw-r--r--drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c1
-rw-r--r--drivers/clocksource/Kconfig19
-rw-r--r--drivers/clocksource/Makefile3
-rw-r--r--drivers/clocksource/sh_cmt.c8
-rw-r--r--drivers/clocksource/timer-mediatek.c114
-rw-r--r--drivers/clocksource/timer-microchip-pit64b.c64
-rw-r--r--drivers/clocksource/timer-sun4i.c2
-rw-r--r--drivers/clocksource/timer-sun5i.c2
-rw-r--r--drivers/clocksource/timer-tegra186.c514
-rw-r--r--drivers/clocksource/timer-ti-dm.c123
-rw-r--r--drivers/cpufreq/mediatek-cpufreq.c8
-rw-r--r--drivers/cpuidle/Kconfig.arm3
-rw-r--r--drivers/dma-buf/dma-resv.c2
-rw-r--r--drivers/edac/ghes_edac.c11
-rw-r--r--drivers/edac/synopsys_edac.c44
-rw-r--r--drivers/firmware/efi/reboot.c21
-rw-r--r--drivers/gpio/gpio-msc313.c15
-rw-r--r--drivers/gpio/gpio-pca953x.c22
-rw-r--r--drivers/gpio/gpio-sim.c16
-rw-r--r--drivers/gpio/gpio-tegra.c15
-rw-r--r--drivers/gpio/gpio-tegra186.c15
-rw-r--r--drivers/gpio/gpio-thunderx.c17
-rw-r--r--drivers/gpio/gpio-visconti.c15
-rw-r--r--drivers/gpio/gpio-xilinx.c2
-rw-r--r--drivers/gpio/gpiolib-cdev.c4
-rw-r--r--drivers/gpio/gpiolib.c51
-rw-r--r--drivers/gpu/drm/Kconfig1
-rw-r--r--drivers/gpu/drm/amd/display/Kconfig2
-rw-r--r--drivers/gpu/drm/drm_gem_ttm_helper.c9
-rw-r--r--drivers/gpu/drm/drm_panel_orientation_quirks.c15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h11
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h45
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c352
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c56
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h7
-rw-r--r--drivers/gpu/drm/i915/gvt/cmd_parser.c6
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-dev.c3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dmem.c6
-rw-r--r--drivers/gpu/drm/panel/panel-edp.c2
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_drv.c4
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_mmu.c2
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_drv.c17
-rw-r--r--drivers/gpu/drm/solomon/ssd130x.c2
-rw-r--r--drivers/gpu/drm/tiny/simpledrm.c2
-rw-r--r--drivers/hwmon/k10temp.c12
-rw-r--r--drivers/i2c/busses/i2c-cadence.c30
-rw-r--r--drivers/i2c/busses/i2c-imx.c2
-rw-r--r--drivers/i2c/busses/i2c-mlxcpld.c2
-rw-r--r--drivers/idle/intel_idle.c77
-rw-r--r--drivers/infiniband/hw/irdma/cm.c50
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_hw.c1
-rw-r--r--drivers/infiniband/hw/irdma/icrdma_hw.c1
-rw-r--r--drivers/infiniband/hw/irdma/irdma.h1
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c4
-rw-r--r--drivers/input/touchscreen/goodix.c5
-rw-r--r--drivers/input/touchscreen/usbtouchscreen.c3
-rw-r--r--drivers/input/touchscreen/wm97xx-core.c4
-rw-r--r--drivers/iommu/hyperv-iommu.c2
-rw-r--r--drivers/irqchip/Kconfig60
-rw-r--r--drivers/irqchip/Makefile4
-rw-r--r--drivers/irqchip/irq-bcm6345-l1.c4
-rw-r--r--drivers/irqchip/irq-gic-v3.c20
-rw-r--r--drivers/irqchip/irq-gic.c18
-rw-r--r--drivers/irqchip/irq-loongarch-cpu.c148
-rw-r--r--drivers/irqchip/irq-loongson-eiointc.c395
-rw-r--r--drivers/irqchip/irq-loongson-liointc.c203
-rw-r--r--drivers/irqchip/irq-loongson-pch-lpc.c205
-rw-r--r--drivers/irqchip/irq-loongson-pch-msi.c127
-rw-r--r--drivers/irqchip/irq-loongson-pch-pic.c177
-rw-r--r--drivers/irqchip/irq-mips-gic.c84
-rw-r--r--drivers/irqchip/irq-renesas-rzg2l.c393
-rw-r--r--drivers/irqchip/irq-sifive-plic.c142
-rw-r--r--drivers/irqchip/irq-stm32-exti.c250
-rw-r--r--drivers/md/raid5.c4
-rw-r--r--drivers/misc/lkdtm/Makefile9
-rw-r--r--drivers/mmc/host/sdhci-omap.c14
-rw-r--r--drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c6
-rw-r--r--drivers/net/amt.c243
-rw-r--r--drivers/net/can/rcar/rcar_canfd.c1
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c18
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c5
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c16
-rw-r--r--drivers/net/dsa/vitesse-vsc73xx-spi.c10
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c23
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c5
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c8
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c13
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c7
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c10
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c8
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.c10
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.h2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_ethtool.c31
-rw-r--r--drivers/net/ethernet/faraday/ftgmac100.c15
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_rx.c5
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_tx.c20
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_txrx.h6
-rw-r--r--drivers/net/ethernet/intel/e1000e/hw.h1
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c4
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.h1
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c30
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c17
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf.h14
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_ethtool.c10
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c11
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_txrx.c7
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_virtchnl.c65
-rw-r--r--drivers/net/ethernet/intel/ice/ice_devids.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_devlink.c59
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_fw_update.c96
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c11
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.c40
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.c3
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c3
-rw-r--r--drivers/net/ethernet/intel/igc/igc_regs.h5
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h1
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c6
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c106
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_flower.c6
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_router.c1
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe_offload.c3
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c9
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_mac.c112
-rw-r--r--drivers/net/ethernet/mscc/ocelot_fdma.c17
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/action.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c18
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfdk/dp.c33
-rw-r--r--drivers/net/ethernet/sfc/ef10.c3
-rw-r--r--drivers/net/ethernet/sfc/ef10_sriov.c10
-rw-r--r--drivers/net/ethernet/sfc/ptp.c22
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c25
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c56
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4.h3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c7
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c22
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c12
-rw-r--r--drivers/net/ethernet/sun/sunhme.c43
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c17
-rw-r--r--drivers/net/ipa/ipa_qmi_msg.h2
-rw-r--r--drivers/net/macsec.c33
-rw-r--r--drivers/net/pcs/pcs-xpcs.c2
-rw-r--r--drivers/net/sungem_phy.c1
-rw-r--r--drivers/net/usb/r8152.c30
-rw-r--r--drivers/net/virtio_net.c37
-rw-r--r--drivers/net/wireless/ath/ath11k/wmi.c3
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c2
-rw-r--r--drivers/net/wireless/realtek/rtw88/main.h6
-rw-r--r--drivers/net/wireless/realtek/rtw88/rtw8821c.c14
-rw-r--r--drivers/net/xen-netback/rx.c1
-rw-r--r--drivers/nvme/host/core.c6
-rw-r--r--drivers/nvme/host/pci.c11
-rw-r--r--drivers/of/kexec.c13
-rw-r--r--drivers/parisc/iosapic.c2
-rw-r--r--drivers/pci/controller/pci-hyperv.c12
-rw-r--r--drivers/perf/arm-cci.c11
-rw-r--r--drivers/perf/arm-ccn.c6
-rw-r--r--drivers/perf/arm_spe_pmu.c22
-rw-r--r--drivers/perf/fsl_imx8_ddr_perf.c6
-rw-r--r--drivers/perf/hisilicon/Kconfig10
-rw-r--r--drivers/perf/hisilicon/Makefile1
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c16
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_hha_pmu.c16
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c16
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pa_pmu.c16
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.c18
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.h2
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c15
-rw-r--r--drivers/perf/hisilicon/hns3_pmu.c1671
-rw-r--r--drivers/perf/marvell_cn10k_tad_pmu.c12
-rw-r--r--drivers/perf/riscv_pmu.c4
-rw-r--r--drivers/perf/riscv_pmu_sbi.c106
-rw-r--r--drivers/pinctrl/Kconfig2
-rw-r--r--drivers/pinctrl/mvebu/pinctrl-armada-37xx.c65
-rw-r--r--drivers/pinctrl/pinctrl-ocelot.c224
-rw-r--r--drivers/pinctrl/qcom/pinctrl-spmi-gpio.c15
-rw-r--r--drivers/pinctrl/ralink/pinctrl-ralink.c2
-rw-r--r--drivers/pinctrl/renesas/pinctrl-rzg2l.c233
-rw-r--r--drivers/pinctrl/sunplus/sppctl.c3
-rw-r--r--drivers/platform/x86/amd-pmc.c14
-rw-r--r--drivers/platform/x86/asus-nb-wmi.c2
-rw-r--r--drivers/platform/x86/gigabyte-wmi.c1
-rw-r--r--drivers/platform/x86/intel/atomisp2/led.c3
-rw-r--r--drivers/platform/x86/intel/ifs/Kconfig3
-rw-r--r--drivers/platform/x86/x86-android-tablets.c17
-rw-r--r--drivers/power/reset/arm-versatile-reboot.c1
-rw-r--r--drivers/power/supply/ab8500_fg.c9
-rw-r--r--drivers/power/supply/power_supply_core.c24
-rw-r--r--drivers/powercap/dtpm_cpu.c33
-rw-r--r--drivers/ptp/Kconfig1
-rw-r--r--drivers/s390/crypto/ap_bus.c2
-rw-r--r--drivers/s390/net/qeth_core_main.c2
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c3
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_scsih.c1
-rw-r--r--drivers/scsi/pm8001/pm8001_hwi.c19
-rw-r--r--drivers/scsi/pm8001/pm8001_init.c2
-rw-r--r--drivers/scsi/pm8001/pm80xx_hwi.c6
-rw-r--r--drivers/scsi/scsi_ioctl.c2
-rw-r--r--drivers/sh/intc/chip.c2
-rw-r--r--drivers/spi/spi-amd.c8
-rw-r--r--drivers/spi/spi-aspeed-smc.c12
-rw-r--r--drivers/spi/spi-bcm2835.c12
-rw-r--r--drivers/spi/spi-cadence-quadspi.c19
-rw-r--r--drivers/spi/spi-cadence.c2
-rw-r--r--drivers/spi/spi-rspi.c4
-rw-r--r--drivers/target/target_core_file.c3
-rw-r--r--drivers/target/target_core_iblock.c4
-rw-r--r--drivers/target/target_core_sbc.c6
-rw-r--r--drivers/tee/optee/optee_smc.h2
-rw-r--r--drivers/tee/optee/smc_abi.c4
-rw-r--r--drivers/tee/tee_core.c2
-rw-r--r--drivers/thermal/cpufreq_cooling.c6
-rw-r--r--drivers/thermal/rcar_gen3_thermal.c4
-rw-r--r--drivers/tty/pty.c14
-rw-r--r--drivers/tty/serial/8250/8250_core.c4
-rw-r--r--drivers/tty/serial/8250/8250_dma.c6
-rw-r--r--drivers/tty/serial/8250/8250_dw.c26
-rw-r--r--drivers/tty/serial/8250/8250_port.c6
-rw-r--r--drivers/tty/serial/amba-pl011.c23
-rw-r--r--drivers/tty/serial/mvebu-uart.c25
-rw-r--r--drivers/tty/serial/samsung_tty.c5
-rw-r--r--drivers/tty/serial/serial_core.c5
-rw-r--r--drivers/tty/serial/stm32-usart.c2
-rw-r--r--drivers/tty/tty.h3
-rw-r--r--drivers/tty/tty_buffer.c46
-rw-r--r--drivers/tty/vt/vt.c2
-rw-r--r--drivers/ufs/core/ufshcd.c62
-rw-r--r--drivers/ufs/host/ufshcd-pltfrm.c15
-rw-r--r--drivers/usb/dwc3/dwc3-am62.c8
-rw-r--r--drivers/usb/dwc3/gadget.c4
-rw-r--r--drivers/usb/gadget/function/uvc_configfs.c6
-rw-r--r--drivers/usb/host/ehci-fsl.c11
-rw-r--r--drivers/usb/host/fsl-mph-dr-of.c3
-rw-r--r--drivers/usb/serial/ftdi_sio.c3
-rw-r--r--drivers/usb/serial/ftdi_sio_ids.h6
-rw-r--r--drivers/usb/typec/class.c1
-rw-r--r--drivers/vfio/vfio.c17
-rw-r--r--drivers/virt/coco/sev-guest/sev-guest.c9
-rw-r--r--drivers/xen/events/events_base.c7
-rw-r--r--drivers/xen/gntdev.c6
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/attr.c74
-rw-r--r--fs/btrfs/ctree.h18
-rw-r--r--fs/btrfs/delayed-inode.c84
-rw-r--r--fs/btrfs/disk-io.c179
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/extent_io.c122
-rw-r--r--fs/btrfs/inode.c29
-rw-r--r--fs/btrfs/send.c40
-rw-r--r--fs/btrfs/tests/btrfs-tests.c24
-rw-r--r--fs/btrfs/transaction.c112
-rw-r--r--fs/btrfs/zoned.c34
-rw-r--r--fs/ceph/addr.c11
-rw-r--r--fs/cifs/connect.c2
-rw-r--r--fs/cifs/sess.c11
-rw-r--r--fs/cifs/smb2pdu.c13
-rw-r--r--fs/dlm/Kconfig9
-rw-r--r--fs/dlm/Makefile2
-rw-r--r--fs/dlm/ast.c4
-rw-r--r--fs/dlm/config.c21
-rw-r--r--fs/dlm/config.h3
-rw-r--r--fs/dlm/dlm_internal.h32
-rw-r--r--fs/dlm/lock.c143
-rw-r--r--fs/dlm/lock.h17
-rw-r--r--fs/dlm/lockspace.c31
-rw-r--r--fs/dlm/lowcomms.c4
-rw-r--r--fs/dlm/member.c30
-rw-r--r--fs/dlm/plock.c51
-rw-r--r--fs/dlm/recoverd.c35
-rw-r--r--fs/dlm/user.c21
-rw-r--r--fs/erofs/compress.h2
-rw-r--r--fs/erofs/data.c39
-rw-r--r--fs/erofs/decompressor.c18
-rw-r--r--fs/erofs/decompressor_lzma.c1
-rw-r--r--fs/erofs/dir.c20
-rw-r--r--fs/erofs/zdata.c797
-rw-r--r--fs/erofs/zdata.h119
-rw-r--r--fs/erofs/zpvec.h159
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext2/inode.c8
-rw-r--r--fs/ext2/super.c18
-rw-r--r--fs/ext4/inode.c14
-rw-r--r--fs/f2fs/file.c22
-rw-r--r--fs/f2fs/recovery.c10
-rw-r--r--fs/fat/file.c9
-rw-r--r--fs/io_uring.c14
-rw-r--r--fs/jfs/file.c4
-rw-r--r--fs/ksmbd/vfs.c2
-rw-r--r--fs/ksmbd/vfs.h2
-rw-r--r--fs/lockd/svcsubs.c14
-rw-r--r--fs/locks.c77
-rw-r--r--fs/netfs/buffered_read.c17
-rw-r--r--fs/nfsd/nfs4xdr.c9
-rw-r--r--fs/nfsd/nfsd.h3
-rw-r--r--fs/nilfs2/nilfs.h3
-rw-r--r--fs/notify/fanotify/fanotify.c19
-rw-r--r--fs/notify/fanotify/fanotify.h2
-rw-r--r--fs/notify/fanotify/fanotify_user.c110
-rw-r--r--fs/notify/fdinfo.c6
-rw-r--r--fs/notify/fsnotify.c23
-rw-r--r--fs/notify/inotify/inotify_user.c2
-rw-r--r--fs/ntfs/attrib.c8
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/ocfs2.h4
-rw-r--r--fs/ocfs2/slot_map.c46
-rw-r--r--fs/ocfs2/super.c21
-rw-r--r--fs/open.c60
-rw-r--r--fs/overlayfs/copy_up.c4
-rw-r--r--fs/overlayfs/inode.c87
-rw-r--r--fs/overlayfs/overlayfs.h15
-rw-r--r--fs/posix_acl.c168
-rw-r--r--fs/quota/dquot.c17
-rw-r--r--fs/read_write.c3
-rw-r--r--fs/reiserfs/inode.c16
-rw-r--r--fs/remap_range.c3
-rw-r--r--fs/userfaultfd.c12
-rw-r--r--fs/xattr.c25
-rw-r--r--fs/xfs/xfs_iops.c14
-rw-r--r--fs/zonefs/super.c2
-rw-r--r--include/asm-generic/barrier.h8
-rw-r--r--include/asm-generic/io.h31
-rw-r--r--include/asm-generic/tlb.h68
-rw-r--r--include/clocksource/timer-ti-dm.h144
-rw-r--r--include/linux/acpi.h4
-rw-r--r--include/linux/cgroup-defs.h7
-rw-r--r--include/linux/cpu.h2
-rw-r--r--include/linux/cpuhotplug.h3
-rw-r--r--include/linux/evm.h6
-rw-r--r--include/linux/fanotify.h14
-rw-r--r--include/linux/fs.h140
-rw-r--r--include/linux/fsnotify_backend.h89
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/gpio/driver.h42
-rw-r--r--include/linux/highmem.h18
-rw-r--r--include/linux/huge_mm.h12
-rw-r--r--include/linux/ima.h5
-rw-r--r--include/linux/irq.h35
-rw-r--r--include/linux/irqchip/mmp.h3
-rw-r--r--include/linux/irqdesc.h5
-rw-r--r--include/linux/jump_label.h9
-rw-r--r--include/linux/kernel_stat.h7
-rw-r--r--include/linux/kexec.h6
-rw-r--r--include/linux/kvm_host.h13
-rw-r--r--include/linux/lockdep.h30
-rw-r--r--include/linux/mm.h14
-rw-r--r--include/linux/mnt_idmapping.h305
-rw-r--r--include/linux/netfs.h2
-rw-r--r--include/linux/objtool.h9
-rw-r--r--include/linux/of.h2
-rw-r--r--include/linux/once_lite.h20
-rw-r--r--include/linux/pci_ids.h3
-rw-r--r--include/linux/perf/riscv_pmu.h4
-rw-r--r--include/linux/perf_event.h2
-rw-r--r--include/linux/posix_acl.h1
-rw-r--r--include/linux/posix_acl_xattr.h34
-rw-r--r--include/linux/quotaops.h15
-rw-r--r--include/linux/reset.h2
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/sched/rt.h8
-rw-r--r--include/linux/sched/task.h2
-rw-r--r--include/linux/sched/topology.h1
-rw-r--r--include/linux/security.h8
-rw-r--r--include/linux/serial_core.h5
-rw-r--r--include/linux/stmmac.h1
-rw-r--r--include/linux/wait.h9
-rw-r--r--include/linux/xattr.h2
-rw-r--r--include/net/addrconf.h3
-rw-r--r--include/net/amt.h20
-rw-r--r--include/net/bluetooth/l2cap.h1
-rw-r--r--include/net/cfg80211.h5
-rw-r--r--include/net/inet_connection_sock.h10
-rw-r--r--include/net/inet_hashtables.h2
-rw-r--r--include/net/inet_sock.h9
-rw-r--r--include/net/ip.h6
-rw-r--r--include/net/mac80211.h3
-rw-r--r--include/net/netfilter/nf_tables.h14
-rw-r--r--include/net/protocol.h4
-rw-r--r--include/net/raw.h2
-rw-r--r--include/net/route.h2
-rw-r--r--include/net/sock.h10
-rw-r--r--include/net/tcp.h22
-rw-r--r--include/net/tls.h4
-rw-r--r--include/net/udp.h4
-rw-r--r--include/trace/events/dlm.h118
-rw-r--r--include/trace/events/iocost.h2
-rw-r--r--include/trace/events/kmem.h40
-rw-r--r--include/trace/events/sock.h6
-rw-r--r--include/uapi/asm-generic/fcntl.h2
-rw-r--r--include/uapi/linux/bpf.h11
-rw-r--r--include/uapi/linux/fanotify.h8
-rw-r--r--include/uapi/linux/input.h11
-rw-r--r--include/uapi/linux/kvm.h3
-rw-r--r--include/uapi/linux/perf_event.h5
-rw-r--r--include/uapi/linux/tty.h3
-rw-r--r--ipc/namespace.c5
-rw-r--r--kernel/bpf/core.c8
-rw-r--r--kernel/bpf/helpers.c12
-rw-r--r--kernel/cgroup/cgroup.c37
-rw-r--r--kernel/cgroup/rstat.c44
-rw-r--r--kernel/configs/x86_debug.config3
-rw-r--r--kernel/events/core.c66
-rw-r--r--kernel/events/ring_buffer.c5
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/irq/Kconfig2
-rw-r--r--kernel/irq/chip.c11
-rw-r--r--kernel/irq/debugfs.c2
-rw-r--r--kernel/irq/generic-chip.c2
-rw-r--r--kernel/irq/ipi.c16
-rw-r--r--kernel/irq/irqdesc.c2
-rw-r--r--kernel/irq/irqdomain.c14
-rw-r--r--kernel/irq/manage.c10
-rw-r--r--kernel/irq/pm.c2
-rw-r--r--kernel/jump_label.c41
-rw-r--r--kernel/kexec_file.c11
-rw-r--r--kernel/locking/lockdep.c7
-rw-r--r--kernel/locking/rwsem.c30
-rw-r--r--kernel/module/internal.h13
-rw-r--r--kernel/module/kallsyms.c35
-rw-r--r--kernel/module/main.c9
-rw-r--r--kernel/printk/printk.c13
-rw-r--r--kernel/rcu/srcutree.c98
-rw-r--r--kernel/rseq.c23
-rw-r--r--kernel/sched/core.c215
-rw-r--r--kernel/sched/core_sched.c15
-rw-r--r--kernel/sched/cpufreq_schedutil.c5
-rw-r--r--kernel/sched/cputime.c15
-rw-r--r--kernel/sched/deadline.c11
-rw-r--r--kernel/sched/fair.c818
-rw-r--r--kernel/sched/features.h3
-rw-r--r--kernel/sched/pelt.h40
-rw-r--r--kernel/sched/rt.c15
-rw-r--r--kernel/sched/sched.h63
-rw-r--r--kernel/sched/topology.c23
-rw-r--r--kernel/sysctl.c57
-rw-r--r--kernel/time/posix-timers.c19
-rw-r--r--kernel/trace/Kconfig3
-rw-r--r--kernel/trace/trace.c11
-rw-r--r--kernel/trace/trace_events_hist.c2
-rw-r--r--kernel/watch_queue.c103
-rw-r--r--kernel/workqueue.c5
-rw-r--r--lib/Kconfig.ubsan3
-rw-r--r--mm/damon/vaddr.c3
-rw-r--r--mm/gup.c6
-rw-r--r--mm/hmm.c19
-rw-r--r--mm/hugetlb.c10
-rw-r--r--mm/ioremap.c26
-rw-r--r--mm/kasan/common.c3
-rw-r--r--mm/kfence/core.c18
-rw-r--r--mm/memory.c36
-rw-r--r--mm/memremap.c6
-rw-r--r--mm/page_alloc.c31
-rw-r--r--mm/rmap.c27
-rw-r--r--mm/secretmem.c33
-rw-r--r--mm/shmem.c7
-rw-r--r--mm/slab.c20
-rw-r--r--mm/slab.h39
-rw-r--r--mm/slab_common.c36
-rw-r--r--mm/slob.c33
-rw-r--r--mm/slub.c98
-rw-r--r--mm/sparse-vmemmap.c8
-rw-r--r--mm/swap_slots.c2
-rw-r--r--mm/userfaultfd.c5
-rw-r--r--net/8021q/vlan_netlink.c10
-rw-r--r--net/bluetooth/hci_sync.c6
-rw-r--r--net/bluetooth/l2cap_core.c61
-rw-r--r--net/bluetooth/mgmt.c1
-rw-r--r--net/bridge/br_netlink.c8
-rw-r--r--net/caif/caif_socket.c20
-rw-r--r--net/core/dev.c8
-rw-r--r--net/core/filter.c5
-rw-r--r--net/core/secure_seq.c4
-rw-r--r--net/core/sock_reuseport.c4
-rw-r--r--net/decnet/af_decnet.c4
-rw-r--r--net/dsa/port.c7
-rw-r--r--net/dsa/switch.c1
-rw-r--r--net/ipv4/af_inet.c22
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/cipso_ipv4.c12
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/fib_semantics.c6
-rw-r--r--net/ipv4/fib_trie.c9
-rw-r--r--net/ipv4/icmp.c22
-rw-r--r--net/ipv4/igmp.c49
-rw-r--r--net/ipv4/inet_connection_sock.c5
-rw-r--r--net/ipv4/inet_timewait_sock.c3
-rw-r--r--net/ipv4/inetpeer.c12
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_input.c37
-rw-r--r--net/ipv4/ip_sockglue.c8
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c4
-rw-r--r--net/ipv4/nexthop.c5
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/route.c10
-rw-r--r--net/ipv4/syncookies.c11
-rw-r--r--net/ipv4/sysctl_net_ipv4.c77
-rw-r--r--net/ipv4/tcp.c39
-rw-r--r--net/ipv4/tcp_fastopen.c9
-rw-r--r--net/ipv4/tcp_input.c94
-rw-r--r--net/ipv4/tcp_ipv4.c6
-rw-r--r--net/ipv4/tcp_metrics.c13
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c60
-rw-r--r--net/ipv4/tcp_recovery.c6
-rw-r--r--net/ipv4/tcp_timer.c30
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/icmp.c2
-rw-r--r--net/ipv6/ip6_input.c23
-rw-r--r--net/ipv6/mcast.c14
-rw-r--r--net/ipv6/ping.c6
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/seg6_iptunnel.c5
-rw-r--r--net/ipv6/seg6_local.c2
-rw-r--r--net/ipv6/syncookies.c3
-rw-r--r--net/ipv6/tcp_ipv6.c13
-rw-r--r--net/ipv6/udp.c9
-rw-r--r--net/mac80211/cfg.c4
-rw-r--r--net/mac80211/iface.c1
-rw-r--r--net/mac80211/rx.c3
-rw-r--r--net/mac80211/tx.c36
-rw-r--r--net/mac80211/util.c3
-rw-r--r--net/mac80211/wme.c4
-rw-r--r--net/mptcp/options.c2
-rw-r--r--net/mptcp/protocol.c12
-rw-r--r--net/mptcp/subflow.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c22
-rw-r--r--net/netfilter/nf_conntrack_netlink.c1
-rw-r--r--net/netfilter/nf_conntrack_standalone.c3
-rw-r--r--net/netfilter/nf_log_syslog.c8
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c78
-rw-r--r--net/netfilter/nfnetlink_queue.c7
-rw-r--r--net/netfilter/nft_queue.c27
-rw-r--r--net/sched/cls_api.c16
-rw-r--r--net/sctp/associola.c5
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/stream.c19
-rw-r--r--net/sctp/stream_sched.c2
-rw-r--r--net/smc/smc_llc.c2
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/tls/tls_device.c19
-rw-r--r--net/tls/tls_main.c7
-rw-r--r--net/wireless/sme.c3
-rw-r--r--net/xfrm/xfrm_policy.c5
-rw-r--r--net/xfrm/xfrm_state.c2
-rw-r--r--samples/fprobe/fprobe_example.c2
-rw-r--r--samples/kprobes/kprobe_example.c5
-rw-r--r--samples/kprobes/kretprobe_example.c5
-rw-r--r--scripts/Makefile.lib1
-rw-r--r--scripts/Makefile.vmlinux_o2
-rw-r--r--scripts/gdb/linux/symbols.py2
-rwxr-xr-xscripts/remove-stale-files2
-rw-r--r--security/Kconfig11
-rw-r--r--security/integrity/evm/evm_crypto.c7
-rw-r--r--security/integrity/evm/evm_main.c12
-rw-r--r--security/integrity/ima/ima_appraise.c3
-rw-r--r--security/integrity/ima/ima_crypto.c1
-rw-r--r--security/integrity/ima/ima_efi.c2
-rw-r--r--security/integrity/ima/ima_kexec.c2
-rw-r--r--security/integrity/ima/ima_policy.c4
-rw-r--r--security/integrity/ima/ima_template_lib.c6
-rw-r--r--security/security.c5
-rw-r--r--sound/pci/hda/patch_conexant.c1
-rw-r--r--sound/pci/hda/patch_realtek.c20
-rw-r--r--sound/soc/codecs/arizona.c4
-rw-r--r--sound/soc/codecs/cs47l92.c8
-rw-r--r--sound/soc/codecs/max98396.c10
-rw-r--r--sound/soc/codecs/rt5640.c30
-rw-r--r--sound/soc/codecs/sgtl5000.c9
-rw-r--r--sound/soc/codecs/sgtl5000.h1
-rw-r--r--sound/soc/codecs/tas2764.c46
-rw-r--r--sound/soc/codecs/tas2764.h6
-rw-r--r--sound/soc/codecs/tlv320adcx140.c13
-rw-r--r--sound/soc/codecs/wcd9335.c17
-rw-r--r--sound/soc/codecs/wm5102.c21
-rw-r--r--sound/soc/codecs/wm8998.c21
-rw-r--r--sound/soc/generic/audio-graph-card2.c6
-rw-r--r--sound/soc/intel/boards/sof_rt5682.c10
-rw-r--r--sound/soc/intel/skylake/skl-nhlt.c40
-rw-r--r--sound/soc/qcom/qdsp6/q6apm.c1
-rw-r--r--sound/soc/rockchip/rockchip_i2s.c160
-rw-r--r--sound/soc/ti/omap-mcbsp-priv.h2
-rw-r--r--sound/soc/ti/omap-mcbsp-st.c14
-rw-r--r--sound/soc/ti/omap-mcbsp.c19
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h13
-rw-r--r--tools/arch/x86/include/asm/disabled-features.h21
-rw-r--r--tools/arch/x86/include/asm/msr-index.h13
-rw-r--r--tools/include/linux/objtool.h9
-rw-r--r--tools/include/uapi/asm-generic/fcntl.h11
-rw-r--r--tools/include/uapi/linux/bpf.h11
-rw-r--r--tools/include/uapi/linux/kvm.h3
-rw-r--r--tools/objtool/arch/x86/decode.c5
-rw-r--r--tools/objtool/builtin-check.c13
-rw-r--r--tools/objtool/check.c330
-rw-r--r--tools/objtool/include/objtool/arch.h1
-rw-r--r--tools/objtool/include/objtool/builtin.h2
-rw-r--r--tools/objtool/include/objtool/check.h24
-rw-r--r--tools/objtool/include/objtool/elf.h1
-rw-r--r--tools/objtool/include/objtool/objtool.h1
-rw-r--r--tools/objtool/objtool.c1
-rw-r--r--tools/perf/builtin-trace.c2
-rwxr-xr-xtools/perf/scripts/python/arm-cs-trace-disasm.py34
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c27
-rw-r--r--tools/perf/util/bpf-loader.c18
-rw-r--r--tools/perf/util/symbol-elf.c56
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c10
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c4
-rw-r--r--tools/testing/selftests/gpio/Makefile2
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c8
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile2
-rwxr-xr-xtools/testing/selftests/net/fib_nexthop_nongw.sh119
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile2
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile2
-rw-r--r--tools/testing/selftests/rseq/rseq-riscv.h50
-rw-r--r--tools/testing/selftests/rseq/rseq.c3
-rw-r--r--tools/vm/slabinfo.c26
1021 files changed, 19023 insertions, 8659 deletions
diff --git a/.mailmap b/.mailmap
index 13e4f504e17f..71577c396252 100644
--- a/.mailmap
+++ b/.mailmap
@@ -60,6 +60,10 @@ Arnd Bergmann <arnd@arndb.de>
Atish Patra <atishp@atishpatra.org> <atish.patra@wdc.com>
Axel Dyks <xl@xlsigned.net>
Axel Lin <axel.lin@gmail.com>
+Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@linaro.org>
+Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@spreadtrum.com>
+Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@unisoc.com>
+Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang7@gmail.com>
Bart Van Assche <bvanassche@acm.org> <bart.vanassche@sandisk.com>
Bart Van Assche <bvanassche@acm.org> <bart.vanassche@wdc.com>
Ben Gardner <bgardner@wabtec.com>
@@ -135,6 +139,8 @@ Frank Rowand <frowand.list@gmail.com> <frowand@mvista.com>
Frank Zago <fzago@systemfabricworks.com>
Gao Xiang <xiang@kernel.org> <gaoxiang25@huawei.com>
Gao Xiang <xiang@kernel.org> <hsiangkao@aol.com>
+Gao Xiang <xiang@kernel.org> <hsiangkao@linux.alibaba.com>
+Gao Xiang <xiang@kernel.org> <hsiangkao@redhat.com>
Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@de.ibm.com>
Gerald Schaefer <gerald.schaefer@linux.ibm.com> <gerald.schaefer@de.ibm.com>
Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@linux.vnet.ibm.com>
@@ -371,6 +377,7 @@ Sean Nyekjaer <sean@geanix.com> <sean.nyekjaer@prevas.dk>
Sebastian Reichel <sre@kernel.org> <sebastian.reichel@collabora.co.uk>
Sebastian Reichel <sre@kernel.org> <sre@debian.org>
Sedat Dilek <sedat.dilek@gmail.com> <sedat.dilek@credativ.de>
+Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com>
Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com>
Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com>
Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com>
diff --git a/CREDITS b/CREDITS
index 91a564c17012..1841184c834d 100644
--- a/CREDITS
+++ b/CREDITS
@@ -627,6 +627,10 @@ S: 48287 Sawleaf
S: Fremont, California 94539
S: USA
+N: Tomas Cech
+E: sleep_walker@suse.com
+D: arm/palm treo support
+
N: Florent Chabaud
E: florent.chabaud@polytechnique.org
D: software suspend
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index bcc974d276dc..df79e129d097 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -493,12 +493,13 @@ What: /sys/devices/system/cpu/cpuX/regs/
/sys/devices/system/cpu/cpuX/regs/identification/
/sys/devices/system/cpu/cpuX/regs/identification/midr_el1
/sys/devices/system/cpu/cpuX/regs/identification/revidr_el1
+ /sys/devices/system/cpu/cpuX/regs/identification/smidr_el1
Date: June 2016
Contact: Linux ARM Kernel Mailing list <linux-arm-kernel@lists.infradead.org>
Description: AArch64 CPU registers
'identification' directory exposes the CPU ID registers for
- identifying model and revision of the CPU.
+ identifying model and revision of the CPU and SMCU.
What: /sys/devices/system/cpu/aarch32_el0
Date: May 2021
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2522b11e593f..5e9147fe8968 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -400,6 +400,12 @@
arm64.nomte [ARM64] Unconditionally disable Memory Tagging Extension
support
+ arm64.nosve [ARM64] Unconditionally disable Scalable Vector
+ Extension support
+
+ arm64.nosme [ARM64] Unconditionally disable Scalable Matrix
+ Extension support
+
ataflop= [HW,M68k]
atarimouse= [HW,MOUSE] Atari Mouse
@@ -3161,7 +3167,7 @@
improves system performance, but it may also
expose users to several CPU vulnerabilities.
Equivalent to: nopti [X86,PPC]
- kpti=0 [ARM64]
+ if nokaslr then kpti=0 [ARM64]
nospectre_v1 [X86,PPC]
nobp=0 [S390]
nospectre_v2 [X86,PPC,S390,ARM64]
@@ -3176,6 +3182,7 @@
no_entry_flush [PPC]
no_uaccess_flush [PPC]
mmio_stale_data=off [X86]
+ retbleed=off [X86]
Exceptions:
This does not have any effect on
@@ -3198,6 +3205,7 @@
mds=full,nosmt [X86]
tsx_async_abort=full,nosmt [X86]
mmio_stale_data=full,nosmt [X86]
+ retbleed=auto,nosmt [X86]
mminit_loglevel=
[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
@@ -5197,6 +5205,30 @@
retain_initrd [RAM] Keep initrd memory after extraction
+ retbleed= [X86] Control mitigation of RETBleed (Arbitrary
+ Speculative Code Execution with Return Instructions)
+ vulnerability.
+
+ off - no mitigation
+ auto - automatically select a migitation
+ auto,nosmt - automatically select a mitigation,
+ disabling SMT if necessary for
+ the full mitigation (only on Zen1
+ and older without STIBP).
+ ibpb - mitigate short speculation windows on
+ basic block boundaries too. Safe, highest
+ perf impact.
+ unret - force enable untrained return thunks,
+ only effective on AMD f15h-f17h
+ based systems.
+ unret,nosmt - like unret, will disable SMT when STIBP
+ is not available.
+
+ Selecting 'auto' will choose a mitigation method at run
+ time according to the CPU.
+
+ Not specifying this option is equivalent to retbleed=auto.
+
rfkill.default_state=
0 "airplane mode". All wifi, bluetooth, wimax, gps, fm,
etc. communication is blocked by default.
@@ -5568,6 +5600,7 @@
eibrs - enhanced IBRS
eibrs,retpoline - enhanced IBRS + Retpolines
eibrs,lfence - enhanced IBRS + LFENCE
+ ibrs - use IBRS to protect kernel
Not specifying this option is equivalent to
spectre_v2=auto.
@@ -5771,6 +5804,24 @@
expediting. Set to zero to disable automatic
expediting.
+ srcutree.srcu_max_nodelay [KNL]
+ Specifies the number of no-delay instances
+ per jiffy for which the SRCU grace period
+ worker thread will be rescheduled with zero
+ delay. Beyond this limit, worker thread will
+ be rescheduled with a sleep delay of one jiffy.
+
+ srcutree.srcu_max_nodelay_phase [KNL]
+ Specifies the per-grace-period phase, number of
+ non-sleeping polls of readers. Beyond this limit,
+ grace period worker thread will be rescheduled
+ with a sleep delay of one jiffy, between each
+ rescan of the readers, for a grace period phase.
+
+ srcutree.srcu_retry_check_delay [KNL]
+ Specifies number of microseconds of non-sleeping
+ delay between each non-sleeping poll of readers.
+
srcutree.small_contention_lim [KNL]
Specifies the number of update-side contention
events per jiffy will be tolerated before
diff --git a/Documentation/admin-guide/perf/hns3-pmu.rst b/Documentation/admin-guide/perf/hns3-pmu.rst
new file mode 100644
index 000000000000..578407e487d6
--- /dev/null
+++ b/Documentation/admin-guide/perf/hns3-pmu.rst
@@ -0,0 +1,136 @@
+======================================
+HNS3 Performance Monitoring Unit (PMU)
+======================================
+
+HNS3(HiSilicon network system 3) Performance Monitoring Unit (PMU) is an
+End Point device to collect performance statistics of HiSilicon SoC NIC.
+On Hip09, each SICL(Super I/O cluster) has one PMU device.
+
+HNS3 PMU supports collection of performance statistics such as bandwidth,
+latency, packet rate and interrupt rate.
+
+Each HNS3 PMU supports 8 hardware events.
+
+HNS3 PMU driver
+===============
+
+The HNS3 PMU driver registers a perf PMU with the name of its sicl id.::
+
+ /sys/devices/hns3_pmu_sicl_<sicl_id>
+
+PMU driver provides description of available events, filter modes, format,
+identifier and cpumask in sysfs.
+
+The "events" directory describes the event code of all supported events
+shown in perf list.
+
+The "filtermode" directory describes the supported filter modes of each
+event.
+
+The "format" directory describes all formats of the config (events) and
+config1 (filter options) fields of the perf_event_attr structure.
+
+The "identifier" file shows version of PMU hardware device.
+
+The "bdf_min" and "bdf_max" files show the supported bdf range of each
+pmu device.
+
+The "hw_clk_freq" file shows the hardware clock frequency of each pmu
+device.
+
+Example usage of checking event code and subevent code::
+
+ $# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_time
+ config=0x00204
+ $# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_packet_num
+ config=0x10204
+
+Each performance statistic has a pair of events to get two values to
+calculate real performance data in userspace.
+
+The bits 0~15 of config (here 0x0204) are the true hardware event code. If
+two events have same value of bits 0~15 of config, that means they are
+event pair. And the bit 16 of config indicates getting counter 0 or
+counter 1 of hardware event.
+
+After getting two values of event pair in usersapce, the formula of
+computation to calculate real performance data is:::
+
+ counter 0 / counter 1
+
+Example usage of checking supported filter mode::
+
+ $# cat /sys/devices/hns3_pmu_sicl_0/filtermode/bw_ssu_rpu_byte_num
+ filter mode supported: global/port/port-tc/func/func-queue/
+
+Example usage of perf::
+
+ $# perf list
+ hns3_pmu_sicl_0/bw_ssu_rpu_byte_num/ [kernel PMU event]
+ hns3_pmu_sicl_0/bw_ssu_rpu_time/ [kernel PMU event]
+ ------------------------------------------
+
+ $# perf stat -g -e hns3_pmu_sicl_0/bw_ssu_rpu_byte_num,global=1/ -e hns3_pmu_sicl_0/bw_ssu_rpu_time,global=1/ -I 1000
+ or
+ $# perf stat -g -e hns3_pmu_sicl_0/config=0x00002,global=1/ -e hns3_pmu_sicl_0/config=0x10002,global=1/ -I 1000
+
+
+Filter modes
+--------------
+
+1. global mode
+PMU collect performance statistics for all HNS3 PCIe functions of IO DIE.
+Set the "global" filter option to 1 will enable this mode.
+Example usage of perf::
+
+ $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,global=1/ -I 1000
+
+2. port mode
+PMU collect performance statistic of one whole physical port. The port id
+is same as mac id. The "tc" filter option must be set to 0xF in this mode,
+here tc stands for traffic class.
+
+Example usage of perf::
+
+ $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,port=0,tc=0xF/ -I 1000
+
+3. port-tc mode
+PMU collect performance statistic of one tc of physical port. The port id
+is same as mac id. The "tc" filter option must be set to 0 ~ 7 in this
+mode.
+Example usage of perf::
+
+ $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,port=0,tc=0/ -I 1000
+
+4. func mode
+PMU collect performance statistic of one PF/VF. The function id is BDF of
+PF/VF, its conversion formula::
+
+ func = (bus << 8) + (device << 3) + (function)
+
+for example:
+ BDF func
+ 35:00.0 0x3500
+ 35:00.1 0x3501
+ 35:01.0 0x3508
+
+In this mode, the "queue" filter option must be set to 0xFFFF.
+Example usage of perf::
+
+ $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,bdf=0x3500,queue=0xFFFF/ -I 1000
+
+5. func-queue mode
+PMU collect performance statistic of one queue of PF/VF. The function id
+is BDF of PF/VF, the "queue" filter option must be set to the exact queue
+id of function.
+Example usage of perf::
+
+ $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,bdf=0x3500,queue=0/ -I 1000
+
+6. func-intr mode
+PMU collect performance statistic of one interrupt of PF/VF. The function
+id is BDF of PF/VF, the "intr" filter option must be set to the exact
+interrupt id of function.
+Example usage of perf::
+
+ $# perf stat -a -e hns3_pmu_sicl_0/config=0x00301,bdf=0x3500,intr=0/ -I 1000
diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
index 69b23f087c05..9c9ece88ce53 100644
--- a/Documentation/admin-guide/perf/index.rst
+++ b/Documentation/admin-guide/perf/index.rst
@@ -9,6 +9,7 @@ Performance monitor support
hisi-pmu
hisi-pcie-pmu
+ hns3-pmu
imx-ddr
qcom_l2_pmu
qcom_l3_pmu
diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
index aec2cd2aaea7..19754beb5a4e 100644
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -612,8 +612,8 @@ the ``menu`` governor to be used on the systems that use the ``ladder`` governor
by default this way, for example.
The other kernel command line parameters controlling CPU idle time management
-described below are only relevant for the *x86* architecture and some of
-them affect Intel processors only.
+described below are only relevant for the *x86* architecture and references
+to ``intel_idle`` affect Intel processors only.
The *x86* architecture support code recognizes three kernel command line
options related to CPU idle time management: ``idle=poll``, ``idle=halt``,
@@ -635,10 +635,13 @@ idle, so it very well may hurt single-thread computations performance as well as
energy-efficiency. Thus using it for performance reasons may not be a good idea
at all.]
-The ``idle=nomwait`` option disables the ``intel_idle`` driver and causes
-``acpi_idle`` to be used (as long as all of the information needed by it is
-there in the system's ACPI tables), but it is not allowed to use the
-``MWAIT`` instruction of the CPUs to ask the hardware to enter idle states.
+The ``idle=nomwait`` option prevents the use of ``MWAIT`` instruction of
+the CPU to enter idle states. When this option is used, the ``acpi_idle``
+driver will use the ``HLT`` instruction instead of ``MWAIT``. On systems
+running Intel processors, this option disables the ``intel_idle`` driver
+and forces the use of the ``acpi_idle`` driver instead. Note that in either
+case, ``acpi_idle`` driver will function only if all the information needed
+by it is in the system's ACPI tables.
In addition to the architecture-level kernel command line options affecting CPU
idle time management, there are parameters affecting individual ``CPUIdle``
diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst
index 3d116fb536c5..31fc10b833dd 100644
--- a/Documentation/arm64/elf_hwcaps.rst
+++ b/Documentation/arm64/elf_hwcaps.rst
@@ -301,6 +301,10 @@ HWCAP2_WFXT
Functionality implied by ID_AA64ISAR2_EL1.WFXT == 0b0010.
+HWCAP2_EBF16
+
+ Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0010.
+
4. Unused AT_HWCAP bits
-----------------------
diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst
index 901cd094f4ec..2a641ba7be3b 100644
--- a/Documentation/arm64/memory.rst
+++ b/Documentation/arm64/memory.rst
@@ -33,9 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
0000000000000000 0000ffffffffffff 256TB user
ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map
[ffff600000000000 ffff7fffffffffff] 32TB [kasan shadow region]
- ffff800000000000 ffff800007ffffff 128MB bpf jit region
- ffff800008000000 ffff80000fffffff 128MB modules
- ffff800010000000 fffffbffefffffff 124TB vmalloc
+ ffff800000000000 ffff800007ffffff 128MB modules
+ ffff800008000000 fffffbffefffffff 124TB vmalloc
fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
@@ -51,9 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
0000000000000000 000fffffffffffff 4PB user
fff0000000000000 ffff7fffffffffff ~4PB kernel logical memory map
[fffd800000000000 ffff7fffffffffff] 512TB [kasan shadow region]
- ffff800000000000 ffff800007ffffff 128MB bpf jit region
- ffff800008000000 ffff80000fffffff 128MB modules
- ffff800010000000 fffffbffefffffff 124TB vmalloc
+ ffff800000000000 ffff800007ffffff 128MB modules
+ ffff800008000000 fffffbffefffffff 124TB vmalloc
fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index d27db84d585e..33b04db8408f 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -82,10 +82,14 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A57 | #1319537 | ARM64_ERRATUM_1319367 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A57 | #1742098 | ARM64_ERRATUM_1742098 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A72 | #853709 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A72 | #1319367 | ARM64_ERRATUM_1319367 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A72 | #1655431 | ARM64_ERRATUM_1742098 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A76 | #1188873,1418040| ARM64_ERRATUM_1418040 |
@@ -102,6 +106,8 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A510 | #2077057 | ARM64_ERRATUM_2077057 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #2441009 | ARM64_ERRATUM_2441009 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 |
diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index d6b3f94b9f1f..0793c400d4b0 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -223,7 +223,7 @@ Module Loading
Inter Module support
--------------------
-Refer to the file kernel/module.c for more information.
+Refer to the files in kernel/module/ for more information.
Hardware Interfaces
===================
diff --git a/Documentation/core-api/protection-keys.rst b/Documentation/core-api/protection-keys.rst
index ec575e72d0b2..bf28ac0401f3 100644
--- a/Documentation/core-api/protection-keys.rst
+++ b/Documentation/core-api/protection-keys.rst
@@ -4,31 +4,29 @@
Memory Protection Keys
======================
-Memory Protection Keys for Userspace (PKU aka PKEYs) is a feature
-which is found on Intel's Skylake (and later) "Scalable Processor"
-Server CPUs. It will be available in future non-server Intel parts
-and future AMD processors.
-
-For anyone wishing to test or use this feature, it is available in
-Amazon's EC2 C5 instances and is known to work there using an Ubuntu
-17.04 image.
-
-Memory Protection Keys provides a mechanism for enforcing page-based
-protections, but without requiring modification of the page tables
-when an application changes protection domains. It works by
-dedicating 4 previously ignored bits in each page table entry to a
-"protection key", giving 16 possible keys.
-
-There is also a new user-accessible register (PKRU) with two separate
-bits (Access Disable and Write Disable) for each key. Being a CPU
-register, PKRU is inherently thread-local, potentially giving each
+Memory Protection Keys provide a mechanism for enforcing page-based
+protections, but without requiring modification of the page tables when an
+application changes protection domains.
+
+Pkeys Userspace (PKU) is a feature which can be found on:
+ * Intel server CPUs, Skylake and later
+ * Intel client CPUs, Tiger Lake (11th Gen Core) and later
+ * Future AMD CPUs
+
+Pkeys work by dedicating 4 previously Reserved bits in each page table entry to
+a "protection key", giving 16 possible keys.
+
+Protections for each key are defined with a per-CPU user-accessible register
+(PKRU). Each of these is a 32-bit register storing two bits (Access Disable
+and Write Disable) for each of 16 keys.
+
+Being a CPU register, PKRU is inherently thread-local, potentially giving each
thread a different set of protections from every other thread.
-There are two new instructions (RDPKRU/WRPKRU) for reading and writing
-to the new register. The feature is only available in 64-bit mode,
-even though there is theoretically space in the PAE PTEs. These
-permissions are enforced on data access only and have no effect on
-instruction fetches.
+There are two instructions (RDPKRU/WRPKRU) for reading and writing to the
+register. The feature is only available in 64-bit mode, even though there is
+theoretically space in the PAE PTEs. These permissions are enforced on data
+access only and have no effect on instruction fetches.
Syscalls
========
diff --git a/Documentation/core-api/symbol-namespaces.rst b/Documentation/core-api/symbol-namespaces.rst
index 5ad9e0abe42c..12e4aecdae94 100644
--- a/Documentation/core-api/symbol-namespaces.rst
+++ b/Documentation/core-api/symbol-namespaces.rst
@@ -51,8 +51,8 @@ namespace ``USB_STORAGE``, use::
The corresponding ksymtab entry struct ``kernel_symbol`` will have the member
``namespace`` set accordingly. A symbol that is exported without a namespace will
refer to ``NULL``. There is no default namespace if none is defined. ``modpost``
-and kernel/module.c make use the namespace at build time or module load time,
-respectively.
+and kernel/module/main.c make use the namespace at build time or module load
+time, respectively.
2.2 Using the DEFAULT_SYMBOL_NAMESPACE define
=============================================
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml
index c388ae5da1e4..c9c346e6228e 100644
--- a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml
+++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml
@@ -94,6 +94,7 @@ if:
- allwinner,sun8i-a83t-display-engine
- allwinner,sun8i-r40-display-engine
- allwinner,sun9i-a80-display-engine
+ - allwinner,sun20i-d1-display-engine
- allwinner,sun50i-a64-display-engine
then:
diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml b/Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml
new file mode 100644
index 000000000000..33b90e975e33
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml
@@ -0,0 +1,134 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/renesas,rzg2l-irqc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas RZ/G2L (and alike SoC's) Interrupt Controller (IA55)
+
+maintainers:
+ - Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
+ - Geert Uytterhoeven <geert+renesas@glider.be>
+
+description: |
+ IA55 performs various interrupt controls including synchronization for the external
+ interrupts of NMI, IRQ, and GPIOINT and the interrupts of the built-in peripheral
+ interrupts output by each IP. And it notifies the interrupt to the GIC
+ - IRQ sense select for 8 external interrupts, mapped to 8 GIC SPI interrupts
+ - GPIO pins used as external interrupt input pins, mapped to 32 GIC SPI interrupts
+ - NMI edge select (NMI is not treated as NMI exception and supports fall edge and
+ stand-up edge detection interrupts)
+
+allOf:
+ - $ref: /schemas/interrupt-controller.yaml#
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - renesas,r9a07g044-irqc # RZ/G2{L,LC}
+ - renesas,r9a07g054-irqc # RZ/V2L
+ - const: renesas,rzg2l-irqc
+
+ '#interrupt-cells':
+ description: The first cell should contain external interrupt number (IRQ0-7) and the
+ second cell is used to specify the flag.
+ const: 2
+
+ '#address-cells':
+ const: 0
+
+ interrupt-controller: true
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 41
+
+ clocks:
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: clk
+ - const: pclk
+
+ power-domains:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+required:
+ - compatible
+ - '#interrupt-cells'
+ - '#address-cells'
+ - interrupt-controller
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - power-domains
+ - resets
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/r9a07g044-cpg.h>
+
+ irqc: interrupt-controller@110a0000 {
+ compatible = "renesas,r9a07g044-irqc", "renesas,rzg2l-irqc";
+ reg = <0x110a0000 0x10000>;
+ #interrupt-cells = <2>;
+ #address-cells = <0>;
+ interrupt-controller;
+ interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 444 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 445 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 446 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 447 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 448 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 449 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 450 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 451 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 452 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 453 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 454 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 455 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 456 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 457 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 458 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 459 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 460 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 461 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 462 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 463 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 464 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 465 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 466 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 467 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 468 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 469 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 470 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 471 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 472 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 473 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 474 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 475 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD R9A07G044_IA55_CLK>,
+ <&cpg CPG_MOD R9A07G044_IA55_PCLK>;
+ clock-names = "clk", "pclk";
+ power-domains = <&cpg>;
+ resets = <&cpg R9A07G044_IA55_RESETN>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml b/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml
index 27092c6a86c4..92e0f8c3eff2 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml
@@ -26,9 +26,14 @@ description:
with priority below this threshold will not cause the PLIC to raise its
interrupt line leading to the context.
- While the PLIC supports both edge-triggered and level-triggered interrupts,
- interrupt handlers are oblivious to this distinction and therefore it is not
- specified in the PLIC device-tree binding.
+ The PLIC supports both edge-triggered and level-triggered interrupts. For
+ edge-triggered interrupts, the RISC-V PLIC spec allows two responses to edges
+ seen while an interrupt handler is active; the PLIC may either queue them or
+ ignore them. In the first case, handlers are oblivious to the trigger type, so
+ it is not included in the interrupt specifier. In the second case, software
+ needs to know the trigger type, so it can reorder the interrupt flow to avoid
+ missing interrupts. This special handling is needed by at least the Renesas
+ RZ/Five SoC (AX45MP AndesCore with a NCEPLIC100) and the T-HEAD C900 PLIC.
While the RISC-V ISA doesn't specify a memory layout for the PLIC, the
"sifive,plic-1.0.0" device is a concrete implementation of the PLIC that
@@ -49,6 +54,10 @@ properties:
oneOf:
- items:
- enum:
+ - renesas,r9a07g043-plic
+ - const: andestech,nceplic100
+ - items:
+ - enum:
- sifive,fu540-c000-plic
- starfive,jh7100-plic
- canaan,k210-plic
@@ -64,8 +73,7 @@ properties:
'#address-cells':
const: 0
- '#interrupt-cells':
- const: 1
+ '#interrupt-cells': true
interrupt-controller: true
@@ -82,6 +90,12 @@ properties:
description:
Specifies how many external interrupts are supported by this controller.
+ clocks: true
+
+ power-domains: true
+
+ resets: true
+
required:
- compatible
- '#address-cells'
@@ -91,6 +105,47 @@ required:
- interrupts-extended
- riscv,ndev
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - andestech,nceplic100
+ - thead,c900-plic
+
+ then:
+ properties:
+ '#interrupt-cells':
+ const: 2
+
+ else:
+ properties:
+ '#interrupt-cells':
+ const: 1
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: renesas,r9a07g043-plic
+
+ then:
+ properties:
+ clocks:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+ required:
+ - clocks
+ - power-domains
+ - resets
+
additionalProperties: false
examples:
diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
index 4f15463611f8..170cd201adc2 100644
--- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -167,70 +167,65 @@ properties:
- in-band-status
fixed-link:
- allOf:
- - if:
- type: array
- then:
- deprecated: true
- items:
- - minimum: 0
- maximum: 31
- description:
- Emulated PHY ID, choose any but unique to the all
- specified fixed-links
-
- - enum: [0, 1]
- description:
- Duplex configuration. 0 for half duplex or 1 for
- full duplex
-
- - enum: [10, 100, 1000, 2500, 10000]
- description:
- Link speed in Mbits/sec.
-
- - enum: [0, 1]
- description:
- Pause configuration. 0 for no pause, 1 for pause
-
- - enum: [0, 1]
- description:
- Asymmetric pause configuration. 0 for no asymmetric
- pause, 1 for asymmetric pause
-
-
- - if:
- type: object
- then:
- properties:
- speed:
- description:
- Link speed.
- $ref: /schemas/types.yaml#/definitions/uint32
- enum: [10, 100, 1000, 2500, 10000]
-
- full-duplex:
- $ref: /schemas/types.yaml#/definitions/flag
- description:
- Indicates that full-duplex is used. When absent, half
- duplex is assumed.
-
- pause:
- $ref: /schemas/types.yaml#definitions/flag
- description:
- Indicates that pause should be enabled.
-
- asym-pause:
- $ref: /schemas/types.yaml#/definitions/flag
- description:
- Indicates that asym_pause should be enabled.
-
- link-gpios:
- maxItems: 1
- description:
- GPIO to determine if the link is up
-
- required:
- - speed
+ oneOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32-array
+ deprecated: true
+ items:
+ - minimum: 0
+ maximum: 31
+ description:
+ Emulated PHY ID, choose any but unique to the all
+ specified fixed-links
+
+ - enum: [0, 1]
+ description:
+ Duplex configuration. 0 for half duplex or 1 for
+ full duplex
+
+ - enum: [10, 100, 1000, 2500, 10000]
+ description:
+ Link speed in Mbits/sec.
+
+ - enum: [0, 1]
+ description:
+ Pause configuration. 0 for no pause, 1 for pause
+
+ - enum: [0, 1]
+ description:
+ Asymmetric pause configuration. 0 for no asymmetric
+ pause, 1 for asymmetric pause
+ - type: object
+ additionalProperties: false
+ properties:
+ speed:
+ description:
+ Link speed.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [10, 100, 1000, 2500, 10000]
+
+ full-duplex:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ Indicates that full-duplex is used. When absent, half
+ duplex is assumed.
+
+ pause:
+ $ref: /schemas/types.yaml#definitions/flag
+ description:
+ Indicates that pause should be enabled.
+
+ asym-pause:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ Indicates that asym_pause should be enabled.
+
+ link-gpios:
+ maxItems: 1
+ description:
+ GPIO to determine if the link is up
+
+ required:
+ - speed
additionalProperties: true
diff --git a/Documentation/devicetree/bindings/net/fsl,fec.yaml b/Documentation/devicetree/bindings/net/fsl,fec.yaml
index daa2f79a294f..1b1853062cd3 100644
--- a/Documentation/devicetree/bindings/net/fsl,fec.yaml
+++ b/Documentation/devicetree/bindings/net/fsl,fec.yaml
@@ -183,6 +183,7 @@ properties:
Should specify the gpio for phy reset.
phy-reset-duration:
+ $ref: /schemas/types.yaml#/definitions/uint32
deprecated: true
description:
Reset duration in milliseconds. Should present only if property
@@ -191,12 +192,14 @@ properties:
and 1 millisecond will be used instead.
phy-reset-active-high:
+ type: boolean
deprecated: true
description:
If present then the reset sequence using the GPIO specified in the
"phy-reset-gpios" property is reversed (H=reset state, L=operation state).
phy-reset-post-delay:
+ $ref: /schemas/types.yaml#/definitions/uint32
deprecated: true
description:
Post reset delay in milliseconds. If present then a delay of phy-reset-post-delay
diff --git a/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml b/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml
index 8cd0adbf7021..7029cb1f38ff 100644
--- a/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Qualcomm Atheros ath9k wireless devices Generic Binding
maintainers:
- - Kalle Valo <kvalo@codeaurora.org>
+ - Toke Høiland-Jørgensen <toke@toke.dk>
description: |
This node provides properties for configuring the ath9k wireless device.
diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
index 8c01fdba134b..a677b056f112 100644
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
@@ -9,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Qualcomm Technologies ath11k wireless devices Generic Binding
maintainers:
- - Kalle Valo <kvalo@codeaurora.org>
+ - Kalle Valo <kvalo@kernel.org>
description: |
These are dt entries for Qualcomm Technologies, Inc. IEEE 802.11ax
diff --git a/Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml
index 52df1b146174..997b74639112 100644
--- a/Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml
@@ -47,6 +47,17 @@ properties:
gpio-ranges:
maxItems: 1
+ interrupt-controller: true
+
+ '#interrupt-cells':
+ const: 2
+ description:
+ The first cell contains the global GPIO port index, constructed using the
+ RZG2L_GPIO() helper macro in <dt-bindings/pinctrl/rzg2l-pinctrl.h> and the
+ second cell is used to specify the flag.
+ E.g. "interrupts = <RZG2L_GPIO(43, 0) IRQ_TYPE_EDGE_FALLING>;" if P43_0 is
+ being used as an interrupt.
+
clocks:
maxItems: 1
@@ -110,6 +121,8 @@ required:
- gpio-controller
- '#gpio-cells'
- gpio-ranges
+ - interrupt-controller
+ - '#interrupt-cells'
- clocks
- power-domains
- resets
@@ -126,6 +139,8 @@ examples:
gpio-controller;
#gpio-cells = <2>;
gpio-ranges = <&pinctrl 0 0 392>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
clocks = <&cpg CPG_MOD R9A07G044_GPIO_HCLK>;
resets = <&cpg R9A07G044_GPIO_RSTN>,
<&cpg R9A07G044_GPIO_PORT_RESETN>,
diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml
index e9a533080b32..ef18a572a1ff 100644
--- a/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml
@@ -25,12 +25,12 @@ properties:
- qcom,sc7280-lpass-cpu
reg:
- minItems: 2
+ minItems: 1
maxItems: 6
description: LPAIF core registers
reg-names:
- minItems: 2
+ minItems: 1
maxItems: 6
clocks:
@@ -42,12 +42,12 @@ properties:
maxItems: 10
interrupts:
- minItems: 2
+ minItems: 1
maxItems: 4
description: LPAIF DMA buffer interrupt
interrupt-names:
- minItems: 2
+ minItems: 1
maxItems: 4
qcom,adsp:
diff --git a/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml b/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml
index 53fd24bdc34e..3711872b6b99 100644
--- a/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml
+++ b/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml
@@ -20,6 +20,7 @@ properties:
- allwinner,suniv-f1c100s-timer
- items:
- enum:
+ - allwinner,sun20i-d1-timer
- allwinner,sun50i-a64-timer
- allwinner,sun50i-h6-timer
- allwinner,sun50i-h616-timer
diff --git a/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml b/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml
index d541cf2067bc..0a01e4f5eddb 100644
--- a/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml
+++ b/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml
@@ -113,7 +113,7 @@ properties:
patternProperties:
"^watchdog@[a-f0-9]+$":
type: object
- $ref: ../watchdog/watchdog.yaml#
+ $ref: /schemas/watchdog/watchdog.yaml#
properties:
compatible:
oneOf:
@@ -145,7 +145,7 @@ patternProperties:
"^pwm@[a-f0-9]+$":
type: object
- $ref: ../pwm/pwm.yaml#
+ $ref: /schemas/pwm/pwm.yaml#
properties:
compatible:
oneOf:
diff --git a/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt b/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt
index 6f1f9dba6e88..f1c848af91d3 100644
--- a/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt
+++ b/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt
@@ -1,7 +1,8 @@
MediaTek Timers
---------------
-MediaTek SoCs have two different timers on different platforms,
+MediaTek SoCs have different timers on different platforms,
+- CPUX (ARM/ARM64 System Timer)
- GPT (General Purpose Timer)
- SYST (System Timer)
@@ -29,6 +30,9 @@ Required properties:
* "mediatek,mt7629-timer" for MT7629 compatible timers (SYST)
* "mediatek,mt6765-timer" for MT6765 and all above compatible timers (SYST)
+ For those SoCs that use CPUX
+ * "mediatek,mt6795-systimer" for MT6795 compatible timers (CPUX)
+
- reg: Should contain location and length for timer register.
- clocks: Should contain system clock.
diff --git a/Documentation/devicetree/bindings/timer/renesas,cmt.yaml b/Documentation/devicetree/bindings/timer/renesas,cmt.yaml
index 53dd6d9f518f..bde6c9b66bf4 100644
--- a/Documentation/devicetree/bindings/timer/renesas,cmt.yaml
+++ b/Documentation/devicetree/bindings/timer/renesas,cmt.yaml
@@ -80,7 +80,6 @@ properties:
- renesas,r8a77980-cmt0 # 32-bit CMT0 on R-Car V3H
- renesas,r8a77990-cmt0 # 32-bit CMT0 on R-Car E3
- renesas,r8a77995-cmt0 # 32-bit CMT0 on R-Car D3
- - renesas,r8a779a0-cmt0 # 32-bit CMT0 on R-Car V3U
- const: renesas,rcar-gen3-cmt0 # 32-bit CMT0 on R-Car Gen3 and RZ/G2
- items:
@@ -97,9 +96,20 @@ properties:
- renesas,r8a77980-cmt1 # 48-bit CMT on R-Car V3H
- renesas,r8a77990-cmt1 # 48-bit CMT on R-Car E3
- renesas,r8a77995-cmt1 # 48-bit CMT on R-Car D3
- - renesas,r8a779a0-cmt1 # 48-bit CMT on R-Car V3U
- const: renesas,rcar-gen3-cmt1 # 48-bit CMT on R-Car Gen3 and RZ/G2
+ - items:
+ - enum:
+ - renesas,r8a779a0-cmt0 # 32-bit CMT0 on R-Car V3U
+ - renesas,r8a779f0-cmt0 # 32-bit CMT0 on R-Car S4-8
+ - const: renesas,rcar-gen4-cmt0 # 32-bit CMT0 on R-Car Gen4
+
+ - items:
+ - enum:
+ - renesas,r8a779a0-cmt1 # 48-bit CMT on R-Car V3U
+ - renesas,r8a779f0-cmt1 # 48-bit CMT on R-Car S4-8
+ - const: renesas,rcar-gen4-cmt1 # 48-bit CMT on R-Car Gen4
+
reg:
maxItems: 1
@@ -135,6 +145,7 @@ allOf:
enum:
- renesas,rcar-gen2-cmt0
- renesas,rcar-gen3-cmt0
+ - renesas,rcar-gen4-cmt0
then:
properties:
interrupts:
@@ -148,6 +159,7 @@ allOf:
enum:
- renesas,rcar-gen2-cmt1
- renesas,rcar-gen3-cmt1
+ - renesas,rcar-gen4-cmt1
then:
properties:
interrupts:
diff --git a/Documentation/devicetree/bindings/timer/st,nomadik-mtu.yaml b/Documentation/devicetree/bindings/timer/st,nomadik-mtu.yaml
new file mode 100644
index 000000000000..901848d298ec
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/st,nomadik-mtu.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2022 Linaro Ltd.
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/timer/st,nomadik-mtu.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: ST Microelectronics Nomadik Multi-Timer Unit MTU Timer
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: This timer is found in the ST Microelectronics Nomadik
+ SoCs STn8800, STn8810 and STn8815 as well as in ST-Ericsson DB8500.
+
+properties:
+ compatible:
+ items:
+ - const: st,nomadik-mtu
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ description: The first clock named TIMCLK clocks the actual timers and
+ the second clock clocks the digital interface to the interconnect.
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: timclk
+ - const: apb_pclk
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/mfd/dbx500-prcmu.h>
+ timer@a03c6000 {
+ compatible = "st,nomadik-mtu";
+ reg = <0xa03c6000 0x1000>;
+ interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
+
+ clocks = <&prcmu_clk PRCMU_TIMCLK>, <&prcc_pclk 6 6>;
+ clock-names = "timclk", "apb_pclk";
+ };
diff --git a/Documentation/features/vm/ioremap_prot/arch-support.txt b/Documentation/features/vm/ioremap_prot/arch-support.txt
index b01bf7bca3e6..6bd78eb4dc6e 100644
--- a/Documentation/features/vm/ioremap_prot/arch-support.txt
+++ b/Documentation/features/vm/ioremap_prot/arch-support.txt
@@ -9,7 +9,7 @@
| alpha: | TODO |
| arc: | ok |
| arm: | TODO |
- | arm64: | TODO |
+ | arm64: | ok |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst
index 4d19b19bcc08..73a4176144b3 100644
--- a/Documentation/filesystems/netfs_library.rst
+++ b/Documentation/filesystems/netfs_library.rst
@@ -301,7 +301,7 @@ through which it can issue requests and negotiate::
void (*issue_read)(struct netfs_io_subrequest *subreq);
bool (*is_still_valid)(struct netfs_io_request *rreq);
int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
- struct folio *folio, void **_fsdata);
+ struct folio **foliop, void **_fsdata);
void (*done)(struct netfs_io_request *rreq);
};
@@ -381,8 +381,10 @@ The operations are as follows:
allocated/grabbed the folio to be modified to allow the filesystem to flush
conflicting state before allowing it to be modified.
- It should return 0 if everything is now fine, -EAGAIN if the folio should be
- regrabbed and any other error code to abort the operation.
+ It may unlock and discard the folio it was given and set the caller's folio
+ pointer to NULL. It should return 0 if everything is now fine (``*foliop``
+ left set) or the op should be retried (``*foliop`` cleared) and any other
+ error code to abort the operation.
* ``done``
diff --git a/Documentation/livepatch/module-elf-format.rst b/Documentation/livepatch/module-elf-format.rst
index dbe9b400e39f..7347638895a0 100644
--- a/Documentation/livepatch/module-elf-format.rst
+++ b/Documentation/livepatch/module-elf-format.rst
@@ -210,11 +210,11 @@ module->symtab.
=====================================
Normally, a stripped down copy of a module's symbol table (containing only
"core" symbols) is made available through module->symtab (See layout_symtab()
-in kernel/module.c). For livepatch modules, the symbol table copied into memory
-on module load must be exactly the same as the symbol table produced when the
-patch module was compiled. This is because the relocations in each livepatch
-relocation section refer to their respective symbols with their symbol indices,
-and the original symbol indices (and thus the symtab ordering) must be
+in kernel/module/kallsyms.c). For livepatch modules, the symbol table copied
+into memory on module load must be exactly the same as the symbol table produced
+when the patch module was compiled. This is because the relocations in each
+livepatch relocation section refer to their respective symbols with their symbol
+indices, and the original symbol indices (and thus the symtab ordering) must be
preserved in order for apply_relocate_add() to find the right symbol.
For example, take this particular rela from a livepatch module:::
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index b12df9137e1c..832b5d36e279 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1894,6 +1894,7 @@ There are some more advanced barrier functions:
(*) dma_wmb();
(*) dma_rmb();
+ (*) dma_mb();
These are for use with consistent memory to guarantee the ordering
of writes or reads of shared memory accessible to both the CPU and a
@@ -1925,11 +1926,11 @@ There are some more advanced barrier functions:
The dma_rmb() allows us guarantee the device has released ownership
before we read the data from the descriptor, and the dma_wmb() allows
us to guarantee the data is written to the descriptor before the device
- can see it now has ownership. Note that, when using writel(), a prior
- wmb() is not needed to guarantee that the cache coherent memory writes
- have completed before writing to the MMIO region. The cheaper
- writel_relaxed() does not provide this guarantee and must not be used
- here.
+ can see it now has ownership. The dma_mb() implies both a dma_rmb() and
+ a dma_wmb(). Note that, when using writel(), a prior wmb() is not needed
+ to guarantee that the cache coherent memory writes have completed before
+ writing to the MMIO region. The cheaper writel_relaxed() does not provide
+ this guarantee and must not be used here.
See the subsection "Kernel I/O barrier effects" for more information on
relaxed I/O accessors and the Documentation/core-api/dma-api.rst file for
diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index ed7fa76e7a40..d742ba6bd211 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -503,26 +503,108 @@ per-port PHY specific details: interface connection, MDIO bus location, etc.
Driver development
==================
-DSA switch drivers need to implement a dsa_switch_ops structure which will
+DSA switch drivers need to implement a ``dsa_switch_ops`` structure which will
contain the various members described below.
-``register_switch_driver()`` registers this dsa_switch_ops in its internal list
-of drivers to probe for. ``unregister_switch_driver()`` does the exact opposite.
+Probing, registration and device lifetime
+-----------------------------------------
-Unless requested differently by setting the priv_size member accordingly, DSA
-does not allocate any driver private context space.
+DSA switches are regular ``device`` structures on buses (be they platform, SPI,
+I2C, MDIO or otherwise). The DSA framework is not involved in their probing
+with the device core.
+
+Switch registration from the perspective of a driver means passing a valid
+``struct dsa_switch`` pointer to ``dsa_register_switch()``, usually from the
+switch driver's probing function. The following members must be valid in the
+provided structure:
+
+- ``ds->dev``: will be used to parse the switch's OF node or platform data.
+
+- ``ds->num_ports``: will be used to create the port list for this switch, and
+ to validate the port indices provided in the OF node.
+
+- ``ds->ops``: a pointer to the ``dsa_switch_ops`` structure holding the DSA
+ method implementations.
+
+- ``ds->priv``: backpointer to a driver-private data structure which can be
+ retrieved in all further DSA method callbacks.
+
+In addition, the following flags in the ``dsa_switch`` structure may optionally
+be configured to obtain driver-specific behavior from the DSA core. Their
+behavior when set is documented through comments in ``include/net/dsa.h``.
+
+- ``ds->vlan_filtering_is_global``
+
+- ``ds->needs_standalone_vlan_filtering``
+
+- ``ds->configure_vlan_while_not_filtering``
+
+- ``ds->untag_bridge_pvid``
+
+- ``ds->assisted_learning_on_cpu_port``
+
+- ``ds->mtu_enforcement_ingress``
+
+- ``ds->fdb_isolation``
+
+Internally, DSA keeps an array of switch trees (group of switches) global to
+the kernel, and attaches a ``dsa_switch`` structure to a tree on registration.
+The tree ID to which the switch is attached is determined by the first u32
+number of the ``dsa,member`` property of the switch's OF node (0 if missing).
+The switch ID within the tree is determined by the second u32 number of the
+same OF property (0 if missing). Registering multiple switches with the same
+switch ID and tree ID is illegal and will cause an error. Using platform data,
+a single switch and a single switch tree is permitted.
+
+In case of a tree with multiple switches, probing takes place asymmetrically.
+The first N-1 callers of ``dsa_register_switch()`` only add their ports to the
+port list of the tree (``dst->ports``), each port having a backpointer to its
+associated switch (``dp->ds``). Then, these switches exit their
+``dsa_register_switch()`` call early, because ``dsa_tree_setup_routing_table()``
+has determined that the tree is not yet complete (not all ports referenced by
+DSA links are present in the tree's port list). The tree becomes complete when
+the last switch calls ``dsa_register_switch()``, and this triggers the effective
+continuation of initialization (including the call to ``ds->ops->setup()``) for
+all switches within that tree, all as part of the calling context of the last
+switch's probe function.
+
+The opposite of registration takes place when calling ``dsa_unregister_switch()``,
+which removes a switch's ports from the port list of the tree. The entire tree
+is torn down when the first switch unregisters.
+
+It is mandatory for DSA switch drivers to implement the ``shutdown()`` callback
+of their respective bus, and call ``dsa_switch_shutdown()`` from it (a minimal
+version of the full teardown performed by ``dsa_unregister_switch()``).
+The reason is that DSA keeps a reference on the master net device, and if the
+driver for the master device decides to unbind on shutdown, DSA's reference
+will block that operation from finalizing.
+
+Either ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` must be called,
+but not both, and the device driver model permits the bus' ``remove()`` method
+to be called even if ``shutdown()`` was already called. Therefore, drivers are
+expected to implement a mutual exclusion method between ``remove()`` and
+``shutdown()`` by setting their drvdata to NULL after any of these has run, and
+checking whether the drvdata is NULL before proceeding to take any action.
+
+After ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` was called, no
+further callbacks via the provided ``dsa_switch_ops`` may take place, and the
+driver may free the data structures associated with the ``dsa_switch``.
Switch configuration
--------------------
-- ``tag_protocol``: this is to indicate what kind of tagging protocol is supported,
- should be a valid value from the ``dsa_tag_protocol`` enum
+- ``get_tag_protocol``: this is to indicate what kind of tagging protocol is
+ supported, should be a valid value from the ``dsa_tag_protocol`` enum.
+ The returned information does not have to be static; the driver is passed the
+ CPU port number, as well as the tagging protocol of a possibly stacked
+ upstream switch, in case there are hardware limitations in terms of supported
+ tag formats.
-- ``probe``: probe routine which will be invoked by the DSA platform device upon
- registration to test for the presence/absence of a switch device. For MDIO
- devices, it is recommended to issue a read towards internal registers using
- the switch pseudo-PHY and return whether this is a supported device. For other
- buses, return a non-NULL string
+- ``change_tag_protocol``: when the default tagging protocol has compatibility
+ problems with the master or other issues, the driver may support changing it
+ at runtime, either through a device tree property or through sysfs. In that
+ case, further calls to ``get_tag_protocol`` should report the protocol in
+ current use.
- ``setup``: setup function for the switch, this function is responsible for setting
up the ``dsa_switch_ops`` private structure with all it needs: register maps,
@@ -535,7 +617,17 @@ Switch configuration
fully configured and ready to serve any kind of request. It is recommended
to issue a software reset of the switch during this setup function in order to
avoid relying on what a previous software agent such as a bootloader/firmware
- may have previously configured.
+ may have previously configured. The method responsible for undoing any
+ applicable allocations or operations done here is ``teardown``.
+
+- ``port_setup`` and ``port_teardown``: methods for initialization and
+ destruction of per-port data structures. It is mandatory for some operations
+ such as registering and unregistering devlink port regions to be done from
+ these methods, otherwise they are optional. A port will be torn down only if
+ it has been previously set up. It is possible for a port to be set up during
+ probing only to be torn down immediately afterwards, for example in case its
+ PHY cannot be found. In this case, probing of the DSA switch continues
+ without that particular port.
PHY devices and link management
-------------------------------
@@ -635,26 +727,198 @@ Power management
``BR_STATE_DISABLED`` and propagating changes to the hardware if this port is
disabled while being a bridge member
+Address databases
+-----------------
+
+Switching hardware is expected to have a table for FDB entries, however not all
+of them are active at the same time. An address database is the subset (partition)
+of FDB entries that is active (can be matched by address learning on RX, or FDB
+lookup on TX) depending on the state of the port. An address database may
+occasionally be called "FID" (Filtering ID) in this document, although the
+underlying implementation may choose whatever is available to the hardware.
+
+For example, all ports that belong to a VLAN-unaware bridge (which is
+*currently* VLAN-unaware) are expected to learn source addresses in the
+database associated by the driver with that bridge (and not with other
+VLAN-unaware bridges). During forwarding and FDB lookup, a packet received on a
+VLAN-unaware bridge port should be able to find a VLAN-unaware FDB entry having
+the same MAC DA as the packet, which is present on another port member of the
+same bridge. At the same time, the FDB lookup process must be able to not find
+an FDB entry having the same MAC DA as the packet, if that entry points towards
+a port which is a member of a different VLAN-unaware bridge (and is therefore
+associated with a different address database).
+
+Similarly, each VLAN of each offloaded VLAN-aware bridge should have an
+associated address database, which is shared by all ports which are members of
+that VLAN, but not shared by ports belonging to different bridges that are
+members of the same VID.
+
+In this context, a VLAN-unaware database means that all packets are expected to
+match on it irrespective of VLAN ID (only MAC address lookup), whereas a
+VLAN-aware database means that packets are supposed to match based on the VLAN
+ID from the classified 802.1Q header (or the pvid if untagged).
+
+At the bridge layer, VLAN-unaware FDB entries have the special VID value of 0,
+whereas VLAN-aware FDB entries have non-zero VID values. Note that a
+VLAN-unaware bridge may have VLAN-aware (non-zero VID) FDB entries, and a
+VLAN-aware bridge may have VLAN-unaware FDB entries. As in hardware, the
+software bridge keeps separate address databases, and offloads to hardware the
+FDB entries belonging to these databases, through switchdev, asynchronously
+relative to the moment when the databases become active or inactive.
+
+When a user port operates in standalone mode, its driver should configure it to
+use a separate database called a port private database. This is different from
+the databases described above, and should impede operation as standalone port
+(packet in, packet out to the CPU port) as little as possible. For example,
+on ingress, it should not attempt to learn the MAC SA of ingress traffic, since
+learning is a bridging layer service and this is a standalone port, therefore
+it would consume useless space. With no address learning, the port private
+database should be empty in a naive implementation, and in this case, all
+received packets should be trivially flooded to the CPU port.
+
+DSA (cascade) and CPU ports are also called "shared" ports because they service
+multiple address databases, and the database that a packet should be associated
+to is usually embedded in the DSA tag. This means that the CPU port may
+simultaneously transport packets coming from a standalone port (which were
+classified by hardware in one address database), and from a bridge port (which
+were classified to a different address database).
+
+Switch drivers which satisfy certain criteria are able to optimize the naive
+configuration by removing the CPU port from the flooding domain of the switch,
+and just program the hardware with FDB entries pointing towards the CPU port
+for which it is known that software is interested in those MAC addresses.
+Packets which do not match a known FDB entry will not be delivered to the CPU,
+which will save CPU cycles required for creating an skb just to drop it.
+
+DSA is able to perform host address filtering for the following kinds of
+addresses:
+
+- Primary unicast MAC addresses of ports (``dev->dev_addr``). These are
+ associated with the port private database of the respective user port,
+ and the driver is notified to install them through ``port_fdb_add`` towards
+ the CPU port.
+
+- Secondary unicast and multicast MAC addresses of ports (addresses added
+ through ``dev_uc_add()`` and ``dev_mc_add()``). These are also associated
+ with the port private database of the respective user port.
+
+- Local/permanent bridge FDB entries (``BR_FDB_LOCAL``). These are the MAC
+ addresses of the bridge ports, for which packets must be terminated locally
+ and not forwarded. They are associated with the address database for that
+ bridge.
+
+- Static bridge FDB entries installed towards foreign (non-DSA) interfaces
+ present in the same bridge as some DSA switch ports. These are also
+ associated with the address database for that bridge.
+
+- Dynamically learned FDB entries on foreign interfaces present in the same
+ bridge as some DSA switch ports, only if ``ds->assisted_learning_on_cpu_port``
+ is set to true by the driver. These are associated with the address database
+ for that bridge.
+
+For various operations detailed below, DSA provides a ``dsa_db`` structure
+which can be of the following types:
+
+- ``DSA_DB_PORT``: the FDB (or MDB) entry to be installed or deleted belongs to
+ the port private database of user port ``db->dp``.
+- ``DSA_DB_BRIDGE``: the entry belongs to one of the address databases of bridge
+ ``db->bridge``. Separation between the VLAN-unaware database and the per-VID
+ databases of this bridge is expected to be done by the driver.
+- ``DSA_DB_LAG``: the entry belongs to the address database of LAG ``db->lag``.
+ Note: ``DSA_DB_LAG`` is currently unused and may be removed in the future.
+
+The drivers which act upon the ``dsa_db`` argument in ``port_fdb_add``,
+``port_mdb_add`` etc should declare ``ds->fdb_isolation`` as true.
+
+DSA associates each offloaded bridge and each offloaded LAG with a one-based ID
+(``struct dsa_bridge :: num``, ``struct dsa_lag :: id``) for the purposes of
+refcounting addresses on shared ports. Drivers may piggyback on DSA's numbering
+scheme (the ID is readable through ``db->bridge.num`` and ``db->lag.id`` or may
+implement their own.
+
+Only the drivers which declare support for FDB isolation are notified of FDB
+entries on the CPU port belonging to ``DSA_DB_PORT`` databases.
+For compatibility/legacy reasons, ``DSA_DB_BRIDGE`` addresses are notified to
+drivers even if they do not support FDB isolation. However, ``db->bridge.num``
+and ``db->lag.id`` are always set to 0 in that case (to denote the lack of
+isolation, for refcounting purposes).
+
+Note that it is not mandatory for a switch driver to implement physically
+separate address databases for each standalone user port. Since FDB entries in
+the port private databases will always point to the CPU port, there is no risk
+for incorrect forwarding decisions. In this case, all standalone ports may
+share the same database, but the reference counting of host-filtered addresses
+(not deleting the FDB entry for a port's MAC address if it's still in use by
+another port) becomes the responsibility of the driver, because DSA is unaware
+that the port databases are in fact shared. This can be achieved by calling
+``dsa_fdb_present_in_other_db()`` and ``dsa_mdb_present_in_other_db()``.
+The down side is that the RX filtering lists of each user port are in fact
+shared, which means that user port A may accept a packet with a MAC DA it
+shouldn't have, only because that MAC address was in the RX filtering list of
+user port B. These packets will still be dropped in software, however.
+
Bridge layer
------------
+Offloading the bridge forwarding plane is optional and handled by the methods
+below. They may be absent, return -EOPNOTSUPP, or ``ds->max_num_bridges`` may
+be non-zero and exceeded, and in this case, joining a bridge port is still
+possible, but the packet forwarding will take place in software, and the ports
+under a software bridge must remain configured in the same way as for
+standalone operation, i.e. have all bridging service functions (address
+learning etc) disabled, and send all received packets to the CPU port only.
+
+Concretely, a port starts offloading the forwarding plane of a bridge once it
+returns success to the ``port_bridge_join`` method, and stops doing so after
+``port_bridge_leave`` has been called. Offloading the bridge means autonomously
+learning FDB entries in accordance with the software bridge port's state, and
+autonomously forwarding (or flooding) received packets without CPU intervention.
+This is optional even when offloading a bridge port. Tagging protocol drivers
+are expected to call ``dsa_default_offload_fwd_mark(skb)`` for packets which
+have already been autonomously forwarded in the forwarding domain of the
+ingress switch port. DSA, through ``dsa_port_devlink_setup()``, considers all
+switch ports part of the same tree ID to be part of the same bridge forwarding
+domain (capable of autonomous forwarding to each other).
+
+Offloading the TX forwarding process of a bridge is a distinct concept from
+simply offloading its forwarding plane, and refers to the ability of certain
+driver and tag protocol combinations to transmit a single skb coming from the
+bridge device's transmit function to potentially multiple egress ports (and
+thereby avoid its cloning in software).
+
+Packets for which the bridge requests this behavior are called data plane
+packets and have ``skb->offload_fwd_mark`` set to true in the tag protocol
+driver's ``xmit`` function. Data plane packets are subject to FDB lookup,
+hardware learning on the CPU port, and do not override the port STP state.
+Additionally, replication of data plane packets (multicast, flooding) is
+handled in hardware and the bridge driver will transmit a single skb for each
+packet that may or may not need replication.
+
+When the TX forwarding offload is enabled, the tag protocol driver is
+responsible to inject packets into the data plane of the hardware towards the
+correct bridging domain (FID) that the port is a part of. The port may be
+VLAN-unaware, and in this case the FID must be equal to the FID used by the
+driver for its VLAN-unaware address database associated with that bridge.
+Alternatively, the bridge may be VLAN-aware, and in that case, it is guaranteed
+that the packet is also VLAN-tagged with the VLAN ID that the bridge processed
+this packet in. It is the responsibility of the hardware to untag the VID on
+the egress-untagged ports, or keep the tag on the egress-tagged ones.
+
- ``port_bridge_join``: bridge layer function invoked when a given switch port is
added to a bridge, this function should do what's necessary at the switch
level to permit the joining port to be added to the relevant logical
domain for it to ingress/egress traffic with other members of the bridge.
+ By setting the ``tx_fwd_offload`` argument to true, the TX forwarding process
+ of this bridge is also offloaded.
- ``port_bridge_leave``: bridge layer function invoked when a given switch port is
removed from a bridge, this function should do what's necessary at the
switch level to deny the leaving port from ingress/egress traffic from the
- remaining bridge members. When the port leaves the bridge, it should be aged
- out at the switch hardware for the switch to (re) learn MAC addresses behind
- this port.
+ remaining bridge members.
- ``port_stp_state_set``: bridge layer function invoked when a given switch port STP
state is computed by the bridge layer and should be propagated to switch
- hardware to forward/block/learn traffic. The switch driver is responsible for
- computing a STP state change based on current and asked parameters and perform
- the relevant ageing based on the intersection results
+ hardware to forward/block/learn traffic.
- ``port_bridge_flags``: bridge layer function invoked when a port must
configure its settings for e.g. flooding of unknown traffic or source address
@@ -667,21 +931,11 @@ Bridge layer
CPU port, and flooding towards the CPU port should also be enabled, due to a
lack of an explicit address filtering mechanism in the DSA core.
-- ``port_bridge_tx_fwd_offload``: bridge layer function invoked after
- ``port_bridge_join`` when a driver sets ``ds->num_fwd_offloading_bridges`` to
- a non-zero value. Returning success in this function activates the TX
- forwarding offload bridge feature for this port, which enables the tagging
- protocol driver to inject data plane packets towards the bridging domain that
- the port is a part of. Data plane packets are subject to FDB lookup, hardware
- learning on the CPU port, and do not override the port STP state.
- Additionally, replication of data plane packets (multicast, flooding) is
- handled in hardware and the bridge driver will transmit a single skb for each
- packet that needs replication. The method is provided as a configuration
- point for drivers that need to configure the hardware for enabling this
- feature.
-
-- ``port_bridge_tx_fwd_unoffload``: bridge layer function invoked when a driver
- leaves a bridge port which had the TX forwarding offload feature enabled.
+- ``port_fast_age``: bridge layer function invoked when flushing the
+ dynamically learned FDB entries on the port is necessary. This is called when
+ transitioning from an STP state where learning should take place to an STP
+ state where it shouldn't, or when leaving a bridge, or when address learning
+ is turned off via ``port_bridge_flags``.
Bridge VLAN filtering
---------------------
@@ -697,55 +951,44 @@ Bridge VLAN filtering
allowed.
- ``port_vlan_add``: bridge layer function invoked when a VLAN is configured
- (tagged or untagged) for the given switch port. If the operation is not
- supported by the hardware, this function should return ``-EOPNOTSUPP`` to
- inform the bridge code to fallback to a software implementation.
+ (tagged or untagged) for the given switch port. The CPU port becomes a member
+ of a VLAN only if a foreign bridge port is also a member of it (and
+ forwarding needs to take place in software), or the VLAN is installed to the
+ VLAN group of the bridge device itself, for termination purposes
+ (``bridge vlan add dev br0 vid 100 self``). VLANs on shared ports are
+ reference counted and removed when there is no user left. Drivers do not need
+ to manually install a VLAN on the CPU port.
- ``port_vlan_del``: bridge layer function invoked when a VLAN is removed from the
given switch port
-- ``port_vlan_dump``: bridge layer function invoked with a switchdev callback
- function that the driver has to call for each VLAN the given port is a member
- of. A switchdev object is used to carry the VID and bridge flags.
-
- ``port_fdb_add``: bridge layer function invoked when the bridge wants to install a
Forwarding Database entry, the switch hardware should be programmed with the
specified address in the specified VLAN Id in the forwarding database
- associated with this VLAN ID. If the operation is not supported, this
- function should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to
- a software implementation.
-
-.. note:: VLAN ID 0 corresponds to the port private database, which, in the context
- of DSA, would be its port-based VLAN, used by the associated bridge device.
+ associated with this VLAN ID.
- ``port_fdb_del``: bridge layer function invoked when the bridge wants to remove a
Forwarding Database entry, the switch hardware should be programmed to delete
the specified MAC address from the specified VLAN ID if it was mapped into
this port forwarding database
-- ``port_fdb_dump``: bridge layer function invoked with a switchdev callback
- function that the driver has to call for each MAC address known to be behind
- the given port. A switchdev object is used to carry the VID and FDB info.
+- ``port_fdb_dump``: bridge bypass function invoked by ``ndo_fdb_dump`` on the
+ physical DSA port interfaces. Since DSA does not attempt to keep in sync its
+ hardware FDB entries with the software bridge, this method is implemented as
+ a means to view the entries visible on user ports in the hardware database.
+ The entries reported by this function have the ``self`` flag in the output of
+ the ``bridge fdb show`` command.
- ``port_mdb_add``: bridge layer function invoked when the bridge wants to install
- a multicast database entry. If the operation is not supported, this function
- should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to a
- software implementation. The switch hardware should be programmed with the
+ a multicast database entry. The switch hardware should be programmed with the
specified address in the specified VLAN ID in the forwarding database
associated with this VLAN ID.
-.. note:: VLAN ID 0 corresponds to the port private database, which, in the context
- of DSA, would be its port-based VLAN, used by the associated bridge device.
-
- ``port_mdb_del``: bridge layer function invoked when the bridge wants to remove a
multicast database entry, the switch hardware should be programmed to delete
the specified MAC address from the specified VLAN ID if it was mapped into
this port forwarding database.
-- ``port_mdb_dump``: bridge layer function invoked with a switchdev callback
- function that the driver has to call for each MAC address known to be behind
- the given port. A switchdev object is used to carry the VID and MDB info.
-
Link aggregation
----------------
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 9f41961d11d5..d7a1bf1a55b5 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1052,11 +1052,7 @@ udp_rmem_min - INTEGER
Default: 4K
udp_wmem_min - INTEGER
- Minimal size of send buffer used by UDP sockets in moderation.
- Each UDP socket is able to use the size for sending data, even if
- total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-
- Default: 4K
+ UDP does not have tx memory accounting and this tunable has no effect.
RAW variables
=============
@@ -1085,7 +1081,7 @@ cipso_cache_enable - BOOLEAN
cipso_cache_bucket_size - INTEGER
The CIPSO label cache consists of a fixed size hash table with each
hash bucket containing a number of cache entries. This variable limits
- the number of entries in each hash bucket; the larger the value the
+ the number of entries in each hash bucket; the larger the value is, the
more CIPSO label mappings that can be cached. When the number of
entries in a given hash bucket reaches this limit adding new entries
causes the oldest entry in the bucket to be removed to make room.
@@ -1179,7 +1175,7 @@ ip_autobind_reuse - BOOLEAN
option should only be set by experts.
Default: 0
-ip_dynaddr - BOOLEAN
+ip_dynaddr - INTEGER
If set non-zero, enables support for dynamic addresses.
If set to a non-zero value larger than 1, a kernel log
message will be printed when dynamic address rewriting
@@ -2870,7 +2866,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
Default: 4K
sctp_wmem - vector of 3 INTEGERs: min, default, max
- Currently this tunable has no effect.
+ Only the first value ("min") is used, "default" and "max" are
+ ignored.
+
+ min: Minimum size of send buffer that can be used by SCTP sockets.
+ It is guaranteed to each SCTP socket (but not association) even
+ under moderate memory pressure.
+
+ Default: 4K
addr_scope_policy - INTEGER
Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
diff --git a/Documentation/sound/soc/dai.rst b/Documentation/sound/soc/dai.rst
index 009b07e5a0f3..bf8431386d26 100644
--- a/Documentation/sound/soc/dai.rst
+++ b/Documentation/sound/soc/dai.rst
@@ -10,7 +10,7 @@ AC97
====
AC97 is a five wire interface commonly found on many PC sound cards. It is
-now also popular in many portable devices. This DAI has a reset line and time
+now also popular in many portable devices. This DAI has a RESET line and time
multiplexes its data on its SDATA_OUT (playback) and SDATA_IN (capture) lines.
The bit clock (BCLK) is always driven by the CODEC (usually 12.288MHz) and the
frame (FRAME) (usually 48kHz) is always driven by the controller. Each AC97
diff --git a/Documentation/staging/static-keys.rst b/Documentation/staging/static-keys.rst
index 38290b9f25eb..b0a519f456cf 100644
--- a/Documentation/staging/static-keys.rst
+++ b/Documentation/staging/static-keys.rst
@@ -201,9 +201,6 @@ static_key->entry field makes use of the two least significant bits.
* ``void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type)``,
see: arch/x86/kernel/jump_label.c
-* ``__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type)``,
- see: arch/x86/kernel/jump_label.c
-
* ``struct jump_entry``,
see: arch/x86/include/asm/jump_label.h
diff --git a/Documentation/translations/it_IT/core-api/symbol-namespaces.rst b/Documentation/translations/it_IT/core-api/symbol-namespaces.rst
index 42f5d04e38ec..0f6898860d6d 100644
--- a/Documentation/translations/it_IT/core-api/symbol-namespaces.rst
+++ b/Documentation/translations/it_IT/core-api/symbol-namespaces.rst
@@ -50,9 +50,9 @@ Di conseguenza, nella tabella dei simboli del kernel ci sarà una voce
rappresentata dalla struttura ``kernel_symbol`` che avrà il campo
``namespace`` (spazio dei nomi) impostato. Un simbolo esportato senza uno spazio
dei nomi avrà questo campo impostato a ``NULL``. Non esiste uno spazio dei nomi
-di base. Il programma ``modpost`` e il codice in kernel/module.c usano lo spazio
-dei nomi, rispettivamente, durante la compilazione e durante il caricamento
-di un modulo.
+di base. Il programma ``modpost`` e il codice in kernel/module/main.c usano lo
+spazio dei nomi, rispettivamente, durante la compilazione e durante il
+caricamento di un modulo.
2.2 Usare il simbolo di preprocessore DEFAULT_SYMBOL_NAMESPACE
==============================================================
diff --git a/Documentation/translations/zh_CN/core-api/kernel-api.rst b/Documentation/translations/zh_CN/core-api/kernel-api.rst
index e45fe80d1cd8..962d31d019d7 100644
--- a/Documentation/translations/zh_CN/core-api/kernel-api.rst
+++ b/Documentation/translations/zh_CN/core-api/kernel-api.rst
@@ -224,7 +224,7 @@ kernel/kmod.c
模块接口支持
------------
-更多信息请参考文件kernel/module.c。
+更多信息请参阅kernel/module/目录下的文件。
硬件接口
========
diff --git a/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst b/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst
index 6abf7ed534ca..bb16f0611046 100644
--- a/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst
+++ b/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst
@@ -52,7 +52,7 @@
相应的 ksymtab 条目结构体 ``kernel_symbol`` 将有相应的成员 ``命名空间`` 集。
导出时未指明命名空间的符号将指向 ``NULL`` 。如果没有定义命名空间,则默认没有。
-``modpost`` 和kernel/module.c分别在构建时或模块加载时使用名称空间。
+``modpost`` 和kernel/module/main.c分别在构建时或模块加载时使用名称空间。
2.2 使用DEFAULT_SYMBOL_NAMESPACE定义
====================================
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 11e00a46c610..98a283930307 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5657,7 +5657,8 @@ by a string of size ``name_size``.
#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
- #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
+ #define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT)
+ #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN
#define KVM_STATS_BASE_SHIFT 8
#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT)
@@ -5702,14 +5703,13 @@ Bits 0-3 of ``flags`` encode the type:
by the ``hist_param`` field. The range of the Nth bucket (1 <= N < ``size``)
is [``hist_param``*(N-1), ``hist_param``*N), while the range of the last
bucket is [``hist_param``*(``size``-1), +INF). (+INF means positive infinity
- value.) The bucket value indicates how many samples fell in the bucket's range.
+ value.)
* ``KVM_STATS_TYPE_LOG_HIST``
The statistic is reported as a logarithmic histogram. The number of
buckets is specified by the ``size`` field. The range of the first bucket is
[0, 1), while the range of the last bucket is [pow(2, ``size``-2), +INF).
Otherwise, The Nth bucket (1 < N < ``size``) covers
- [pow(2, N-2), pow(2, N-1)). The bucket value indicates how many samples fell
- in the bucket's range.
+ [pow(2, N-2), pow(2, N-1)).
Bits 4-7 of ``flags`` encode the unit:
@@ -5724,6 +5724,15 @@ Bits 4-7 of ``flags`` encode the unit:
It indicates that the statistics data is used to measure time or latency.
* ``KVM_STATS_UNIT_CYCLES``
It indicates that the statistics data is used to measure CPU clock cycles.
+ * ``KVM_STATS_UNIT_BOOLEAN``
+ It indicates that the statistic will always be either 0 or 1. Boolean
+ statistics of "peak" type will never go back from 1 to 0. Boolean
+ statistics can be linear histograms (with two buckets) but not logarithmic
+ histograms.
+
+Note that, in the case of histograms, the unit applies to the bucket
+ranges, while the bucket value indicates how many samples fell in the
+bucket's range.
Bits 8-11 of ``flags``, together with ``exponent``, encode the scale of the
unit:
@@ -5746,7 +5755,7 @@ the corresponding statistics data.
The ``bucket_size`` field is used as a parameter for histogram statistics data.
It is only used by linear histogram statistics data, specifying the size of a
-bucket.
+bucket in the unit expressed by bits 4-11 of ``flags`` together with ``exponent``.
The ``name`` field is the name string of the statistics data. The name string
starts at the end of ``struct kvm_stats_desc``. The maximum length including
diff --git a/Documentation/virt/kvm/arm/hyp-abi.rst b/Documentation/virt/kvm/arm/hyp-abi.rst
index 4d43fbc25195..412b276449d3 100644
--- a/Documentation/virt/kvm/arm/hyp-abi.rst
+++ b/Documentation/virt/kvm/arm/hyp-abi.rst
@@ -60,12 +60,13 @@ these functions (see arch/arm{,64}/include/asm/virt.h):
* ::
- x0 = HVC_VHE_RESTART (arm64 only)
+ x0 = HVC_FINALISE_EL2 (arm64 only)
- Attempt to upgrade the kernel's exception level from EL1 to EL2 by enabling
- the VHE mode. This is conditioned by the CPU supporting VHE, the EL2 MMU
- being off, and VHE not being disabled by any other means (command line
- option, for example).
+ Finish configuring EL2 depending on the command-line options,
+ including an attempt to upgrade the kernel's exception level from
+ EL1 to EL2 by enabling the VHE mode. This is conditioned by the CPU
+ supporting VHE, the EL2 MMU being off, and VHE not being disabled by
+ any other means (command line option, for example).
Any other value of r0/x0 triggers a hypervisor-specific handling,
which is not documented here.
diff --git a/MAINTAINERS b/MAINTAINERS
index 0f9366144d31..eac4d36eab2a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1038,6 +1038,7 @@ F: arch/arm64/boot/dts/amd/
AMD XGBE DRIVER
M: Tom Lendacky <thomas.lendacky@amd.com>
+M: "Shyam Sundar S K" <Shyam-sundar.S-k@amd.com>
L: netdev@vger.kernel.org
S: Supported
F: arch/arm64/boot/dts/amd/amd-seattle-xgbe*.dtsi
@@ -2497,10 +2498,8 @@ F: drivers/power/reset/oxnas-restart.c
N: oxnas
ARM/PALM TREO SUPPORT
-M: Tomas Cech <sleep_walker@suse.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S: Maintained
-W: http://hackndev.com
+S: Orphan
F: arch/arm/mach-pxa/palmtreo.*
ARM/PALMTX,PALMT5,PALMLD,PALMTE2,PALMTC SUPPORT
@@ -7520,6 +7519,8 @@ F: include/video/s1d13xxxfb.h
EROFS FILE SYSTEM
M: Gao Xiang <xiang@kernel.org>
M: Chao Yu <chao@kernel.org>
+R: Yue Hu <huyue2@coolpad.com>
+R: Jeffle Xu <jefflexu@linux.alibaba.com>
L: linux-erofs@lists.ozlabs.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git
@@ -9071,6 +9072,12 @@ F: Documentation/admin-guide/perf/hisi-pcie-pmu.rst
F: Documentation/admin-guide/perf/hisi-pmu.rst
F: drivers/perf/hisilicon
+HISILICON HNS3 PMU DRIVER
+M: Guangbin Huang <huangguangbin2@huawei.com>
+S: Supported
+F: Documentation/admin-guide/perf/hns3-pmu.rst
+F: drivers/perf/hisilicon/hns3_pmu.c
+
HISILICON QM AND ZIP Controller DRIVER
M: Zhou Wang <wangzhou1@hisilicon.com>
L: linux-crypto@vger.kernel.org
@@ -9653,6 +9660,7 @@ F: drivers/input/misc/ideapad_slidebar.c
IDMAPPED MOUNTS
M: Christian Brauner <brauner@kernel.org>
+M: Seth Forshee <sforshee@kernel.org>
L: linux-fsdevel@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git
@@ -14397,7 +14405,8 @@ S: Maintained
F: drivers/net/phy/nxp-c45-tja11xx.c
NXP FSPI DRIVER
-M: Ashish Kumar <ashish.kumar@nxp.com>
+M: Han Xu <han.xu@nxp.com>
+M: Haibo Chen <haibo.chen@nxp.com>
R: Yogesh Gaur <yogeshgaur.83@gmail.com>
L: linux-spi@vger.kernel.org
S: Maintained
@@ -15883,7 +15892,7 @@ PIN CONTROLLER - FREESCALE
M: Dong Aisheng <aisheng.dong@nxp.com>
M: Fabio Estevam <festevam@gmail.com>
M: Shawn Guo <shawnguo@kernel.org>
-M: Stefan Agner <stefan@agner.ch>
+M: Jacky Bai <ping.bai@nxp.com>
R: Pengutronix Kernel Team <kernel@pengutronix.de>
L: linux-gpio@vger.kernel.org
S: Maintained
@@ -17307,12 +17316,15 @@ N: riscv
K: riscv
RISC-V/MICROCHIP POLARFIRE SOC SUPPORT
-M: Lewis Hanly <lewis.hanly@microchip.com>
M: Conor Dooley <conor.dooley@microchip.com>
+M: Daire McNamara <daire.mcnamara@microchip.com>
L: linux-riscv@lists.infradead.org
S: Supported
F: arch/riscv/boot/dts/microchip/
+F: drivers/char/hw_random/mpfs-rng.c
+F: drivers/clk/microchip/clk-mpfs.c
F: drivers/mailbox/mailbox-mpfs.c
+F: drivers/pci/controller/pcie-microchip-host.c
F: drivers/soc/microchip/
F: include/soc/microchip/mpfs.h
diff --git a/Makefile b/Makefile
index faa4880f25f7..df92892325ae 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 19
SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION =
NAME = Superb Owl
# *DOCUMENTATION*
diff --git a/arch/Kconfig b/arch/Kconfig
index fcf9a41a4ef5..5ea3e3838c21 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -223,6 +223,9 @@ config HAVE_FUNCTION_DESCRIPTORS
config TRACE_IRQFLAGS_SUPPORT
bool
+config TRACE_IRQFLAGS_NMI_SUPPORT
+ bool
+
#
# An arch should select this if it provides all these things:
#
@@ -438,6 +441,13 @@ config MMU_GATHER_PAGE_SIZE
config MMU_GATHER_NO_RANGE
bool
+ select MMU_GATHER_MERGE_VMAS
+
+config MMU_GATHER_NO_FLUSH_CACHE
+ bool
+
+config MMU_GATHER_MERGE_VMAS
+ bool
config MMU_GATHER_NO_GATHER
bool
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index f6d2946edbd2..15f2effd6baf 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -60,7 +60,7 @@ int irq_select_affinity(unsigned int irq)
cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0);
last_cpu = cpu;
- cpumask_copy(irq_data_get_affinity_mask(data), cpumask_of(cpu));
+ irq_data_update_affinity(data, cpumask_of(cpu));
chip->irq_set_affinity(data, cpumask_of(cpu), false);
return 0;
}
diff --git a/arch/arc/kernel/jump_label.c b/arch/arc/kernel/jump_label.c
index b8600dc325b5..70b74a5d047b 100644
--- a/arch/arc/kernel/jump_label.c
+++ b/arch/arc/kernel/jump_label.c
@@ -96,19 +96,6 @@ void arch_jump_label_transform(struct jump_entry *entry,
flush_icache_range(entry->code, entry->code + JUMP_LABEL_NOP_SIZE);
}
-void arch_jump_label_transform_static(struct jump_entry *entry,
- enum jump_label_type type)
-{
- /*
- * We use only one NOP type (1x, 4 byte) in arch_static_branch, so
- * there's no need to patch an identical NOP over the top of it here.
- * The generic code calls 'arch_jump_label_transform' if the NOP needs
- * to be replaced by a branch, so 'arch_jump_label_transform_static' is
- * never called with type other than JUMP_LABEL_NOP.
- */
- BUG_ON(type != JUMP_LABEL_NOP);
-}
-
#ifdef CONFIG_ARC_DBG_JUMP_LABEL
#define SELFTEST_MSG "ARC: instruction generation self-test: "
diff --git a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi
index fded07f370b3..d6ba4b2a60f6 100644
--- a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi
@@ -226,7 +226,7 @@
reg = <0x28>;
#gpio-cells = <2>;
gpio-controller;
- ngpio = <32>;
+ ngpios = <62>;
};
sgtl5000: codec@a {
diff --git a/arch/arm/boot/dts/imx6ull-colibri.dtsi b/arch/arm/boot/dts/imx6ull-colibri.dtsi
index 15621e03fa4d..2c3ae715c683 100644
--- a/arch/arm/boot/dts/imx6ull-colibri.dtsi
+++ b/arch/arm/boot/dts/imx6ull-colibri.dtsi
@@ -166,7 +166,7 @@
atmel_mxt_ts: touchscreen@4a {
compatible = "atmel,maxtouch";
pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_atmel_conn>;
+ pinctrl-0 = <&pinctrl_atmel_conn &pinctrl_atmel_snvs_conn>;
reg = <0x4a>;
interrupt-parent = <&gpio5>;
interrupts = <4 IRQ_TYPE_EDGE_FALLING>; /* SODIMM 107 / INT */
@@ -331,7 +331,6 @@
pinctrl_atmel_conn: atmelconngrp {
fsl,pins = <
MX6UL_PAD_JTAG_MOD__GPIO1_IO10 0xb0a0 /* SODIMM 106 */
- MX6ULL_PAD_SNVS_TAMPER4__GPIO5_IO04 0xb0a0 /* SODIMM 107 */
>;
};
@@ -684,6 +683,12 @@
};
&iomuxc_snvs {
+ pinctrl_atmel_snvs_conn: atmelsnvsconngrp {
+ fsl,pins = <
+ MX6ULL_PAD_SNVS_TAMPER4__GPIO5_IO04 0xb0a0 /* SODIMM 107 */
+ >;
+ };
+
pinctrl_snvs_gpio1: snvsgpio1grp {
fsl,pins = <
MX6ULL_PAD_SNVS_TAMPER6__GPIO5_IO06 0x110a0 /* SODIMM 93 */
diff --git a/arch/arm/boot/dts/lan966x-kontron-kswitch-d10-mmt.dtsi b/arch/arm/boot/dts/lan966x-kontron-kswitch-d10-mmt.dtsi
index 4cab1b3b3b29..725dcf707b31 100644
--- a/arch/arm/boot/dts/lan966x-kontron-kswitch-d10-mmt.dtsi
+++ b/arch/arm/boot/dts/lan966x-kontron-kswitch-d10-mmt.dtsi
@@ -87,22 +87,22 @@
phy4: ethernet-phy@5 {
reg = <5>;
- coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>;
+ coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>;
};
phy5: ethernet-phy@6 {
reg = <6>;
- coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>;
+ coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>;
};
phy6: ethernet-phy@7 {
reg = <7>;
- coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>;
+ coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>;
};
phy7: ethernet-phy@8 {
reg = <8>;
- coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>;
+ coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>;
};
};
diff --git a/arch/arm/boot/dts/lan966x.dtsi b/arch/arm/boot/dts/lan966x.dtsi
index 3cb02fffe716..38e90a31d2dd 100644
--- a/arch/arm/boot/dts/lan966x.dtsi
+++ b/arch/arm/boot/dts/lan966x.dtsi
@@ -38,7 +38,7 @@
sys_clk: sys_clk {
compatible = "fixed-clock";
#clock-cells = <0>;
- clock-frequency = <162500000>;
+ clock-frequency = <165625000>;
};
cpu_clk: cpu_clk {
diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi
index 814ad0b46232..c3b8a6d63027 100644
--- a/arch/arm/boot/dts/qcom-msm8974.dtsi
+++ b/arch/arm/boot/dts/qcom-msm8974.dtsi
@@ -506,6 +506,8 @@
interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GCC_BLSP1_UART2_APPS_CLK>, <&gcc GCC_BLSP1_AHB_CLK>;
clock-names = "core", "iface";
+ pinctrl-names = "default";
+ pinctrl-0 = <&blsp1_uart2_default>;
status = "disabled";
};
@@ -581,6 +583,9 @@
interrupts = <GIC_SPI 113 IRQ_TYPE_NONE>;
clocks = <&gcc GCC_BLSP2_UART1_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>;
clock-names = "core", "iface";
+ pinctrl-names = "default", "sleep";
+ pinctrl-0 = <&blsp2_uart1_default>;
+ pinctrl-1 = <&blsp2_uart1_sleep>;
status = "disabled";
};
@@ -599,6 +604,8 @@
interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GCC_BLSP2_UART4_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>;
clock-names = "core", "iface";
+ pinctrl-names = "default";
+ pinctrl-0 = <&blsp2_uart4_default>;
status = "disabled";
};
@@ -639,6 +646,9 @@
interrupts = <0 106 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GCC_BLSP2_QUP6_I2C_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>;
clock-names = "core", "iface";
+ pinctrl-names = "default", "sleep";
+ pinctrl-0 = <&blsp2_i2c6_default>;
+ pinctrl-1 = <&blsp2_i2c6_sleep>;
#address-cells = <1>;
#size-cells = <0>;
};
@@ -1256,7 +1266,7 @@
};
};
- blsp1_uart2_active: blsp1-uart2-active {
+ blsp1_uart2_default: blsp1-uart2-default {
rx {
pins = "gpio5";
function = "blsp_uart2";
@@ -1272,7 +1282,7 @@
};
};
- blsp2_uart1_active: blsp2-uart1-active {
+ blsp2_uart1_default: blsp2-uart1-default {
tx-rts {
pins = "gpio41", "gpio44";
function = "blsp_uart7";
@@ -1295,7 +1305,7 @@
bias-pull-down;
};
- blsp2_uart4_active: blsp2-uart4-active {
+ blsp2_uart4_default: blsp2-uart4-default {
tx-rts {
pins = "gpio53", "gpio56";
function = "blsp_uart10";
@@ -1406,7 +1416,19 @@
bias-pull-up;
};
- /* BLSP2_I2C6 info is missing - nobody uses it though? */
+ blsp2_i2c6_default: blsp2-i2c6-default {
+ pins = "gpio87", "gpio88";
+ function = "blsp_i2c12";
+ drive-strength = <2>;
+ bias-disable;
+ };
+
+ blsp2_i2c6_sleep: blsp2-i2c6-sleep {
+ pins = "gpio87", "gpio88";
+ function = "blsp_i2c12";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
spi8_default: spi8_default {
mosi {
diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi
index 89c71d419f82..659a17fc755c 100644
--- a/arch/arm/boot/dts/sama5d2.dtsi
+++ b/arch/arm/boot/dts/sama5d2.dtsi
@@ -1124,7 +1124,7 @@
clocks = <&pmc PMC_TYPE_PERIPHERAL 55>, <&pmc PMC_TYPE_GCK 55>;
clock-names = "pclk", "gclk";
assigned-clocks = <&pmc PMC_TYPE_CORE PMC_I2S1_MUX>;
- assigned-parrents = <&pmc PMC_TYPE_GCK 55>;
+ assigned-clock-parents = <&pmc PMC_TYPE_GCK 55>;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
index f19ed981da9d..3706216ffb40 100644
--- a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
+++ b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
@@ -169,7 +169,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "mxicy,mx25l1606e", "winbond,w25q128";
+ compatible = "mxicy,mx25l1606e", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <40000000>;
};
diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h
index a81dda65c576..45180a2cc47c 100644
--- a/arch/arm/include/asm/dma.h
+++ b/arch/arm/include/asm/dma.h
@@ -10,7 +10,7 @@
#else
#define MAX_DMA_ADDRESS ({ \
extern phys_addr_t arm_dma_zone_size; \
- arm_dma_zone_size && arm_dma_zone_size < (0x10000000 - PAGE_OFFSET) ? \
+ arm_dma_zone_size && arm_dma_zone_size < (0x100000000ULL - PAGE_OFFSET) ? \
(PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; })
#endif
diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
index f1d0a7807cd0..41536feb4392 100644
--- a/arch/arm/include/asm/domain.h
+++ b/arch/arm/include/asm/domain.h
@@ -112,19 +112,6 @@ static __always_inline void set_domain(unsigned int val)
}
#endif
-#ifdef CONFIG_CPU_USE_DOMAINS
-#define modify_domain(dom,type) \
- do { \
- unsigned int domain = get_domain(); \
- domain &= ~domain_mask(dom); \
- domain = domain | domain_val(dom, type); \
- set_domain(domain); \
- } while (0)
-
-#else
-static inline void modify_domain(unsigned dom, unsigned type) { }
-#endif
-
/*
* Generate the T (user) versions of the LDR/STR and related
* instructions (inline assembly)
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index eba7cbc93b86..7fcdc785366c 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -139,11 +139,9 @@ extern void __iomem *__arm_ioremap_caller(phys_addr_t, size_t, unsigned int,
extern void __iomem *__arm_ioremap_pfn(unsigned long, unsigned long, size_t, unsigned int);
extern void __iomem *__arm_ioremap_exec(phys_addr_t, size_t, bool cached);
void __arm_iomem_set_ro(void __iomem *ptr, size_t size);
-extern void __iounmap(volatile void __iomem *addr);
extern void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t,
unsigned int, void *);
-extern void (*arch_iounmap)(volatile void __iomem *);
/*
* Bad read/write accesses...
@@ -380,7 +378,7 @@ void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size);
#define ioremap_wc ioremap_wc
#define ioremap_wt ioremap_wc
-void iounmap(volatile void __iomem *iomem_cookie);
+void iounmap(volatile void __iomem *io_addr);
#define iounmap iounmap
void *arch_memremap_wb(phys_addr_t phys_addr, size_t size);
diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h
index 92282558caf7..2b8970d8e5a2 100644
--- a/arch/arm/include/asm/mach/map.h
+++ b/arch/arm/include/asm/mach/map.h
@@ -27,6 +27,7 @@ enum {
MT_HIGH_VECTORS,
MT_MEMORY_RWX,
MT_MEMORY_RW,
+ MT_MEMORY_RO,
MT_ROM,
MT_MEMORY_RWX_NONCACHED,
MT_MEMORY_RW_DTCM,
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 93051e2f402c..1408a6a15d0e 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -163,5 +163,31 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs)
((current_stack_pointer | (THREAD_SIZE - 1)) - 7) - 1; \
})
+
+/*
+ * Update ITSTATE after normal execution of an IT block instruction.
+ *
+ * The 8 IT state bits are split into two parts in CPSR:
+ * ITSTATE<1:0> are in CPSR<26:25>
+ * ITSTATE<7:2> are in CPSR<15:10>
+ */
+static inline unsigned long it_advance(unsigned long cpsr)
+{
+ if ((cpsr & 0x06000400) == 0) {
+ /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */
+ cpsr &= ~PSR_IT_MASK;
+ } else {
+ /* We need to shift left ITSTATE<4:0> */
+ const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */
+ unsigned long it = cpsr & mask;
+ it <<= 1;
+ it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */
+ it &= mask;
+ cpsr &= ~mask;
+ cpsr |= it;
+ }
+ return cpsr;
+}
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 7aa3ded4af92..6a447ac67d80 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -302,6 +302,7 @@ local_restart:
b ret_fast_syscall
#endif
ENDPROC(vector_swi)
+ .ltorg
/*
* This is the really slow path. We're going to be doing
diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c
index 303b3ab87f7e..eb9c24b6e8e2 100644
--- a/arch/arm/kernel/jump_label.c
+++ b/arch/arm/kernel/jump_label.c
@@ -27,9 +27,3 @@ void arch_jump_label_transform(struct jump_entry *entry,
{
__arch_jump_label_transform(entry, type, false);
}
-
-void arch_jump_label_transform_static(struct jump_entry *entry,
- enum jump_label_type type)
-{
- __arch_jump_label_transform(entry, type, true);
-}
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index b5e8b9ae4c7d..7fd3600db8ef 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -40,8 +40,8 @@ ENDPROC(_find_first_zero_bit_le)
* Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
*/
ENTRY(_find_next_zero_bit_le)
- teq r1, #0
- beq 3b
+ cmp r2, r1
+ bhs 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
ARM( ldrb r3, [r0, r2, lsr #3] )
@@ -81,8 +81,8 @@ ENDPROC(_find_first_bit_le)
* Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
*/
ENTRY(_find_next_bit_le)
- teq r1, #0
- beq 3b
+ cmp r2, r1
+ bhs 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
ARM( ldrb r3, [r0, r2, lsr #3] )
@@ -115,8 +115,8 @@ ENTRY(_find_first_zero_bit_be)
ENDPROC(_find_first_zero_bit_be)
ENTRY(_find_next_zero_bit_be)
- teq r1, #0
- beq 3b
+ cmp r2, r1
+ bhs 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
eor r3, r2, #0x18 @ big endian byte ordering
@@ -149,8 +149,8 @@ ENTRY(_find_first_bit_be)
ENDPROC(_find_first_bit_be)
ENTRY(_find_next_bit_be)
- teq r1, #0
- beq 3b
+ cmp r2, r1
+ bhs 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
eor r3, r2, #0x18 @ big endian byte ordering
diff --git a/arch/arm/mach-hisi/Kconfig b/arch/arm/mach-hisi/Kconfig
index 75cccbd3f05f..7b3440687176 100644
--- a/arch/arm/mach-hisi/Kconfig
+++ b/arch/arm/mach-hisi/Kconfig
@@ -40,7 +40,7 @@ config ARCH_HIP04
select HAVE_ARM_ARCH_TIMER
select MCPM if SMP
select MCPM_QUAD_CLUSTER if SMP
- select GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
help
Support for Hisilicon HiP04 SoC family
diff --git a/arch/arm/mach-mmp/mmp2.h b/arch/arm/mach-mmp/mmp2.h
index 3ebc1bb13f71..7f80b90248fb 100644
--- a/arch/arm/mach-mmp/mmp2.h
+++ b/arch/arm/mach-mmp/mmp2.h
@@ -5,13 +5,13 @@
#include <linux/platform_data/pxa_sdhci.h>
extern void mmp2_timer_init(void);
-extern void __init mmp2_init_icu(void);
extern void __init mmp2_init_irq(void);
extern void mmp2_clear_pmic_int(void);
#include <linux/i2c.h>
#include <linux/platform_data/i2c-pxa.h>
#include <linux/platform_data/dma-mmp_tdma.h>
+#include <linux/irqchip/mmp.h>
#include "devices.h"
diff --git a/arch/arm/mach-mmp/pxa168.h b/arch/arm/mach-mmp/pxa168.h
index 34f907cd165a..c1547e098f09 100644
--- a/arch/arm/mach-mmp/pxa168.h
+++ b/arch/arm/mach-mmp/pxa168.h
@@ -5,7 +5,6 @@
#include <linux/reboot.h>
extern void pxa168_timer_init(void);
-extern void __init icu_init_irq(void);
extern void __init pxa168_init_irq(void);
extern void pxa168_restart(enum reboot_mode, const char *);
extern void pxa168_clear_keypad_wakeup(void);
@@ -18,6 +17,7 @@ extern void pxa168_clear_keypad_wakeup(void);
#include <linux/pxa168_eth.h>
#include <linux/platform_data/mv_usb.h>
#include <linux/soc/mmp/cputype.h>
+#include <linux/irqchip/mmp.h>
#include "devices.h"
diff --git a/arch/arm/mach-mmp/pxa910.h b/arch/arm/mach-mmp/pxa910.h
index 6ace5a8aa15b..7d229214065a 100644
--- a/arch/arm/mach-mmp/pxa910.h
+++ b/arch/arm/mach-mmp/pxa910.h
@@ -3,13 +3,13 @@
#define __ASM_MACH_PXA910_H
extern void pxa910_timer_init(void);
-extern void __init icu_init_irq(void);
extern void __init pxa910_init_irq(void);
#include <linux/i2c.h>
#include <linux/platform_data/i2c-pxa.h>
#include <linux/platform_data/mtd-nand-pxa3xx.h>
#include <video/mmp_disp.h>
+#include <linux/irqchip/mmp.h>
#include "devices.h"
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index a8adbb4d478a..f7f940282c3f 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -105,6 +105,7 @@ config ARCH_OMAP2PLUS
select MACH_OMAP_GENERIC
select MEMORY
select MFD_SYSCON
+ select OMAP_DM_SYSTIMER
select OMAP_DM_TIMER
select OMAP_GPMC
select PINCTRL
@@ -209,6 +210,7 @@ config SOC_OMAP2420
bool "OMAP2420 support"
depends on ARCH_OMAP2
default y
+ select OMAP_DM_SYSTIMER
select OMAP_DM_TIMER
select SOC_HAS_OMAP2_SDRC
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index c546356d0f02..5738496717e2 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -549,7 +549,7 @@ static struct pxa2xx_spi_controller corgi_spi_info = {
};
static struct gpiod_lookup_table corgi_spi_gpio_table = {
- .dev_id = "pxa2xx-spi.1",
+ .dev_id = "spi1",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW),
GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW),
diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index 2ae06edf413c..2fd665944103 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -635,7 +635,7 @@ static struct pxa2xx_spi_controller pxa_ssp2_master_info = {
};
static struct gpiod_lookup_table pxa_ssp2_gpio_table = {
- .dev_id = "pxa2xx-spi.2",
+ .dev_id = "spi2",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_HX4700_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW),
{ },
diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index 753fe166ab68..624088257cfc 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -140,7 +140,7 @@ struct platform_device pxa_spi_ssp4 = {
};
static struct gpiod_lookup_table pxa_ssp3_gpio_table = {
- .dev_id = "pxa2xx-spi.3",
+ .dev_id = "spi3",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS1, "cs", 0, GPIO_ACTIVE_LOW),
GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS2, "cs", 1, GPIO_ACTIVE_LOW),
@@ -149,7 +149,7 @@ static struct gpiod_lookup_table pxa_ssp3_gpio_table = {
};
static struct gpiod_lookup_table pxa_ssp4_gpio_table = {
- .dev_id = "pxa2xx-spi.4",
+ .dev_id = "spi4",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS3, "cs", 0, GPIO_ACTIVE_LOW),
GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS4, "cs", 1, GPIO_ACTIVE_LOW),
diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c
index f98dc61e87af..98423a96f440 100644
--- a/arch/arm/mach-pxa/littleton.c
+++ b/arch/arm/mach-pxa/littleton.c
@@ -207,7 +207,7 @@ static struct spi_board_info littleton_spi_devices[] __initdata = {
};
static struct gpiod_lookup_table littleton_spi_gpio_table = {
- .dev_id = "pxa2xx-spi.2",
+ .dev_id = "spi2",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", LITTLETON_GPIO_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW),
{ },
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index 20456a55c4c5..0827ebca1d38 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -994,7 +994,7 @@ static struct pxa2xx_spi_controller magician_spi_info = {
};
static struct gpiod_lookup_table magician_spi_gpio_table = {
- .dev_id = "pxa2xx-spi.2",
+ .dev_id = "spi2",
.table = {
/* NOTICE must be GPIO, incompatibility with hw PXA SPI framing */
GPIO_LOOKUP_IDX("gpio-pxa", GPIO14_MAGICIAN_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW),
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index dd88953adc9d..9964729cd428 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -578,7 +578,7 @@ static struct pxa2xx_spi_controller spitz_spi_info = {
};
static struct gpiod_lookup_table spitz_spi_gpio_table = {
- .dev_id = "pxa2xx-spi.2",
+ .dev_id = "spi2",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW),
GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW),
diff --git a/arch/arm/mach-pxa/z2.c b/arch/arm/mach-pxa/z2.c
index d03520555497..c4d4162a7e6e 100644
--- a/arch/arm/mach-pxa/z2.c
+++ b/arch/arm/mach-pxa/z2.c
@@ -623,7 +623,7 @@ static struct pxa2xx_spi_controller pxa_ssp2_master_info = {
};
static struct gpiod_lookup_table pxa_ssp1_gpio_table = {
- .dev_id = "pxa2xx-spi.1",
+ .dev_id = "spi1",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", GPIO24_ZIPITZ2_WIFI_CS, "cs", 0, GPIO_ACTIVE_LOW),
{ },
@@ -631,7 +631,7 @@ static struct gpiod_lookup_table pxa_ssp1_gpio_table = {
};
static struct gpiod_lookup_table pxa_ssp2_gpio_table = {
- .dev_id = "pxa2xx-spi.2",
+ .dev_id = "spi2",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_ZIPITZ2_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW),
{ },
diff --git a/arch/arm/mach-rockchip/pm.c b/arch/arm/mach-rockchip/pm.c
index 87389d9456b9..30d781d80fe0 100644
--- a/arch/arm/mach-rockchip/pm.c
+++ b/arch/arm/mach-rockchip/pm.c
@@ -311,7 +311,7 @@ void __init rockchip_suspend_init(void)
&match);
if (!match) {
pr_err("Failed to find PMU node\n");
- return;
+ goto out_put;
}
pm_data = (struct rockchip_pm_data *) match->data;
@@ -320,9 +320,12 @@ void __init rockchip_suspend_init(void)
if (ret) {
pr_err("%s: matches init error %d\n", __func__, ret);
- return;
+ goto out_put;
}
}
suspend_set_ops(pm_data->ops);
+
+out_put:
+ of_node_put(np);
}
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index a3a4589ec73b..fc439c2c16f8 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -631,7 +631,11 @@ config CPU_USE_DOMAINS
bool
help
This option enables or disables the use of domain switching
- via the set_fs() function.
+ using the DACR (domain access control register) to protect memory
+ domains from each other. In Linux we use three domains: kernel, user
+ and IO. The domains are used to protect userspace from kernelspace
+ and to handle IO-space as a special type of memory by assigning
+ manager or client roles to running code (such as a process).
config CPU_V7M_NUM_IRQ
int "Number of external interrupts connected to the NVIC"
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 6f499559d193..f8dd0b3cc8e0 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -935,6 +935,9 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (type == TYPE_LDST)
do_alignment_finish_ldst(addr, instr, regs, offset);
+ if (thumb_mode(regs))
+ regs->ARM_cpsr = it_advance(regs->ARM_cpsr);
+
return 0;
bad_or_fault:
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 576c0e6c92fc..2129070065c3 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -418,7 +418,7 @@ void *arch_memremap_wb(phys_addr_t phys_addr, size_t size)
__builtin_return_address(0));
}
-void __iounmap(volatile void __iomem *io_addr)
+void iounmap(volatile void __iomem *io_addr)
{
void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr);
struct static_vm *svm;
@@ -446,13 +446,6 @@ void __iounmap(volatile void __iomem *io_addr)
vunmap(addr);
}
-
-void (*arch_iounmap)(volatile void __iomem *) = __iounmap;
-
-void iounmap(volatile void __iomem *cookie)
-{
- arch_iounmap(cookie);
-}
EXPORT_SYMBOL(iounmap);
#if defined(CONFIG_PCI) || IS_ENABLED(CONFIG_PCMCIA)
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 5e2be37a198e..cd17e324aa51 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -296,6 +296,13 @@ static struct mem_type mem_types[] __ro_after_init = {
.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
.domain = DOMAIN_KERNEL,
},
+ [MT_MEMORY_RO] = {
+ .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+ L_PTE_XN | L_PTE_RDONLY,
+ .prot_l1 = PMD_TYPE_TABLE,
+ .prot_sect = PMD_TYPE_SECT,
+ .domain = DOMAIN_KERNEL,
+ },
[MT_ROM] = {
.prot_sect = PMD_TYPE_SECT,
.domain = DOMAIN_KERNEL,
@@ -489,6 +496,7 @@ static void __init build_mem_type_table(void)
/* Also setup NX memory mapping */
mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN;
+ mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_XN;
}
if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
/*
@@ -568,6 +576,7 @@ static void __init build_mem_type_table(void)
mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+ mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
#endif
/*
@@ -587,6 +596,8 @@ static void __init build_mem_type_table(void)
mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S;
mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED;
+ mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_S;
+ mem_types[MT_MEMORY_RO].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S;
mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED;
@@ -647,6 +658,8 @@ static void __init build_mem_type_table(void)
mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot;
mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd;
mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot;
+ mem_types[MT_MEMORY_RO].prot_sect |= ecc_mask | cp->pmd;
+ mem_types[MT_MEMORY_RO].prot_pte |= kern_pgprot;
mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot;
mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask;
mem_types[MT_ROM].prot_sect |= cp->pmd;
@@ -1360,7 +1373,7 @@ static void __init devicemaps_init(const struct machine_desc *mdesc)
map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK);
map.virtual = FDT_FIXED_BASE;
map.length = FDT_FIXED_SIZE;
- map.type = MT_ROM;
+ map.type = MT_MEMORY_RO;
create_mapping(&map);
}
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 2658f52903da..c42debaded95 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -230,14 +230,7 @@ void *arch_memremap_wb(phys_addr_t phys_addr, size_t size)
return (void *)phys_addr;
}
-void __iounmap(volatile void __iomem *addr)
-{
-}
-EXPORT_SYMBOL(__iounmap);
-
-void (*arch_iounmap)(volatile void __iomem *);
-
-void iounmap(volatile void __iomem *addr)
+void iounmap(volatile void __iomem *io_addr)
{
}
EXPORT_SYMBOL(iounmap);
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index fb9f3eb6bf48..8bc7a2d6d6c7 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -108,8 +108,7 @@ static unsigned int spectre_v2_install_workaround(unsigned int method)
#else
static unsigned int spectre_v2_install_workaround(unsigned int method)
{
- pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n",
- smp_processor_id());
+ pr_info_once("Spectre V2: workarounds disabled by configuration\n");
return SPECTRE_VULNERABLE;
}
@@ -209,10 +208,10 @@ static int spectre_bhb_install_workaround(int method)
return SPECTRE_VULNERABLE;
spectre_bhb_method = method;
- }
- pr_info("CPU%u: Spectre BHB: using %s workaround\n",
- smp_processor_id(), spectre_bhb_method_name(method));
+ pr_info("CPU%u: Spectre BHB: enabling %s workaround for all CPUs\n",
+ smp_processor_id(), spectre_bhb_method_name(method));
+ }
return SPECTRE_MITIGATED;
}
diff --git a/arch/arm/probes/decode.h b/arch/arm/probes/decode.h
index 973173598992..facc889d05ee 100644
--- a/arch/arm/probes/decode.h
+++ b/arch/arm/probes/decode.h
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/stddef.h>
#include <asm/probes.h>
+#include <asm/ptrace.h>
#include <asm/kprobes.h>
void __init arm_probes_decode_init(void);
@@ -35,31 +36,6 @@ void __init find_str_pc_offset(void);
#endif
-/*
- * Update ITSTATE after normal execution of an IT block instruction.
- *
- * The 8 IT state bits are split into two parts in CPSR:
- * ITSTATE<1:0> are in CPSR<26:25>
- * ITSTATE<7:2> are in CPSR<15:10>
- */
-static inline unsigned long it_advance(unsigned long cpsr)
- {
- if ((cpsr & 0x06000400) == 0) {
- /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */
- cpsr &= ~PSR_IT_MASK;
- } else {
- /* We need to shift left ITSTATE<4:0> */
- const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */
- unsigned long it = cpsr & mask;
- it <<= 1;
- it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */
- it &= mask;
- cpsr &= ~mask;
- cpsr |= it;
- }
- return cpsr;
-}
-
static inline void __kprobes bx_write_pc(long pcv, struct pt_regs *regs)
{
long cpsr = regs->ARM_cpsr;
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1652a9800ebe..340e61199057 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -101,6 +101,7 @@ config ARM64
select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANTS_NO_INSTR
+ select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARM_AMBA
select ARM_ARCH_TIMER
@@ -126,6 +127,7 @@ config ARM64
select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
select GENERIC_IDLE_POLL_SETUP
+ select GENERIC_IOREMAP
select GENERIC_IRQ_IPI
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
@@ -188,6 +190,7 @@ config ARM64
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT if PERF_EVENTS
+ select HAVE_IOREMAP_PROT
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_KVM
select HAVE_NMI
@@ -226,6 +229,7 @@ config ARM64
select THREAD_INFO_IN_TASK
select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD
select TRACE_IRQFLAGS_SUPPORT
+ select TRACE_IRQFLAGS_NMI_SUPPORT
help
ARM 64-bit (AArch64) Linux support.
@@ -503,6 +507,22 @@ config ARM64_ERRATUM_834220
If unsure, say Y.
+config ARM64_ERRATUM_1742098
+ bool "Cortex-A57/A72: 1742098: ELR recorded incorrectly on interrupt taken between cryptographic instructions in a sequence"
+ depends on COMPAT
+ default y
+ help
+ This option removes the AES hwcap for aarch32 user-space to
+ workaround erratum 1742098 on Cortex-A57 and Cortex-A72.
+
+ Affected parts may corrupt the AES state if an interrupt is
+ taken between a pair of AES instructions. These instructions
+ are only present if the cryptography extensions are present.
+ All software should have a fallback implementation for CPUs
+ that don't implement the cryptography extensions.
+
+ If unsure, say Y.
+
config ARM64_ERRATUM_845719
bool "Cortex-A53: 845719: a load might read incorrect data"
depends on COMPAT
@@ -821,6 +841,23 @@ config ARM64_ERRATUM_2224489
If unsure, say Y.
+config ARM64_ERRATUM_2441009
+ bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI"
+ default y
+ select ARM64_WORKAROUND_REPEAT_TLBI
+ help
+ This option adds a workaround for ARM Cortex-A510 erratum #2441009.
+
+ Under very rare circumstances, affected Cortex-A510 CPUs
+ may not handle a race between a break-before-make sequence on one
+ CPU, and another CPU accessing the same page. This could allow a
+ store to a page that has been unmapped.
+
+ Work around this by adding the affected CPUs to the list that needs
+ TLB sequences to be done twice.
+
+ If unsure, say Y.
+
config ARM64_ERRATUM_2064142
bool "Cortex-A510: 2064142: workaround TRBE register writes while disabled"
depends on CORESIGHT_TRBE
diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile
index ebe80faab883..a0e3dedd2883 100644
--- a/arch/arm64/boot/Makefile
+++ b/arch/arm64/boot/Makefile
@@ -16,7 +16,7 @@
OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
-targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo
+targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo Image.zst
$(obj)/Image: vmlinux FORCE
$(call if_changed,objcopy)
@@ -35,3 +35,6 @@ $(obj)/Image.lzma: $(obj)/Image FORCE
$(obj)/Image.lzo: $(obj)/Image FORCE
$(call if_changed,lzo)
+
+$(obj)/Image.zst: $(obj)/Image FORCE
+ $(call if_changed,zstd)
diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi
index 66023d553524..d084c33d5ca8 100644
--- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi
+++ b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi
@@ -9,6 +9,14 @@
/delete-node/ cpu@3;
};
+ timer {
+ compatible = "arm,armv8-timer";
+ interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>;
+ };
+
pmu {
compatible = "arm,cortex-a53-pmu";
interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi
index a4be040a00c0..967d2cd3c3ce 100644
--- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi
+++ b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi
@@ -29,6 +29,8 @@
device_type = "cpu";
compatible = "brcm,brahma-b53";
reg = <0x0>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0x0 0xfff8>;
next-level-cache = <&l2>;
};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index 92465f777603..d5cdd77e5a95 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -224,9 +224,12 @@
little-endian;
};
- efuse@1e80000 {
+ sfp: efuse@1e80000 {
compatible = "fsl,ls1028a-sfp";
reg = <0x0 0x1e80000 0x0 0x10000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(4)>;
+ clock-names = "sfp";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
index 913d845eb51a..1977103a5ef4 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
@@ -376,7 +376,8 @@ camera: &i2c7 {
<&cru ACLK_VIO>,
<&cru ACLK_GIC_PRE>,
<&cru PCLK_DDR>,
- <&cru ACLK_HDCP>;
+ <&cru ACLK_HDCP>,
+ <&cru ACLK_VDU>;
assigned-clock-rates =
<600000000>, <1600000000>,
<1000000000>,
@@ -388,6 +389,7 @@ camera: &i2c7 {
<400000000>,
<200000000>,
<200000000>,
+ <400000000>,
<400000000>;
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index fbd0346624e6..9d5b0e8c9cca 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -1462,7 +1462,8 @@
<&cru HCLK_PERILP1>, <&cru PCLK_PERILP1>,
<&cru ACLK_VIO>, <&cru ACLK_HDCP>,
<&cru ACLK_GIC_PRE>,
- <&cru PCLK_DDR>;
+ <&cru PCLK_DDR>,
+ <&cru ACLK_VDU>;
assigned-clock-rates =
<594000000>, <800000000>,
<1000000000>,
@@ -1473,7 +1474,8 @@
<100000000>, <50000000>,
<400000000>, <400000000>,
<200000000>,
- <200000000>;
+ <200000000>,
+ <400000000>;
};
grf: syscon@ff770000 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
index 1534e11a9ad1..fa953b736642 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
@@ -687,6 +687,7 @@
};
&usb_host0_xhci {
+ dr_mode = "host";
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
index 7bdcecc0dfe4..02d5f5a8ca03 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
@@ -133,7 +133,7 @@
assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>;
assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>, <&gmac1_clkin>;
clock_in_out = "input";
- phy-mode = "rgmii-id";
+ phy-mode = "rgmii";
phy-supply = <&vcc_3v3>;
pinctrl-names = "default";
pinctrl-0 = <&gmac1m1_miim
diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h
index c39f2437e08e..980d1dd8e1a3 100644
--- a/arch/arm64/include/asm/asm-extable.h
+++ b/arch/arm64/include/asm/asm-extable.h
@@ -2,12 +2,27 @@
#ifndef __ASM_ASM_EXTABLE_H
#define __ASM_ASM_EXTABLE_H
+#include <linux/bits.h>
+#include <asm/gpr-num.h>
+
#define EX_TYPE_NONE 0
-#define EX_TYPE_FIXUP 1
-#define EX_TYPE_BPF 2
-#define EX_TYPE_UACCESS_ERR_ZERO 3
+#define EX_TYPE_BPF 1
+#define EX_TYPE_UACCESS_ERR_ZERO 2
+#define EX_TYPE_KACCESS_ERR_ZERO 3
#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4
+/* Data fields for EX_TYPE_UACCESS_ERR_ZERO */
+#define EX_DATA_REG_ERR_SHIFT 0
+#define EX_DATA_REG_ERR GENMASK(4, 0)
+#define EX_DATA_REG_ZERO_SHIFT 5
+#define EX_DATA_REG_ZERO GENMASK(9, 5)
+
+/* Data fields for EX_TYPE_LOAD_UNALIGNED_ZEROPAD */
+#define EX_DATA_REG_DATA_SHIFT 0
+#define EX_DATA_REG_DATA GENMASK(4, 0)
+#define EX_DATA_REG_ADDR_SHIFT 5
+#define EX_DATA_REG_ADDR GENMASK(9, 5)
+
#ifdef __ASSEMBLY__
#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
@@ -19,31 +34,45 @@
.short (data); \
.popsection;
+#define EX_DATA_REG(reg, gpr) \
+ (.L__gpr_num_##gpr << EX_DATA_REG_##reg##_SHIFT)
+
+#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \
+ __ASM_EXTABLE_RAW(insn, fixup, \
+ EX_TYPE_UACCESS_ERR_ZERO, \
+ ( \
+ EX_DATA_REG(ERR, err) | \
+ EX_DATA_REG(ZERO, zero) \
+ ))
+
+#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr)
+
+#define _ASM_EXTABLE_UACCESS(insn, fixup) \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, wzr, wzr)
+
/*
- * Create an exception table entry for `insn`, which will branch to `fixup`
+ * Create an exception table entry for uaccess `insn`, which will branch to `fixup`
* when an unhandled fault is taken.
*/
- .macro _asm_extable, insn, fixup
- __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0)
+ .macro _asm_extable_uaccess, insn, fixup
+ _ASM_EXTABLE_UACCESS(\insn, \fixup)
.endm
/*
* Create an exception table entry for `insn` if `fixup` is provided. Otherwise
* do nothing.
*/
- .macro _cond_extable, insn, fixup
- .ifnc \fixup,
- _asm_extable \insn, \fixup
+ .macro _cond_uaccess_extable, insn, fixup
+ .ifnc \fixup,
+ _asm_extable_uaccess \insn, \fixup
.endif
.endm
#else /* __ASSEMBLY__ */
-#include <linux/bits.h>
#include <linux/stringify.h>
-#include <asm/gpr-num.h>
-
#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
".pushsection __ex_table, \"a\"\n" \
".align 2\n" \
@@ -53,14 +82,6 @@
".short (" data ")\n" \
".popsection\n"
-#define _ASM_EXTABLE(insn, fixup) \
- __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0")
-
-#define EX_DATA_REG_ERR_SHIFT 0
-#define EX_DATA_REG_ERR GENMASK(4, 0)
-#define EX_DATA_REG_ZERO_SHIFT 5
-#define EX_DATA_REG_ZERO GENMASK(9, 5)
-
#define EX_DATA_REG(reg, gpr) \
"((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
@@ -73,13 +94,23 @@
EX_DATA_REG(ZERO, zero) \
")")
+#define _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, zero) \
+ __DEFINE_ASM_GPR_NUMS \
+ __ASM_EXTABLE_RAW(#insn, #fixup, \
+ __stringify(EX_TYPE_KACCESS_ERR_ZERO), \
+ "(" \
+ EX_DATA_REG(ERR, err) " | " \
+ EX_DATA_REG(ZERO, zero) \
+ ")")
+
#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
_ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr)
-#define EX_DATA_REG_DATA_SHIFT 0
-#define EX_DATA_REG_DATA GENMASK(4, 0)
-#define EX_DATA_REG_ADDR_SHIFT 5
-#define EX_DATA_REG_ADDR GENMASK(9, 5)
+#define _ASM_EXTABLE_UACCESS(insn, fixup) \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, wzr, wzr)
+
+#define _ASM_EXTABLE_KACCESS_ERR(insn, fixup, err) \
+ _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, wzr)
#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \
__DEFINE_ASM_GPR_NUMS \
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index 0557af834e03..75b211c98dea 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -61,7 +61,7 @@ alternative_else_nop_endif
#define USER(l, x...) \
9999: x; \
- _asm_extable 9999b, l
+ _asm_extable_uaccess 9999b, l
/*
* Generate the assembly for LDTR/STTR with exception table entries.
@@ -73,8 +73,8 @@ alternative_else_nop_endif
8889: ldtr \reg2, [\addr, #8];
add \addr, \addr, \post_inc;
- _asm_extable 8888b,\l;
- _asm_extable 8889b,\l;
+ _asm_extable_uaccess 8888b, \l;
+ _asm_extable_uaccess 8889b, \l;
.endm
.macro user_stp l, reg1, reg2, addr, post_inc
@@ -82,14 +82,14 @@ alternative_else_nop_endif
8889: sttr \reg2, [\addr, #8];
add \addr, \addr, \post_inc;
- _asm_extable 8888b,\l;
- _asm_extable 8889b,\l;
+ _asm_extable_uaccess 8888b,\l;
+ _asm_extable_uaccess 8889b,\l;
.endm
.macro user_ldst l, inst, reg, addr, post_inc
8888: \inst \reg, [\addr];
add \addr, \addr, \post_inc;
- _asm_extable 8888b,\l;
+ _asm_extable_uaccess 8888b, \l;
.endm
#endif
diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h
index ead62f7dd269..13ecc79854ee 100644
--- a/arch/arm64/include/asm/asm_pointer_auth.h
+++ b/arch/arm64/include/asm/asm_pointer_auth.h
@@ -59,9 +59,9 @@ alternative_else_nop_endif
.macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3
mrs \tmp1, id_aa64isar1_el1
- ubfx \tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8
+ ubfx \tmp1, \tmp1, #ID_AA64ISAR1_EL1_APA_SHIFT, #8
mrs_s \tmp2, SYS_ID_AA64ISAR2_EL1
- ubfx \tmp2, \tmp2, #ID_AA64ISAR2_APA3_SHIFT, #4
+ ubfx \tmp2, \tmp2, #ID_AA64ISAR2_EL1_APA3_SHIFT, #4
orr \tmp1, \tmp1, \tmp2
cbz \tmp1, .Lno_addr_auth\@
mov_q \tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 8c5a61aeaf8e..5846145be523 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -360,6 +360,20 @@ alternative_cb_end
.endm
/*
+ * idmap_get_t0sz - get the T0SZ value needed to cover the ID map
+ *
+ * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
+ * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
+ * this number conveniently equals the number of leading zeroes in
+ * the physical address of _end.
+ */
+ .macro idmap_get_t0sz, reg
+ adrp \reg, _end
+ orr \reg, \reg, #(1 << VA_BITS_MIN) - 1
+ clz \reg, \reg
+ .endm
+
+/*
* tcr_compute_pa_size - set TCR.(I)PS to the highest supported
* ID_AA64MMFR0_EL1.PARange value
*
@@ -423,7 +437,7 @@ alternative_endif
b.lo .Ldcache_op\@
dsb \domain
- _cond_extable .Ldcache_op\@, \fixup
+ _cond_uaccess_extable .Ldcache_op\@, \fixup
.endm
/*
@@ -462,7 +476,19 @@ alternative_endif
dsb ish
isb
- _cond_extable .Licache_op\@, \fixup
+ _cond_uaccess_extable .Licache_op\@, \fixup
+ .endm
+
+/*
+ * load_ttbr1 - install @pgtbl as a TTBR1 page table
+ * pgtbl preserved
+ * tmp1/tmp2 clobbered, either may overlap with pgtbl
+ */
+ .macro load_ttbr1, pgtbl, tmp1, tmp2
+ phys_to_ttbr \tmp1, \pgtbl
+ offset_ttbr1 \tmp1, \tmp2
+ msr ttbr1_el1, \tmp1
+ isb
.endm
/*
@@ -478,10 +504,7 @@ alternative_endif
isb
tlbi vmalle1
dsb nsh
- phys_to_ttbr \tmp, \page_table
- offset_ttbr1 \tmp, \tmp2
- msr ttbr1_el1, \tmp
- isb
+ load_ttbr1 \page_table, \tmp, \tmp2
.endm
/*
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 9f3e2c3d2ca0..2cfc4245d2e2 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -50,13 +50,13 @@
#define pmr_sync() do {} while (0)
#endif
-#define mb() dsb(sy)
-#define rmb() dsb(ld)
-#define wmb() dsb(st)
+#define __mb() dsb(sy)
+#define __rmb() dsb(ld)
+#define __wmb() dsb(st)
-#define dma_mb() dmb(osh)
-#define dma_rmb() dmb(oshld)
-#define dma_wmb() dmb(oshst)
+#define __dma_mb() dmb(osh)
+#define __dma_rmb() dmb(oshld)
+#define __dma_wmb() dmb(oshst)
#define io_stop_wc() dgh()
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 7c2181c72116..ca9b487112cc 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -5,34 +5,9 @@
#ifndef __ASM_CACHE_H
#define __ASM_CACHE_H
-#include <asm/cputype.h>
-#include <asm/mte-def.h>
-
-#define CTR_L1IP_SHIFT 14
-#define CTR_L1IP_MASK 3
-#define CTR_DMINLINE_SHIFT 16
-#define CTR_IMINLINE_SHIFT 0
-#define CTR_IMINLINE_MASK 0xf
-#define CTR_ERG_SHIFT 20
-#define CTR_CWG_SHIFT 24
-#define CTR_CWG_MASK 15
-#define CTR_IDC_SHIFT 28
-#define CTR_DIC_SHIFT 29
-
-#define CTR_CACHE_MINLINE_MASK \
- (0xf << CTR_DMINLINE_SHIFT | CTR_IMINLINE_MASK << CTR_IMINLINE_SHIFT)
-
-#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
-
-#define ICACHE_POLICY_VPIPT 0
-#define ICACHE_POLICY_RESERVED 1
-#define ICACHE_POLICY_VIPT 2
-#define ICACHE_POLICY_PIPT 3
-
#define L1_CACHE_SHIFT (6)
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
-
#define CLIDR_LOUU_SHIFT 27
#define CLIDR_LOC_SHIFT 24
#define CLIDR_LOUIS_SHIFT 21
@@ -55,6 +30,10 @@
#include <linux/bitops.h>
#include <linux/kasan-enabled.h>
+#include <asm/cputype.h>
+#include <asm/mte-def.h>
+#include <asm/sysreg.h>
+
#ifdef CONFIG_KASAN_SW_TAGS
#define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT)
#elif defined(CONFIG_KASAN_HW_TAGS)
@@ -66,6 +45,12 @@ static inline unsigned int arch_slab_minalign(void)
#define arch_slab_minalign() arch_slab_minalign()
#endif
+#define CTR_CACHE_MINLINE_MASK \
+ (0xf << CTR_EL0_DMINLINE_SHIFT | \
+ CTR_EL0_IMINLINE_MASK << CTR_EL0_IMINLINE_SHIFT)
+
+#define CTR_L1IP(ctr) SYS_FIELD_GET(CTR_EL0, L1Ip, ctr)
+
#define ICACHEF_ALIASING 0
#define ICACHEF_VPIPT 1
extern unsigned long __icache_flags;
@@ -86,7 +71,7 @@ static __always_inline int icache_is_vpipt(void)
static inline u32 cache_type_cwg(void)
{
- return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK;
+ return (read_cpuid_cachetype() >> CTR_EL0_CWG_SHIFT) & CTR_EL0_CWG_MASK;
}
#define __read_mostly __section(".data..read_mostly")
@@ -120,12 +105,12 @@ static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void)
{
u32 ctr = read_cpuid_cachetype();
- if (!(ctr & BIT(CTR_IDC_SHIFT))) {
+ if (!(ctr & BIT(CTR_EL0_IDC_SHIFT))) {
u64 clidr = read_sysreg(clidr_el1);
if (CLIDR_LOC(clidr) == 0 ||
(CLIDR_LOUIS(clidr) == 0 && CLIDR_LOUU(clidr) == 0))
- ctr |= BIT(CTR_IDC_SHIFT);
+ ctr |= BIT(CTR_EL0_IDC_SHIFT);
}
return ctr;
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 5a228e203ef9..37185e978aeb 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -105,13 +105,6 @@ static inline void flush_icache_range(unsigned long start, unsigned long end)
#define flush_icache_range flush_icache_range
/*
- * Cache maintenance functions used by the DMA API. No to be used directly.
- */
-extern void __dma_map_area(const void *, size_t, int);
-extern void __dma_unmap_area(const void *, size_t, int);
-extern void __dma_flush_area(const void *, size_t);
-
-/*
* Copy user data from/to a page which is mapped into a different
* processes address space. Really, we want to allow our "user
* space" model to handle this.
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 115cdec1ae87..fd7a92219eea 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -46,6 +46,7 @@ struct cpuinfo_arm64 {
u64 reg_midr;
u64 reg_revidr;
u64 reg_gmid;
+ u64 reg_smidr;
u64 reg_id_aa64dfr0;
u64 reg_id_aa64dfr1;
diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
index e95c4df83911..a444c8915e88 100644
--- a/arch/arm64/include/asm/cpu_ops.h
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -31,11 +31,6 @@
* @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the
* cpu being killed.
* @cpu_kill: Ensures a cpu has left the kernel. Called from another cpu.
- * @cpu_init_idle: Reads any data necessary to initialize CPU idle states for
- * a proposed logical id.
- * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing
- * to wrong parameters or error conditions. Called from the
- * CPU being suspended. Must be called with IRQs disabled.
*/
struct cpu_operations {
const char *name;
@@ -49,10 +44,6 @@ struct cpu_operations {
void (*cpu_die)(unsigned int cpu);
int (*cpu_kill)(unsigned int cpu);
#endif
-#ifdef CONFIG_CPU_IDLE
- int (*cpu_init_idle)(unsigned int);
- int (*cpu_suspend)(unsigned long);
-#endif
};
int __init init_cpu_ops(int cpu);
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 14a8f3d93add..fd7d75a275f6 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -11,7 +11,7 @@
#include <asm/hwcap.h>
#include <asm/sysreg.h>
-#define MAX_CPU_FEATURES 64
+#define MAX_CPU_FEATURES 128
#define cpu_feature(x) KERNEL_HWCAP_ ## x
#ifndef __ASSEMBLY__
@@ -673,7 +673,7 @@ static inline bool supports_clearbhb(int scope)
isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
return cpuid_feature_extract_unsigned_field(isar2,
- ID_AA64ISAR2_CLEARBHB_SHIFT);
+ ID_AA64ISAR2_EL1_BC_SHIFT);
}
const struct cpumask *system_32bit_el0_cpumask(void);
@@ -908,7 +908,10 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
}
extern struct arm64_ftr_override id_aa64mmfr1_override;
+extern struct arm64_ftr_override id_aa64pfr0_override;
extern struct arm64_ftr_override id_aa64pfr1_override;
+extern struct arm64_ftr_override id_aa64zfr0_override;
+extern struct arm64_ftr_override id_aa64smfr0_override;
extern struct arm64_ftr_override id_aa64isar1_override;
extern struct arm64_ftr_override id_aa64isar2_override;
diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h
index 14a19d1141bd..2047713e097d 100644
--- a/arch/arm64/include/asm/cpuidle.h
+++ b/arch/arm64/include/asm/cpuidle.h
@@ -4,21 +4,6 @@
#include <asm/proc-fns.h>
-#ifdef CONFIG_CPU_IDLE
-extern int arm_cpuidle_init(unsigned int cpu);
-extern int arm_cpuidle_suspend(int index);
-#else
-static inline int arm_cpuidle_init(unsigned int cpu)
-{
- return -EOPNOTSUPP;
-}
-
-static inline int arm_cpuidle_suspend(int index)
-{
- return -EOPNOTSUPP;
-}
-#endif
-
#ifdef CONFIG_ARM64_PSEUDO_NMI
#include <asm/arch_gicv3.h>
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 34ceff08cac4..2630faa5bc08 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -129,64 +129,6 @@
msr cptr_el2, x0 // Disable copro. traps to EL2
.endm
-/* SVE register access */
-.macro __init_el2_nvhe_sve
- mrs x1, id_aa64pfr0_el1
- ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
- cbz x1, .Lskip_sve_\@
-
- bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps
- msr cptr_el2, x0 // Disable copro. traps to EL2
- isb
- mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
- msr_s SYS_ZCR_EL2, x1 // length for EL1.
-.Lskip_sve_\@:
-.endm
-
-/* SME register access and priority mapping */
-.macro __init_el2_nvhe_sme
- mrs x1, id_aa64pfr1_el1
- ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
- cbz x1, .Lskip_sme_\@
-
- bic x0, x0, #CPTR_EL2_TSM // Also disable SME traps
- msr cptr_el2, x0 // Disable copro. traps to EL2
- isb
-
- mrs x1, sctlr_el2
- orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps
- msr sctlr_el2, x1
- isb
-
- mov x1, #0 // SMCR controls
-
- mrs_s x2, SYS_ID_AA64SMFR0_EL1
- ubfx x2, x2, #ID_AA64SMFR0_FA64_SHIFT, #1 // Full FP in SM?
- cbz x2, .Lskip_sme_fa64_\@
-
- orr x1, x1, SMCR_ELx_FA64_MASK
-.Lskip_sme_fa64_\@:
-
- orr x1, x1, #SMCR_ELx_LEN_MASK // Enable full SME vector
- msr_s SYS_SMCR_EL2, x1 // length for EL1.
-
- mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported?
- ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
- cbz x1, .Lskip_sme_\@
-
- msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal
-
- mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present?
- ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4
- cbz x1, .Lskip_sme_\@
-
- mrs_s x1, SYS_HCRX_EL2
- orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping
- msr_s SYS_HCRX_EL2, x1
-
-.Lskip_sme_\@:
-.endm
-
/* Disable any fine grained traps */
.macro __init_el2_fgt
mrs x1, id_aa64mmfr0_el1
@@ -250,8 +192,6 @@
__init_el2_hstr
__init_el2_nvhe_idregs
__init_el2_nvhe_cptr
- __init_el2_nvhe_sve
- __init_el2_nvhe_sme
__init_el2_fgt
__init_el2_nvhe_prepare_eret
.endm
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index daff882883f9..71ed5fdf718b 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -62,10 +62,12 @@ enum fixed_addresses {
#endif /* CONFIG_ACPI_APEI_GHES */
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#ifdef CONFIG_RELOCATABLE
+ FIX_ENTRY_TRAMP_TEXT4, /* one extra slot for the data page */
+#endif
FIX_ENTRY_TRAMP_TEXT3,
FIX_ENTRY_TRAMP_TEXT2,
FIX_ENTRY_TRAMP_TEXT1,
- FIX_ENTRY_TRAMP_DATA,
#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1))
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
__end_of_permanent_fixed_addresses,
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index aa443d8f8cfb..cef4ae7a3d8b 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -85,7 +85,7 @@
#define KERNEL_HWCAP_PACA __khwcap_feature(PACA)
#define KERNEL_HWCAP_PACG __khwcap_feature(PACG)
-#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 32)
+#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 64)
#define KERNEL_HWCAP_DCPODP __khwcap2_feature(DCPODP)
#define KERNEL_HWCAP_SVE2 __khwcap2_feature(SVE2)
#define KERNEL_HWCAP_SVEAES __khwcap2_feature(SVEAES)
@@ -118,6 +118,7 @@
#define KERNEL_HWCAP_SME_F32F32 __khwcap2_feature(SME_F32F32)
#define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64)
#define KERNEL_HWCAP_WFXT __khwcap2_feature(WFXT)
+#define KERNEL_HWCAP_EBF16 __khwcap2_feature(EBF16)
/*
* This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 3995652daf81..87dd42d74afe 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -163,13 +163,16 @@ extern void __memset_io(volatile void __iomem *, int, size_t);
/*
* I/O memory mapping functions.
*/
-extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot);
-extern void iounmap(volatile void __iomem *addr);
-extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
-#define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
-#define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
-#define ioremap_np(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))
+bool ioremap_allowed(phys_addr_t phys_addr, size_t size, unsigned long prot);
+#define ioremap_allowed ioremap_allowed
+
+#define _PAGE_IOREMAP PROT_DEVICE_nGnRE
+
+#define ioremap_wc(addr, size) \
+ ioremap_prot((addr), (size), PROT_NORMAL_NC)
+#define ioremap_np(addr, size) \
+ ioremap_prot((addr), (size), PROT_DEVICE_nGnRnE)
/*
* io{read,write}{16,32,64}be() macros
@@ -184,6 +187,15 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
#include <asm-generic/io.h>
+#define ioremap_cache ioremap_cache
+static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size)
+{
+ if (pfn_is_map_memory(__phys_to_pfn(addr)))
+ return (void __iomem *)__phys_to_virt(addr);
+
+ return ioremap_prot(addr, size, PROT_NORMAL);
+}
+
/*
* More restrictive address range checking than the default implementation
* (PHYS_OFFSET and PHYS_MASK taken into account).
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 96dc0f7da258..02e59fa8f293 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -8,6 +8,7 @@
#ifndef __ASM_KERNEL_PGTABLE_H
#define __ASM_KERNEL_PGTABLE_H
+#include <asm/boot.h>
#include <asm/pgtable-hwdef.h>
#include <asm/sparsemem.h>
@@ -35,10 +36,8 @@
*/
#if ARM64_KERNEL_USES_PMD_MAPS
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
-#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1)
#else
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
-#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT))
#endif
@@ -87,7 +86,14 @@
+ EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \
+ EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */
#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end))
-#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
+
+/* the initial ID map may need two extra pages if it needs to be extended */
+#if VA_BITS < 48
+#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE)
+#else
+#define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE)
+#endif
+#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE)
/* Initial memory map size */
#if ARM64_KERNEL_USES_PMD_MAPS
@@ -107,9 +113,11 @@
#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
#if ARM64_KERNEL_USES_PMD_MAPS
-#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
+#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
+#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY)
#else
-#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
+#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
+#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY)
#endif
/*
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 0af70d9abede..227d256cd4b9 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -174,7 +174,11 @@
#include <linux/types.h>
#include <asm/bug.h>
+#if VA_BITS > 48
extern u64 vabits_actual;
+#else
+#define vabits_actual ((u64)VA_BITS)
+#endif
extern s64 memstart_addr;
/* PHYS_OFFSET - the physical address of the start of memory. */
@@ -351,6 +355,11 @@ static inline void *phys_to_virt(phys_addr_t x)
})
void dump_mem_limit(void);
+
+static inline bool defer_reserve_crashkernel(void)
+{
+ return IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32);
+}
#endif /* !ASSEMBLY */
/*
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 6770667b34a3..c7ccd82db1d2 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -60,8 +60,7 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
* TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
* physical memory, in which case it will be smaller.
*/
-extern u64 idmap_t0sz;
-extern u64 idmap_ptrs_per_pgd;
+extern int idmap_t0sz;
/*
* Ensure TCR.T0SZ is set to the provided value.
@@ -106,13 +105,18 @@ static inline void cpu_uninstall_idmap(void)
cpu_switch_mm(mm->pgd, mm);
}
-static inline void cpu_install_idmap(void)
+static inline void __cpu_install_idmap(pgd_t *idmap)
{
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
cpu_set_idmap_tcr_t0sz();
- cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
+ cpu_switch_mm(lm_alias(idmap), &init_mm);
+}
+
+static inline void cpu_install_idmap(void)
+{
+ __cpu_install_idmap(idmap_pg_dir);
}
/*
@@ -143,7 +147,7 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz)
* Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
* avoiding the possibility of conflicting TLB entries being allocated.
*/
-static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp)
+static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
{
typedef void (ttbr_replace_func)(phys_addr_t);
extern ttbr_replace_func idmap_cpu_replace_ttbr1;
@@ -166,7 +170,7 @@ static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp)
replace_phys = (void *)__pa_symbol(function_nocfi(idmap_cpu_replace_ttbr1));
- cpu_install_idmap();
+ __cpu_install_idmap(idmap);
replace_phys(ttbr1);
cpu_uninstall_idmap();
}
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index dd3d12bce07b..5ab8d163198f 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -281,10 +281,9 @@
*/
#ifdef CONFIG_ARM64_PA_BITS_52
/*
- * This should be GENMASK_ULL(47, 2).
* TTBR_ELx[1] is RES0 in this configuration.
*/
-#define TTBR_BADDR_MASK_52 (((UL(1) << 46) - 1) << 2)
+#define TTBR_BADDR_MASK_52 GENMASK_ULL(47, 2)
#endif
#ifdef CONFIG_ARM64_VA_BITS_52
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 0b6632f18364..b5df82aa99e6 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -45,6 +45,12 @@
__flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+static inline bool arch_thp_swp_supported(void)
+{
+ return !system_supports_mte();
+}
+#define arch_thp_swp_supported arch_thp_swp_supported
+
/*
* Outside of a few very special situations (e.g. hibernation), we always
* use broadcast TLB invalidation instructions, therefore a spurious page
@@ -427,6 +433,16 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
}
+/*
+ * Select all bits except the pfn
+ */
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+ unsigned long pfn = pte_pfn(pte);
+
+ return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
+}
+
#ifdef CONFIG_NUMA_BALANCING
/*
* See the comment in include/linux/pgtable.h
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 9e58749db21d..86eb0bfe3b38 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -272,8 +272,9 @@ void tls_preserve_current_state(void);
static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
{
+ s32 previous_syscall = regs->syscallno;
memset(regs, 0, sizeof(*regs));
- forget_syscall(regs);
+ regs->syscallno = previous_syscall;
regs->pc = pc;
if (system_uses_irq_prio_masking())
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 42ff95dba6da..7c71358d44c4 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -192,8 +192,6 @@
#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0)
#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1)
-#define SYS_ID_AA64ZFR0_EL1 sys_reg(3, 0, 0, 4, 4)
-#define SYS_ID_AA64SMFR0_EL1 sys_reg(3, 0, 0, 4, 5)
#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0)
#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1)
@@ -201,9 +199,6 @@
#define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4)
#define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5)
-#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1)
-#define SYS_ID_AA64ISAR2_EL1 sys_reg(3, 0, 0, 6, 2)
-
#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0)
#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1)
#define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2)
@@ -410,12 +405,6 @@
#define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0)
#define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0)
-#define SYS_LORSA_EL1 sys_reg(3, 0, 10, 4, 0)
-#define SYS_LOREA_EL1 sys_reg(3, 0, 10, 4, 1)
-#define SYS_LORN_EL1 sys_reg(3, 0, 10, 4, 2)
-#define SYS_LORC_EL1 sys_reg(3, 0, 10, 4, 3)
-#define SYS_LORID_EL1 sys_reg(3, 0, 10, 4, 7)
-
#define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0)
#define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1)
@@ -454,16 +443,12 @@
#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0)
#define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0)
-#define SYS_GMID_EL1 sys_reg(3, 1, 0, 0, 4)
#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7)
#define SMIDR_EL1_IMPLEMENTER_SHIFT 24
#define SMIDR_EL1_SMPS_SHIFT 15
#define SMIDR_EL1_AFFINITY_SHIFT 0
-#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1)
-#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7)
-
#define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0)
#define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1)
@@ -704,66 +689,6 @@
/* Position the attr at the correct index */
#define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8))
-/* id_aa64isar1 */
-#define ID_AA64ISAR1_I8MM_SHIFT 52
-#define ID_AA64ISAR1_DGH_SHIFT 48
-#define ID_AA64ISAR1_BF16_SHIFT 44
-#define ID_AA64ISAR1_SPECRES_SHIFT 40
-#define ID_AA64ISAR1_SB_SHIFT 36
-#define ID_AA64ISAR1_FRINTTS_SHIFT 32
-#define ID_AA64ISAR1_GPI_SHIFT 28
-#define ID_AA64ISAR1_GPA_SHIFT 24
-#define ID_AA64ISAR1_LRCPC_SHIFT 20
-#define ID_AA64ISAR1_FCMA_SHIFT 16
-#define ID_AA64ISAR1_JSCVT_SHIFT 12
-#define ID_AA64ISAR1_API_SHIFT 8
-#define ID_AA64ISAR1_APA_SHIFT 4
-#define ID_AA64ISAR1_DPB_SHIFT 0
-
-#define ID_AA64ISAR1_APA_NI 0x0
-#define ID_AA64ISAR1_APA_ARCHITECTED 0x1
-#define ID_AA64ISAR1_APA_ARCH_EPAC 0x2
-#define ID_AA64ISAR1_APA_ARCH_EPAC2 0x3
-#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC 0x4
-#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC_CMB 0x5
-#define ID_AA64ISAR1_API_NI 0x0
-#define ID_AA64ISAR1_API_IMP_DEF 0x1
-#define ID_AA64ISAR1_API_IMP_DEF_EPAC 0x2
-#define ID_AA64ISAR1_API_IMP_DEF_EPAC2 0x3
-#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC 0x4
-#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC_CMB 0x5
-#define ID_AA64ISAR1_GPA_NI 0x0
-#define ID_AA64ISAR1_GPA_ARCHITECTED 0x1
-#define ID_AA64ISAR1_GPI_NI 0x0
-#define ID_AA64ISAR1_GPI_IMP_DEF 0x1
-
-/* id_aa64isar2 */
-#define ID_AA64ISAR2_CLEARBHB_SHIFT 28
-#define ID_AA64ISAR2_APA3_SHIFT 12
-#define ID_AA64ISAR2_GPA3_SHIFT 8
-#define ID_AA64ISAR2_RPRES_SHIFT 4
-#define ID_AA64ISAR2_WFXT_SHIFT 0
-
-#define ID_AA64ISAR2_RPRES_8BIT 0x0
-#define ID_AA64ISAR2_RPRES_12BIT 0x1
-/*
- * Value 0x1 has been removed from the architecture, and is
- * reserved, but has not yet been removed from the ARM ARM
- * as of ARM DDI 0487G.b.
- */
-#define ID_AA64ISAR2_WFXT_NI 0x0
-#define ID_AA64ISAR2_WFXT_SUPPORTED 0x2
-
-#define ID_AA64ISAR2_APA3_NI 0x0
-#define ID_AA64ISAR2_APA3_ARCHITECTED 0x1
-#define ID_AA64ISAR2_APA3_ARCH_EPAC 0x2
-#define ID_AA64ISAR2_APA3_ARCH_EPAC2 0x3
-#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC 0x4
-#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC_CMB 0x5
-
-#define ID_AA64ISAR2_GPA3_NI 0x0
-#define ID_AA64ISAR2_GPA3_ARCHITECTED 0x1
-
/* id_aa64pfr0 */
#define ID_AA64PFR0_CSV3_SHIFT 60
#define ID_AA64PFR0_CSV2_SHIFT 56
@@ -811,45 +736,6 @@
#define ID_AA64PFR1_MTE 0x2
#define ID_AA64PFR1_MTE_ASYMM 0x3
-/* id_aa64zfr0 */
-#define ID_AA64ZFR0_F64MM_SHIFT 56
-#define ID_AA64ZFR0_F32MM_SHIFT 52
-#define ID_AA64ZFR0_I8MM_SHIFT 44
-#define ID_AA64ZFR0_SM4_SHIFT 40
-#define ID_AA64ZFR0_SHA3_SHIFT 32
-#define ID_AA64ZFR0_BF16_SHIFT 20
-#define ID_AA64ZFR0_BITPERM_SHIFT 16
-#define ID_AA64ZFR0_AES_SHIFT 4
-#define ID_AA64ZFR0_SVEVER_SHIFT 0
-
-#define ID_AA64ZFR0_F64MM 0x1
-#define ID_AA64ZFR0_F32MM 0x1
-#define ID_AA64ZFR0_I8MM 0x1
-#define ID_AA64ZFR0_BF16 0x1
-#define ID_AA64ZFR0_SM4 0x1
-#define ID_AA64ZFR0_SHA3 0x1
-#define ID_AA64ZFR0_BITPERM 0x1
-#define ID_AA64ZFR0_AES 0x1
-#define ID_AA64ZFR0_AES_PMULL 0x2
-#define ID_AA64ZFR0_SVEVER_SVE2 0x1
-
-/* id_aa64smfr0 */
-#define ID_AA64SMFR0_FA64_SHIFT 63
-#define ID_AA64SMFR0_I16I64_SHIFT 52
-#define ID_AA64SMFR0_F64F64_SHIFT 48
-#define ID_AA64SMFR0_I8I32_SHIFT 36
-#define ID_AA64SMFR0_F16F32_SHIFT 35
-#define ID_AA64SMFR0_B16F32_SHIFT 34
-#define ID_AA64SMFR0_F32F32_SHIFT 32
-
-#define ID_AA64SMFR0_FA64 0x1
-#define ID_AA64SMFR0_I16I64 0xf
-#define ID_AA64SMFR0_F64F64 0x1
-#define ID_AA64SMFR0_I8I32 0xf
-#define ID_AA64SMFR0_F16F32 0x1
-#define ID_AA64SMFR0_B16F32 0x1
-#define ID_AA64SMFR0_F32F32 0x1
-
/* id_aa64mmfr0 */
#define ID_AA64MMFR0_ECV_SHIFT 60
#define ID_AA64MMFR0_FGT_SHIFT 56
@@ -902,6 +788,7 @@
/* id_aa64mmfr1 */
#define ID_AA64MMFR1_ECBHB_SHIFT 60
+#define ID_AA64MMFR1_TIDCP1_SHIFT 52
#define ID_AA64MMFR1_HCX_SHIFT 40
#define ID_AA64MMFR1_AFP_SHIFT 44
#define ID_AA64MMFR1_ETS_SHIFT 36
@@ -918,6 +805,9 @@
#define ID_AA64MMFR1_VMIDBITS_8 0
#define ID_AA64MMFR1_VMIDBITS_16 2
+#define ID_AA64MMFR1_TIDCP1_NI 0
+#define ID_AA64MMFR1_TIDCP1_IMP 1
+
/* id_aa64mmfr2 */
#define ID_AA64MMFR2_E0PD_SHIFT 60
#define ID_AA64MMFR2_EVT_SHIFT 56
@@ -1084,9 +974,6 @@
#define MVFR2_FPMISC_SHIFT 4
#define MVFR2_SIMDMISC_SHIFT 0
-#define DCZID_DZP_SHIFT 4
-#define DCZID_BS_SHIFT 0
-
#define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */
#define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */
@@ -1121,8 +1008,8 @@
#define SYS_RGSR_EL1_SEED_MASK 0xffffUL
/* GMID_EL1 field definitions */
-#define SYS_GMID_EL1_BS_SHIFT 0
-#define SYS_GMID_EL1_BS_SIZE 4
+#define GMID_EL1_BS_SHIFT 0
+#define GMID_EL1_BS_SIZE 4
/* TFSR{,E0}_EL1 bit definitions */
#define SYS_TFSR_EL1_TF0_SHIFT 0
@@ -1324,6 +1211,9 @@
#endif
+#define SYS_FIELD_GET(reg, field, val) \
+ FIELD_GET(reg##_##field##_MASK, val)
+
#define SYS_FIELD_PREP(reg, field, val) \
FIELD_PREP(reg##_##field##_MASK, val)
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 63f9c828f1a7..2fc9f0861769 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -232,34 +232,34 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
* The "__xxx_error" versions set the third argument to -EFAULT if an error
* occurs, and leave it unchanged on success.
*/
-#define __get_mem_asm(load, reg, x, addr, err) \
+#define __get_mem_asm(load, reg, x, addr, err, type) \
asm volatile( \
"1: " load " " reg "1, [%2]\n" \
"2:\n" \
- _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \
+ _ASM_EXTABLE_##type##ACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \
: "+r" (err), "=&r" (x) \
: "r" (addr))
-#define __raw_get_mem(ldr, x, ptr, err) \
-do { \
- unsigned long __gu_val; \
- switch (sizeof(*(ptr))) { \
- case 1: \
- __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err)); \
- break; \
- case 2: \
- __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err)); \
- break; \
- case 4: \
- __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err)); \
- break; \
- case 8: \
- __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err)); \
- break; \
- default: \
- BUILD_BUG(); \
- } \
- (x) = (__force __typeof__(*(ptr)))__gu_val; \
+#define __raw_get_mem(ldr, x, ptr, err, type) \
+do { \
+ unsigned long __gu_val; \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err), type); \
+ break; \
+ case 2: \
+ __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err), type); \
+ break; \
+ case 4: \
+ __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err), type); \
+ break; \
+ case 8: \
+ __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err), type); \
+ break; \
+ default: \
+ BUILD_BUG(); \
+ } \
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \
} while (0)
/*
@@ -274,7 +274,7 @@ do { \
__chk_user_ptr(ptr); \
\
uaccess_ttbr0_enable(); \
- __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err); \
+ __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err, U); \
uaccess_ttbr0_disable(); \
\
(x) = __rgu_val; \
@@ -314,40 +314,40 @@ do { \
\
__uaccess_enable_tco_async(); \
__raw_get_mem("ldr", *((type *)(__gkn_dst)), \
- (__force type *)(__gkn_src), __gkn_err); \
+ (__force type *)(__gkn_src), __gkn_err, K); \
__uaccess_disable_tco_async(); \
\
if (unlikely(__gkn_err)) \
goto err_label; \
} while (0)
-#define __put_mem_asm(store, reg, x, addr, err) \
+#define __put_mem_asm(store, reg, x, addr, err, type) \
asm volatile( \
"1: " store " " reg "1, [%2]\n" \
"2:\n" \
- _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \
+ _ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0) \
: "+r" (err) \
: "r" (x), "r" (addr))
-#define __raw_put_mem(str, x, ptr, err) \
-do { \
- __typeof__(*(ptr)) __pu_val = (x); \
- switch (sizeof(*(ptr))) { \
- case 1: \
- __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err)); \
- break; \
- case 2: \
- __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err)); \
- break; \
- case 4: \
- __put_mem_asm(str, "%w", __pu_val, (ptr), (err)); \
- break; \
- case 8: \
- __put_mem_asm(str, "%x", __pu_val, (ptr), (err)); \
- break; \
- default: \
- BUILD_BUG(); \
- } \
+#define __raw_put_mem(str, x, ptr, err, type) \
+do { \
+ __typeof__(*(ptr)) __pu_val = (x); \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err), type); \
+ break; \
+ case 2: \
+ __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err), type); \
+ break; \
+ case 4: \
+ __put_mem_asm(str, "%w", __pu_val, (ptr), (err), type); \
+ break; \
+ case 8: \
+ __put_mem_asm(str, "%x", __pu_val, (ptr), (err), type); \
+ break; \
+ default: \
+ BUILD_BUG(); \
+ } \
} while (0)
/*
@@ -362,7 +362,7 @@ do { \
__chk_user_ptr(__rpu_ptr); \
\
uaccess_ttbr0_enable(); \
- __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err); \
+ __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err, U); \
uaccess_ttbr0_disable(); \
} while (0)
@@ -400,7 +400,7 @@ do { \
\
__uaccess_enable_tco_async(); \
__raw_put_mem("str", *((type *)(__pkn_src)), \
- (__force type *)(__pkn_dst), __pkn_err); \
+ (__force type *)(__pkn_dst), __pkn_err, K); \
__uaccess_disable_tco_async(); \
\
if (unlikely(__pkn_err)) \
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 0e80db4327b6..4eb601e7de50 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -36,9 +36,9 @@
#define HVC_RESET_VECTORS 2
/*
- * HVC_VHE_RESTART - Upgrade the CPU from EL1 to EL2, if possible
+ * HVC_FINALISE_EL2 - Upgrade the CPU from EL1 to EL2, if possible
*/
-#define HVC_VHE_RESTART 3
+#define HVC_FINALISE_EL2 3
/* Max number of HYP stub hypercalls */
#define HVC_STUB_HCALL_NR 4
@@ -49,6 +49,13 @@
#define BOOT_CPU_MODE_EL1 (0xe11)
#define BOOT_CPU_MODE_EL2 (0xe12)
+/*
+ * Flags returned together with the boot mode, but not preserved in
+ * __boot_cpu_mode. Used by the idreg override code to work out the
+ * boot state.
+ */
+#define BOOT_CPU_FLAG_E2H BIT_ULL(32)
+
#ifndef __ASSEMBLY__
#include <asm/ptrace.h>
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 4bb2cc8ac446..1ad2568a2569 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -19,6 +19,9 @@
/*
* HWCAP flags - for AT_HWCAP
+ *
+ * Bits 62 and 63 are reserved for use by libc.
+ * Bits 32-61 are unallocated for potential use by libc.
*/
#define HWCAP_FP (1 << 0)
#define HWCAP_ASIMD (1 << 1)
@@ -88,5 +91,6 @@
#define HWCAP2_SME_F32F32 (1 << 29)
#define HWCAP2_SME_FA64 (1 << 30)
#define HWCAP2_WFXT (1UL << 31)
+#define HWCAP2_EBF16 (1UL << 32)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index fa7981d0d917..1add7b01efa7 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -14,6 +14,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong
CFLAGS_syscall.o += -fno-stack-protector
+# When KASAN is enabled, a stack trace is recorded for every alloc/free, which
+# can significantly impact performance. Avoid instrumenting the stack trace
+# collection code to minimize this impact.
+KASAN_SANITIZE_stacktrace.o := n
+
# It's not safe to invoke KCOV when portions of the kernel environment aren't
# available or are out-of-sync with HW state. Since `noinstr` doesn't always
# inhibit KCOV instrumentation, disable it for the entire compilation unit.
@@ -59,7 +64,7 @@ obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o
obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
obj-$(CONFIG_PARAVIRT) += paravirt.o
-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o pi/
obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
obj-$(CONFIG_ELF_CORE) += elfcore.o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index e4dea8db6924..a5a256e3f9fe 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -351,7 +351,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
prot = __acpi_get_writethrough_mem_attribute();
}
}
- return __ioremap(phys, size, prot);
+ return ioremap_prot(phys, size, pgprot_val(prot));
}
/*
diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
index fdfecf0991ce..e51535a5f939 100644
--- a/arch/arm64/kernel/acpi_numa.c
+++ b/arch/arm64/kernel/acpi_numa.c
@@ -109,7 +109,7 @@ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa)
pxm = pa->proximity_domain;
node = acpi_map_pxm_to_node(pxm);
- if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
+ if (node == NUMA_NO_NODE) {
pr_err("SRAT: Too many proximity domains %d\n", pxm);
bad_srat();
return;
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 7bbf5104b7b7..9bcaa5eacf16 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -121,7 +121,7 @@ static void clean_dcache_range_nopatch(u64 start, u64 end)
ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
d_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0,
- CTR_DMINLINE_SHIFT);
+ CTR_EL0_DminLine_SHIFT);
cur = start & ~(d_size - 1);
do {
/*
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 6875a16b09d2..fb0e7c7b2e20 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -59,6 +59,7 @@ struct insn_emulation {
static LIST_HEAD(insn_emulation);
static int nr_insn_emulated __initdata;
static DEFINE_RAW_SPINLOCK(insn_emulation_lock);
+static DEFINE_MUTEX(insn_emulation_mutex);
static void register_emulation_hooks(struct insn_emulation_ops *ops)
{
@@ -207,10 +208,10 @@ static int emulation_proc_handler(struct ctl_table *table, int write,
loff_t *ppos)
{
int ret = 0;
- struct insn_emulation *insn = (struct insn_emulation *) table->data;
+ struct insn_emulation *insn = container_of(table->data, struct insn_emulation, current_mode);
enum insn_emulation_mode prev_mode = insn->current_mode;
- table->data = &insn->current_mode;
+ mutex_lock(&insn_emulation_mutex);
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write || prev_mode == insn->current_mode)
@@ -223,7 +224,7 @@ static int emulation_proc_handler(struct ctl_table *table, int write,
update_insn_emulation_mode(insn, INSN_UNDEF);
}
ret:
- table->data = insn;
+ mutex_unlock(&insn_emulation_mutex);
return ret;
}
@@ -247,7 +248,7 @@ static void __init register_insn_emulation_sysctl(void)
sysctl->maxlen = sizeof(int);
sysctl->procname = insn->ops->name;
- sysctl->data = insn;
+ sysctl->data = &insn->current_mode;
sysctl->extra1 = &insn->min;
sysctl->extra2 = &insn->max;
sysctl->proc_handler = emulation_proc_handler;
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index c05cc3b6162e..7e6289e709fc 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -187,7 +187,7 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry,
int scope)
{
u32 midr = read_cpuid_id();
- bool has_dic = read_cpuid_cachetype() & BIT(CTR_DIC_SHIFT);
+ bool has_dic = read_cpuid_cachetype() & BIT(CTR_EL0_DIC_SHIFT);
const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1);
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
@@ -212,6 +212,12 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe),
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_2441009
+ {
+ /* Cortex-A510 r0p0 -> r1p1. Fixed in r1p2 */
+ ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1),
+ },
+#endif
{},
};
#endif
@@ -395,6 +401,14 @@ static struct midr_range trbe_write_out_of_range_cpus[] = {
};
#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
+#ifdef CONFIG_ARM64_ERRATUM_1742098
+static struct midr_range broken_aarch32_aes[] = {
+ MIDR_RANGE(MIDR_CORTEX_A57, 0, 1, 0xf, 0xf),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ {},
+};
+#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{
@@ -480,7 +494,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
#endif
#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
{
- .desc = "Qualcomm erratum 1009, or ARM erratum 1286807",
+ .desc = "Qualcomm erratum 1009, or ARM erratum 1286807, 2441009",
.capability = ARM64_WORKAROUND_REPEAT_TLBI,
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
.matches = cpucap_multi_entry_cap_matches,
@@ -658,6 +672,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 1)
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_1742098
+ {
+ .desc = "ARM erratum 1742098",
+ .capability = ARM64_WORKAROUND_1742098,
+ CAP_MIDR_RANGE_LIST(broken_aarch32_aes),
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ },
+#endif
{
}
};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 8d88433de81d..ad64cab0a2ba 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -79,6 +79,7 @@
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/fpsimd.h>
+#include <asm/hwcap.h>
#include <asm/insn.h>
#include <asm/kvm_host.h>
#include <asm/mmu_context.h>
@@ -91,7 +92,7 @@
#include <asm/virt.h>
/* Kernel representation of AT_HWCAP and AT_HWCAP2 */
-static unsigned long elf_hwcap __read_mostly;
+static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly;
#ifdef CONFIG_COMPAT
#define COMPAT_ELF_HWCAP_DEFAULT \
@@ -209,35 +210,35 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_I8MM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DGH_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_BF16_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SPECRES_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FRINTTS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_SPECRES_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_SB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_FRINTTS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPI_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_GPI_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPA_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_GPA_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_FCMA_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_JSCVT_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
- FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_API_SHIFT, 4, 0),
+ FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_EL1_API_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
- FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_APA_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
+ FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_EL1_APA_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
- FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_APA3_SHIFT, 4, 0),
+ FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_GPA3_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_WFXT_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_GPA3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -276,41 +277,41 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F64MM_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F32MM_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_I8MM_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BF16_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_AES_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
- FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_FA64_SHIFT, 1, 0),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
- FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I16I64_SHIFT, 4, 0),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
- FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F64F64_SHIFT, 1, 0),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
- FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I8I32_SHIFT, 4, 0),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
- FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F16F32_SHIFT, 1, 0),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
- FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_B16F32_SHIFT, 1, 0),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
- FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F32F32_SHIFT, 1, 0),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0),
ARM64_FTR_END,
};
@@ -361,6 +362,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TIDCP1_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_AFP_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_ETS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TWED_SHIFT, 4, 0),
@@ -396,18 +398,18 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
static const struct arm64_ftr_bits ftr_ctr[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DIC_SHIFT, 1, 1),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IDC_SHIFT, 1, 1),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_CWG_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_ERG_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DMINLINE_SHIFT, 4, 1),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DIC_SHIFT, 1, 1),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_IDC_SHIFT, 1, 1),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_EL0_CWG_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_EL0_ERG_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DminLine_SHIFT, 4, 1),
/*
* Linux can handle differing I-cache policies. Userspace JITs will
* make use of *minLine.
* If we have differing I-cache policies, report it as the weakest - VIPT.
*/
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, CTR_L1IP_SHIFT, 2, ICACHE_POLICY_VIPT), /* L1Ip */
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, CTR_EL0_L1Ip_SHIFT, 2, CTR_EL0_L1Ip_VIPT), /* L1Ip */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_IminLine_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -453,13 +455,13 @@ static const struct arm64_ftr_bits ftr_mvfr2[] = {
};
static const struct arm64_ftr_bits ftr_dczid[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, DCZID_DZP_SHIFT, 1, 1),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, DCZID_BS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, DCZID_EL0_DZP_SHIFT, 1, 1),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, DCZID_EL0_BS_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_gmid[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, SYS_GMID_EL1_BS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, GMID_EL1_BS_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -561,7 +563,7 @@ static const struct arm64_ftr_bits ftr_id_pfr2[] = {
static const struct arm64_ftr_bits ftr_id_dfr0[] = {
/* [31:28] TraceFilt */
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_PERFMON_SHIFT, 4, 0xf),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_DFR0_PERFMON_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MPROFDBG_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPTRC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPTRC_SHIFT, 4, 0),
@@ -631,7 +633,10 @@ static const struct arm64_ftr_bits ftr_raz[] = {
__ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override)
struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
+struct arm64_ftr_override __ro_after_init id_aa64pfr0_override;
struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
+struct arm64_ftr_override __ro_after_init id_aa64zfr0_override;
+struct arm64_ftr_override __ro_after_init id_aa64smfr0_override;
struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
struct arm64_ftr_override __ro_after_init id_aa64isar2_override;
@@ -668,11 +673,14 @@ static const struct __ftr_reg_entry {
ARM64_FTR_REG(SYS_ID_MMFR5_EL1, ftr_id_mmfr5),
/* Op1 = 0, CRn = 0, CRm = 4 */
- ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0,
+ &id_aa64pfr0_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
&id_aa64pfr1_override),
- ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0),
- ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0,
+ &id_aa64zfr0_override),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0,
+ &id_aa64smfr0_override),
/* Op1 = 0, CRn = 0, CRm = 5 */
ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
@@ -993,15 +1001,24 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
init_32bit_cpu_features(&info->aarch32);
- if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
+ if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+ id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+ info->reg_zcr = read_zcr_features();
init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
vec_init_vq_map(ARM64_VEC_SVE);
}
- if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) {
+ if (IS_ENABLED(CONFIG_ARM64_SME) &&
+ id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
+ info->reg_smcr = read_smcr_features();
+ /*
+ * We mask out SMPS since even if the hardware
+ * supports priorities the kernel does not at present
+ * and we block access to them.
+ */
+ info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr);
- if (IS_ENABLED(CONFIG_ARM64_SME))
- vec_init_vq_map(ARM64_VEC_SME);
+ vec_init_vq_map(ARM64_VEC_SME);
}
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
@@ -1233,23 +1250,31 @@ void update_cpu_features(int cpu,
taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu,
info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0);
- if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
+ if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+ id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+ info->reg_zcr = read_zcr_features();
taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,
info->reg_zcr, boot->reg_zcr);
- /* Probe vector lengths, unless we already gave up on SVE */
- if (id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) &&
- !system_capabilities_finalized())
+ /* Probe vector lengths */
+ if (!system_capabilities_finalized())
vec_update_vq_map(ARM64_VEC_SVE);
}
- if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) {
+ if (IS_ENABLED(CONFIG_ARM64_SME) &&
+ id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
+ info->reg_smcr = read_smcr_features();
+ /*
+ * We mask out SMPS since even if the hardware
+ * supports priorities the kernel does not at present
+ * and we block access to them.
+ */
+ info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
taint |= check_update_ftr_reg(SYS_SMCR_EL1, cpu,
info->reg_smcr, boot->reg_smcr);
- /* Probe vector lengths, unless we already gave up on SME */
- if (id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1)) &&
- !system_capabilities_finalized())
+ /* Probe vector lengths */
+ if (!system_capabilities_finalized())
vec_update_vq_map(ARM64_VEC_SME);
}
@@ -1480,7 +1505,7 @@ static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
else
ctr = read_cpuid_effective_cachetype();
- return ctr & BIT(CTR_IDC_SHIFT);
+ return ctr & BIT(CTR_EL0_IDC_SHIFT);
}
static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unused)
@@ -1491,7 +1516,7 @@ static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unu
* to the CTR_EL0 on this CPU and emulate it with the real/safe
* value.
*/
- if (!(read_cpuid_cachetype() & BIT(CTR_IDC_SHIFT)))
+ if (!(read_cpuid_cachetype() & BIT(CTR_EL0_IDC_SHIFT)))
sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0);
}
@@ -1505,7 +1530,7 @@ static bool has_cache_dic(const struct arm64_cpu_capabilities *entry,
else
ctr = read_cpuid_cachetype();
- return ctr & BIT(CTR_DIC_SHIFT);
+ return ctr & BIT(CTR_EL0_DIC_SHIFT);
}
static bool __maybe_unused
@@ -1645,14 +1670,34 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
}
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT))
+
+extern
+void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
+ phys_addr_t size, pgprot_t prot,
+ phys_addr_t (*pgtable_alloc)(int), int flags);
+
+static phys_addr_t kpti_ng_temp_alloc;
+
+static phys_addr_t kpti_ng_pgd_alloc(int shift)
+{
+ kpti_ng_temp_alloc -= PAGE_SIZE;
+ return kpti_ng_temp_alloc;
+}
+
static void __nocfi
kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
{
- typedef void (kpti_remap_fn)(int, int, phys_addr_t);
+ typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long);
extern kpti_remap_fn idmap_kpti_install_ng_mappings;
kpti_remap_fn *remap_fn;
int cpu = smp_processor_id();
+ int levels = CONFIG_PGTABLE_LEVELS;
+ int order = order_base_2(levels);
+ u64 kpti_ng_temp_pgd_pa = 0;
+ pgd_t *kpti_ng_temp_pgd;
+ u64 alloc = 0;
if (__this_cpu_read(this_cpu_vector) == vectors) {
const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI);
@@ -1670,12 +1715,40 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
remap_fn = (void *)__pa_symbol(function_nocfi(idmap_kpti_install_ng_mappings));
+ if (!cpu) {
+ alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order);
+ kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE);
+ kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd);
+
+ //
+ // Create a minimal page table hierarchy that permits us to map
+ // the swapper page tables temporarily as we traverse them.
+ //
+ // The physical pages are laid out as follows:
+ //
+ // +--------+-/-------+-/------ +-\\--------+
+ // : PTE[] : | PMD[] : | PUD[] : || PGD[] :
+ // +--------+-\-------+-\------ +-//--------+
+ // ^
+ // The first page is mapped into this hierarchy at a PMD_SHIFT
+ // aligned virtual address, so that we can manipulate the PTE
+ // level entries while the mapping is active. The first entry
+ // covers the PTE[] page itself, the remaining entries are free
+ // to be used as a ad-hoc fixmap.
+ //
+ create_kpti_ng_temp_pgd(kpti_ng_temp_pgd, __pa(alloc),
+ KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL,
+ kpti_ng_pgd_alloc, 0);
+ }
+
cpu_install_idmap();
- remap_fn(cpu, num_online_cpus(), __pa_symbol(swapper_pg_dir));
+ remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA);
cpu_uninstall_idmap();
- if (!cpu)
+ if (!cpu) {
+ free_pages(alloc, order);
arm64_use_ng_mappings = true;
+ }
}
#else
static void
@@ -1971,6 +2044,14 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
}
#endif /* CONFIG_ARM64_MTE */
+static void elf_hwcap_fixup(void)
+{
+#ifdef CONFIG_ARM64_ERRATUM_1742098
+ if (cpus_have_const_cap(ARM64_WORKAROUND_1742098))
+ compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES;
+#endif /* ARM64_ERRATUM_1742098 */
+}
+
#ifdef CONFIG_KVM
static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
{
@@ -1978,6 +2059,11 @@ static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, in
}
#endif /* CONFIG_KVM */
+static void cpu_trap_el0_impdef(const struct arm64_cpu_capabilities *__unused)
+{
+ sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_TIDCP);
+}
+
/* Internal helper functions to match cpu capability type */
static bool
cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
@@ -2132,7 +2218,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
- .field_pos = ID_AA64ISAR1_DPB_SHIFT,
+ .field_pos = ID_AA64ISAR1_EL1_DPB_SHIFT,
.field_width = 4,
.min_field_value = 1,
},
@@ -2143,7 +2229,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64ISAR1_DPB_SHIFT,
+ .field_pos = ID_AA64ISAR1_EL1_DPB_SHIFT,
.field_width = 4,
.min_field_value = 2,
},
@@ -2303,7 +2389,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
- .field_pos = ID_AA64ISAR1_SB_SHIFT,
+ .field_pos = ID_AA64ISAR1_EL1_SB_SHIFT,
.field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = 1,
@@ -2315,9 +2401,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64ISAR1_APA_SHIFT,
+ .field_pos = ID_AA64ISAR1_EL1_APA_SHIFT,
.field_width = 4,
- .min_field_value = ID_AA64ISAR1_APA_ARCHITECTED,
+ .min_field_value = ID_AA64ISAR1_EL1_APA_PAuth,
.matches = has_address_auth_cpucap,
},
{
@@ -2326,9 +2412,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
.sys_reg = SYS_ID_AA64ISAR2_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64ISAR2_APA3_SHIFT,
+ .field_pos = ID_AA64ISAR2_EL1_APA3_SHIFT,
.field_width = 4,
- .min_field_value = ID_AA64ISAR2_APA3_ARCHITECTED,
+ .min_field_value = ID_AA64ISAR2_EL1_APA3_PAuth,
.matches = has_address_auth_cpucap,
},
{
@@ -2337,9 +2423,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64ISAR1_API_SHIFT,
+ .field_pos = ID_AA64ISAR1_EL1_API_SHIFT,
.field_width = 4,
- .min_field_value = ID_AA64ISAR1_API_IMP_DEF,
+ .min_field_value = ID_AA64ISAR1_EL1_API_PAuth,
.matches = has_address_auth_cpucap,
},
{
@@ -2353,9 +2439,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64ISAR1_GPA_SHIFT,
+ .field_pos = ID_AA64ISAR1_EL1_GPA_SHIFT,
.field_width = 4,
- .min_field_value = ID_AA64ISAR1_GPA_ARCHITECTED,
+ .min_field_value = ID_AA64ISAR1_EL1_GPA_IMP,
.matches = has_cpuid_feature,
},
{
@@ -2364,9 +2450,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.sys_reg = SYS_ID_AA64ISAR2_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64ISAR2_GPA3_SHIFT,
+ .field_pos = ID_AA64ISAR2_EL1_GPA3_SHIFT,
.field_width = 4,
- .min_field_value = ID_AA64ISAR2_GPA3_ARCHITECTED,
+ .min_field_value = ID_AA64ISAR2_EL1_GPA3_IMP,
.matches = has_cpuid_feature,
},
{
@@ -2375,9 +2461,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64ISAR1_GPI_SHIFT,
+ .field_pos = ID_AA64ISAR1_EL1_GPI_SHIFT,
.field_width = 4,
- .min_field_value = ID_AA64ISAR1_GPI_IMP_DEF,
+ .min_field_value = ID_AA64ISAR1_EL1_GPI_IMP,
.matches = has_cpuid_feature,
},
{
@@ -2478,7 +2564,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.sys_reg = SYS_ID_AA64ISAR1_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64ISAR1_LRCPC_SHIFT,
+ .field_pos = ID_AA64ISAR1_EL1_LRCPC_SHIFT,
.field_width = 4,
.matches = has_cpuid_feature,
.min_field_value = 1,
@@ -2503,9 +2589,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_SME_FA64,
.sys_reg = SYS_ID_AA64SMFR0_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64SMFR0_FA64_SHIFT,
+ .field_pos = ID_AA64SMFR0_EL1_FA64_SHIFT,
.field_width = 1,
- .min_field_value = ID_AA64SMFR0_FA64,
+ .min_field_value = ID_AA64SMFR0_EL1_FA64_IMP,
.matches = has_cpuid_feature,
.cpu_enable = fa64_kernel_enable,
},
@@ -2516,10 +2602,22 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.sys_reg = SYS_ID_AA64ISAR2_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64ISAR2_WFXT_SHIFT,
+ .field_pos = ID_AA64ISAR2_EL1_WFxT_SHIFT,
.field_width = 4,
.matches = has_cpuid_feature,
- .min_field_value = ID_AA64ISAR2_WFXT_SUPPORTED,
+ .min_field_value = ID_AA64ISAR2_EL1_WFxT_IMP,
+ },
+ {
+ .desc = "Trap EL0 IMPLEMENTATION DEFINED functionality",
+ .capability = ARM64_HAS_TIDCP1,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .sys_reg = SYS_ID_AA64MMFR1_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64MMFR1_TIDCP1_SHIFT,
+ .field_width = 4,
+ .min_field_value = ID_AA64MMFR1_TIDCP1_IMP,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_trap_el0_impdef,
},
{},
};
@@ -2560,33 +2658,33 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
#ifdef CONFIG_ARM64_PTR_AUTH
static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = {
{
- HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_APA_SHIFT,
+ HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_APA_SHIFT,
4, FTR_UNSIGNED,
- ID_AA64ISAR1_APA_ARCHITECTED)
+ ID_AA64ISAR1_EL1_APA_PAuth)
},
{
- HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_SHIFT,
- 4, FTR_UNSIGNED, ID_AA64ISAR2_APA3_ARCHITECTED)
+ HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_APA3_SHIFT,
+ 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_APA3_PAuth)
},
{
- HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_SHIFT,
- 4, FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF)
+ HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_API_SHIFT,
+ 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_API_PAuth)
},
{},
};
static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
{
- HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPA_SHIFT,
- 4, FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED)
+ HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_GPA_SHIFT,
+ 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_GPA_IMP)
},
{
- HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_GPA3_SHIFT,
- 4, FTR_UNSIGNED, ID_AA64ISAR2_GPA3_ARCHITECTED)
+ HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_GPA3_SHIFT,
+ 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_GPA3_IMP)
},
{
- HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPI_SHIFT,
- 4, FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF)
+ HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_GPI_SHIFT,
+ 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_GPI_IMP)
},
{},
};
@@ -2614,30 +2712,31 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_EBF16),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM),
HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
#ifdef CONFIG_ARM64_SVE
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SVEver_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BitPerm_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BF16_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SHA3_IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SM4_IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_I8MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F32MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F64MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
#endif
HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
#ifdef CONFIG_ARM64_BTI
@@ -2653,17 +2752,17 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
#endif /* CONFIG_ARM64_MTE */
HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
- HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
- HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_WFXT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_WFXT_SUPPORTED, CAP_HWCAP, KERNEL_HWCAP_WFXT),
+ HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
+ HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_WFxT_IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT),
#ifdef CONFIG_ARM64_SME
HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SME, CAP_HWCAP, KERNEL_HWCAP_SME),
- HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_FA64, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
- HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I16I64, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
- HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F64F64, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
- HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I8I32, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
- HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F16F32, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
- HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_B16F32, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
- HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F32F32, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_FA64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F64F64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I8I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
+ HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F32F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
#endif /* CONFIG_ARM64_SME */
{},
};
@@ -3098,14 +3197,12 @@ static bool __maybe_unused __system_matches_cap(unsigned int n)
void cpu_set_feature(unsigned int num)
{
- WARN_ON(num >= MAX_CPU_FEATURES);
- elf_hwcap |= BIT(num);
+ set_bit(num, elf_hwcap);
}
bool cpu_have_feature(unsigned int num)
{
- WARN_ON(num >= MAX_CPU_FEATURES);
- return elf_hwcap & BIT(num);
+ return test_bit(num, elf_hwcap);
}
EXPORT_SYMBOL_GPL(cpu_have_feature);
@@ -3116,12 +3213,12 @@ unsigned long cpu_get_elf_hwcap(void)
* note that for userspace compatibility we guarantee that bits 62
* and 63 will always be returned as 0.
*/
- return lower_32_bits(elf_hwcap);
+ return elf_hwcap[0];
}
unsigned long cpu_get_elf_hwcap2(void)
{
- return upper_32_bits(elf_hwcap);
+ return elf_hwcap[1];
}
static void __init setup_system_capabilities(void)
@@ -3143,8 +3240,10 @@ void __init setup_cpu_features(void)
setup_system_capabilities();
setup_elf_hwcaps(arm64_elf_hwcaps);
- if (system_supports_32bit_el0())
+ if (system_supports_32bit_el0()) {
setup_elf_hwcaps(compat_elf_hwcaps);
+ elf_hwcap_fixup();
+ }
if (system_uses_ttbr0_pan())
pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
@@ -3197,6 +3296,7 @@ static int enable_mismatched_32bit_el0(unsigned int cpu)
cpu_active_mask);
get_cpu_device(lucky_winner)->offline_disabled = true;
setup_elf_hwcaps(compat_elf_hwcaps);
+ elf_hwcap_fixup();
pr_info("Asymmetric 32-bit EL0 support detected on CPU %u; CPU hot-unplug disabled on CPU %u\n",
cpu, lucky_winner);
return 0;
@@ -3218,7 +3318,7 @@ subsys_initcall_sync(init_32bit_el0_mask);
static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
{
- cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+ cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
}
/*
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index 3006f4324808..4150e308e99c 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -13,35 +13,6 @@
#include <linux/of_device.h>
#include <linux/psci.h>
-#include <asm/cpuidle.h>
-#include <asm/cpu_ops.h>
-
-int arm_cpuidle_init(unsigned int cpu)
-{
- const struct cpu_operations *ops = get_cpu_ops(cpu);
- int ret = -EOPNOTSUPP;
-
- if (ops && ops->cpu_suspend && ops->cpu_init_idle)
- ret = ops->cpu_init_idle(cpu);
-
- return ret;
-}
-
-/**
- * arm_cpuidle_suspend() - function to enter a low-power idle state
- * @index: argument to pass to CPU suspend operations
- *
- * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU
- * operations back-end error code otherwise.
- */
-int arm_cpuidle_suspend(int index)
-{
- int cpu = smp_processor_id();
- const struct cpu_operations *ops = get_cpu_ops(cpu);
-
- return ops->cpu_suspend(index);
-}
-
#ifdef CONFIG_ACPI
#include <acpi/processor.h>
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 8eff0a34ffd4..d7702f39b4d3 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -33,12 +33,19 @@
DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data);
static struct cpuinfo_arm64 boot_cpu_data;
-static const char *icache_policy_str[] = {
- [ICACHE_POLICY_VPIPT] = "VPIPT",
- [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN",
- [ICACHE_POLICY_VIPT] = "VIPT",
- [ICACHE_POLICY_PIPT] = "PIPT",
-};
+static inline const char *icache_policy_str(int l1ip)
+{
+ switch (l1ip) {
+ case CTR_EL0_L1Ip_VPIPT:
+ return "VPIPT";
+ case CTR_EL0_L1Ip_VIPT:
+ return "VIPT";
+ case CTR_EL0_L1Ip_PIPT:
+ return "PIPT";
+ default:
+ return "RESERVED/UNKNOWN";
+ }
+}
unsigned long __icache_flags;
@@ -107,6 +114,7 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_SME_F32F32] = "smef32f32",
[KERNEL_HWCAP_SME_FA64] = "smefa64",
[KERNEL_HWCAP_WFXT] = "wfxt",
+ [KERNEL_HWCAP_EBF16] = "ebf16",
};
#ifdef CONFIG_COMPAT
@@ -267,6 +275,7 @@ static struct kobj_type cpuregs_kobj_type = {
CPUREGS_ATTR_RO(midr_el1, midr);
CPUREGS_ATTR_RO(revidr_el1, revidr);
+CPUREGS_ATTR_RO(smidr_el1, smidr);
static struct attribute *cpuregs_id_attrs[] = {
&cpuregs_attr_midr_el1.attr,
@@ -279,6 +288,16 @@ static const struct attribute_group cpuregs_attr_group = {
.name = "identification"
};
+static struct attribute *sme_cpuregs_id_attrs[] = {
+ &cpuregs_attr_smidr_el1.attr,
+ NULL
+};
+
+static const struct attribute_group sme_cpuregs_attr_group = {
+ .attrs = sme_cpuregs_id_attrs,
+ .name = "identification"
+};
+
static int cpuid_cpu_online(unsigned int cpu)
{
int rc;
@@ -296,6 +315,8 @@ static int cpuid_cpu_online(unsigned int cpu)
rc = sysfs_create_group(&info->kobj, &cpuregs_attr_group);
if (rc)
kobject_del(&info->kobj);
+ if (system_supports_sme())
+ rc = sysfs_merge_group(&info->kobj, &sme_cpuregs_attr_group);
out:
return rc;
}
@@ -342,19 +363,19 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
u32 l1ip = CTR_L1IP(info->reg_ctr);
switch (l1ip) {
- case ICACHE_POLICY_PIPT:
+ case CTR_EL0_L1Ip_PIPT:
break;
- case ICACHE_POLICY_VPIPT:
+ case CTR_EL0_L1Ip_VPIPT:
set_bit(ICACHEF_VPIPT, &__icache_flags);
break;
- case ICACHE_POLICY_RESERVED:
- case ICACHE_POLICY_VIPT:
+ case CTR_EL0_L1Ip_VIPT:
+ default:
/* Assume aliasing */
set_bit(ICACHEF_ALIASING, &__icache_flags);
break;
}
- pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
+ pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str(l1ip), cpu);
}
static void __cpuinfo_store_cpu_32bit(struct cpuinfo_32bit *info)
@@ -418,14 +439,6 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
__cpuinfo_store_cpu_32bit(&info->aarch32);
- if (IS_ENABLED(CONFIG_ARM64_SVE) &&
- id_aa64pfr0_sve(info->reg_id_aa64pfr0))
- info->reg_zcr = read_zcr_features();
-
- if (IS_ENABLED(CONFIG_ARM64_SME) &&
- id_aa64pfr1_sme(info->reg_id_aa64pfr1))
- info->reg_smcr = read_smcr_features();
-
cpuinfo_detect_icache_policy(info);
}
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 5b82b9292400..254fe31c03a0 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -636,18 +636,28 @@ alternative_else_nop_endif
*/
.endm
- .macro tramp_data_page dst
- adr_l \dst, .entry.tramp.text
- sub \dst, \dst, PAGE_SIZE
- .endm
-
- .macro tramp_data_read_var dst, var
-#ifdef CONFIG_RANDOMIZE_BASE
- tramp_data_page \dst
- add \dst, \dst, #:lo12:__entry_tramp_data_\var
- ldr \dst, [\dst]
+ .macro tramp_data_read_var dst, var
+#ifdef CONFIG_RELOCATABLE
+ ldr \dst, .L__tramp_data_\var
+ .ifndef .L__tramp_data_\var
+ .pushsection ".entry.tramp.rodata", "a", %progbits
+ .align 3
+.L__tramp_data_\var:
+ .quad \var
+ .popsection
+ .endif
#else
- ldr \dst, =\var
+ /*
+ * As !RELOCATABLE implies !RANDOMIZE_BASE the address is always a
+ * compile time constant (and hence not secret and not worth hiding).
+ *
+ * As statically allocated kernel code and data always live in the top
+ * 47 bits of the address space we can sign-extend bit 47 and avoid an
+ * instruction to load the upper 16 bits (which must be 0xFFFF).
+ */
+ movz \dst, :abs_g2_s:\var
+ movk \dst, :abs_g1_nc:\var
+ movk \dst, :abs_g0_nc:\var
#endif
.endm
@@ -695,7 +705,7 @@ alternative_else_nop_endif
msr vbar_el1, x30
isb
.else
- ldr x30, =vectors
+ adr_l x30, vectors
.endif // \kpti == 1
.if \bhb == BHB_MITIGATION_FW
@@ -764,24 +774,7 @@ SYM_CODE_END(tramp_exit_native)
SYM_CODE_START(tramp_exit_compat)
tramp_exit 32
SYM_CODE_END(tramp_exit_compat)
-
- .ltorg
.popsection // .entry.tramp.text
-#ifdef CONFIG_RANDOMIZE_BASE
- .pushsection ".rodata", "a"
- .align PAGE_SHIFT
-SYM_DATA_START(__entry_tramp_data_start)
-__entry_tramp_data_vectors:
- .quad vectors
-#ifdef CONFIG_ARM_SDE_INTERFACE
-__entry_tramp_data___sdei_asm_handler:
- .quad __sdei_asm_handler
-#endif /* CONFIG_ARM_SDE_INTERFACE */
-__entry_tramp_data_this_cpu_vector:
- .quad this_cpu_vector
-SYM_DATA_END(__entry_tramp_data_start)
- .popsection // .rodata
-#endif /* CONFIG_RANDOMIZE_BASE */
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
/*
@@ -932,7 +925,6 @@ NOKPROBE(call_on_irq_stack)
* This clobbers x4, __sdei_handler() will restore this from firmware's
* copy.
*/
-.ltorg
.pushsection ".entry.tramp.text", "ax"
SYM_CODE_START(__sdei_asm_entry_trampoline)
mrs x4, ttbr1_el1
@@ -967,7 +959,6 @@ SYM_CODE_START(__sdei_asm_exit_trampoline)
1: sdei_handler_exit exit_mode=x2
SYM_CODE_END(__sdei_asm_exit_trampoline)
NOKPROBE(__sdei_asm_exit_trampoline)
- .ltorg
.popsection // .entry.tramp.text
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index aecf3071efdd..dd63ffc3a2fa 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -445,7 +445,6 @@ static void fpsimd_save(void)
if (system_supports_sme()) {
u64 *svcr = last->svcr;
- *svcr = read_sysreg_s(SYS_SVCR);
*svcr = read_sysreg_s(SYS_SVCR);
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 6a98f1a38c29..cefe6a73ee54 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -37,8 +37,6 @@
#include "efi-header.S"
-#define __PHYS_OFFSET KERNEL_START
-
#if (PAGE_OFFSET & 0x1fffff) != 0
#error PAGE_OFFSET must be at least 2MB aligned
#endif
@@ -51,9 +49,6 @@
* MMU = off, D-cache = off, I-cache = on or off,
* x0 = physical address to the FDT blob.
*
- * This code is mostly position independent so you call this at
- * __pa(PAGE_OFFSET).
- *
* Note that the callee-saved registers are used for storing variables
* that are useful before the MMU is enabled. The allocations are described
* in the entry routines.
@@ -82,25 +77,34 @@
* primary lowlevel boot path:
*
* Register Scope Purpose
+ * x20 primary_entry() .. __primary_switch() CPU boot mode
* x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0
+ * x22 create_idmap() .. start_kernel() ID map VA of the DT blob
* x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset
- * x28 __create_page_tables() callee preserved temp register
- * x19/x20 __primary_switch() callee preserved temp registers
- * x24 __primary_switch() .. relocate_kernel() current RELR displacement
+ * x24 __primary_switch() linear map KASLR seed
+ * x25 primary_entry() .. start_kernel() supported VA size
+ * x28 create_idmap() callee preserved temp register
*/
SYM_CODE_START(primary_entry)
bl preserve_boot_args
bl init_kernel_el // w0=cpu_boot_mode
- adrp x23, __PHYS_OFFSET
- and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0
- bl set_cpu_boot_mode_flag
- bl __create_page_tables
+ mov x20, x0
+ bl create_idmap
+
/*
* The following calls CPU setup code, see arch/arm64/mm/proc.S for
* details.
* On return, the CPU will be ready for the MMU to be turned on and
* the TCR will have been set.
*/
+#if VA_BITS > 48
+ mrs_s x0, SYS_ID_AA64MMFR2_EL1
+ tst x0, #0xf << ID_AA64MMFR2_LVA_SHIFT
+ mov x0, #VA_BITS
+ mov x25, #VA_BITS_MIN
+ csel x25, x25, x0, eq
+ mov x0, x25
+#endif
bl __cpu_setup // initialise processor
b __primary_switch
SYM_CODE_END(primary_entry)
@@ -122,28 +126,16 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
b dcache_inval_poc // tail call
SYM_CODE_END(preserve_boot_args)
-/*
- * Macro to create a table entry to the next page.
- *
- * tbl: page table address
- * virt: virtual address
- * shift: #imm page table shift
- * ptrs: #imm pointers per table page
- *
- * Preserves: virt
- * Corrupts: ptrs, tmp1, tmp2
- * Returns: tbl -> next level table page address
- */
- .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
- add \tmp1, \tbl, #PAGE_SIZE
- phys_to_pte \tmp2, \tmp1
- orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type
- lsr \tmp1, \virt, #\shift
- sub \ptrs, \ptrs, #1
- and \tmp1, \tmp1, \ptrs // table index
- str \tmp2, [\tbl, \tmp1, lsl #3]
- add \tbl, \tbl, #PAGE_SIZE // next level table page
- .endm
+SYM_FUNC_START_LOCAL(clear_page_tables)
+ /*
+ * Clear the init page tables.
+ */
+ adrp x0, init_pg_dir
+ adrp x1, init_pg_end
+ sub x2, x1, x0
+ mov x1, xzr
+ b __pi_memset // tail call
+SYM_FUNC_END(clear_page_tables)
/*
* Macro to populate page table entries, these entries can be pointers to the next level
@@ -179,31 +171,20 @@ SYM_CODE_END(preserve_boot_args)
* vstart: virtual address of start of range
* vend: virtual address of end of range - we map [vstart, vend]
* shift: shift used to transform virtual address into index
- * ptrs: number of entries in page table
+ * order: #imm 2log(number of entries in page table)
* istart: index in table corresponding to vstart
* iend: index in table corresponding to vend
* count: On entry: how many extra entries were required in previous level, scales
* our end index.
* On exit: returns how many extra entries required for next page table level
*
- * Preserves: vstart, vend, shift, ptrs
+ * Preserves: vstart, vend
* Returns: istart, iend, count
*/
- .macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count
- lsr \iend, \vend, \shift
- mov \istart, \ptrs
- sub \istart, \istart, #1
- and \iend, \iend, \istart // iend = (vend >> shift) & (ptrs - 1)
- mov \istart, \ptrs
- mul \istart, \istart, \count
- add \iend, \iend, \istart // iend += count * ptrs
- // our entries span multiple tables
-
- lsr \istart, \vstart, \shift
- mov \count, \ptrs
- sub \count, \count, #1
- and \istart, \istart, \count
-
+ .macro compute_indices, vstart, vend, shift, order, istart, iend, count
+ ubfx \istart, \vstart, \shift, \order
+ ubfx \iend, \vend, \shift, \order
+ add \iend, \iend, \count, lsl \order
sub \count, \iend, \istart
.endm
@@ -218,119 +199,116 @@ SYM_CODE_END(preserve_boot_args)
* vend: virtual address of end of range - we map [vstart, vend - 1]
* flags: flags to use to map last level entries
* phys: physical address corresponding to vstart - physical memory is contiguous
- * pgds: the number of pgd entries
+ * order: #imm 2log(number of entries in PGD table)
+ *
+ * If extra_shift is set, an extra level will be populated if the end address does
+ * not fit in 'extra_shift' bits. This assumes vend is in the TTBR0 range.
*
* Temporaries: istart, iend, tmp, count, sv - these need to be different registers
* Preserves: vstart, flags
* Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv
*/
- .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv
+ .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv, extra_shift
sub \vend, \vend, #1
add \rtbl, \tbl, #PAGE_SIZE
- mov \sv, \rtbl
mov \count, #0
- compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count
+
+ .ifnb \extra_shift
+ tst \vend, #~((1 << (\extra_shift)) - 1)
+ b.eq .L_\@
+ compute_indices \vstart, \vend, #\extra_shift, #(PAGE_SHIFT - 3), \istart, \iend, \count
+ mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv
+ .endif
+.L_\@:
+ compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count
mov \sv, \rtbl
+ populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
+ mov \tbl, \sv
#if SWAPPER_PGTABLE_LEVELS > 3
- compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count
+ compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
+ mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv
- mov \sv, \rtbl
#endif
#if SWAPPER_PGTABLE_LEVELS > 2
- compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count
+ compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
+ mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv
#endif
- compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count
- bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1
- populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
+ compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
+ bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1
+ populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
.endm
/*
- * Setup the initial page tables. We only setup the barest amount which is
- * required to get the kernel running. The following sections are required:
- * - identity mapping to enable the MMU (low address, TTBR0)
- * - first few MB of the kernel linear mapping to jump to once the MMU has
- * been enabled
+ * Remap a subregion created with the map_memory macro with modified attributes
+ * or output address. The entire remapped region must have been covered in the
+ * invocation of map_memory.
+ *
+ * x0: last level table address (returned in first argument to map_memory)
+ * x1: start VA of the existing mapping
+ * x2: start VA of the region to update
+ * x3: end VA of the region to update (exclusive)
+ * x4: start PA associated with the region to update
+ * x5: attributes to set on the updated region
+ * x6: order of the last level mappings
*/
-SYM_FUNC_START_LOCAL(__create_page_tables)
- mov x28, lr
+SYM_FUNC_START_LOCAL(remap_region)
+ sub x3, x3, #1 // make end inclusive
- /*
- * Invalidate the init page tables to avoid potential dirty cache lines
- * being evicted. Other page tables are allocated in rodata as part of
- * the kernel image, and thus are clean to the PoC per the boot
- * protocol.
- */
- adrp x0, init_pg_dir
- adrp x1, init_pg_end
- bl dcache_inval_poc
+ // Get the index offset for the start of the last level table
+ lsr x1, x1, x6
+ bfi x1, xzr, #0, #PAGE_SHIFT - 3
- /*
- * Clear the init page tables.
- */
- adrp x0, init_pg_dir
- adrp x1, init_pg_end
- sub x1, x1, x0
-1: stp xzr, xzr, [x0], #16
- stp xzr, xzr, [x0], #16
- stp xzr, xzr, [x0], #16
- stp xzr, xzr, [x0], #16
- subs x1, x1, #64
- b.ne 1b
+ // Derive the start and end indexes into the last level table
+ // associated with the provided region
+ lsr x2, x2, x6
+ lsr x3, x3, x6
+ sub x2, x2, x1
+ sub x3, x3, x1
- mov x7, SWAPPER_MM_MMUFLAGS
+ mov x1, #1
+ lsl x6, x1, x6 // block size at this level
- /*
- * Create the identity mapping.
- */
- adrp x0, idmap_pg_dir
- adrp x3, __idmap_text_start // __pa(__idmap_text_start)
-
-#ifdef CONFIG_ARM64_VA_BITS_52
- mrs_s x6, SYS_ID_AA64MMFR2_EL1
- and x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
- mov x5, #52
- cbnz x6, 1f
-#endif
- mov x5, #VA_BITS_MIN
-1:
- adr_l x6, vabits_actual
- str x5, [x6]
- dmb sy
- dc ivac, x6 // Invalidate potentially stale cache line
+ populate_entries x0, x4, x2, x3, x5, x6, x7
+ ret
+SYM_FUNC_END(remap_region)
+SYM_FUNC_START_LOCAL(create_idmap)
+ mov x28, lr
/*
- * VA_BITS may be too small to allow for an ID mapping to be created
- * that covers system RAM if that is located sufficiently high in the
- * physical address space. So for the ID map, use an extended virtual
- * range in that case, and configure an additional translation level
- * if needed.
+ * The ID map carries a 1:1 mapping of the physical address range
+ * covered by the loaded image, which could be anywhere in DRAM. This
+ * means that the required size of the VA (== PA) space is decided at
+ * boot time, and could be more than the configured size of the VA
+ * space for ordinary kernel and user space mappings.
+ *
+ * There are three cases to consider here:
+ * - 39 <= VA_BITS < 48, and the ID map needs up to 48 VA bits to cover
+ * the placement of the image. In this case, we configure one extra
+ * level of translation on the fly for the ID map only. (This case
+ * also covers 42-bit VA/52-bit PA on 64k pages).
*
- * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
- * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
- * this number conveniently equals the number of leading zeroes in
- * the physical address of __idmap_text_end.
+ * - VA_BITS == 48, and the ID map needs more than 48 VA bits. This can
+ * only happen when using 64k pages, in which case we need to extend
+ * the root level table rather than add a level. Note that we can
+ * treat this case as 'always extended' as long as we take care not
+ * to program an unsupported T0SZ value into the TCR register.
+ *
+ * - Combinations that would require two additional levels of
+ * translation are not supported, e.g., VA_BITS==36 on 16k pages, or
+ * VA_BITS==39/4k pages with 5-level paging, where the input address
+ * requires more than 47 or 48 bits, respectively.
*/
- adrp x5, __idmap_text_end
- clz x5, x5
- cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough?
- b.ge 1f // .. then skip VA range extension
-
- adr_l x6, idmap_t0sz
- str x5, [x6]
- dmb sy
- dc ivac, x6 // Invalidate potentially stale cache line
-
#if (VA_BITS < 48)
+#define IDMAP_PGD_ORDER (VA_BITS - PGDIR_SHIFT)
#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
-#define EXTRA_PTRS (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
/*
* If VA_BITS < 48, we have to configure an additional table level.
@@ -342,36 +320,40 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
#if VA_BITS != EXTRA_SHIFT
#error "Mismatch between VA_BITS and page size/number of translation levels"
#endif
-
- mov x4, EXTRA_PTRS
- create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
#else
+#define IDMAP_PGD_ORDER (PHYS_MASK_SHIFT - PGDIR_SHIFT)
+#define EXTRA_SHIFT
/*
* If VA_BITS == 48, we don't have to configure an additional
* translation level, but the top-level table has more entries.
*/
- mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
- str_l x4, idmap_ptrs_per_pgd, x5
#endif
-1:
- ldr_l x4, idmap_ptrs_per_pgd
- adr_l x6, __idmap_text_end // __pa(__idmap_text_end)
-
- map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14
-
- /*
- * Map the kernel image (starting with PHYS_OFFSET).
- */
- adrp x0, init_pg_dir
- mov_q x5, KIMAGE_VADDR // compile time __va(_text)
- add x5, x5, x23 // add KASLR displacement
- mov x4, PTRS_PER_PGD
- adrp x6, _end // runtime __pa(_end)
- adrp x3, _text // runtime __pa(_text)
- sub x6, x6, x3 // _end - _text
- add x6, x6, x5 // runtime __va(_end)
-
- map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
+ adrp x0, init_idmap_pg_dir
+ adrp x3, _text
+ adrp x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
+ mov x7, SWAPPER_RX_MMUFLAGS
+
+ map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT
+
+ /* Remap the kernel page tables r/w in the ID map */
+ adrp x1, _text
+ adrp x2, init_pg_dir
+ adrp x3, init_pg_end
+ bic x4, x2, #SWAPPER_BLOCK_SIZE - 1
+ mov x5, SWAPPER_RW_MMUFLAGS
+ mov x6, #SWAPPER_BLOCK_SHIFT
+ bl remap_region
+
+ /* Remap the FDT after the kernel image */
+ adrp x1, _text
+ adrp x22, _end + SWAPPER_BLOCK_SIZE
+ bic x2, x22, #SWAPPER_BLOCK_SIZE - 1
+ bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address
+ add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
+ bic x4, x21, #SWAPPER_BLOCK_SIZE - 1
+ mov x5, SWAPPER_RW_MMUFLAGS
+ mov x6, #SWAPPER_BLOCK_SHIFT
+ bl remap_region
/*
* Since the page tables have been populated with non-cacheable
@@ -380,16 +362,27 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
*/
dmb sy
- adrp x0, idmap_pg_dir
- adrp x1, idmap_pg_end
+ adrp x0, init_idmap_pg_dir
+ adrp x1, init_idmap_pg_end
bl dcache_inval_poc
+ ret x28
+SYM_FUNC_END(create_idmap)
+SYM_FUNC_START_LOCAL(create_kernel_mapping)
adrp x0, init_pg_dir
- adrp x1, init_pg_end
- bl dcache_inval_poc
+ mov_q x5, KIMAGE_VADDR // compile time __va(_text)
+ add x5, x5, x23 // add KASLR displacement
+ adrp x6, _end // runtime __pa(_end)
+ adrp x3, _text // runtime __pa(_text)
+ sub x6, x6, x3 // _end - _text
+ add x6, x6, x5 // runtime __va(_end)
+ mov x7, SWAPPER_RW_MMUFLAGS
- ret x28
-SYM_FUNC_END(__create_page_tables)
+ map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
+
+ dsb ishst // sync with page table walker
+ ret
+SYM_FUNC_END(create_kernel_mapping)
/*
* Initialize CPU registers with task-specific and cpu-specific context.
@@ -420,7 +413,7 @@ SYM_FUNC_END(__create_page_tables)
/*
* The following fragment of code is executed with the MMU enabled.
*
- * x0 = __PHYS_OFFSET
+ * x0 = __pa(KERNEL_START)
*/
SYM_FUNC_START_LOCAL(__primary_switched)
adr_l x4, init_task
@@ -439,6 +432,9 @@ SYM_FUNC_START_LOCAL(__primary_switched)
sub x4, x4, x0 // the kernel virtual and
str_l x4, kimage_voffset, x5 // physical mappings
+ mov x0, x20
+ bl set_cpu_boot_mode_flag
+
// Clear BSS
adr_l x0, __bss_start
mov x1, xzr
@@ -447,35 +443,30 @@ SYM_FUNC_START_LOCAL(__primary_switched)
bl __pi_memset
dsb ishst // Make zero page visible to PTW
+#if VA_BITS > 48
+ adr_l x8, vabits_actual // Set this early so KASAN early init
+ str x25, [x8] // ... observes the correct value
+ dc civac, x8 // Make visible to booting secondaries
+#endif
+
+#ifdef CONFIG_RANDOMIZE_BASE
+ adrp x5, memstart_offset_seed // Save KASLR linear map seed
+ strh w24, [x5, :lo12:memstart_offset_seed]
+#endif
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
bl kasan_early_init
#endif
mov x0, x21 // pass FDT address in x0
bl early_fdt_map // Try mapping the FDT early
+ mov x0, x20 // pass the full boot status
bl init_feature_override // Parse cpu feature overrides
-#ifdef CONFIG_RANDOMIZE_BASE
- tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized?
- b.ne 0f
- bl kaslr_early_init // parse FDT for KASLR options
- cbz x0, 0f // KASLR disabled? just proceed
- orr x23, x23, x0 // record KASLR offset
- ldp x29, x30, [sp], #16 // we must enable KASLR, return
- ret // to __primary_switch()
-0:
-#endif
- bl switch_to_vhe // Prefer VHE if possible
+ mov x0, x20
+ bl finalise_el2 // Prefer VHE if possible
ldp x29, x30, [sp], #16
bl start_kernel
ASM_BUG()
SYM_FUNC_END(__primary_switched)
- .pushsection ".rodata", "a"
-SYM_DATA_START(kimage_vaddr)
- .quad _text
-SYM_DATA_END(kimage_vaddr)
-EXPORT_SYMBOL(kimage_vaddr)
- .popsection
-
/*
* end early head section, begin head code that is also used for
* hotplug and needs to have the same protections as the text region
@@ -490,8 +481,9 @@ EXPORT_SYMBOL(kimage_vaddr)
* Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if
* SCTLR_ELx.EOS is clear), we place an ISB prior to ERET.
*
- * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
- * booted in EL1 or EL2 respectively.
+ * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x0 if
+ * booted in EL1 or EL2 respectively, with the top 32 bits containing
+ * potential context flags. These flags are *not* stored in __boot_cpu_mode.
*/
SYM_FUNC_START(init_kernel_el)
mrs x0, CurrentEL
@@ -520,6 +512,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
msr vbar_el2, x0
isb
+ mov_q x1, INIT_SCTLR_EL1_MMU_OFF
+
/*
* Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
* making it impossible to start in nVHE mode. Is that
@@ -529,34 +523,19 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
and x0, x0, #HCR_E2H
cbz x0, 1f
- /* Switching to VHE requires a sane SCTLR_EL1 as a start */
- mov_q x0, INIT_SCTLR_EL1_MMU_OFF
- msr_s SYS_SCTLR_EL12, x0
-
- /*
- * Force an eret into a helper "function", and let it return
- * to our original caller... This makes sure that we have
- * initialised the basic PSTATE state.
- */
- mov x0, #INIT_PSTATE_EL2
- msr spsr_el1, x0
- adr x0, __cpu_stick_to_vhe
- msr elr_el1, x0
- eret
+ /* Set a sane SCTLR_EL1, the VHE way */
+ msr_s SYS_SCTLR_EL12, x1
+ mov x2, #BOOT_CPU_FLAG_E2H
+ b 2f
1:
- mov_q x0, INIT_SCTLR_EL1_MMU_OFF
- msr sctlr_el1, x0
-
+ msr sctlr_el1, x1
+ mov x2, xzr
+2:
msr elr_el2, lr
mov w0, #BOOT_CPU_MODE_EL2
+ orr x0, x0, x2
eret
-
-__cpu_stick_to_vhe:
- mov x0, #HVC_VHE_RESTART
- hvc #0
- mov x0, #BOOT_CPU_MODE_EL2
- ret
SYM_FUNC_END(init_kernel_el)
/*
@@ -569,52 +548,21 @@ SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag)
b.ne 1f
add x1, x1, #4
1: str w0, [x1] // Save CPU boot mode
- dmb sy
- dc ivac, x1 // Invalidate potentially stale cache line
ret
SYM_FUNC_END(set_cpu_boot_mode_flag)
-/*
- * These values are written with the MMU off, but read with the MMU on.
- * Writers will invalidate the corresponding address, discarding up to a
- * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures
- * sufficient alignment that the CWG doesn't overlap another section.
- */
- .pushsection ".mmuoff.data.write", "aw"
-/*
- * We need to find out the CPU boot mode long after boot, so we need to
- * store it in a writable variable.
- *
- * This is not in .bss, because we set it sufficiently early that the boot-time
- * zeroing of .bss would clobber it.
- */
-SYM_DATA_START(__boot_cpu_mode)
- .long BOOT_CPU_MODE_EL2
- .long BOOT_CPU_MODE_EL1
-SYM_DATA_END(__boot_cpu_mode)
-/*
- * The booting CPU updates the failed status @__early_cpu_boot_status,
- * with MMU turned off.
- */
-SYM_DATA_START(__early_cpu_boot_status)
- .quad 0
-SYM_DATA_END(__early_cpu_boot_status)
-
- .popsection
-
/*
* This provides a "holding pen" for platforms to hold all secondary
* cores are held until we're ready for them to initialise.
*/
SYM_FUNC_START(secondary_holding_pen)
bl init_kernel_el // w0=cpu_boot_mode
- bl set_cpu_boot_mode_flag
- mrs x0, mpidr_el1
+ mrs x2, mpidr_el1
mov_q x1, MPIDR_HWID_BITMASK
- and x0, x0, x1
+ and x2, x2, x1
adr_l x3, secondary_holding_pen_release
pen: ldr x4, [x3]
- cmp x4, x0
+ cmp x4, x2
b.eq secondary_startup
wfe
b pen
@@ -626,7 +574,6 @@ SYM_FUNC_END(secondary_holding_pen)
*/
SYM_FUNC_START(secondary_entry)
bl init_kernel_el // w0=cpu_boot_mode
- bl set_cpu_boot_mode_flag
b secondary_startup
SYM_FUNC_END(secondary_entry)
@@ -634,16 +581,24 @@ SYM_FUNC_START_LOCAL(secondary_startup)
/*
* Common entry point for secondary CPUs.
*/
- bl switch_to_vhe
+ mov x20, x0 // preserve boot mode
+ bl finalise_el2
bl __cpu_secondary_check52bitva
+#if VA_BITS > 48
+ ldr_l x0, vabits_actual
+#endif
bl __cpu_setup // initialise processor
adrp x1, swapper_pg_dir
+ adrp x2, idmap_pg_dir
bl __enable_mmu
ldr x8, =__secondary_switched
br x8
SYM_FUNC_END(secondary_startup)
SYM_FUNC_START_LOCAL(__secondary_switched)
+ mov x0, x20
+ bl set_cpu_boot_mode_flag
+ str_l xzr, __early_cpu_boot_status, x3
adr_l x5, vectors
msr vbar_el1, x5
isb
@@ -691,6 +646,7 @@ SYM_FUNC_END(__secondary_too_slow)
*
* x0 = SCTLR_EL1 value for turning on the MMU.
* x1 = TTBR1_EL1 value
+ * x2 = ID map root table address
*
* Returns to the caller via x30/lr. This requires the caller to be covered
* by the .idmap.text section.
@@ -699,20 +655,15 @@ SYM_FUNC_END(__secondary_too_slow)
* If it isn't, park the CPU
*/
SYM_FUNC_START(__enable_mmu)
- mrs x2, ID_AA64MMFR0_EL1
- ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4
- cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN
+ mrs x3, ID_AA64MMFR0_EL1
+ ubfx x3, x3, #ID_AA64MMFR0_TGRAN_SHIFT, 4
+ cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN
b.lt __no_granule_support
- cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX
+ cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX
b.gt __no_granule_support
- update_early_cpu_boot_status 0, x2, x3
- adrp x2, idmap_pg_dir
- phys_to_ttbr x1, x1
phys_to_ttbr x2, x2
msr ttbr0_el1, x2 // load TTBR0
- offset_ttbr1 x1, x3
- msr ttbr1_el1, x1 // load TTBR1
- isb
+ load_ttbr1 x1, x1, x3
set_sctlr_el1 x0
@@ -720,7 +671,7 @@ SYM_FUNC_START(__enable_mmu)
SYM_FUNC_END(__enable_mmu)
SYM_FUNC_START(__cpu_secondary_check52bitva)
-#ifdef CONFIG_ARM64_VA_BITS_52
+#if VA_BITS > 48
ldr_l x0, vabits_actual
cmp x0, #52
b.ne 2f
@@ -755,13 +706,10 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
* Iterate over each entry in the relocation table, and apply the
* relocations in place.
*/
- ldr w9, =__rela_offset // offset to reloc table
- ldr w10, =__rela_size // size of reloc table
-
+ adr_l x9, __rela_start
+ adr_l x10, __rela_end
mov_q x11, KIMAGE_VADDR // default virtual offset
add x11, x11, x23 // actual virtual offset
- add x9, x9, x11 // __va(.rela)
- add x10, x9, x10 // __va(.rela) + sizeof(.rela)
0: cmp x9, x10
b.hs 1f
@@ -804,21 +752,9 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
* entry in x9, the address being relocated by the current address or
* bitmap entry in x13 and the address being relocated by the current
* bit in x14.
- *
- * Because addends are stored in place in the binary, RELR relocations
- * cannot be applied idempotently. We use x24 to keep track of the
- * currently applied displacement so that we can correctly relocate if
- * __relocate_kernel is called twice with non-zero displacements (i.e.
- * if there is both a physical misalignment and a KASLR displacement).
*/
- ldr w9, =__relr_offset // offset to reloc table
- ldr w10, =__relr_size // size of reloc table
- add x9, x9, x11 // __va(.relr)
- add x10, x9, x10 // __va(.relr) + sizeof(.relr)
-
- sub x15, x23, x24 // delta from previous offset
- cbz x15, 7f // nothing to do if unchanged
- mov x24, x23 // save new offset
+ adr_l x9, __relr_start
+ adr_l x10, __relr_end
2: cmp x9, x10
b.hs 7f
@@ -826,7 +762,7 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
tbnz x11, #0, 3f // branch to handle bitmaps
add x13, x11, x23
ldr x12, [x13] // relocate address entry
- add x12, x12, x15
+ add x12, x12, x23
str x12, [x13], #8 // adjust to start of bitmap
b 2b
@@ -835,7 +771,7 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
cbz x11, 6f
tbz x11, #0, 5f // skip bit if not set
ldr x12, [x14] // relocate bit
- add x12, x12, x15
+ add x12, x12, x23
str x12, [x14]
5: add x14, x14, #8 // move to next bit's address
@@ -856,43 +792,32 @@ SYM_FUNC_END(__relocate_kernel)
#endif
SYM_FUNC_START_LOCAL(__primary_switch)
+ adrp x1, reserved_pg_dir
+ adrp x2, init_idmap_pg_dir
+ bl __enable_mmu
+#ifdef CONFIG_RELOCATABLE
+ adrp x23, KERNEL_START
+ and x23, x23, MIN_KIMG_ALIGN - 1
#ifdef CONFIG_RANDOMIZE_BASE
- mov x19, x0 // preserve new SCTLR_EL1 value
- mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value
+ mov x0, x22
+ adrp x1, init_pg_end
+ mov sp, x1
+ mov x29, xzr
+ bl __pi_kaslr_early_init
+ and x24, x0, #SZ_2M - 1 // capture memstart offset seed
+ bic x0, x0, #SZ_2M - 1
+ orr x23, x23, x0 // record kernel offset
+#endif
#endif
+ bl clear_page_tables
+ bl create_kernel_mapping
adrp x1, init_pg_dir
- bl __enable_mmu
+ load_ttbr1 x1, x1, x2
#ifdef CONFIG_RELOCATABLE
-#ifdef CONFIG_RELR
- mov x24, #0 // no RELR displacement yet
-#endif
bl __relocate_kernel
-#ifdef CONFIG_RANDOMIZE_BASE
- ldr x8, =__primary_switched
- adrp x0, __PHYS_OFFSET
- blr x8
-
- /*
- * If we return here, we have a KASLR displacement in x23 which we need
- * to take into account by discarding the current kernel mapping and
- * creating a new one.
- */
- pre_disable_mmu_workaround
- msr sctlr_el1, x20 // disable the MMU
- isb
- bl __create_page_tables // recreate kernel mapping
-
- tlbi vmalle1 // Remove any stale TLB entries
- dsb nsh
- isb
-
- set_sctlr_el1 x19 // re-enable the MMU
-
- bl __relocate_kernel
-#endif
#endif
ldr x8, =__primary_switched
- adrp x0, __PHYS_OFFSET
+ adrp x0, KERNEL_START // __pa(KERNEL_START)
br x8
SYM_FUNC_END(__primary_switch)
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 2e248342476e..af5df48ba915 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -300,11 +300,6 @@ static void swsusp_mte_restore_tags(void)
unsigned long pfn = xa_state.xa_index;
struct page *page = pfn_to_online_page(pfn);
- /*
- * It is not required to invoke page_kasan_tag_reset(page)
- * at this point since the tags stored in page->flags are
- * already restored.
- */
mte_restore_page_tags(page_address(page), tags);
mte_free_tag_storage(tags);
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 43d212618834..12c7fad02ae5 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -16,6 +16,30 @@
#include <asm/ptrace.h>
#include <asm/virt.h>
+// Warning, hardcoded register allocation
+// This will clobber x1 and x2, and expect x1 to contain
+// the id register value as read from the HW
+.macro __check_override idreg, fld, width, pass, fail
+ ubfx x1, x1, #\fld, #\width
+ cbz x1, \fail
+
+ adr_l x1, \idreg\()_override
+ ldr x2, [x1, FTR_OVR_VAL_OFFSET]
+ ldr x1, [x1, FTR_OVR_MASK_OFFSET]
+ ubfx x2, x2, #\fld, #\width
+ ubfx x1, x1, #\fld, #\width
+ cmp x1, xzr
+ and x2, x2, x1
+ csinv x2, x2, xzr, ne
+ cbnz x2, \pass
+ b \fail
+.endm
+
+.macro check_override idreg, fld, pass, fail
+ mrs x1, \idreg\()_el1
+ __check_override \idreg \fld 4 \pass \fail
+.endm
+
.text
.pushsection .hyp.text, "ax"
@@ -51,8 +75,8 @@ SYM_CODE_START_LOCAL(elx_sync)
msr vbar_el2, x1
b 9f
-1: cmp x0, #HVC_VHE_RESTART
- b.eq mutate_to_vhe
+1: cmp x0, #HVC_FINALISE_EL2
+ b.eq __finalise_el2
2: cmp x0, #HVC_SOFT_RESTART
b.ne 3f
@@ -73,27 +97,67 @@ SYM_CODE_START_LOCAL(elx_sync)
eret
SYM_CODE_END(elx_sync)
-// nVHE? No way! Give me the real thing!
-SYM_CODE_START_LOCAL(mutate_to_vhe)
+SYM_CODE_START_LOCAL(__finalise_el2)
+ check_override id_aa64pfr0 ID_AA64PFR0_SVE_SHIFT .Linit_sve .Lskip_sve
+
+.Linit_sve: /* SVE register access */
+ mrs x0, cptr_el2 // Disable SVE traps
+ bic x0, x0, #CPTR_EL2_TZ
+ msr cptr_el2, x0
+ isb
+ mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
+ msr_s SYS_ZCR_EL2, x1 // length for EL1.
+
+.Lskip_sve:
+ check_override id_aa64pfr1 ID_AA64PFR1_SME_SHIFT .Linit_sme .Lskip_sme
+
+.Linit_sme: /* SME register access and priority mapping */
+ mrs x0, cptr_el2 // Disable SME traps
+ bic x0, x0, #CPTR_EL2_TSM
+ msr cptr_el2, x0
+ isb
+
+ mrs x1, sctlr_el2
+ orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps
+ msr sctlr_el2, x1
+ isb
+
+ mov x0, #0 // SMCR controls
+
+ // Full FP in SM?
+ mrs_s x1, SYS_ID_AA64SMFR0_EL1
+ __check_override id_aa64smfr0 ID_AA64SMFR0_EL1_FA64_SHIFT 1 .Linit_sme_fa64 .Lskip_sme_fa64
+
+.Linit_sme_fa64:
+ orr x0, x0, SMCR_ELx_FA64_MASK
+.Lskip_sme_fa64:
+
+ orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector
+ msr_s SYS_SMCR_EL2, x0 // length for EL1.
+
+ mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported?
+ ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
+ cbz x1, .Lskip_sme
+
+ msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal
+
+ mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present?
+ ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4
+ cbz x1, .Lskip_sme
+
+ mrs_s x1, SYS_HCRX_EL2
+ orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping
+ msr_s SYS_HCRX_EL2, x1
+
+.Lskip_sme:
+
+ // nVHE? No way! Give me the real thing!
// Sanity check: MMU *must* be off
mrs x1, sctlr_el2
tbnz x1, #0, 1f
// Needs to be VHE capable, obviously
- mrs x1, id_aa64mmfr1_el1
- ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4
- cbz x1, 1f
-
- // Check whether VHE is disabled from the command line
- adr_l x1, id_aa64mmfr1_override
- ldr x2, [x1, FTR_OVR_VAL_OFFSET]
- ldr x1, [x1, FTR_OVR_MASK_OFFSET]
- ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
- ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4
- cmp x1, xzr
- and x2, x2, x1
- csinv x2, x2, xzr, ne
- cbnz x2, 2f
+ check_override id_aa64mmfr1 ID_AA64MMFR1_VHE_SHIFT 2f 1f
1: mov_q x0, HVC_STUB_ERR
eret
@@ -140,10 +204,10 @@ SYM_CODE_START_LOCAL(mutate_to_vhe)
msr spsr_el1, x0
b enter_vhe
-SYM_CODE_END(mutate_to_vhe)
+SYM_CODE_END(__finalise_el2)
// At the point where we reach enter_vhe(), we run with
- // the MMU off (which is enforced by mutate_to_vhe()).
+ // the MMU off (which is enforced by __finalise_el2()).
// We thus need to be in the idmap, or everything will
// explode when enabling the MMU.
@@ -222,12 +286,12 @@ SYM_FUNC_START(__hyp_reset_vectors)
SYM_FUNC_END(__hyp_reset_vectors)
/*
- * Entry point to switch to VHE if deemed capable
+ * Entry point to finalise EL2 and switch to VHE if deemed capable
+ *
+ * w0: boot mode, as returned by init_kernel_el()
*/
-SYM_FUNC_START(switch_to_vhe)
+SYM_FUNC_START(finalise_el2)
// Need to have booted at EL2
- adr_l x1, __boot_cpu_mode
- ldr w0, [x1]
cmp w0, #BOOT_CPU_MODE_EL2
b.ne 1f
@@ -236,9 +300,8 @@ SYM_FUNC_START(switch_to_vhe)
cmp x0, #CurrentEL_EL1
b.ne 1f
- // Turn the world upside down
- mov x0, #HVC_VHE_RESTART
+ mov x0, #HVC_FINALISE_EL2
hvc #0
1:
ret
-SYM_FUNC_END(switch_to_vhe)
+SYM_FUNC_END(finalise_el2)
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 8a2ceb591686..1b0542c69738 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -19,16 +19,21 @@
#define FTR_ALIAS_NAME_LEN 30
#define FTR_ALIAS_OPTION_LEN 116
+static u64 __boot_status __initdata;
+
struct ftr_set_desc {
char name[FTR_DESC_NAME_LEN];
struct arm64_ftr_override *override;
struct {
char name[FTR_DESC_FIELD_LEN];
u8 shift;
+ u8 width;
bool (*filter)(u64 val);
} fields[];
};
+#define FIELD(n, s, f) { .name = n, .shift = s, .width = 4, .filter = f }
+
static bool __init mmfr1_vh_filter(u64 val)
{
/*
@@ -37,24 +42,65 @@ static bool __init mmfr1_vh_filter(u64 val)
* the user was trying to force nVHE on us, proceed with
* attitude adjustment.
*/
- return !(is_kernel_in_hyp_mode() && val == 0);
+ return !(__boot_status == (BOOT_CPU_FLAG_E2H | BOOT_CPU_MODE_EL2) &&
+ val == 0);
}
static const struct ftr_set_desc mmfr1 __initconst = {
.name = "id_aa64mmfr1",
.override = &id_aa64mmfr1_override,
.fields = {
- { "vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter },
+ FIELD("vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter),
+ {}
+ },
+};
+
+static bool __init pfr0_sve_filter(u64 val)
+{
+ /*
+ * Disabling SVE also means disabling all the features that
+ * are associated with it. The easiest way to do it is just to
+ * override id_aa64zfr0_el1 to be 0.
+ */
+ if (!val) {
+ id_aa64zfr0_override.val = 0;
+ id_aa64zfr0_override.mask = GENMASK(63, 0);
+ }
+
+ return true;
+}
+
+static const struct ftr_set_desc pfr0 __initconst = {
+ .name = "id_aa64pfr0",
+ .override = &id_aa64pfr0_override,
+ .fields = {
+ FIELD("sve", ID_AA64PFR0_SVE_SHIFT, pfr0_sve_filter),
{}
},
};
+static bool __init pfr1_sme_filter(u64 val)
+{
+ /*
+ * Similarly to SVE, disabling SME also means disabling all
+ * the features that are associated with it. Just set
+ * id_aa64smfr0_el1 to 0 and don't look back.
+ */
+ if (!val) {
+ id_aa64smfr0_override.val = 0;
+ id_aa64smfr0_override.mask = GENMASK(63, 0);
+ }
+
+ return true;
+}
+
static const struct ftr_set_desc pfr1 __initconst = {
.name = "id_aa64pfr1",
.override = &id_aa64pfr1_override,
.fields = {
- { "bt", ID_AA64PFR1_BT_SHIFT },
- { "mte", ID_AA64PFR1_MTE_SHIFT},
+ FIELD("bt", ID_AA64PFR1_BT_SHIFT, NULL ),
+ FIELD("mte", ID_AA64PFR1_MTE_SHIFT, NULL),
+ FIELD("sme", ID_AA64PFR1_SME_SHIFT, pfr1_sme_filter),
{}
},
};
@@ -63,10 +109,10 @@ static const struct ftr_set_desc isar1 __initconst = {
.name = "id_aa64isar1",
.override = &id_aa64isar1_override,
.fields = {
- { "gpi", ID_AA64ISAR1_GPI_SHIFT },
- { "gpa", ID_AA64ISAR1_GPA_SHIFT },
- { "api", ID_AA64ISAR1_API_SHIFT },
- { "apa", ID_AA64ISAR1_APA_SHIFT },
+ FIELD("gpi", ID_AA64ISAR1_EL1_GPI_SHIFT, NULL),
+ FIELD("gpa", ID_AA64ISAR1_EL1_GPA_SHIFT, NULL),
+ FIELD("api", ID_AA64ISAR1_EL1_API_SHIFT, NULL),
+ FIELD("apa", ID_AA64ISAR1_EL1_APA_SHIFT, NULL),
{}
},
};
@@ -75,8 +121,18 @@ static const struct ftr_set_desc isar2 __initconst = {
.name = "id_aa64isar2",
.override = &id_aa64isar2_override,
.fields = {
- { "gpa3", ID_AA64ISAR2_GPA3_SHIFT },
- { "apa3", ID_AA64ISAR2_APA3_SHIFT },
+ FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL),
+ FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL),
+ {}
+ },
+};
+
+static const struct ftr_set_desc smfr0 __initconst = {
+ .name = "id_aa64smfr0",
+ .override = &id_aa64smfr0_override,
+ .fields = {
+ /* FA64 is a one bit field... :-/ */
+ { "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, },
{}
},
};
@@ -89,16 +145,18 @@ static const struct ftr_set_desc kaslr __initconst = {
.override = &kaslr_feature_override,
#endif
.fields = {
- { "disabled", 0 },
+ FIELD("disabled", 0, NULL),
{}
},
};
static const struct ftr_set_desc * const regs[] __initconst = {
&mmfr1,
+ &pfr0,
&pfr1,
&isar1,
&isar2,
+ &smfr0,
&kaslr,
};
@@ -108,6 +166,8 @@ static const struct {
} aliases[] __initconst = {
{ "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
{ "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" },
+ { "arm64.nosve", "id_aa64pfr0.sve=0 id_aa64pfr1.sme=0" },
+ { "arm64.nosme", "id_aa64pfr1.sme=0" },
{ "arm64.nobti", "id_aa64pfr1.bt=0" },
{ "arm64.nopauth",
"id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
@@ -144,7 +204,8 @@ static void __init match_options(const char *cmdline)
for (f = 0; strlen(regs[i]->fields[f].name); f++) {
u64 shift = regs[i]->fields[f].shift;
- u64 mask = 0xfUL << shift;
+ u64 width = regs[i]->fields[f].width ?: 4;
+ u64 mask = GENMASK_ULL(shift + width - 1, shift);
u64 v;
if (find_field(cmdline, regs[i], f, &v))
@@ -152,7 +213,7 @@ static void __init match_options(const char *cmdline)
/*
* If an override gets filtered out, advertise
- * it by setting the value to 0xf, but
+ * it by setting the value to the all-ones while
* clearing the mask... Yes, this is fragile.
*/
if (regs[i]->fields[f].filter &&
@@ -234,9 +295,9 @@ static __init void parse_cmdline(void)
}
/* Keep checkers quiet */
-void init_feature_override(void);
+void init_feature_override(u64 boot_status);
-asmlinkage void __init init_feature_override(void)
+asmlinkage void __init init_feature_override(u64 boot_status)
{
int i;
@@ -247,6 +308,8 @@ asmlinkage void __init init_feature_override(void)
}
}
+ __boot_status = boot_status;
+
parse_cmdline();
for (i = 0; i < ARRAY_SIZE(regs); i++) {
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 241c86b67d01..afa69e04e75e 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -10,11 +10,8 @@
#error This file should only be included in vmlinux.lds.S
#endif
-#ifdef CONFIG_EFI
-
-__efistub_kernel_size = _edata - _text;
-__efistub_primary_entry_offset = primary_entry - _text;
-
+PROVIDE(__efistub_kernel_size = _edata - _text);
+PROVIDE(__efistub_primary_entry_offset = primary_entry - _text);
/*
* The EFI stub has its own symbol namespace prefixed by __efistub_, to
@@ -25,31 +22,37 @@ __efistub_primary_entry_offset = primary_entry - _text;
* linked at. The routines below are all implemented in assembler in a
* position independent manner
*/
-__efistub_memcmp = __pi_memcmp;
-__efistub_memchr = __pi_memchr;
-__efistub_memcpy = __pi_memcpy;
-__efistub_memmove = __pi_memmove;
-__efistub_memset = __pi_memset;
-__efistub_strlen = __pi_strlen;
-__efistub_strnlen = __pi_strnlen;
-__efistub_strcmp = __pi_strcmp;
-__efistub_strncmp = __pi_strncmp;
-__efistub_strrchr = __pi_strrchr;
-__efistub_dcache_clean_poc = __pi_dcache_clean_poc;
-
-#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
-__efistub___memcpy = __pi_memcpy;
-__efistub___memmove = __pi_memmove;
-__efistub___memset = __pi_memset;
-#endif
+PROVIDE(__efistub_memcmp = __pi_memcmp);
+PROVIDE(__efistub_memchr = __pi_memchr);
+PROVIDE(__efistub_memcpy = __pi_memcpy);
+PROVIDE(__efistub_memmove = __pi_memmove);
+PROVIDE(__efistub_memset = __pi_memset);
+PROVIDE(__efistub_strlen = __pi_strlen);
+PROVIDE(__efistub_strnlen = __pi_strnlen);
+PROVIDE(__efistub_strcmp = __pi_strcmp);
+PROVIDE(__efistub_strncmp = __pi_strncmp);
+PROVIDE(__efistub_strrchr = __pi_strrchr);
+PROVIDE(__efistub_dcache_clean_poc = __pi_dcache_clean_poc);
+
+PROVIDE(__efistub__text = _text);
+PROVIDE(__efistub__end = _end);
+PROVIDE(__efistub__edata = _edata);
+PROVIDE(__efistub_screen_info = screen_info);
+PROVIDE(__efistub__ctype = _ctype);
-__efistub__text = _text;
-__efistub__end = _end;
-__efistub__edata = _edata;
-__efistub_screen_info = screen_info;
-__efistub__ctype = _ctype;
+/*
+ * The __ prefixed memcpy/memset/memmove symbols are provided by KASAN, which
+ * instruments the conventional ones. Therefore, any references from the EFI
+ * stub or other position independent, low level C code should be redirected to
+ * the non-instrumented versions as well.
+ */
+PROVIDE(__efistub___memcpy = __pi_memcpy);
+PROVIDE(__efistub___memmove = __pi_memmove);
+PROVIDE(__efistub___memset = __pi_memset);
-#endif
+PROVIDE(__pi___memcpy = __pi_memcpy);
+PROVIDE(__pi___memmove = __pi_memmove);
+PROVIDE(__pi___memset = __pi_memset);
#ifdef CONFIG_KVM
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
index fc98037e1220..faf88ec9c48e 100644
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -26,14 +26,3 @@ void arch_jump_label_transform(struct jump_entry *entry,
aarch64_insn_patch_text_nosync(addr, insn);
}
-
-void arch_jump_label_transform_static(struct jump_entry *entry,
- enum jump_label_type type)
-{
- /*
- * We use the architected A64 NOP in arch_static_branch, so there's no
- * need to patch an identical A64 NOP over the top of it here. The core
- * will call arch_jump_label_transform from a module notifier if the
- * NOP needs to be replaced by a branch.
- */
-}
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 418b2bba1521..325455d16dbc 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -13,7 +13,6 @@
#include <linux/pgtable.h>
#include <linux/random.h>
-#include <asm/cacheflush.h>
#include <asm/fixmap.h>
#include <asm/kernel-pgtable.h>
#include <asm/memory.h>
@@ -21,128 +20,45 @@
#include <asm/sections.h>
#include <asm/setup.h>
-enum kaslr_status {
- KASLR_ENABLED,
- KASLR_DISABLED_CMDLINE,
- KASLR_DISABLED_NO_SEED,
- KASLR_DISABLED_FDT_REMAP,
-};
-
-static enum kaslr_status __initdata kaslr_status;
u64 __ro_after_init module_alloc_base;
u16 __initdata memstart_offset_seed;
-static __init u64 get_kaslr_seed(void *fdt)
-{
- int node, len;
- fdt64_t *prop;
- u64 ret;
-
- node = fdt_path_offset(fdt, "/chosen");
- if (node < 0)
- return 0;
-
- prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
- if (!prop || len != sizeof(u64))
- return 0;
-
- ret = fdt64_to_cpu(*prop);
- *prop = 0;
- return ret;
-}
-
struct arm64_ftr_override kaslr_feature_override __initdata;
-/*
- * This routine will be executed with the kernel mapped at its default virtual
- * address, and if it returns successfully, the kernel will be remapped, and
- * start_kernel() will be executed from a randomized virtual offset. The
- * relocation will result in all absolute references (e.g., static variables
- * containing function pointers) to be reinitialized, and zero-initialized
- * .bss variables will be reset to 0.
- */
-u64 __init kaslr_early_init(void)
+static int __init kaslr_init(void)
{
- void *fdt;
- u64 seed, offset, mask, module_range;
- unsigned long raw;
+ u64 module_range;
+ u32 seed;
/*
* Set a reasonable default for module_alloc_base in case
* we end up running with module randomization disabled.
*/
module_alloc_base = (u64)_etext - MODULES_VSIZE;
- dcache_clean_inval_poc((unsigned long)&module_alloc_base,
- (unsigned long)&module_alloc_base +
- sizeof(module_alloc_base));
-
- /*
- * Try to map the FDT early. If this fails, we simply bail,
- * and proceed with KASLR disabled. We will make another
- * attempt at mapping the FDT in setup_machine()
- */
- fdt = get_early_fdt_ptr();
- if (!fdt) {
- kaslr_status = KASLR_DISABLED_FDT_REMAP;
- return 0;
- }
- /*
- * Retrieve (and wipe) the seed from the FDT
- */
- seed = get_kaslr_seed(fdt);
-
- /*
- * Check if 'nokaslr' appears on the command line, and
- * return 0 if that is the case.
- */
if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) {
- kaslr_status = KASLR_DISABLED_CMDLINE;
+ pr_info("KASLR disabled on command line\n");
return 0;
}
- /*
- * Mix in any entropy obtainable architecturally if enabled
- * and supported.
- */
-
- if (arch_get_random_seed_long_early(&raw))
- seed ^= raw;
-
- if (!seed) {
- kaslr_status = KASLR_DISABLED_NO_SEED;
+ if (!kaslr_offset()) {
+ pr_warn("KASLR disabled due to lack of seed\n");
return 0;
}
+ pr_info("KASLR enabled\n");
+
/*
- * OK, so we are proceeding with KASLR enabled. Calculate a suitable
- * kernel image offset from the seed. Let's place the kernel in the
- * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
- * the lower and upper quarters to avoid colliding with other
- * allocations.
- * Even if we could randomize at page granularity for 16k and 64k pages,
- * let's always round to 2 MB so we don't interfere with the ability to
- * map using contiguous PTEs
+ * KASAN without KASAN_VMALLOC does not expect the module region to
+ * intersect the vmalloc region, since shadow memory is allocated for
+ * each module at load time, whereas the vmalloc region will already be
+ * shadowed by KASAN zero pages.
*/
- mask = ((1UL << (VA_BITS_MIN - 2)) - 1) & ~(SZ_2M - 1);
- offset = BIT(VA_BITS_MIN - 3) + (seed & mask);
+ BUILD_BUG_ON((IS_ENABLED(CONFIG_KASAN_GENERIC) ||
+ IS_ENABLED(CONFIG_KASAN_SW_TAGS)) &&
+ !IS_ENABLED(CONFIG_KASAN_VMALLOC));
- /* use the top 16 bits to randomize the linear region */
- memstart_offset_seed = seed >> 48;
-
- if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) &&
- (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
- IS_ENABLED(CONFIG_KASAN_SW_TAGS)))
- /*
- * KASAN without KASAN_VMALLOC does not expect the module region
- * to intersect the vmalloc region, since shadow memory is
- * allocated for each module at load time, whereas the vmalloc
- * region is shadowed by KASAN zero pages. So keep modules
- * out of the vmalloc region if KASAN is enabled without
- * KASAN_VMALLOC, and put the kernel well within 4 GB of the
- * module region.
- */
- return offset % SZ_2G;
+ seed = get_random_u32();
if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
/*
@@ -154,8 +70,7 @@ u64 __init kaslr_early_init(void)
* resolved normally.)
*/
module_range = SZ_2G - (u64)(_end - _stext);
- module_alloc_base = max((u64)_end + offset - SZ_2G,
- (u64)MODULES_VADDR);
+ module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR);
} else {
/*
* Randomize the module region by setting module_alloc_base to
@@ -167,40 +82,12 @@ u64 __init kaslr_early_init(void)
* when ARM64_MODULE_PLTS is enabled.
*/
module_range = MODULES_VSIZE - (u64)(_etext - _stext);
- module_alloc_base = (u64)_etext + offset - MODULES_VSIZE;
}
/* use the lower 21 bits to randomize the base of the module region */
module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
module_alloc_base &= PAGE_MASK;
- dcache_clean_inval_poc((unsigned long)&module_alloc_base,
- (unsigned long)&module_alloc_base +
- sizeof(module_alloc_base));
- dcache_clean_inval_poc((unsigned long)&memstart_offset_seed,
- (unsigned long)&memstart_offset_seed +
- sizeof(memstart_offset_seed));
-
- return offset;
-}
-
-static int __init kaslr_init(void)
-{
- switch (kaslr_status) {
- case KASLR_ENABLED:
- pr_info("KASLR enabled\n");
- break;
- case KASLR_DISABLED_CMDLINE:
- pr_info("KASLR disabled on command line\n");
- break;
- case KASLR_DISABLED_NO_SEED:
- pr_warn("KASLR disabled due to lack of seed\n");
- break;
- case KASLR_DISABLED_FDT_REMAP:
- pr_warn("KASLR disabled due to FDT remapping failure\n");
- break;
- }
-
return 0;
}
-core_initcall(kaslr_init)
+subsys_initcall(kaslr_init)
diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
index 42bd8c0c60e0..692e9d2e31e5 100644
--- a/arch/arm64/kernel/kuser32.S
+++ b/arch/arm64/kernel/kuser32.S
@@ -15,6 +15,7 @@
#include <asm/unistd.h>
+ .section .rodata
.align 5
.globl __kuser_helper_start
__kuser_helper_start:
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index f6b00743c399..b2b730233274 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -48,15 +48,6 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte,
if (!pte_is_tagged)
return;
- page_kasan_tag_reset(page);
- /*
- * We need smp_wmb() in between setting the flags and clearing the
- * tags because if another thread reads page->flags and builds a
- * tagged address out of it, there is an actual dependency to the
- * memory access, but on the current thread we do not guarantee that
- * the new page->flags are visible before the tags were updated.
- */
- smp_wmb();
mte_clear_page_tags(page_address(page));
}
diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile
new file mode 100644
index 000000000000..839291430cb3
--- /dev/null
+++ b/arch/arm64/kernel/pi/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2022 Google LLC
+
+KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
+ -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \
+ $(call cc-option,-mbranch-protection=none) \
+ -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \
+ -include $(srctree)/include/linux/hidden.h \
+ -D__DISABLE_EXPORTS -ffreestanding -D__NO_FORTIFY \
+ $(call cc-option,-fno-addrsig)
+
+# remove SCS flags from all objects in this directory
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
+# disable LTO
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS))
+
+GCOV_PROFILE := n
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+UBSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
+$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \
+ --remove-section=.note.gnu.property \
+ --prefix-alloc-sections=.init
+$(obj)/%.pi.o: $(obj)/%.o FORCE
+ $(call if_changed,objcopy)
+
+$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
+ $(call if_changed_rule,cc_o_c)
+
+obj-y := kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
+extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c
new file mode 100644
index 000000000000..6c3855e69395
--- /dev/null
+++ b/arch/arm64/kernel/pi/kaslr_early.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2022 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+// NOTE: code in this file runs *very* early, and is not permitted to use
+// global variables or anything that relies on absolute addressing.
+
+#include <linux/libfdt.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/string.h>
+
+#include <asm/archrandom.h>
+#include <asm/memory.h>
+
+/* taken from lib/string.c */
+static char *__strstr(const char *s1, const char *s2)
+{
+ size_t l1, l2;
+
+ l2 = strlen(s2);
+ if (!l2)
+ return (char *)s1;
+ l1 = strlen(s1);
+ while (l1 >= l2) {
+ l1--;
+ if (!memcmp(s1, s2, l2))
+ return (char *)s1;
+ s1++;
+ }
+ return NULL;
+}
+static bool cmdline_contains_nokaslr(const u8 *cmdline)
+{
+ const u8 *str;
+
+ str = __strstr(cmdline, "nokaslr");
+ return str == cmdline || (str > cmdline && *(str - 1) == ' ');
+}
+
+static bool is_kaslr_disabled_cmdline(void *fdt)
+{
+ if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
+ int node;
+ const u8 *prop;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ goto out;
+
+ prop = fdt_getprop(fdt, node, "bootargs", NULL);
+ if (!prop)
+ goto out;
+
+ if (cmdline_contains_nokaslr(prop))
+ return true;
+
+ if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
+ goto out;
+
+ return false;
+ }
+out:
+ return cmdline_contains_nokaslr(CONFIG_CMDLINE);
+}
+
+static u64 get_kaslr_seed(void *fdt)
+{
+ int node, len;
+ fdt64_t *prop;
+ u64 ret;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ return 0;
+
+ prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+ if (!prop || len != sizeof(u64))
+ return 0;
+
+ ret = fdt64_to_cpu(*prop);
+ *prop = 0;
+ return ret;
+}
+
+asmlinkage u64 kaslr_early_init(void *fdt)
+{
+ u64 seed;
+
+ if (is_kaslr_disabled_cmdline(fdt))
+ return 0;
+
+ seed = get_kaslr_seed(fdt);
+ if (!seed) {
+#ifdef CONFIG_ARCH_RANDOM
+ if (!__early_cpu_has_rndr() ||
+ !__arm64_rndr((unsigned long *)&seed))
+#endif
+ return 0;
+ }
+
+ /*
+ * OK, so we are proceeding with KASLR enabled. Calculate a suitable
+ * kernel image offset from the seed. Let's place the kernel in the
+ * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
+ * the lower and upper quarters to avoid colliding with other
+ * allocations.
+ */
+ return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
+}
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index b0980fbb6bc7..3e6d0352d7d3 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -280,6 +280,9 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
vl = task_get_sme_vl(current);
} else {
+ if (!system_supports_sve())
+ return -EINVAL;
+
vl = task_get_sve_vl(current);
}
@@ -342,9 +345,14 @@ fpsimd_only:
#else /* ! CONFIG_ARM64_SVE */
-/* Turn any non-optimised out attempts to use these into a link error: */
+static int restore_sve_fpsimd_context(struct user_ctxs *user)
+{
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+}
+
+/* Turn any non-optimised out attempts to use this into a link error: */
extern int preserve_sve_context(void __user *ctx);
-extern int restore_sve_fpsimd_context(struct user_ctxs *user);
#endif /* ! CONFIG_ARM64_SVE */
@@ -649,14 +657,10 @@ static int restore_sigframe(struct pt_regs *regs,
if (!user.fpsimd)
return -EINVAL;
- if (user.sve) {
- if (!system_supports_sve())
- return -EINVAL;
-
+ if (user.sve)
err = restore_sve_fpsimd_context(&user);
- } else {
+ else
err = restore_fpsimd_context(user.fpsimd);
- }
}
if (err == 0 && system_supports_sme() && user.za)
diff --git a/arch/arm64/kernel/sigreturn32.S b/arch/arm64/kernel/sigreturn32.S
index 475d30d471ac..ccbd4aab4ba4 100644
--- a/arch/arm64/kernel/sigreturn32.S
+++ b/arch/arm64/kernel/sigreturn32.S
@@ -15,6 +15,7 @@
#include <asm/unistd.h>
+ .section .rodata
.globl __aarch32_sigret_code_start
__aarch32_sigret_code_start:
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 4ea9392f86e0..617f78ad43a1 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -100,10 +100,11 @@ SYM_FUNC_END(__cpu_suspend_enter)
.pushsection ".idmap.text", "awx"
SYM_CODE_START(cpu_resume)
bl init_kernel_el
- bl switch_to_vhe
+ bl finalise_el2
bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */
adrp x1, swapper_pg_dir
+ adrp x2, idmap_pg_dir
bl __enable_mmu
ldr x8, =_cpu_resume
br x8
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 0467cb79f080..fcaa151b81f1 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -38,6 +38,8 @@
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
* associated with the most recently encountered replacement lr
* value.
+ *
+ * @task: The task being unwound.
*/
struct unwind_state {
unsigned long fp;
@@ -48,13 +50,13 @@ struct unwind_state {
#ifdef CONFIG_KRETPROBES
struct llist_node *kr_cur;
#endif
+ struct task_struct *task;
};
-static notrace void unwind_init(struct unwind_state *state, unsigned long fp,
- unsigned long pc)
+static void unwind_init_common(struct unwind_state *state,
+ struct task_struct *task)
{
- state->fp = fp;
- state->pc = pc;
+ state->task = task;
#ifdef CONFIG_KRETPROBES
state->kr_cur = NULL;
#endif
@@ -72,7 +74,57 @@ static notrace void unwind_init(struct unwind_state *state, unsigned long fp,
state->prev_fp = 0;
state->prev_type = STACK_TYPE_UNKNOWN;
}
-NOKPROBE_SYMBOL(unwind_init);
+
+/*
+ * Start an unwind from a pt_regs.
+ *
+ * The unwind will begin at the PC within the regs.
+ *
+ * The regs must be on a stack currently owned by the calling task.
+ */
+static inline void unwind_init_from_regs(struct unwind_state *state,
+ struct pt_regs *regs)
+{
+ unwind_init_common(state, current);
+
+ state->fp = regs->regs[29];
+ state->pc = regs->pc;
+}
+
+/*
+ * Start an unwind from a caller.
+ *
+ * The unwind will begin at the caller of whichever function this is inlined
+ * into.
+ *
+ * The function which invokes this must be noinline.
+ */
+static __always_inline void unwind_init_from_caller(struct unwind_state *state)
+{
+ unwind_init_common(state, current);
+
+ state->fp = (unsigned long)__builtin_frame_address(1);
+ state->pc = (unsigned long)__builtin_return_address(0);
+}
+
+/*
+ * Start an unwind from a blocked task.
+ *
+ * The unwind will begin at the blocked tasks saved PC (i.e. the caller of
+ * cpu_switch_to()).
+ *
+ * The caller should ensure the task is blocked in cpu_switch_to() for the
+ * duration of the unwind, or the unwind will be bogus. It is never valid to
+ * call this for the current task.
+ */
+static inline void unwind_init_from_task(struct unwind_state *state,
+ struct task_struct *task)
+{
+ unwind_init_common(state, task);
+
+ state->fp = thread_saved_fp(task);
+ state->pc = thread_saved_pc(task);
+}
/*
* Unwind from one frame record (A) to the next frame record (B).
@@ -81,9 +133,9 @@ NOKPROBE_SYMBOL(unwind_init);
* records (e.g. a cycle), determined based on the location and fp value of A
* and the location (but not the fp value) of B.
*/
-static int notrace unwind_next(struct task_struct *tsk,
- struct unwind_state *state)
+static int notrace unwind_next(struct unwind_state *state)
{
+ struct task_struct *tsk = state->task;
unsigned long fp = state->fp;
struct stack_info info;
@@ -117,15 +169,15 @@ static int notrace unwind_next(struct task_struct *tsk,
if (fp <= state->prev_fp)
return -EINVAL;
} else {
- set_bit(state->prev_type, state->stacks_done);
+ __set_bit(state->prev_type, state->stacks_done);
}
/*
* Record this frame record's values and location. The prev_fp and
* prev_type are only meaningful to the next unwind_next() invocation.
*/
- state->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
- state->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
+ state->fp = READ_ONCE(*(unsigned long *)(fp));
+ state->pc = READ_ONCE(*(unsigned long *)(fp + 8));
state->prev_fp = fp;
state->prev_type = info.type;
@@ -157,8 +209,7 @@ static int notrace unwind_next(struct task_struct *tsk,
}
NOKPROBE_SYMBOL(unwind_next);
-static void notrace unwind(struct task_struct *tsk,
- struct unwind_state *state,
+static void notrace unwind(struct unwind_state *state,
stack_trace_consume_fn consume_entry, void *cookie)
{
while (1) {
@@ -166,7 +217,7 @@ static void notrace unwind(struct task_struct *tsk,
if (!consume_entry(cookie, state->pc))
break;
- ret = unwind_next(tsk, state);
+ ret = unwind_next(state);
if (ret < 0)
break;
}
@@ -212,15 +263,15 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
{
struct unwind_state state;
- if (regs)
- unwind_init(&state, regs->regs[29], regs->pc);
- else if (task == current)
- unwind_init(&state,
- (unsigned long)__builtin_frame_address(1),
- (unsigned long)__builtin_return_address(0));
- else
- unwind_init(&state, thread_saved_fp(task),
- thread_saved_pc(task));
-
- unwind(task, &state, consume_entry, cookie);
+ if (regs) {
+ if (task != current)
+ return;
+ unwind_init_from_regs(&state, regs);
+ } else if (task == current) {
+ unwind_init_from_caller(&state);
+ } else {
+ unwind_init_from_task(&state, task);
+ }
+
+ unwind(&state, consume_entry, cookie);
}
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 2b0887e58a7c..9135fe0f3df5 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -52,7 +52,7 @@ void notrace __cpu_suspend_exit(void)
/* Restore CnP bit in TTBR1_EL1 */
if (system_supports_cnp())
- cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+ cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
/*
* PSTATE was not saved over suspend/resume, re-enable any detected
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 9ac7a81b79be..b7fed33981f7 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -579,11 +579,11 @@ static void ctr_read_handler(unsigned long esr, struct pt_regs *regs)
if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
/* Hide DIC so that we can trap the unnecessary maintenance...*/
- val &= ~BIT(CTR_DIC_SHIFT);
+ val &= ~BIT(CTR_EL0_DIC_SHIFT);
/* ... and fake IminLine to reduce the number of traps. */
- val &= ~CTR_IMINLINE_MASK;
- val |= (PAGE_SHIFT - 2) & CTR_IMINLINE_MASK;
+ val &= ~CTR_EL0_IminLine_MASK;
+ val |= (PAGE_SHIFT - 2) & CTR_EL0_IminLine_MASK;
}
pt_regs_write_reg(regs, rt, val);
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index f6e25d7c346a..bafbf78fab77 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -24,7 +24,13 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti
# routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so
# preparation in build-time C")).
ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \
- -Bsymbolic --build-id=sha1 -n $(btildflags-y) -T
+ -Bsymbolic --build-id=sha1 -n $(btildflags-y)
+
+ifdef CONFIG_LD_ORPHAN_WARN
+ ldflags-y += --orphan-handling=warn
+endif
+
+ldflags-y += -T
ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
index a5e61e09ea92..e69fb4aaaf3e 100644
--- a/arch/arm64/kernel/vdso/vdso.lds.S
+++ b/arch/arm64/kernel/vdso/vdso.lds.S
@@ -11,6 +11,7 @@
#include <linux/const.h>
#include <asm/page.h>
#include <asm/vdso.h>
+#include <asm-generic/vmlinux.lds.h>
OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
OUTPUT_ARCH(aarch64)
@@ -49,11 +50,24 @@ SECTIONS
.dynamic : { *(.dynamic) } :text :dynamic
- .rodata : { *(.rodata*) } :text
+ .rela.dyn : ALIGN(8) { *(.rela .rela*) }
+
+ .rodata : {
+ *(.rodata*)
+ *(.got)
+ *(.got.plt)
+ *(.plt)
+ *(.plt.*)
+ *(.iplt)
+ *(.igot .igot.plt)
+ } :text
_end = .;
PROVIDE(end = .);
+ DWARF_DEBUG
+ ELF_DETAILS
+
/DISCARD/ : {
*(.data .data.* .gnu.linkonce.d.* .sdata*)
*(.bss .sbss .dynbss .dynsbss)
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 05ba1aae1b6f..36c8f66cad25 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -104,6 +104,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__
VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1
VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096
VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1
+VDSO_LDFLAGS += --orphan-handling=warn
# Borrow vdsomunge.c from the arm vDSO
diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S
index 3348ce5ea306..8d95d7d35057 100644
--- a/arch/arm64/kernel/vdso32/vdso.lds.S
+++ b/arch/arm64/kernel/vdso32/vdso.lds.S
@@ -11,6 +11,7 @@
#include <linux/const.h>
#include <asm/page.h>
#include <asm/vdso.h>
+#include <asm-generic/vmlinux.lds.h>
OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm")
OUTPUT_ARCH(arm)
@@ -35,12 +36,30 @@ SECTIONS
.dynamic : { *(.dynamic) } :text :dynamic
- .rodata : { *(.rodata*) } :text
+ .rodata : {
+ *(.rodata*)
+ *(.got)
+ *(.got.plt)
+ *(.plt)
+ *(.rel.iplt)
+ *(.iplt)
+ *(.igot.plt)
+ } :text
- .text : { *(.text*) } :text =0xe7f001f2
+ .text : {
+ *(.text*)
+ *(.glue_7)
+ *(.glue_7t)
+ *(.vfp11_veneer)
+ *(.v4_bx)
+ } :text =0xe7f001f2
- .got : { *(.got) }
- .rel.plt : { *(.rel.plt) }
+ .rel.dyn : { *(.rel*) }
+
+ .ARM.exidx : { *(.ARM.exidx*) }
+ DWARF_DEBUG
+ ELF_DETAILS
+ .ARM.attributes 0 : { *(.ARM.attributes) }
/DISCARD/ : {
*(.note.GNU-stack)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 2d4a8f995175..45131e354e27 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -115,7 +115,8 @@ jiffies = jiffies_64;
__entry_tramp_text_start = .; \
*(.entry.tramp.text) \
. = ALIGN(PAGE_SIZE); \
- __entry_tramp_text_end = .;
+ __entry_tramp_text_end = .; \
+ *(.entry.tramp.rodata)
#else
#define TRAMP_TEXT
#endif
@@ -198,8 +199,7 @@ SECTIONS
}
idmap_pg_dir = .;
- . += IDMAP_DIR_SIZE;
- idmap_pg_end = .;
+ . += PAGE_SIZE;
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
tramp_pg_dir = .;
@@ -235,6 +235,10 @@ SECTIONS
__inittext_end = .;
__initdata_begin = .;
+ init_idmap_pg_dir = .;
+ . += INIT_IDMAP_DIR_SIZE;
+ init_idmap_pg_end = .;
+
.init.data : {
INIT_DATA
INIT_SETUP(16)
@@ -253,21 +257,17 @@ SECTIONS
HYPERVISOR_RELOC_SECTION
.rela.dyn : ALIGN(8) {
+ __rela_start = .;
*(.rela .rela*)
+ __rela_end = .;
}
- __rela_offset = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR);
- __rela_size = SIZEOF(.rela.dyn);
-
-#ifdef CONFIG_RELR
.relr.dyn : ALIGN(8) {
+ __relr_start = .;
*(.relr.dyn)
+ __relr_end = .;
}
- __relr_offset = ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR);
- __relr_size = SIZEOF(.relr.dyn);
-#endif
-
. = ALIGN(SEGMENT_ALIGN);
__initdata_end = .;
__init_end = .;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
index fd55014b3497..fa6e466ed57f 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
@@ -176,25 +176,25 @@
)
#define PVM_ID_AA64ISAR1_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) \
)
#define PVM_ID_AA64ISAR2_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) \
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) \
)
u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 35a4331ba5f3..6b94c3e6ff26 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -173,10 +173,10 @@ static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu)
u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW;
if (!vcpu_has_ptrauth(vcpu))
- allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_API) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI));
+ allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI));
return id_aa64isar1_el1_sys_val & allow_mask;
}
@@ -186,8 +186,8 @@ static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu)
u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW;
if (!vcpu_has_ptrauth(vcpu))
- allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) |
- ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3));
+ allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3));
return id_aa64isar2_el1_sys_val & allow_mask;
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index c06c0477fab5..c4fb3874b5e2 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1136,17 +1136,17 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
- val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_API) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI));
+ val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI));
break;
case SYS_ID_AA64ISAR2_EL1:
if (!vcpu_has_ptrauth(vcpu))
- val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) |
- ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3));
+ val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3));
if (!cpus_have_final_cap(ARM64_HAS_WFXT))
- val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_WFXT);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT);
break;
case SYS_ID_AA64DFR0_EL1:
/* Limit debug to ARMv8.0 */
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
index eeb9e45bcce8..1b7c93ae7e63 100644
--- a/arch/arm64/lib/mte.S
+++ b/arch/arm64/lib/mte.S
@@ -18,7 +18,7 @@
*/
.macro multitag_transfer_size, reg, tmp
mrs_s \reg, SYS_GMID_EL1
- ubfx \reg, \reg, #SYS_GMID_EL1_BS_SHIFT, #SYS_GMID_EL1_BS_SIZE
+ ubfx \reg, \reg, #GMID_EL1_BS_SHIFT, #GMID_EL1_BS_SIZE
mov \tmp, #4
lsl \reg, \tmp, \reg
.endm
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 21c907987080..081058d4e436 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -194,44 +194,3 @@ SYM_FUNC_START(__pi_dcache_clean_pop)
ret
SYM_FUNC_END(__pi_dcache_clean_pop)
SYM_FUNC_ALIAS(dcache_clean_pop, __pi_dcache_clean_pop)
-
-/*
- * __dma_flush_area(start, size)
- *
- * clean & invalidate D / U line
- *
- * - start - virtual start address of region
- * - size - size in question
- */
-SYM_FUNC_START(__pi___dma_flush_area)
- add x1, x0, x1
- dcache_by_line_op civac, sy, x0, x1, x2, x3
- ret
-SYM_FUNC_END(__pi___dma_flush_area)
-SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area)
-
-/*
- * __dma_map_area(start, size, dir)
- * - start - kernel virtual start address
- * - size - size of region
- * - dir - DMA direction
- */
-SYM_FUNC_START(__pi___dma_map_area)
- add x1, x0, x1
- b __pi_dcache_clean_poc
-SYM_FUNC_END(__pi___dma_map_area)
-SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area)
-
-/*
- * __dma_unmap_area(start, size, dir)
- * - start - kernel virtual start address
- * - size - size of region
- * - dir - DMA direction
- */
-SYM_FUNC_START(__pi___dma_unmap_area)
- add x1, x0, x1
- cmp w2, #DMA_TO_DEVICE
- b.ne __pi_dcache_inval_poc
- ret
-SYM_FUNC_END(__pi___dma_unmap_area)
-SYM_FUNC_ALIAS(__dma_unmap_area, __pi___dma_unmap_area)
diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index 0dea80bf6de4..24913271e898 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -23,15 +23,6 @@ void copy_highpage(struct page *to, struct page *from)
if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) {
set_bit(PG_mte_tagged, &to->flags);
- page_kasan_tag_reset(to);
- /*
- * We need smp_wmb() in between setting the flags and clearing the
- * tags because if another thread reads page->flags and builds a
- * tagged address out of it, there is an actual dependency to the
- * memory access, but on the current thread we do not guarantee that
- * the new page->flags are visible before the tags were updated.
- */
- smp_wmb();
mte_copy_page_tags(kto, kfrom);
}
}
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 6099c81b9322..599cf81f5685 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -14,20 +14,29 @@
#include <asm/xen/xen-ops.h>
void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
- enum dma_data_direction dir)
+ enum dma_data_direction dir)
{
- __dma_map_area(phys_to_virt(paddr), size, dir);
+ unsigned long start = (unsigned long)phys_to_virt(paddr);
+
+ dcache_clean_poc(start, start + size);
}
void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
- enum dma_data_direction dir)
+ enum dma_data_direction dir)
{
- __dma_unmap_area(phys_to_virt(paddr), size, dir);
+ unsigned long start = (unsigned long)phys_to_virt(paddr);
+
+ if (dir == DMA_TO_DEVICE)
+ return;
+
+ dcache_inval_poc(start, start + size);
}
void arch_dma_prep_coherent(struct page *page, size_t size)
{
- __dma_flush_area(page_address(page), size);
+ unsigned long start = (unsigned long)page_address(page);
+
+ dcache_clean_inval_poc(start, start + size);
}
#ifdef CONFIG_IOMMU_DMA
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
index 489455309695..228d681a8715 100644
--- a/arch/arm64/mm/extable.c
+++ b/arch/arm64/mm/extable.c
@@ -16,13 +16,6 @@ get_ex_fixup(const struct exception_table_entry *ex)
return ((unsigned long)&ex->fixup + ex->fixup);
}
-static bool ex_handler_fixup(const struct exception_table_entry *ex,
- struct pt_regs *regs)
-{
- regs->pc = get_ex_fixup(ex);
- return true;
-}
-
static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
struct pt_regs *regs)
{
@@ -72,11 +65,10 @@ bool fixup_exception(struct pt_regs *regs)
return false;
switch (ex->type) {
- case EX_TYPE_FIXUP:
- return ex_handler_fixup(ex, regs);
case EX_TYPE_BPF:
return ex_handler_bpf(ex, regs);
case EX_TYPE_UACCESS_ERR_ZERO:
+ case EX_TYPE_KACCESS_ERR_ZERO:
return ex_handler_uaccess_err_zero(ex, regs);
case EX_TYPE_LOAD_UNALIGNED_ZEROPAD:
return ex_handler_load_unaligned_zeropad(ex, regs);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index c5e11768e5c1..cdf3ffa0c223 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -927,6 +927,5 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
void tag_clear_highpage(struct page *page)
{
mte_zero_clear_page_tags(page_address(page));
- page_kasan_tag_reset(page);
set_bit(PG_mte_tagged, &page->flags);
}
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 3618ef3f6d81..5307ffdefb8d 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -100,16 +100,6 @@ int pud_huge(pud_t pud)
#endif
}
-/*
- * Select all bits except the pfn
- */
-static inline pgprot_t pte_pgprot(pte_t pte)
-{
- unsigned long pfn = pte_pfn(pte);
-
- return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
-}
-
static int find_num_contig(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, size_t *pgsize)
{
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 339ee84e5a61..b6ef26fc8ebe 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -389,7 +389,7 @@ void __init arm64_memblock_init(void)
early_init_fdt_scan_reserved_mem();
- if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32))
+ if (!defer_reserve_crashkernel())
reserve_crashkernel();
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
@@ -438,7 +438,7 @@ void __init bootmem_init(void)
* request_standard_resources() depends on crashkernel's memory being
* reserved, so do it here.
*/
- if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32))
+ if (defer_reserve_crashkernel())
reserve_crashkernel();
memblock_dump_all();
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index b21f91cd830d..c5af103d4ad4 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -1,96 +1,22 @@
// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Based on arch/arm/mm/ioremap.c
- *
- * (C) Copyright 1995 1996 Linus Torvalds
- * Hacked for ARM by Phil Blundell <philb@gnu.org>
- * Hacked to allow all architectures to build, and various cleanups
- * by Russell King
- * Copyright (C) 2012 ARM Ltd.
- */
-#include <linux/export.h>
#include <linux/mm.h>
-#include <linux/vmalloc.h>
#include <linux/io.h>
-#include <asm/fixmap.h>
-#include <asm/tlbflush.h>
-
-static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size,
- pgprot_t prot, void *caller)
+bool ioremap_allowed(phys_addr_t phys_addr, size_t size, unsigned long prot)
{
- unsigned long last_addr;
- unsigned long offset = phys_addr & ~PAGE_MASK;
- int err;
- unsigned long addr;
- struct vm_struct *area;
+ unsigned long last_addr = phys_addr + size - 1;
- /*
- * Page align the mapping address and size, taking account of any
- * offset.
- */
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(size + offset);
+ /* Don't allow outside PHYS_MASK */
+ if (last_addr & ~PHYS_MASK)
+ return false;
- /*
- * Don't allow wraparound, zero size or outside PHYS_MASK.
- */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr || (last_addr & ~PHYS_MASK))
- return NULL;
-
- /*
- * Don't allow RAM to be mapped.
- */
+ /* Don't allow RAM to be mapped. */
if (WARN_ON(pfn_is_map_memory(__phys_to_pfn(phys_addr))))
- return NULL;
-
- area = get_vm_area_caller(size, VM_IOREMAP, caller);
- if (!area)
- return NULL;
- addr = (unsigned long)area->addr;
- area->phys_addr = phys_addr;
-
- err = ioremap_page_range(addr, addr + size, phys_addr, prot);
- if (err) {
- vunmap((void *)addr);
- return NULL;
- }
-
- return (void __iomem *)(offset + addr);
-}
-
-void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot)
-{
- return __ioremap_caller(phys_addr, size, prot,
- __builtin_return_address(0));
-}
-EXPORT_SYMBOL(__ioremap);
-
-void iounmap(volatile void __iomem *io_addr)
-{
- unsigned long addr = (unsigned long)io_addr & PAGE_MASK;
-
- /*
- * We could get an address outside vmalloc range in case
- * of ioremap_cache() reusing a RAM mapping.
- */
- if (is_vmalloc_addr((void *)addr))
- vunmap((void *)addr);
-}
-EXPORT_SYMBOL(iounmap);
-
-void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size)
-{
- /* For normal memory we already have a cacheable mapping. */
- if (pfn_is_map_memory(__phys_to_pfn(phys_addr)))
- return (void __iomem *)__phys_to_virt(phys_addr);
+ return false;
- return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL),
- __builtin_return_address(0));
+ return true;
}
-EXPORT_SYMBOL(ioremap_cache);
/*
* Must be called after early_fixmap_init
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index c12cd700598f..e969e68de005 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -236,7 +236,7 @@ static void __init kasan_init_shadow(void)
*/
memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
dsb(ishst);
- cpu_replace_ttbr1(lm_alias(tmp_pg_dir));
+ cpu_replace_ttbr1(lm_alias(tmp_pg_dir), idmap_pg_dir);
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
@@ -280,7 +280,7 @@ static void __init kasan_init_shadow(void)
PAGE_KERNEL_RO));
memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE);
- cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+ cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
}
static void __init kasan_init_depth(void)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 626ec32873c6..db7c4e6ae57b 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -43,15 +43,27 @@
#define NO_CONT_MAPPINGS BIT(1)
#define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */
-u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN);
-u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
+int idmap_t0sz __ro_after_init;
-u64 __section(".mmuoff.data.write") vabits_actual;
+#if VA_BITS > 48
+u64 vabits_actual __ro_after_init = VA_BITS_MIN;
EXPORT_SYMBOL(vabits_actual);
+#endif
+
+u64 kimage_vaddr __ro_after_init = (u64)&_text;
+EXPORT_SYMBOL(kimage_vaddr);
u64 kimage_voffset __ro_after_init;
EXPORT_SYMBOL(kimage_voffset);
+u32 __boot_cpu_mode[] = { BOOT_CPU_MODE_EL2, BOOT_CPU_MODE_EL1 };
+
+/*
+ * The booting CPU updates the failed status @__early_cpu_boot_status,
+ * with MMU turned off.
+ */
+long __section(".mmuoff.data.write") __early_cpu_boot_status;
+
/*
* Empty_zero_page is a special page that is used for zero-initialized data
* and COW.
@@ -388,6 +400,13 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
} while (pgdp++, addr = next, addr != end);
}
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+extern __alias(__create_pgd_mapping)
+void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
+ phys_addr_t size, pgprot_t prot,
+ phys_addr_t (*pgtable_alloc)(int), int flags);
+#endif
+
static phys_addr_t __pgd_pgtable_alloc(int shift)
{
void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL);
@@ -529,8 +548,7 @@ static void __init map_mem(pgd_t *pgdp)
#ifdef CONFIG_KEXEC_CORE
if (crash_mem_map) {
- if (IS_ENABLED(CONFIG_ZONE_DMA) ||
- IS_ENABLED(CONFIG_ZONE_DMA32))
+ if (defer_reserve_crashkernel())
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
else if (crashk_res.end)
memblock_mark_nomap(crashk_res.start,
@@ -571,8 +589,7 @@ static void __init map_mem(pgd_t *pgdp)
* through /sys/kernel/kexec_crash_size interface.
*/
#ifdef CONFIG_KEXEC_CORE
- if (crash_mem_map &&
- !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) {
+ if (crash_mem_map && !defer_reserve_crashkernel()) {
if (crashk_res.end) {
__map_memblock(pgdp, crashk_res.start,
crashk_res.end + 1,
@@ -665,13 +682,9 @@ static int __init map_entry_trampoline(void)
__set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i,
pa_start + i * PAGE_SIZE, prot);
- if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
- extern char __entry_tramp_data_start[];
-
- __set_fixmap(FIX_ENTRY_TRAMP_DATA,
- __pa_symbol(__entry_tramp_data_start),
- PAGE_KERNEL_RO);
- }
+ if (IS_ENABLED(CONFIG_RELOCATABLE))
+ __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i,
+ pa_start + i * PAGE_SIZE, PAGE_KERNEL_RO);
return 0;
}
@@ -762,22 +775,57 @@ static void __init map_kernel(pgd_t *pgdp)
kasan_copy_shadow(pgdp);
}
+static void __init create_idmap(void)
+{
+ u64 start = __pa_symbol(__idmap_text_start);
+ u64 size = __pa_symbol(__idmap_text_end) - start;
+ pgd_t *pgd = idmap_pg_dir;
+ u64 pgd_phys;
+
+ /* check if we need an additional level of translation */
+ if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) {
+ pgd_phys = early_pgtable_alloc(PAGE_SHIFT);
+ set_pgd(&idmap_pg_dir[start >> VA_BITS],
+ __pgd(pgd_phys | P4D_TYPE_TABLE));
+ pgd = __va(pgd_phys);
+ }
+ __create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX,
+ early_pgtable_alloc, 0);
+
+ if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) {
+ extern u32 __idmap_kpti_flag;
+ u64 pa = __pa_symbol(&__idmap_kpti_flag);
+
+ /*
+ * The KPTI G-to-nG conversion code needs a read-write mapping
+ * of its synchronization flag in the ID map.
+ */
+ __create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL,
+ early_pgtable_alloc, 0);
+ }
+}
+
void __init paging_init(void)
{
pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
+ extern pgd_t init_idmap_pg_dir[];
+
+ idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0));
map_kernel(pgdp);
map_mem(pgdp);
pgd_clear_fixmap();
- cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+ cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir);
init_mm.pgd = swapper_pg_dir;
memblock_phys_free(__pa_symbol(init_pg_dir),
__pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
memblock_allow_resize();
+
+ create_idmap();
}
/*
diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
index a9e50e930484..4334dec93bd4 100644
--- a/arch/arm64/mm/mteswap.c
+++ b/arch/arm64/mm/mteswap.c
@@ -53,15 +53,6 @@ bool mte_restore_tags(swp_entry_t entry, struct page *page)
if (!tags)
return false;
- page_kasan_tag_reset(page);
- /*
- * We need smp_wmb() in between setting the flags and clearing the
- * tags because if another thread reads page->flags and builds a
- * tagged address out of it, there is an actual dependency to the
- * memory access, but on the current thread we do not guarantee that
- * the new page->flags are visible before the tags were updated.
- */
- smp_wmb();
mte_restore_page_tags(page_address(page), tags);
return true;
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 50bbed947bec..7837a69524c5 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -14,6 +14,7 @@
#include <asm/asm-offsets.h>
#include <asm/asm_pointer_auth.h>
#include <asm/hwcap.h>
+#include <asm/kernel-pgtable.h>
#include <asm/pgtable-hwdef.h>
#include <asm/cpufeature.h>
#include <asm/alternative.h>
@@ -200,34 +201,64 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1)
.popsection
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+
+#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
+
.pushsection ".idmap.text", "awx"
- .macro __idmap_kpti_get_pgtable_ent, type
- dc cvac, cur_\()\type\()p // Ensure any existing dirty
- dmb sy // lines are written back before
- ldr \type, [cur_\()\type\()p] // loading the entry
- tbz \type, #0, skip_\()\type // Skip invalid and
- tbnz \type, #11, skip_\()\type // non-global entries
+ .macro kpti_mk_tbl_ng, type, num_entries
+ add end_\type\()p, cur_\type\()p, #\num_entries * 8
+.Ldo_\type:
+ ldr \type, [cur_\type\()p] // Load the entry
+ tbz \type, #0, .Lnext_\type // Skip invalid and
+ tbnz \type, #11, .Lnext_\type // non-global entries
+ orr \type, \type, #PTE_NG // Same bit for blocks and pages
+ str \type, [cur_\type\()p] // Update the entry
+ .ifnc \type, pte
+ tbnz \type, #1, .Lderef_\type
+ .endif
+.Lnext_\type:
+ add cur_\type\()p, cur_\type\()p, #8
+ cmp cur_\type\()p, end_\type\()p
+ b.ne .Ldo_\type
.endm
- .macro __idmap_kpti_put_pgtable_ent_ng, type
- orr \type, \type, #PTE_NG // Same bit for blocks and pages
- str \type, [cur_\()\type\()p] // Update the entry and ensure
- dmb sy // that it is visible to all
- dc civac, cur_\()\type\()p // CPUs.
+ /*
+ * Dereference the current table entry and map it into the temporary
+ * fixmap slot associated with the current level.
+ */
+ .macro kpti_map_pgtbl, type, level
+ str xzr, [temp_pte, #8 * (\level + 1)] // break before make
+ dsb nshst
+ add pte, temp_pte, #PAGE_SIZE * (\level + 1)
+ lsr pte, pte, #12
+ tlbi vaae1, pte
+ dsb nsh
+ isb
+
+ phys_to_pte pte, cur_\type\()p
+ add cur_\type\()p, temp_pte, #PAGE_SIZE * (\level + 1)
+ orr pte, pte, pte_flags
+ str pte, [temp_pte, #8 * (\level + 1)]
+ dsb nshst
.endm
/*
- * void __kpti_install_ng_mappings(int cpu, int num_cpus, phys_addr_t swapper)
+ * void __kpti_install_ng_mappings(int cpu, int num_secondaries, phys_addr_t temp_pgd,
+ * unsigned long temp_pte_va)
*
* Called exactly once from stop_machine context by each CPU found during boot.
*/
-__idmap_kpti_flag:
- .long 1
+ .pushsection ".data", "aw", %progbits
+SYM_DATA(__idmap_kpti_flag, .long 1)
+ .popsection
+
SYM_FUNC_START(idmap_kpti_install_ng_mappings)
cpu .req w0
+ temp_pte .req x0
num_cpus .req w1
- swapper_pa .req x2
+ pte_flags .req x1
+ temp_pgd_phys .req x2
swapper_ttb .req x3
flag_ptr .req x4
cur_pgdp .req x5
@@ -235,17 +266,16 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings)
pgd .req x7
cur_pudp .req x8
end_pudp .req x9
- pud .req x10
cur_pmdp .req x11
end_pmdp .req x12
- pmd .req x13
cur_ptep .req x14
end_ptep .req x15
pte .req x16
+ valid .req x17
+ mov x5, x3 // preserve temp_pte arg
mrs swapper_ttb, ttbr1_el1
- restore_ttbr1 swapper_ttb
- adr flag_ptr, __idmap_kpti_flag
+ adr_l flag_ptr, __idmap_kpti_flag
cbnz cpu, __idmap_kpti_secondary
@@ -256,98 +286,71 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings)
eor w17, w17, num_cpus
cbnz w17, 1b
- /* We need to walk swapper, so turn off the MMU. */
- pre_disable_mmu_workaround
- mrs x17, sctlr_el1
- bic x17, x17, #SCTLR_ELx_M
- msr sctlr_el1, x17
+ /* Switch to the temporary page tables on this CPU only */
+ __idmap_cpu_set_reserved_ttbr1 x8, x9
+ offset_ttbr1 temp_pgd_phys, x8
+ msr ttbr1_el1, temp_pgd_phys
isb
+ mov temp_pte, x5
+ mov pte_flags, #KPTI_NG_PTE_FLAGS
+
/* Everybody is enjoying the idmap, so we can rewrite swapper. */
/* PGD */
- mov cur_pgdp, swapper_pa
- add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)
-do_pgd: __idmap_kpti_get_pgtable_ent pgd
- tbnz pgd, #1, walk_puds
-next_pgd:
- __idmap_kpti_put_pgtable_ent_ng pgd
-skip_pgd:
- add cur_pgdp, cur_pgdp, #8
- cmp cur_pgdp, end_pgdp
- b.ne do_pgd
-
- /* Publish the updated tables and nuke all the TLBs */
- dsb sy
- tlbi vmalle1is
- dsb ish
- isb
+ adrp cur_pgdp, swapper_pg_dir
+ kpti_map_pgtbl pgd, 0
+ kpti_mk_tbl_ng pgd, PTRS_PER_PGD
- /* We're done: fire up the MMU again */
- mrs x17, sctlr_el1
- orr x17, x17, #SCTLR_ELx_M
- set_sctlr_el1 x17
+ /* Ensure all the updated entries are visible to secondary CPUs */
+ dsb ishst
+
+ /* We're done: fire up swapper_pg_dir again */
+ __idmap_cpu_set_reserved_ttbr1 x8, x9
+ msr ttbr1_el1, swapper_ttb
+ isb
/* Set the flag to zero to indicate that we're all done */
str wzr, [flag_ptr]
ret
+.Lderef_pgd:
/* PUD */
-walk_puds:
- .if CONFIG_PGTABLE_LEVELS > 3
+ .if CONFIG_PGTABLE_LEVELS > 3
+ pud .req x10
pte_to_phys cur_pudp, pgd
- add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)
-do_pud: __idmap_kpti_get_pgtable_ent pud
- tbnz pud, #1, walk_pmds
-next_pud:
- __idmap_kpti_put_pgtable_ent_ng pud
-skip_pud:
- add cur_pudp, cur_pudp, 8
- cmp cur_pudp, end_pudp
- b.ne do_pud
- b next_pgd
- .else /* CONFIG_PGTABLE_LEVELS <= 3 */
- mov pud, pgd
- b walk_pmds
-next_pud:
- b next_pgd
+ kpti_map_pgtbl pud, 1
+ kpti_mk_tbl_ng pud, PTRS_PER_PUD
+ b .Lnext_pgd
+ .else /* CONFIG_PGTABLE_LEVELS <= 3 */
+ pud .req pgd
+ .set .Lnext_pud, .Lnext_pgd
.endif
+.Lderef_pud:
/* PMD */
-walk_pmds:
- .if CONFIG_PGTABLE_LEVELS > 2
+ .if CONFIG_PGTABLE_LEVELS > 2
+ pmd .req x13
pte_to_phys cur_pmdp, pud
- add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)
-do_pmd: __idmap_kpti_get_pgtable_ent pmd
- tbnz pmd, #1, walk_ptes
-next_pmd:
- __idmap_kpti_put_pgtable_ent_ng pmd
-skip_pmd:
- add cur_pmdp, cur_pmdp, #8
- cmp cur_pmdp, end_pmdp
- b.ne do_pmd
- b next_pud
- .else /* CONFIG_PGTABLE_LEVELS <= 2 */
- mov pmd, pud
- b walk_ptes
-next_pmd:
- b next_pud
+ kpti_map_pgtbl pmd, 2
+ kpti_mk_tbl_ng pmd, PTRS_PER_PMD
+ b .Lnext_pud
+ .else /* CONFIG_PGTABLE_LEVELS <= 2 */
+ pmd .req pgd
+ .set .Lnext_pmd, .Lnext_pgd
.endif
+.Lderef_pmd:
/* PTE */
-walk_ptes:
pte_to_phys cur_ptep, pmd
- add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)
-do_pte: __idmap_kpti_get_pgtable_ent pte
- __idmap_kpti_put_pgtable_ent_ng pte
-skip_pte:
- add cur_ptep, cur_ptep, #8
- cmp cur_ptep, end_ptep
- b.ne do_pte
- b next_pmd
+ kpti_map_pgtbl pte, 3
+ kpti_mk_tbl_ng pte, PTRS_PER_PTE
+ b .Lnext_pmd
.unreq cpu
+ .unreq temp_pte
.unreq num_cpus
- .unreq swapper_pa
+ .unreq pte_flags
+ .unreq temp_pgd_phys
.unreq cur_pgdp
.unreq end_pgdp
.unreq pgd
@@ -360,6 +363,7 @@ skip_pte:
.unreq cur_ptep
.unreq end_ptep
.unreq pte
+ .unreq valid
/* Secondary CPUs end up here */
__idmap_kpti_secondary:
@@ -379,7 +383,6 @@ __idmap_kpti_secondary:
cbnz w16, 1b
/* All done, act like nothing happened */
- offset_ttbr1 swapper_ttb, x16
msr ttbr1_el1, swapper_ttb
isb
ret
@@ -395,6 +398,8 @@ SYM_FUNC_END(idmap_kpti_install_ng_mappings)
*
* Initialise the processor for turning the MMU on.
*
+ * Input:
+ * x0 - actual number of VA bits (ignored unless VA_BITS > 48)
* Output:
* Return in x0 the value of the SCTLR_EL1 register.
*/
@@ -464,12 +469,11 @@ SYM_FUNC_START(__cpu_setup)
tcr_clear_errata_bits tcr, x9, x5
#ifdef CONFIG_ARM64_VA_BITS_52
- ldr_l x9, vabits_actual
- sub x9, xzr, x9
+ sub x9, xzr, x0
add x9, x9, #64
tcr_set_t1sz tcr, x9
#else
- ldr_l x9, idmap_t0sz
+ idmap_get_t0sz x9
#endif
tcr_set_t0sz tcr, x9
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 507b20373953..779653771507 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -36,6 +36,7 @@ HAS_RNG
HAS_SB
HAS_STAGE2_FWB
HAS_SYSREG_GIC_CPUIF
+HAS_TIDCP1
HAS_TLB_RANGE
HAS_VIRT_HOST_EXTN
HAS_WFXT
@@ -61,6 +62,7 @@ WORKAROUND_1418040
WORKAROUND_1463225
WORKAROUND_1508412
WORKAROUND_1542419
+WORKAROUND_1742098
WORKAROUND_1902691
WORKAROUND_2038923
WORKAROUND_2064142
diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk
index 5c55509eb43f..db461921d256 100755
--- a/arch/arm64/tools/gen-sysreg.awk
+++ b/arch/arm64/tools/gen-sysreg.awk
@@ -88,7 +88,7 @@ END {
# skip blank lines and comment lines
/^$/ { next }
-/^#/ { next }
+/^[\t ]*#/ { next }
/^SysregFields/ {
change_block("SysregFields", "None", "SysregFields")
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index ff5e552f7420..9ae483ec1e56 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -46,6 +46,89 @@
# feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration
# item ACCDATA) though it may be more taseful to do something else.
+Sysreg ID_AA64ZFR0_EL1 3 0 0 4 4
+Res0 63:60
+Enum 59:56 F64MM
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 55:52 F32MM
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Res0 51:48
+Enum 47:44 I8MM
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 43:40 SM4
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Res0 39:36
+Enum 35:32 SHA3
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Res0 31:24
+Enum 23:20 BF16
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 EBF16
+EndEnum
+Enum 19:16 BitPerm
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Res0 15:8
+Enum 7:4 AES
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 PMULL128
+EndEnum
+Enum 3:0 SVEver
+ 0b0000 IMP
+ 0b0001 SVE2
+EndEnum
+EndSysreg
+
+Sysreg ID_AA64SMFR0_EL1 3 0 0 4 5
+Enum 63 FA64
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+Res0 62:60
+Field 59:56 SMEver
+Enum 55:52 I16I64
+ 0b0000 NI
+ 0b1111 IMP
+EndEnum
+Res0 51:49
+Enum 48 F64F64
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+Res0 47:40
+Enum 39:36 I8I32
+ 0b0000 NI
+ 0b1111 IMP
+EndEnum
+Enum 35 F16F32
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+Enum 34 B16F32
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+Res0 33
+Enum 32 F32F32
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+Res0 31:0
+EndSysreg
+
Sysreg ID_AA64ISAR0_EL1 3 0 0 6 0
Enum 63:60 RNDR
0b0000 NI
@@ -114,6 +197,122 @@ EndEnum
Res0 3:0
EndSysreg
+Sysreg ID_AA64ISAR1_EL1 3 0 0 6 1
+Enum 63:60 LS64
+ 0b0000 NI
+ 0b0001 LS64
+ 0b0010 LS64_V
+ 0b0011 LS64_ACCDATA
+EndEnum
+Enum 59:56 XS
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 55:52 I8MM
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 51:48 DGH
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 47:44 BF16
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 EBF16
+EndEnum
+Enum 43:40 SPECRES
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 39:36 SB
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 35:32 FRINTTS
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 31:28 GPI
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 27:24 GPA
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 23:20 LRCPC
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 LRCPC2
+EndEnum
+Enum 19:16 FCMA
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 15:12 JSCVT
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 11:8 API
+ 0b0000 NI
+ 0b0001 PAuth
+ 0b0010 EPAC
+ 0b0011 PAuth2
+ 0b0100 FPAC
+ 0b0101 FPACCOMBINE
+EndEnum
+Enum 7:4 APA
+ 0b0000 NI
+ 0b0001 PAuth
+ 0b0010 EPAC
+ 0b0011 PAuth2
+ 0b0100 FPAC
+ 0b0101 FPACCOMBINE
+EndEnum
+Enum 3:0 DPB
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 DPB2
+EndEnum
+EndSysreg
+
+Sysreg ID_AA64ISAR2_EL1 3 0 0 6 2
+Res0 63:28
+Enum 27:24 PAC_frac
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 23:20 BC
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 19:16 MOPS
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 15:12 APA3
+ 0b0000 NI
+ 0b0001 PAuth
+ 0b0010 EPAC
+ 0b0011 PAuth2
+ 0b0100 FPAC
+ 0b0101 FPACCOMBINE
+EndEnum
+Enum 11:8 GPA3
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 7:4 RPRES
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 3:0 WFxT
+ 0b0000 NI
+ 0b0010 IMP
+EndEnum
+EndSysreg
+
Sysreg SCTLR_EL1 3 0 1 0 0
Field 63 TIDCP
Field 62 SPINMASK
@@ -257,6 +456,11 @@ Field 5:3 Ctype2
Field 2:0 Ctype1
EndSysreg
+Sysreg GMID_EL1 3 1 0 0 4
+Res0 63:4
+Field 3:0 BS
+EndSysreg
+
Sysreg SMIDR_EL1 3 1 0 0 6
Res0 63:32
Field 31:24 IMPLEMENTER
@@ -273,6 +477,33 @@ Field 3:1 Level
Field 0 InD
EndSysreg
+Sysreg CTR_EL0 3 3 0 0 1
+Res0 63:38
+Field 37:32 TminLine
+Res1 31
+Res0 30
+Field 29 DIC
+Field 28 IDC
+Field 27:24 CWG
+Field 23:20 ERG
+Field 19:16 DminLine
+Enum 15:14 L1Ip
+ 0b00 VPIPT
+ # This is named as AIVIVT in the ARM but documented as reserved
+ 0b01 RESERVED
+ 0b10 VIPT
+ 0b11 PIPT
+EndEnum
+Res0 13:4
+Field 3:0 IminLine
+EndSysreg
+
+Sysreg DCZID_EL0 3 3 0 0 7
+Res0 63:5
+Field 4 DZP
+Field 3:0 BS
+EndSysreg
+
Sysreg SVCR 3 3 4 2 2
Res0 63:2
Field 1 ZA
@@ -367,3 +598,36 @@ EndSysreg
Sysreg TTBR1_EL1 3 0 2 0 1
Fields TTBRx_EL1
EndSysreg
+
+Sysreg LORSA_EL1 3 0 10 4 0
+Res0 63:52
+Field 51:16 SA
+Res0 15:1
+Field 0 Valid
+EndSysreg
+
+Sysreg LOREA_EL1 3 0 10 4 1
+Res0 63:52
+Field 51:48 EA_51_48
+Field 47:16 EA_47_16
+Res0 15:0
+EndSysreg
+
+Sysreg LORN_EL1 3 0 10 4 2
+Res0 63:8
+Field 7:0 Num
+EndSysreg
+
+Sysreg LORC_EL1 3 0 10 4 3
+Res0 63:10
+Field 9:2 DS
+Res0 1
+Field 0 EN
+EndSysreg
+
+Sysreg LORID_EL1 3 0 10 4 7
+Res0 63:24
+Field 23:16 LD
+Res0 15:8
+Field 7:0 LR
+EndSysreg
diff --git a/arch/csky/include/asm/tlb.h b/arch/csky/include/asm/tlb.h
index 3498e65f59f8..702861c68874 100644
--- a/arch/csky/include/asm/tlb.h
+++ b/arch/csky/include/asm/tlb.h
@@ -4,21 +4,6 @@
#define __ASM_CSKY_TLB_H
#include <asm/cacheflush.h>
-
-#define tlb_start_vma(tlb, vma) \
- do { \
- if (!(tlb)->fullmm) \
- flush_cache_range(vma, (vma)->vm_start, (vma)->vm_end); \
- } while (0)
-
-#define tlb_end_vma(tlb, vma) \
- do { \
- if (!(tlb)->fullmm) \
- flush_tlb_range(vma, (vma)->vm_start, (vma)->vm_end); \
- } while (0)
-
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#endif /* __ASM_CSKY_TLB_H */
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 35adcf89035a..99300850abc1 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -834,7 +834,7 @@ iosapic_unregister_intr (unsigned int gsi)
if (iosapic_intr_info[irq].count == 0) {
#ifdef CONFIG_SMP
/* Clear affinity */
- cpumask_setall(irq_get_affinity_mask(irq));
+ irq_data_update_affinity(irq_get_irq_data(irq), cpu_all_mask);
#endif
/* Clear the interrupt information */
iosapic_intr_info[irq].dest = 0;
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index ecef17c7c35b..275b9ea58c64 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -57,8 +57,8 @@ static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
{
if (irq < NR_IRQS) {
- cpumask_copy(irq_get_affinity_mask(irq),
- cpumask_of(cpu_logical_id(hwid)));
+ irq_data_update_affinity(irq_get_irq_data(irq),
+ cpumask_of(cpu_logical_id(hwid)));
irq_redir[irq] = (char) (redir & 0xff);
}
}
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index df5c28f252e3..025e5133c860 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -37,7 +37,7 @@ static int ia64_set_msi_irq_affinity(struct irq_data *idata,
msg.data = data;
pci_write_msi_msg(irq, &msg);
- cpumask_copy(irq_data_get_affinity_mask(idata), cpumask_of(cpu));
+ irq_data_update_affinity(idata, cpumask_of(cpu));
return 0;
}
@@ -132,7 +132,7 @@ static int dmar_msi_set_affinity(struct irq_data *data,
msg.address_lo |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
dmar_msi_write(irq, &msg);
- cpumask_copy(irq_data_get_affinity_mask(data), mask);
+ irq_data_update_affinity(data, mask);
return 0;
}
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 53a912befb62..83fe390f8449 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -2,6 +2,7 @@
config LOONGARCH
bool
default y
+ select ACPI_GENERIC_GSI if ACPI
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
select ARCH_BINFMT_ELF_STATE
select ARCH_ENABLE_MEMORY_HOTPLUG
@@ -69,7 +70,6 @@ config LOONGARCH
select GENERIC_TIME_VSYSCALL
select GPIOLIB
select HAVE_ARCH_AUDITSYSCALL
- select HAVE_ARCH_COMPILER_H
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
@@ -108,6 +108,7 @@ config LOONGARCH
select TRACE_IRQFLAGS_SUPPORT
select USE_PERCPU_NUMA_NODE_ID
select ZONE_DMA32
+ select MMU_GATHER_MERGE_VMAS if MMU
config 32BIT
bool
diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h
index 62044cd5b7bc..c5108213876c 100644
--- a/arch/loongarch/include/asm/acpi.h
+++ b/arch/loongarch/include/asm/acpi.h
@@ -31,6 +31,148 @@ static inline bool acpi_has_cpu_in_madt(void)
extern struct list_head acpi_wakeup_device_list;
+/*
+ * Temporary definitions until the core ACPICA code gets updated (see
+ * 1656837932-18257-1-git-send-email-lvjianmin@loongson.cn and its
+ * follow-ups for the "rationale").
+ *
+ * Once the "legal reasons" are cleared and that the code is merged,
+ * this can be dropped entierely.
+ */
+#if (ACPI_CA_VERSION == 0x20220331 && !defined(LOONGARCH_ACPICA_EXT))
+
+#define LOONGARCH_ACPICA_EXT 1
+
+#define ACPI_MADT_TYPE_CORE_PIC 17
+#define ACPI_MADT_TYPE_LIO_PIC 18
+#define ACPI_MADT_TYPE_HT_PIC 19
+#define ACPI_MADT_TYPE_EIO_PIC 20
+#define ACPI_MADT_TYPE_MSI_PIC 21
+#define ACPI_MADT_TYPE_BIO_PIC 22
+#define ACPI_MADT_TYPE_LPC_PIC 23
+
+/* Values for Version field above */
+
+enum acpi_madt_core_pic_version {
+ ACPI_MADT_CORE_PIC_VERSION_NONE = 0,
+ ACPI_MADT_CORE_PIC_VERSION_V1 = 1,
+ ACPI_MADT_CORE_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */
+};
+
+enum acpi_madt_lio_pic_version {
+ ACPI_MADT_LIO_PIC_VERSION_NONE = 0,
+ ACPI_MADT_LIO_PIC_VERSION_V1 = 1,
+ ACPI_MADT_LIO_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */
+};
+
+enum acpi_madt_eio_pic_version {
+ ACPI_MADT_EIO_PIC_VERSION_NONE = 0,
+ ACPI_MADT_EIO_PIC_VERSION_V1 = 1,
+ ACPI_MADT_EIO_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */
+};
+
+enum acpi_madt_ht_pic_version {
+ ACPI_MADT_HT_PIC_VERSION_NONE = 0,
+ ACPI_MADT_HT_PIC_VERSION_V1 = 1,
+ ACPI_MADT_HT_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */
+};
+
+enum acpi_madt_bio_pic_version {
+ ACPI_MADT_BIO_PIC_VERSION_NONE = 0,
+ ACPI_MADT_BIO_PIC_VERSION_V1 = 1,
+ ACPI_MADT_BIO_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */
+};
+
+enum acpi_madt_msi_pic_version {
+ ACPI_MADT_MSI_PIC_VERSION_NONE = 0,
+ ACPI_MADT_MSI_PIC_VERSION_V1 = 1,
+ ACPI_MADT_MSI_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */
+};
+
+enum acpi_madt_lpc_pic_version {
+ ACPI_MADT_LPC_PIC_VERSION_NONE = 0,
+ ACPI_MADT_LPC_PIC_VERSION_V1 = 1,
+ ACPI_MADT_LPC_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */
+};
+
+#pragma pack(1)
+
+/* Core Interrupt Controller */
+
+struct acpi_madt_core_pic {
+ struct acpi_subtable_header header;
+ u8 version;
+ u32 processor_id;
+ u32 core_id;
+ u32 flags;
+};
+
+/* Legacy I/O Interrupt Controller */
+
+struct acpi_madt_lio_pic {
+ struct acpi_subtable_header header;
+ u8 version;
+ u64 address;
+ u16 size;
+ u8 cascade[2];
+ u32 cascade_map[2];
+};
+
+/* Extend I/O Interrupt Controller */
+
+struct acpi_madt_eio_pic {
+ struct acpi_subtable_header header;
+ u8 version;
+ u8 cascade;
+ u8 node;
+ u64 node_map;
+};
+
+/* HT Interrupt Controller */
+
+struct acpi_madt_ht_pic {
+ struct acpi_subtable_header header;
+ u8 version;
+ u64 address;
+ u16 size;
+ u8 cascade[8];
+};
+
+/* Bridge I/O Interrupt Controller */
+
+struct acpi_madt_bio_pic {
+ struct acpi_subtable_header header;
+ u8 version;
+ u64 address;
+ u16 size;
+ u16 id;
+ u16 gsi_base;
+};
+
+/* MSI Interrupt Controller */
+
+struct acpi_madt_msi_pic {
+ struct acpi_subtable_header header;
+ u8 version;
+ u64 msg_address;
+ u32 start;
+ u32 count;
+};
+
+/* LPC Interrupt Controller */
+
+struct acpi_madt_lpc_pic {
+ struct acpi_subtable_header header;
+ u8 version;
+ u64 address;
+ u16 size;
+ u8 cascade;
+};
+
+#pragma pack()
+
+#endif
+
#endif /* !CONFIG_ACPI */
#define ACPI_TABLE_UPGRADE_MAX_PHYS ARCH_LOW_ADDRESS_LIMIT
diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h
index a1a04083bd67..be037a40580d 100644
--- a/arch/loongarch/include/asm/asmmacro.h
+++ b/arch/loongarch/include/asm/asmmacro.h
@@ -274,16 +274,4 @@
nor \dst, \src, zero
.endm
-.macro bgt r0 r1 label
- blt \r1, \r0, \label
-.endm
-
-.macro bltz r0 label
- blt \r0, zero, \label
-.endm
-
-.macro bgez r0 label
- bge \r0, zero, \label
-.endm
-
#endif /* _ASM_ASMMACRO_H */
diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h
index 979367ad4e2c..6b9aca9ab6e9 100644
--- a/arch/loongarch/include/asm/atomic.h
+++ b/arch/loongarch/include/asm/atomic.h
@@ -10,7 +10,6 @@
#include <linux/types.h>
#include <asm/barrier.h>
#include <asm/cmpxchg.h>
-#include <asm/compiler.h>
#if __SIZEOF_LONG__ == 4
#define __LL "ll.w "
@@ -157,27 +156,25 @@ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v)
__asm__ __volatile__(
"1: ll.w %1, %2 # atomic_sub_if_positive\n"
" addi.w %0, %1, %3 \n"
- " or %1, %0, $zero \n"
- " blt %0, $zero, 2f \n"
+ " move %1, %0 \n"
+ " bltz %0, 2f \n"
" sc.w %1, %2 \n"
- " beq $zero, %1, 1b \n"
+ " beqz %1, 1b \n"
"2: \n"
__WEAK_LLSC_MB
- : "=&r" (result), "=&r" (temp),
- "+" GCC_OFF_SMALL_ASM() (v->counter)
+ : "=&r" (result), "=&r" (temp), "+ZC" (v->counter)
: "I" (-i));
} else {
__asm__ __volatile__(
"1: ll.w %1, %2 # atomic_sub_if_positive\n"
" sub.w %0, %1, %3 \n"
- " or %1, %0, $zero \n"
- " blt %0, $zero, 2f \n"
+ " move %1, %0 \n"
+ " bltz %0, 2f \n"
" sc.w %1, %2 \n"
- " beq $zero, %1, 1b \n"
+ " beqz %1, 1b \n"
"2: \n"
__WEAK_LLSC_MB
- : "=&r" (result), "=&r" (temp),
- "+" GCC_OFF_SMALL_ASM() (v->counter)
+ : "=&r" (result), "=&r" (temp), "+ZC" (v->counter)
: "r" (i));
}
@@ -320,27 +317,25 @@ static inline long arch_atomic64_sub_if_positive(long i, atomic64_t *v)
__asm__ __volatile__(
"1: ll.d %1, %2 # atomic64_sub_if_positive \n"
" addi.d %0, %1, %3 \n"
- " or %1, %0, $zero \n"
- " blt %0, $zero, 2f \n"
+ " move %1, %0 \n"
+ " bltz %0, 2f \n"
" sc.d %1, %2 \n"
- " beq %1, $zero, 1b \n"
+ " beqz %1, 1b \n"
"2: \n"
__WEAK_LLSC_MB
- : "=&r" (result), "=&r" (temp),
- "+" GCC_OFF_SMALL_ASM() (v->counter)
+ : "=&r" (result), "=&r" (temp), "+ZC" (v->counter)
: "I" (-i));
} else {
__asm__ __volatile__(
"1: ll.d %1, %2 # atomic64_sub_if_positive \n"
" sub.d %0, %1, %3 \n"
- " or %1, %0, $zero \n"
- " blt %0, $zero, 2f \n"
+ " move %1, %0 \n"
+ " bltz %0, 2f \n"
" sc.d %1, %2 \n"
- " beq %1, $zero, 1b \n"
+ " beqz %1, 1b \n"
"2: \n"
__WEAK_LLSC_MB
- : "=&r" (result), "=&r" (temp),
- "+" GCC_OFF_SMALL_ASM() (v->counter)
+ : "=&r" (result), "=&r" (temp), "+ZC" (v->counter)
: "r" (i));
}
diff --git a/arch/loongarch/include/asm/barrier.h b/arch/loongarch/include/asm/barrier.h
index b6517eeeb141..cda977675854 100644
--- a/arch/loongarch/include/asm/barrier.h
+++ b/arch/loongarch/include/asm/barrier.h
@@ -48,9 +48,9 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
__asm__ __volatile__(
"sltu %0, %1, %2\n\t"
#if (__SIZEOF_LONG__ == 4)
- "sub.w %0, $r0, %0\n\t"
+ "sub.w %0, $zero, %0\n\t"
#elif (__SIZEOF_LONG__ == 8)
- "sub.d %0, $r0, %0\n\t"
+ "sub.d %0, $zero, %0\n\t"
#endif
: "=r" (mask)
: "r" (index), "r" (size)
diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h
index 75b3a4478652..0a9b0fac1eee 100644
--- a/arch/loongarch/include/asm/cmpxchg.h
+++ b/arch/loongarch/include/asm/cmpxchg.h
@@ -55,9 +55,9 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
__asm__ __volatile__( \
"1: " ld " %0, %2 # __cmpxchg_asm \n" \
" bne %0, %z3, 2f \n" \
- " or $t0, %z4, $zero \n" \
+ " move $t0, %z4 \n" \
" " st " $t0, %1 \n" \
- " beq $zero, $t0, 1b \n" \
+ " beqz $t0, 1b \n" \
"2: \n" \
__WEAK_LLSC_MB \
: "=&r" (__ret), "=ZB"(*m) \
diff --git a/arch/loongarch/include/asm/compiler.h b/arch/loongarch/include/asm/compiler.h
deleted file mode 100644
index 657cebe70ace..000000000000
--- a/arch/loongarch/include/asm/compiler.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
- */
-#ifndef _ASM_COMPILER_H
-#define _ASM_COMPILER_H
-
-#define GCC_OFF_SMALL_ASM() "ZC"
-
-#define LOONGARCH_ISA_LEVEL "loongarch"
-#define LOONGARCH_ISA_ARCH_LEVEL "arch=loongarch"
-#define LOONGARCH_ISA_LEVEL_RAW loongarch
-#define LOONGARCH_ISA_ARCH_LEVEL_RAW LOONGARCH_ISA_LEVEL_RAW
-
-#endif /* _ASM_COMPILER_H */
diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h
index f3960b18a90e..5f3ff4781fda 100644
--- a/arch/loongarch/include/asm/elf.h
+++ b/arch/loongarch/include/asm/elf.h
@@ -288,8 +288,6 @@ struct arch_elf_state {
.interp_fp_abi = LOONGARCH_ABI_FP_ANY, \
}
-#define elf_read_implies_exec(ex, exec_stk) (exec_stk == EXSTACK_DEFAULT)
-
extern int arch_elf_pt_proc(void *ehdr, void *phdr, struct file *elf,
bool is_interp, struct arch_elf_state *state);
diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h
index 9de8231694ec..feb6658c84ff 100644
--- a/arch/loongarch/include/asm/futex.h
+++ b/arch/loongarch/include/asm/futex.h
@@ -8,7 +8,6 @@
#include <linux/futex.h>
#include <linux/uaccess.h>
#include <asm/barrier.h>
-#include <asm/compiler.h>
#include <asm/errno.h>
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
@@ -17,7 +16,7 @@
"1: ll.w %1, %4 # __futex_atomic_op\n" \
" " insn " \n" \
"2: sc.w $t0, %2 \n" \
- " beq $t0, $zero, 1b \n" \
+ " beqz $t0, 1b \n" \
"3: \n" \
" .section .fixup,\"ax\" \n" \
"4: li.w %0, %6 \n" \
@@ -82,9 +81,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv
"# futex_atomic_cmpxchg_inatomic \n"
"1: ll.w %1, %3 \n"
" bne %1, %z4, 3f \n"
- " or $t0, %z5, $zero \n"
+ " move $t0, %z5 \n"
"2: sc.w $t0, %2 \n"
- " beq $zero, $t0, 1b \n"
+ " beqz $t0, 1b \n"
"3: \n"
__WEAK_LLSC_MB
" .section .fixup,\"ax\" \n"
@@ -95,8 +94,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv
" "__UA_ADDR "\t1b, 4b \n"
" "__UA_ADDR "\t2b, 4b \n"
" .previous \n"
- : "+r" (ret), "=&r" (val), "=" GCC_OFF_SMALL_ASM() (*uaddr)
- : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
+ : "+r" (ret), "=&r" (val), "=ZC" (*uaddr)
+ : "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval),
"i" (-EFAULT)
: "memory", "t0");
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index ace3ea6da72e..149b2123e7f4 100644
--- a/arch/loongarch/include/asm/irq.h
+++ b/arch/loongarch/include/asm/irq.h
@@ -35,9 +35,6 @@ static inline bool on_irq_stack(int cpu, unsigned long sp)
return (low <= sp && sp <= high);
}
-int get_ipi_irq(void);
-int get_pmc_irq(void);
-int get_timer_irq(void);
void spurious_interrupt(void);
#define NR_IRQS_LEGACY 16
@@ -48,6 +45,14 @@ void arch_trigger_cpumask_backtrace(const struct cpumask *mask, bool exclude_sel
#define MAX_IO_PICS 2
#define NR_IRQS (64 + (256 * MAX_IO_PICS))
+struct acpi_vector_group {
+ int node;
+ int pci_segment;
+ struct irq_domain *parent;
+};
+extern struct acpi_vector_group pch_group[MAX_IO_PICS];
+extern struct acpi_vector_group msi_group[MAX_IO_PICS];
+
#define CORES_PER_EIO_NODE 4
#define LOONGSON_CPU_UART0_VEC 10 /* CPU UART0 */
@@ -79,15 +84,6 @@ void arch_trigger_cpumask_backtrace(const struct cpumask *mask, bool exclude_sel
extern int find_pch_pic(u32 gsi);
extern int eiointc_get_node(int id);
-static inline void eiointc_enable(void)
-{
- uint64_t misc;
-
- misc = iocsr_read64(LOONGARCH_IOCSR_MISC_FUNC);
- misc |= IOCSR_MISC_FUNC_EXT_IOI_EN;
- iocsr_write64(misc, LOONGARCH_IOCSR_MISC_FUNC);
-}
-
struct acpi_madt_lio_pic;
struct acpi_madt_eio_pic;
struct acpi_madt_ht_pic;
@@ -95,21 +91,29 @@ struct acpi_madt_bio_pic;
struct acpi_madt_msi_pic;
struct acpi_madt_lpc_pic;
-struct irq_domain *loongarch_cpu_irq_init(void);
-
-struct irq_domain *liointc_acpi_init(struct irq_domain *parent,
+int liointc_acpi_init(struct irq_domain *parent,
struct acpi_madt_lio_pic *acpi_liointc);
-struct irq_domain *eiointc_acpi_init(struct irq_domain *parent,
+int eiointc_acpi_init(struct irq_domain *parent,
struct acpi_madt_eio_pic *acpi_eiointc);
struct irq_domain *htvec_acpi_init(struct irq_domain *parent,
struct acpi_madt_ht_pic *acpi_htvec);
-struct irq_domain *pch_lpc_acpi_init(struct irq_domain *parent,
+int pch_lpc_acpi_init(struct irq_domain *parent,
struct acpi_madt_lpc_pic *acpi_pchlpc);
-struct irq_domain *pch_msi_acpi_init(struct irq_domain *parent,
+#if IS_ENABLED(CONFIG_LOONGSON_PCH_MSI)
+int pch_msi_acpi_init(struct irq_domain *parent,
struct acpi_madt_msi_pic *acpi_pchmsi);
-struct irq_domain *pch_pic_acpi_init(struct irq_domain *parent,
+#else
+static inline int pch_msi_acpi_init(struct irq_domain *parent,
+ struct acpi_madt_msi_pic *acpi_pchmsi)
+{
+ return 0;
+}
+#endif
+int pch_pic_acpi_init(struct irq_domain *parent,
struct acpi_madt_bio_pic *acpi_pchpic);
+int find_pch_pic(u32 gsi);
+struct fwnode_handle *get_pch_msi_handle(int pci_segment);
extern struct acpi_madt_lio_pic *acpi_liointc;
extern struct acpi_madt_eio_pic *acpi_eiointc[MAX_IO_PICS];
@@ -119,11 +123,10 @@ extern struct acpi_madt_lpc_pic *acpi_pchlpc;
extern struct acpi_madt_msi_pic *acpi_pchmsi[MAX_IO_PICS];
extern struct acpi_madt_bio_pic *acpi_pchpic[MAX_IO_PICS];
-extern struct irq_domain *cpu_domain;
-extern struct irq_domain *liointc_domain;
-extern struct irq_domain *pch_lpc_domain;
-extern struct irq_domain *pch_msi_domain[MAX_IO_PICS];
-extern struct irq_domain *pch_pic_domain[MAX_IO_PICS];
+extern struct fwnode_handle *cpuintc_handle;
+extern struct fwnode_handle *liointc_handle;
+extern struct fwnode_handle *pch_lpc_handle;
+extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS];
extern irqreturn_t loongson3_ipi_interrupt(int irq, void *dev);
diff --git a/arch/loongarch/include/asm/irqflags.h b/arch/loongarch/include/asm/irqflags.h
index 52121cd791fe..319a8c616f1f 100644
--- a/arch/loongarch/include/asm/irqflags.h
+++ b/arch/loongarch/include/asm/irqflags.h
@@ -9,7 +9,6 @@
#include <linux/compiler.h>
#include <linux/stringify.h>
-#include <asm/compiler.h>
#include <asm/loongarch.h>
static inline void arch_local_irq_enable(void)
diff --git a/arch/loongarch/include/asm/local.h b/arch/loongarch/include/asm/local.h
index 2052a2267337..65fbbae9fc4d 100644
--- a/arch/loongarch/include/asm/local.h
+++ b/arch/loongarch/include/asm/local.h
@@ -9,7 +9,6 @@
#include <linux/bitops.h>
#include <linux/atomic.h>
#include <asm/cmpxchg.h>
-#include <asm/compiler.h>
typedef struct {
atomic_long_t a;
diff --git a/arch/loongarch/include/asm/loongson.h b/arch/loongarch/include/asm/loongson.h
index 6a8038725ba7..6e8f6972ceb6 100644
--- a/arch/loongarch/include/asm/loongson.h
+++ b/arch/loongarch/include/asm/loongson.h
@@ -39,18 +39,6 @@ extern const struct plat_smp_ops loongson3_smp_ops;
#define MAX_PACKAGES 16
-/* Chip Config register of each physical cpu package */
-extern u64 loongson_chipcfg[MAX_PACKAGES];
-#define LOONGSON_CHIPCFG(id) (*(volatile u32 *)(loongson_chipcfg[id]))
-
-/* Chip Temperature register of each physical cpu package */
-extern u64 loongson_chiptemp[MAX_PACKAGES];
-#define LOONGSON_CHIPTEMP(id) (*(volatile u32 *)(loongson_chiptemp[id]))
-
-/* Freq Control register of each physical cpu package */
-extern u64 loongson_freqctrl[MAX_PACKAGES];
-#define LOONGSON_FREQCTRL(id) (*(volatile u32 *)(loongson_freqctrl[id]))
-
#define xconf_readl(addr) readl(addr)
#define xconf_readq(addr) readq(addr)
@@ -58,7 +46,7 @@ static inline void xconf_writel(u32 val, volatile void __iomem *addr)
{
asm volatile (
" st.w %[v], %[hw], 0 \n"
- " ld.b $r0, %[hw], 0 \n"
+ " ld.b $zero, %[hw], 0 \n"
:
: [hw] "r" (addr), [v] "r" (val)
);
@@ -68,7 +56,7 @@ static inline void xconf_writeq(u64 val64, volatile void __iomem *addr)
{
asm volatile (
" st.d %[v], %[hw], 0 \n"
- " ld.b $r0, %[hw], 0 \n"
+ " ld.b $zero, %[hw], 0 \n"
:
: [hw] "r" (addr), [v] "r" (val64)
);
diff --git a/arch/loongarch/include/asm/stacktrace.h b/arch/loongarch/include/asm/stacktrace.h
index 26483e396ad1..6b5c2a7aa706 100644
--- a/arch/loongarch/include/asm/stacktrace.h
+++ b/arch/loongarch/include/asm/stacktrace.h
@@ -23,13 +23,13 @@
static __always_inline void prepare_frametrace(struct pt_regs *regs)
{
__asm__ __volatile__(
- /* Save $r1 */
+ /* Save $ra */
STORE_ONE_REG(1)
- /* Use $r1 to save PC */
- "pcaddi $r1, 0\n\t"
- STR_LONG_S " $r1, %0\n\t"
- /* Restore $r1 */
- STR_LONG_L " $r1, %1, "STR_LONGSIZE"\n\t"
+ /* Use $ra to save PC */
+ "pcaddi $ra, 0\n\t"
+ STR_LONG_S " $ra, %0\n\t"
+ /* Restore $ra */
+ STR_LONG_L " $ra, %1, "STR_LONGSIZE"\n\t"
STORE_ONE_REG(2)
STORE_ONE_REG(3)
STORE_ONE_REG(4)
diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h
index 99beb11c2fa8..b7dd9f19a5a9 100644
--- a/arch/loongarch/include/asm/thread_info.h
+++ b/arch/loongarch/include/asm/thread_info.h
@@ -44,14 +44,14 @@ struct thread_info {
}
/* How to get the thread information struct from C. */
-register struct thread_info *__current_thread_info __asm__("$r2");
+register struct thread_info *__current_thread_info __asm__("$tp");
static inline struct thread_info *current_thread_info(void)
{
return __current_thread_info;
}
-register unsigned long current_stack_pointer __asm__("$r3");
+register unsigned long current_stack_pointer __asm__("$sp");
#endif /* !__ASSEMBLY__ */
diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h
index 4f629ae9d5a9..dd24f5898f65 100644
--- a/arch/loongarch/include/asm/tlb.h
+++ b/arch/loongarch/include/asm/tlb.h
@@ -137,16 +137,6 @@ static inline void invtlb_all(u32 op, u32 info, u64 addr)
);
}
-/*
- * LoongArch doesn't need any special per-pte or per-vma handling, except
- * we need to flush cache for area to be unmapped.
- */
-#define tlb_start_vma(tlb, vma) \
- do { \
- if (!(tlb)->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
- } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
static void tlb_flush(struct mmu_gather *tlb);
diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h
index 217c6a3727b1..2b44edc604a2 100644
--- a/arch/loongarch/include/asm/uaccess.h
+++ b/arch/loongarch/include/asm/uaccess.h
@@ -162,7 +162,7 @@ do { \
"2: \n" \
" .section .fixup,\"ax\" \n" \
"3: li.w %0, %3 \n" \
- " or %1, $r0, $r0 \n" \
+ " move %1, $zero \n" \
" b 2b \n" \
" .previous \n" \
" .section __ex_table,\"a\" \n" \
diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c
index bb729ee8a237..03aa14581d0a 100644
--- a/arch/loongarch/kernel/acpi.c
+++ b/arch/loongarch/kernel/acpi.c
@@ -25,7 +25,6 @@ EXPORT_SYMBOL(acpi_pci_disabled);
int acpi_strict = 1; /* We have no workarounds on LoongArch */
int num_processors;
int disabled_cpus;
-enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
u64 acpi_saved_sp;
@@ -33,70 +32,6 @@ u64 acpi_saved_sp;
#define PREFIX "ACPI: "
-int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
-{
- if (irqp != NULL)
- *irqp = acpi_register_gsi(NULL, gsi, -1, -1);
- return (*irqp >= 0) ? 0 : -EINVAL;
-}
-EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
-
-int acpi_isa_irq_to_gsi(unsigned int isa_irq, u32 *gsi)
-{
- if (gsi)
- *gsi = isa_irq;
- return 0;
-}
-
-/*
- * success: return IRQ number (>=0)
- * failure: return < 0
- */
-int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
-{
- struct irq_fwspec fwspec;
-
- switch (gsi) {
- case GSI_MIN_CPU_IRQ ... GSI_MAX_CPU_IRQ:
- fwspec.fwnode = liointc_domain->fwnode;
- fwspec.param[0] = gsi - GSI_MIN_CPU_IRQ;
- fwspec.param_count = 1;
-
- return irq_create_fwspec_mapping(&fwspec);
-
- case GSI_MIN_LPC_IRQ ... GSI_MAX_LPC_IRQ:
- if (!pch_lpc_domain)
- return -EINVAL;
-
- fwspec.fwnode = pch_lpc_domain->fwnode;
- fwspec.param[0] = gsi - GSI_MIN_LPC_IRQ;
- fwspec.param[1] = acpi_dev_get_irq_type(trigger, polarity);
- fwspec.param_count = 2;
-
- return irq_create_fwspec_mapping(&fwspec);
-
- case GSI_MIN_PCH_IRQ ... GSI_MAX_PCH_IRQ:
- if (!pch_pic_domain[0])
- return -EINVAL;
-
- fwspec.fwnode = pch_pic_domain[0]->fwnode;
- fwspec.param[0] = gsi - GSI_MIN_PCH_IRQ;
- fwspec.param[1] = IRQ_TYPE_LEVEL_HIGH;
- fwspec.param_count = 2;
-
- return irq_create_fwspec_mapping(&fwspec);
- }
-
- return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(acpi_register_gsi);
-
-void acpi_unregister_gsi(u32 gsi)
-{
-
-}
-EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
-
void __init __iomem * __acpi_map_table(unsigned long phys, unsigned long size)
{
diff --git a/arch/loongarch/kernel/cacheinfo.c b/arch/loongarch/kernel/cacheinfo.c
index b38f5489d094..4662b06269f4 100644
--- a/arch/loongarch/kernel/cacheinfo.c
+++ b/arch/loongarch/kernel/cacheinfo.c
@@ -4,8 +4,9 @@
*
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
-#include <asm/cpu-info.h>
#include <linux/cacheinfo.h>
+#include <asm/bootinfo.h>
+#include <asm/cpu-info.h>
/* Populates leaf and increments to next leaf */
#define populate_cache(cache, leaf, c_level, c_type) \
@@ -17,6 +18,8 @@ do { \
leaf->ways_of_associativity = c->cache.ways; \
leaf->size = c->cache.linesz * c->cache.sets * \
c->cache.ways; \
+ if (leaf->level > 2) \
+ leaf->size *= nodes_per_package; \
leaf++; \
} while (0)
@@ -95,11 +98,15 @@ static void cache_cpumap_setup(unsigned int cpu)
int populate_cache_leaves(unsigned int cpu)
{
- int level = 1;
+ int level = 1, nodes_per_package = 1;
struct cpuinfo_loongarch *c = &current_cpu_data;
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+ if (loongson_sysconf.nr_nodes > 1)
+ nodes_per_package = loongson_sysconf.cores_per_package
+ / loongson_sysconf.cores_per_node;
+
if (c->icache.waysize) {
populate_cache(dcache, this_leaf, level, CACHE_TYPE_DATA);
populate_cache(icache, this_leaf, level++, CACHE_TYPE_INST);
diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
index d5b3dbcf5425..d53b631c9022 100644
--- a/arch/loongarch/kernel/entry.S
+++ b/arch/loongarch/kernel/entry.S
@@ -27,7 +27,7 @@ SYM_FUNC_START(handle_syscall)
addi.d sp, sp, -PT_SIZE
cfi_st t2, PT_R3
- cfi_rel_offset sp, PT_R3
+ cfi_rel_offset sp, PT_R3
st.d zero, sp, PT_R0
csrrd t2, LOONGARCH_CSR_PRMD
st.d t2, sp, PT_PRMD
@@ -50,7 +50,7 @@ SYM_FUNC_START(handle_syscall)
cfi_st a7, PT_R11
csrrd ra, LOONGARCH_CSR_ERA
st.d ra, sp, PT_ERA
- cfi_rel_offset ra, PT_ERA
+ cfi_rel_offset ra, PT_ERA
cfi_st tp, PT_R2
cfi_st u0, PT_R21
diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c
index 467946ecf451..82b478a5c665 100644
--- a/arch/loongarch/kernel/env.c
+++ b/arch/loongarch/kernel/env.c
@@ -17,21 +17,6 @@ u64 efi_system_table;
struct loongson_system_configuration loongson_sysconf;
EXPORT_SYMBOL(loongson_sysconf);
-u64 loongson_chipcfg[MAX_PACKAGES];
-u64 loongson_chiptemp[MAX_PACKAGES];
-u64 loongson_freqctrl[MAX_PACKAGES];
-unsigned long long smp_group[MAX_PACKAGES];
-
-static void __init register_addrs_set(u64 *registers, const u64 addr, int num)
-{
- u64 i;
-
- for (i = 0; i < num; i++) {
- *registers = (i << 44) | addr;
- registers++;
- }
-}
-
void __init init_environ(void)
{
int efi_boot = fw_arg0;
@@ -50,11 +35,6 @@ void __init init_environ(void)
efi_memmap_init_early(&data);
memblock_reserve(data.phys_map & PAGE_MASK,
PAGE_ALIGN(data.size + (data.phys_map & ~PAGE_MASK)));
-
- register_addrs_set(smp_group, TO_UNCACHE(0x1fe01000), 16);
- register_addrs_set(loongson_chipcfg, TO_UNCACHE(0x1fe00180), 16);
- register_addrs_set(loongson_chiptemp, TO_UNCACHE(0x1fe0019c), 16);
- register_addrs_set(loongson_freqctrl, TO_UNCACHE(0x1fe001d0), 16);
}
static int __init init_cpu_fullname(void)
diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S
index a631a7137667..576b3370a296 100644
--- a/arch/loongarch/kernel/fpu.S
+++ b/arch/loongarch/kernel/fpu.S
@@ -27,78 +27,78 @@
.endm
.macro sc_save_fp base
- EX fst.d $f0, \base, (0 * FPU_REG_WIDTH)
- EX fst.d $f1, \base, (1 * FPU_REG_WIDTH)
- EX fst.d $f2, \base, (2 * FPU_REG_WIDTH)
- EX fst.d $f3, \base, (3 * FPU_REG_WIDTH)
- EX fst.d $f4, \base, (4 * FPU_REG_WIDTH)
- EX fst.d $f5, \base, (5 * FPU_REG_WIDTH)
- EX fst.d $f6, \base, (6 * FPU_REG_WIDTH)
- EX fst.d $f7, \base, (7 * FPU_REG_WIDTH)
- EX fst.d $f8, \base, (8 * FPU_REG_WIDTH)
- EX fst.d $f9, \base, (9 * FPU_REG_WIDTH)
- EX fst.d $f10, \base, (10 * FPU_REG_WIDTH)
- EX fst.d $f11, \base, (11 * FPU_REG_WIDTH)
- EX fst.d $f12, \base, (12 * FPU_REG_WIDTH)
- EX fst.d $f13, \base, (13 * FPU_REG_WIDTH)
- EX fst.d $f14, \base, (14 * FPU_REG_WIDTH)
- EX fst.d $f15, \base, (15 * FPU_REG_WIDTH)
- EX fst.d $f16, \base, (16 * FPU_REG_WIDTH)
- EX fst.d $f17, \base, (17 * FPU_REG_WIDTH)
- EX fst.d $f18, \base, (18 * FPU_REG_WIDTH)
- EX fst.d $f19, \base, (19 * FPU_REG_WIDTH)
- EX fst.d $f20, \base, (20 * FPU_REG_WIDTH)
- EX fst.d $f21, \base, (21 * FPU_REG_WIDTH)
- EX fst.d $f22, \base, (22 * FPU_REG_WIDTH)
- EX fst.d $f23, \base, (23 * FPU_REG_WIDTH)
- EX fst.d $f24, \base, (24 * FPU_REG_WIDTH)
- EX fst.d $f25, \base, (25 * FPU_REG_WIDTH)
- EX fst.d $f26, \base, (26 * FPU_REG_WIDTH)
- EX fst.d $f27, \base, (27 * FPU_REG_WIDTH)
- EX fst.d $f28, \base, (28 * FPU_REG_WIDTH)
- EX fst.d $f29, \base, (29 * FPU_REG_WIDTH)
- EX fst.d $f30, \base, (30 * FPU_REG_WIDTH)
- EX fst.d $f31, \base, (31 * FPU_REG_WIDTH)
+ EX fst.d $f0, \base, (0 * FPU_REG_WIDTH)
+ EX fst.d $f1, \base, (1 * FPU_REG_WIDTH)
+ EX fst.d $f2, \base, (2 * FPU_REG_WIDTH)
+ EX fst.d $f3, \base, (3 * FPU_REG_WIDTH)
+ EX fst.d $f4, \base, (4 * FPU_REG_WIDTH)
+ EX fst.d $f5, \base, (5 * FPU_REG_WIDTH)
+ EX fst.d $f6, \base, (6 * FPU_REG_WIDTH)
+ EX fst.d $f7, \base, (7 * FPU_REG_WIDTH)
+ EX fst.d $f8, \base, (8 * FPU_REG_WIDTH)
+ EX fst.d $f9, \base, (9 * FPU_REG_WIDTH)
+ EX fst.d $f10, \base, (10 * FPU_REG_WIDTH)
+ EX fst.d $f11, \base, (11 * FPU_REG_WIDTH)
+ EX fst.d $f12, \base, (12 * FPU_REG_WIDTH)
+ EX fst.d $f13, \base, (13 * FPU_REG_WIDTH)
+ EX fst.d $f14, \base, (14 * FPU_REG_WIDTH)
+ EX fst.d $f15, \base, (15 * FPU_REG_WIDTH)
+ EX fst.d $f16, \base, (16 * FPU_REG_WIDTH)
+ EX fst.d $f17, \base, (17 * FPU_REG_WIDTH)
+ EX fst.d $f18, \base, (18 * FPU_REG_WIDTH)
+ EX fst.d $f19, \base, (19 * FPU_REG_WIDTH)
+ EX fst.d $f20, \base, (20 * FPU_REG_WIDTH)
+ EX fst.d $f21, \base, (21 * FPU_REG_WIDTH)
+ EX fst.d $f22, \base, (22 * FPU_REG_WIDTH)
+ EX fst.d $f23, \base, (23 * FPU_REG_WIDTH)
+ EX fst.d $f24, \base, (24 * FPU_REG_WIDTH)
+ EX fst.d $f25, \base, (25 * FPU_REG_WIDTH)
+ EX fst.d $f26, \base, (26 * FPU_REG_WIDTH)
+ EX fst.d $f27, \base, (27 * FPU_REG_WIDTH)
+ EX fst.d $f28, \base, (28 * FPU_REG_WIDTH)
+ EX fst.d $f29, \base, (29 * FPU_REG_WIDTH)
+ EX fst.d $f30, \base, (30 * FPU_REG_WIDTH)
+ EX fst.d $f31, \base, (31 * FPU_REG_WIDTH)
.endm
.macro sc_restore_fp base
- EX fld.d $f0, \base, (0 * FPU_REG_WIDTH)
- EX fld.d $f1, \base, (1 * FPU_REG_WIDTH)
- EX fld.d $f2, \base, (2 * FPU_REG_WIDTH)
- EX fld.d $f3, \base, (3 * FPU_REG_WIDTH)
- EX fld.d $f4, \base, (4 * FPU_REG_WIDTH)
- EX fld.d $f5, \base, (5 * FPU_REG_WIDTH)
- EX fld.d $f6, \base, (6 * FPU_REG_WIDTH)
- EX fld.d $f7, \base, (7 * FPU_REG_WIDTH)
- EX fld.d $f8, \base, (8 * FPU_REG_WIDTH)
- EX fld.d $f9, \base, (9 * FPU_REG_WIDTH)
- EX fld.d $f10, \base, (10 * FPU_REG_WIDTH)
- EX fld.d $f11, \base, (11 * FPU_REG_WIDTH)
- EX fld.d $f12, \base, (12 * FPU_REG_WIDTH)
- EX fld.d $f13, \base, (13 * FPU_REG_WIDTH)
- EX fld.d $f14, \base, (14 * FPU_REG_WIDTH)
- EX fld.d $f15, \base, (15 * FPU_REG_WIDTH)
- EX fld.d $f16, \base, (16 * FPU_REG_WIDTH)
- EX fld.d $f17, \base, (17 * FPU_REG_WIDTH)
- EX fld.d $f18, \base, (18 * FPU_REG_WIDTH)
- EX fld.d $f19, \base, (19 * FPU_REG_WIDTH)
- EX fld.d $f20, \base, (20 * FPU_REG_WIDTH)
- EX fld.d $f21, \base, (21 * FPU_REG_WIDTH)
- EX fld.d $f22, \base, (22 * FPU_REG_WIDTH)
- EX fld.d $f23, \base, (23 * FPU_REG_WIDTH)
- EX fld.d $f24, \base, (24 * FPU_REG_WIDTH)
- EX fld.d $f25, \base, (25 * FPU_REG_WIDTH)
- EX fld.d $f26, \base, (26 * FPU_REG_WIDTH)
- EX fld.d $f27, \base, (27 * FPU_REG_WIDTH)
- EX fld.d $f28, \base, (28 * FPU_REG_WIDTH)
- EX fld.d $f29, \base, (29 * FPU_REG_WIDTH)
- EX fld.d $f30, \base, (30 * FPU_REG_WIDTH)
- EX fld.d $f31, \base, (31 * FPU_REG_WIDTH)
+ EX fld.d $f0, \base, (0 * FPU_REG_WIDTH)
+ EX fld.d $f1, \base, (1 * FPU_REG_WIDTH)
+ EX fld.d $f2, \base, (2 * FPU_REG_WIDTH)
+ EX fld.d $f3, \base, (3 * FPU_REG_WIDTH)
+ EX fld.d $f4, \base, (4 * FPU_REG_WIDTH)
+ EX fld.d $f5, \base, (5 * FPU_REG_WIDTH)
+ EX fld.d $f6, \base, (6 * FPU_REG_WIDTH)
+ EX fld.d $f7, \base, (7 * FPU_REG_WIDTH)
+ EX fld.d $f8, \base, (8 * FPU_REG_WIDTH)
+ EX fld.d $f9, \base, (9 * FPU_REG_WIDTH)
+ EX fld.d $f10, \base, (10 * FPU_REG_WIDTH)
+ EX fld.d $f11, \base, (11 * FPU_REG_WIDTH)
+ EX fld.d $f12, \base, (12 * FPU_REG_WIDTH)
+ EX fld.d $f13, \base, (13 * FPU_REG_WIDTH)
+ EX fld.d $f14, \base, (14 * FPU_REG_WIDTH)
+ EX fld.d $f15, \base, (15 * FPU_REG_WIDTH)
+ EX fld.d $f16, \base, (16 * FPU_REG_WIDTH)
+ EX fld.d $f17, \base, (17 * FPU_REG_WIDTH)
+ EX fld.d $f18, \base, (18 * FPU_REG_WIDTH)
+ EX fld.d $f19, \base, (19 * FPU_REG_WIDTH)
+ EX fld.d $f20, \base, (20 * FPU_REG_WIDTH)
+ EX fld.d $f21, \base, (21 * FPU_REG_WIDTH)
+ EX fld.d $f22, \base, (22 * FPU_REG_WIDTH)
+ EX fld.d $f23, \base, (23 * FPU_REG_WIDTH)
+ EX fld.d $f24, \base, (24 * FPU_REG_WIDTH)
+ EX fld.d $f25, \base, (25 * FPU_REG_WIDTH)
+ EX fld.d $f26, \base, (26 * FPU_REG_WIDTH)
+ EX fld.d $f27, \base, (27 * FPU_REG_WIDTH)
+ EX fld.d $f28, \base, (28 * FPU_REG_WIDTH)
+ EX fld.d $f29, \base, (29 * FPU_REG_WIDTH)
+ EX fld.d $f30, \base, (30 * FPU_REG_WIDTH)
+ EX fld.d $f31, \base, (31 * FPU_REG_WIDTH)
.endm
.macro sc_save_fcc base, tmp0, tmp1
movcf2gr \tmp0, $fcc0
- move \tmp1, \tmp0
+ move \tmp1, \tmp0
movcf2gr \tmp0, $fcc1
bstrins.d \tmp1, \tmp0, 15, 8
movcf2gr \tmp0, $fcc2
@@ -113,11 +113,11 @@
bstrins.d \tmp1, \tmp0, 55, 48
movcf2gr \tmp0, $fcc7
bstrins.d \tmp1, \tmp0, 63, 56
- EX st.d \tmp1, \base, 0
+ EX st.d \tmp1, \base, 0
.endm
.macro sc_restore_fcc base, tmp0, tmp1
- EX ld.d \tmp0, \base, 0
+ EX ld.d \tmp0, \base, 0
bstrpick.d \tmp1, \tmp0, 7, 0
movgr2cf $fcc0, \tmp1
bstrpick.d \tmp1, \tmp0, 15, 8
@@ -138,11 +138,11 @@
.macro sc_save_fcsr base, tmp0
movfcsr2gr \tmp0, fcsr0
- EX st.w \tmp0, \base, 0
+ EX st.w \tmp0, \base, 0
.endm
.macro sc_restore_fcsr base, tmp0
- EX ld.w \tmp0, \base, 0
+ EX ld.w \tmp0, \base, 0
movgr2fcsr fcsr0, \tmp0
.endm
@@ -151,9 +151,9 @@
*/
SYM_FUNC_START(_save_fp)
fpu_save_csr a0 t1
- fpu_save_double a0 t1 # clobbers t1
+ fpu_save_double a0 t1 # clobbers t1
fpu_save_cc a0 t1 t2 # clobbers t1, t2
- jirl zero, ra, 0
+ jr ra
SYM_FUNC_END(_save_fp)
EXPORT_SYMBOL(_save_fp)
@@ -161,10 +161,10 @@ EXPORT_SYMBOL(_save_fp)
* Restore a thread's fp context.
*/
SYM_FUNC_START(_restore_fp)
- fpu_restore_double a0 t1 # clobbers t1
- fpu_restore_csr a0 t1
- fpu_restore_cc a0 t1 t2 # clobbers t1, t2
- jirl zero, ra, 0
+ fpu_restore_double a0 t1 # clobbers t1
+ fpu_restore_csr a0 t1
+ fpu_restore_cc a0 t1 t2 # clobbers t1, t2
+ jr ra
SYM_FUNC_END(_restore_fp)
/*
@@ -216,7 +216,7 @@ SYM_FUNC_START(_init_fpu)
movgr2fr.d $f30, t1
movgr2fr.d $f31, t1
- jirl zero, ra, 0
+ jr ra
SYM_FUNC_END(_init_fpu)
/*
@@ -225,11 +225,11 @@ SYM_FUNC_END(_init_fpu)
* a2: fcsr
*/
SYM_FUNC_START(_save_fp_context)
- sc_save_fcc a1 t1 t2
- sc_save_fcsr a2 t1
- sc_save_fp a0
- li.w a0, 0 # success
- jirl zero, ra, 0
+ sc_save_fcc a1 t1 t2
+ sc_save_fcsr a2 t1
+ sc_save_fp a0
+ li.w a0, 0 # success
+ jr ra
SYM_FUNC_END(_save_fp_context)
/*
@@ -238,14 +238,14 @@ SYM_FUNC_END(_save_fp_context)
* a2: fcsr
*/
SYM_FUNC_START(_restore_fp_context)
- sc_restore_fp a0
- sc_restore_fcc a1 t1 t2
- sc_restore_fcsr a2 t1
- li.w a0, 0 # success
- jirl zero, ra, 0
+ sc_restore_fp a0
+ sc_restore_fcc a1 t1 t2
+ sc_restore_fcsr a2 t1
+ li.w a0, 0 # success
+ jr ra
SYM_FUNC_END(_restore_fp_context)
SYM_FUNC_START(fault)
li.w a0, -EFAULT # failure
- jirl zero, ra, 0
+ jr ra
SYM_FUNC_END(fault)
diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
index 93496852b3cc..75e5be807a0d 100644
--- a/arch/loongarch/kernel/genex.S
+++ b/arch/loongarch/kernel/genex.S
@@ -28,23 +28,23 @@ SYM_FUNC_START(__arch_cpu_idle)
nop
idle 0
/* end of rollback region */
-1: jirl zero, ra, 0
+1: jr ra
SYM_FUNC_END(__arch_cpu_idle)
SYM_FUNC_START(handle_vint)
BACKUP_T0T1
SAVE_ALL
la.abs t1, __arch_cpu_idle
- LONG_L t0, sp, PT_ERA
+ LONG_L t0, sp, PT_ERA
/* 32 byte rollback region */
ori t0, t0, 0x1f
xori t0, t0, 0x1f
bne t0, t1, 1f
- LONG_S t0, sp, PT_ERA
+ LONG_S t0, sp, PT_ERA
1: move a0, sp
move a1, sp
la.abs t0, do_vint
- jirl ra, t0, 0
+ jirl ra, t0, 0
RESTORE_ALL_AND_RET
SYM_FUNC_END(handle_vint)
@@ -72,7 +72,7 @@ SYM_FUNC_END(except_vec_cex)
build_prep_\prep
move a0, sp
la.abs t0, do_\handler
- jirl ra, t0, 0
+ jirl ra, t0, 0
RESTORE_ALL_AND_RET
SYM_FUNC_END(handle_\exception)
.endm
@@ -91,5 +91,5 @@ SYM_FUNC_END(except_vec_cex)
SYM_FUNC_START(handle_sys)
la.abs t0, handle_syscall
- jirl zero, t0, 0
+ jr t0
SYM_FUNC_END(handle_sys)
diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
index d01e62dd414f..7062cdf0e33e 100644
--- a/arch/loongarch/kernel/head.S
+++ b/arch/loongarch/kernel/head.S
@@ -32,7 +32,7 @@ SYM_CODE_START(kernel_entry) # kernel entry point
/* We might not get launched at the address the kernel is linked to,
so we jump there. */
la.abs t0, 0f
- jirl zero, t0, 0
+ jr t0
0:
la t0, __bss_start # clear .bss
st.d zero, t0, 0
@@ -50,7 +50,7 @@ SYM_CODE_START(kernel_entry) # kernel entry point
/* KSave3 used for percpu base, initialized as 0 */
csrwr zero, PERCPU_BASE_KS
/* GPR21 used for percpu base (runtime), initialized as 0 */
- or u0, zero, zero
+ move u0, zero
la tp, init_thread_union
/* Set the SP after an empty pt_regs. */
@@ -85,8 +85,8 @@ SYM_CODE_START(smpboot_entry)
ld.d sp, t0, CPU_BOOT_STACK
ld.d tp, t0, CPU_BOOT_TINFO
- la.abs t0, 0f
- jirl zero, t0, 0
+ la.abs t0, 0f
+ jr t0
0:
bl start_secondary
SYM_CODE_END(smpboot_entry)
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index b34b8d792aa4..1ba19c76563e 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -25,12 +25,8 @@ DEFINE_PER_CPU(unsigned long, irq_stack);
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);
-struct irq_domain *cpu_domain;
-struct irq_domain *liointc_domain;
-struct irq_domain *pch_lpc_domain;
-struct irq_domain *pch_msi_domain[MAX_IO_PICS];
-struct irq_domain *pch_pic_domain[MAX_IO_PICS];
-
+struct acpi_vector_group pch_group[MAX_IO_PICS];
+struct acpi_vector_group msi_group[MAX_IO_PICS];
/*
* 'what should we do if we get a hw irq event on an illegal vector'.
* each architecture has to answer this themselves.
@@ -56,6 +52,51 @@ int arch_show_interrupts(struct seq_file *p, int prec)
return 0;
}
+static int __init early_pci_mcfg_parse(struct acpi_table_header *header)
+{
+ struct acpi_table_mcfg *mcfg;
+ struct acpi_mcfg_allocation *mptr;
+ int i, n;
+
+ if (header->length < sizeof(struct acpi_table_mcfg))
+ return -EINVAL;
+
+ n = (header->length - sizeof(struct acpi_table_mcfg)) /
+ sizeof(struct acpi_mcfg_allocation);
+ mcfg = (struct acpi_table_mcfg *)header;
+ mptr = (struct acpi_mcfg_allocation *) &mcfg[1];
+
+ for (i = 0; i < n; i++, mptr++) {
+ msi_group[i].pci_segment = mptr->pci_segment;
+ pch_group[i].node = msi_group[i].node = (mptr->address >> 44) & 0xf;
+ }
+
+ return 0;
+}
+
+static void __init init_vec_parent_group(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_IO_PICS; i++) {
+ msi_group[i].pci_segment = -1;
+ msi_group[i].node = -1;
+ pch_group[i].node = -1;
+ }
+
+ acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse);
+}
+
+static int __init get_ipi_irq(void)
+{
+ struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
+
+ if (d)
+ return irq_create_mapping(d, EXCCODE_IPI - EXCCODE_INT_START);
+
+ return -EINVAL;
+}
+
void __init init_IRQ(void)
{
int i;
@@ -69,9 +110,12 @@ void __init init_IRQ(void)
clear_csr_ecfg(ECFG0_IM);
clear_csr_estat(ESTATF_IP);
+ init_vec_parent_group();
irqchip_init();
#ifdef CONFIG_SMP
- ipi_irq = EXCCODE_IPI - EXCCODE_INT_START;
+ ipi_irq = get_ipi_irq();
+ if (ipi_irq < 0)
+ panic("IPI IRQ mapping failed\n");
irq_set_percpu_devid(ipi_irq);
r = request_percpu_irq(ipi_irq, loongson3_ipi_interrupt, "IPI", &ipi_dummy_dev);
if (r < 0)
diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c
index e6ab87948e1d..dc2b82ea894c 100644
--- a/arch/loongarch/kernel/ptrace.c
+++ b/arch/loongarch/kernel/ptrace.c
@@ -193,7 +193,7 @@ static int fpr_set(struct task_struct *target,
const void *kbuf, const void __user *ubuf)
{
const int fcc_start = NUM_FPU_REGS * sizeof(elf_fpreg_t);
- const int fcc_end = fcc_start + sizeof(u64);
+ const int fcsr_start = fcc_start + sizeof(u64);
int err;
BUG_ON(count % sizeof(elf_fpreg_t));
@@ -209,10 +209,12 @@ static int fpr_set(struct task_struct *target,
if (err)
return err;
- if (count > 0)
- err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu.fcc,
- fcc_start, fcc_end);
+ err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu.fcc, fcc_start,
+ fcc_start + sizeof(u64));
+ err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu.fcsr, fcsr_start,
+ fcsr_start + sizeof(u32));
return err;
}
diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c
index 2b86469e4718..800c965a17ea 100644
--- a/arch/loongarch/kernel/reset.c
+++ b/arch/loongarch/kernel/reset.c
@@ -13,7 +13,6 @@
#include <linux/console.h>
#include <acpi/reboot.h>
-#include <asm/compiler.h>
#include <asm/idle.h>
#include <asm/loongarch.h>
#include <asm/reboot.h>
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index c74860b53375..8f5c2f9a1a83 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -126,7 +126,7 @@ static void __init parse_bios_table(const struct dmi_header *dm)
char *dmi_data = (char *)dm;
bios_extern = *(dmi_data + SMBIOS_BIOSEXTERN_OFFSET);
- b_info.bios_size = *(dmi_data + SMBIOS_BIOSSIZE_OFFSET);
+ b_info.bios_size = (*(dmi_data + SMBIOS_BIOSSIZE_OFFSET) + 1) << 6;
if (bios_extern & LOONGSON_EFI_ENABLE)
set_bit(EFI_BOOT, &efi.flags);
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 73cec62504fb..09743103d9b3 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -278,116 +278,29 @@ void loongson3_cpu_die(unsigned int cpu)
mb();
}
-/*
- * The target CPU should go to XKPRANGE (uncached area) and flush
- * ICache/DCache/VCache before the control CPU can safely disable its clock.
- */
-static void loongson3_play_dead(int *state_addr)
+void play_dead(void)
{
- register int val;
- register void *addr;
+ register uint64_t addr;
register void (*init_fn)(void);
- __asm__ __volatile__(
- " li.d %[addr], 0x8000000000000000\n"
- "1: cacop 0x8, %[addr], 0 \n" /* flush ICache */
- " cacop 0x8, %[addr], 1 \n"
- " cacop 0x8, %[addr], 2 \n"
- " cacop 0x8, %[addr], 3 \n"
- " cacop 0x9, %[addr], 0 \n" /* flush DCache */
- " cacop 0x9, %[addr], 1 \n"
- " cacop 0x9, %[addr], 2 \n"
- " cacop 0x9, %[addr], 3 \n"
- " addi.w %[sets], %[sets], -1 \n"
- " addi.d %[addr], %[addr], 0x40 \n"
- " bnez %[sets], 1b \n"
- " li.d %[addr], 0x8000000000000000\n"
- "2: cacop 0xa, %[addr], 0 \n" /* flush VCache */
- " cacop 0xa, %[addr], 1 \n"
- " cacop 0xa, %[addr], 2 \n"
- " cacop 0xa, %[addr], 3 \n"
- " cacop 0xa, %[addr], 4 \n"
- " cacop 0xa, %[addr], 5 \n"
- " cacop 0xa, %[addr], 6 \n"
- " cacop 0xa, %[addr], 7 \n"
- " cacop 0xa, %[addr], 8 \n"
- " cacop 0xa, %[addr], 9 \n"
- " cacop 0xa, %[addr], 10 \n"
- " cacop 0xa, %[addr], 11 \n"
- " cacop 0xa, %[addr], 12 \n"
- " cacop 0xa, %[addr], 13 \n"
- " cacop 0xa, %[addr], 14 \n"
- " cacop 0xa, %[addr], 15 \n"
- " addi.w %[vsets], %[vsets], -1 \n"
- " addi.d %[addr], %[addr], 0x40 \n"
- " bnez %[vsets], 2b \n"
- " li.w %[val], 0x7 \n" /* *state_addr = CPU_DEAD; */
- " st.w %[val], %[state_addr], 0 \n"
- " dbar 0 \n"
- " cacop 0x11, %[state_addr], 0 \n" /* flush entry of *state_addr */
- : [addr] "=&r" (addr), [val] "=&r" (val)
- : [state_addr] "r" (state_addr),
- [sets] "r" (cpu_data[smp_processor_id()].dcache.sets),
- [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets));
-
+ idle_task_exit();
local_irq_enable();
- change_csr_ecfg(ECFG0_IM, ECFGF_IPI);
+ set_csr_ecfg(ECFGF_IPI);
+ __this_cpu_write(cpu_state, CPU_DEAD);
+
+ __smp_mb();
+ do {
+ __asm__ __volatile__("idle 0\n\t");
+ addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0);
+ } while (addr == 0);
- __asm__ __volatile__(
- " idle 0 \n"
- " li.w $t0, 0x1020 \n"
- " iocsrrd.d %[init_fn], $t0 \n" /* Get init PC */
- : [init_fn] "=&r" (addr)
- : /* No Input */
- : "a0");
- init_fn = __va(addr);
+ init_fn = (void *)TO_CACHE(addr);
+ iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR);
init_fn();
unreachable();
}
-void play_dead(void)
-{
- int *state_addr;
- unsigned int cpu = smp_processor_id();
- void (*play_dead_uncached)(int *s);
-
- idle_task_exit();
- play_dead_uncached = (void *)TO_UNCACHE(__pa((unsigned long)loongson3_play_dead));
- state_addr = &per_cpu(cpu_state, cpu);
- mb();
- play_dead_uncached(state_addr);
-}
-
-static int loongson3_enable_clock(unsigned int cpu)
-{
- uint64_t core_id = cpu_data[cpu].core;
- uint64_t package_id = cpu_data[cpu].package;
-
- LOONGSON_FREQCTRL(package_id) |= 1 << (core_id * 4 + 3);
-
- return 0;
-}
-
-static int loongson3_disable_clock(unsigned int cpu)
-{
- uint64_t core_id = cpu_data[cpu].core;
- uint64_t package_id = cpu_data[cpu].package;
-
- LOONGSON_FREQCTRL(package_id) &= ~(1 << (core_id * 4 + 3));
-
- return 0;
-}
-
-static int register_loongson3_notifier(void)
-{
- return cpuhp_setup_state_nocalls(CPUHP_LOONGARCH_SOC_PREPARE,
- "loongarch/loongson:prepare",
- loongson3_enable_clock,
- loongson3_disable_clock);
-}
-early_initcall(register_loongson3_notifier);
-
#endif
/*
diff --git a/arch/loongarch/kernel/switch.S b/arch/loongarch/kernel/switch.S
index 53e2fa8e580e..37e84ac8ffc2 100644
--- a/arch/loongarch/kernel/switch.S
+++ b/arch/loongarch/kernel/switch.S
@@ -24,8 +24,8 @@ SYM_FUNC_START(__switch_to)
move tp, a2
cpu_restore_nonscratch a1
- li.w t0, _THREAD_SIZE - 32
- PTR_ADD t0, t0, tp
+ li.w t0, _THREAD_SIZE - 32
+ PTR_ADD t0, t0, tp
set_saved_sp t0, t1, t2
ldptr.d t1, a1, THREAD_CSRPRMD
diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
index fe6823875895..79dc5eddf504 100644
--- a/arch/loongarch/kernel/time.c
+++ b/arch/loongarch/kernel/time.c
@@ -123,6 +123,16 @@ void sync_counter(void)
csr_write64(-init_timeval, LOONGARCH_CSR_CNTC);
}
+static int get_timer_irq(void)
+{
+ struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
+
+ if (d)
+ return irq_create_mapping(d, EXCCODE_TIMER - EXCCODE_INT_START);
+
+ return -EINVAL;
+}
+
int constant_clockevent_init(void)
{
unsigned int irq;
@@ -132,7 +142,9 @@ int constant_clockevent_init(void)
struct clock_event_device *cd;
static int timer_irq_installed = 0;
- irq = EXCCODE_TIMER - EXCCODE_INT_START;
+ irq = get_timer_irq();
+ if (irq < 0)
+ pr_err("Failed to map irq %d (timer)\n", irq);
cd = &per_cpu(constant_clockevent_device, cpu);
diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S
index 25d9be5fbb19..16ba2b8dd68a 100644
--- a/arch/loongarch/lib/clear_user.S
+++ b/arch/loongarch/lib/clear_user.S
@@ -32,7 +32,7 @@ SYM_FUNC_START(__clear_user)
1: st.b zero, a0, 0
addi.d a0, a0, 1
addi.d a1, a1, -1
- bgt a1, zero, 1b
+ bgtz a1, 1b
2: move a0, a1
jr ra
diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S
index 9ae507f851b5..97d20327a69e 100644
--- a/arch/loongarch/lib/copy_user.S
+++ b/arch/loongarch/lib/copy_user.S
@@ -35,7 +35,7 @@ SYM_FUNC_START(__copy_user)
addi.d a0, a0, 1
addi.d a1, a1, 1
addi.d a2, a2, -1
- bgt a2, zero, 1b
+ bgtz a2, 1b
3: move a0, a2
jr ra
diff --git a/arch/loongarch/lib/delay.c b/arch/loongarch/lib/delay.c
index 5d856694fcfe..831d4761f385 100644
--- a/arch/loongarch/lib/delay.c
+++ b/arch/loongarch/lib/delay.c
@@ -7,7 +7,6 @@
#include <linux/smp.h>
#include <linux/timex.h>
-#include <asm/compiler.h>
#include <asm/processor.h>
void __delay(unsigned long cycles)
diff --git a/arch/loongarch/mm/page.S b/arch/loongarch/mm/page.S
index ddc78ab33c7b..4c874a7af0ad 100644
--- a/arch/loongarch/mm/page.S
+++ b/arch/loongarch/mm/page.S
@@ -10,75 +10,75 @@
.align 5
SYM_FUNC_START(clear_page)
- lu12i.w t0, 1 << (PAGE_SHIFT - 12)
- add.d t0, t0, a0
+ lu12i.w t0, 1 << (PAGE_SHIFT - 12)
+ add.d t0, t0, a0
1:
- st.d zero, a0, 0
- st.d zero, a0, 8
- st.d zero, a0, 16
- st.d zero, a0, 24
- st.d zero, a0, 32
- st.d zero, a0, 40
- st.d zero, a0, 48
- st.d zero, a0, 56
- addi.d a0, a0, 128
- st.d zero, a0, -64
- st.d zero, a0, -56
- st.d zero, a0, -48
- st.d zero, a0, -40
- st.d zero, a0, -32
- st.d zero, a0, -24
- st.d zero, a0, -16
- st.d zero, a0, -8
- bne t0, a0, 1b
+ st.d zero, a0, 0
+ st.d zero, a0, 8
+ st.d zero, a0, 16
+ st.d zero, a0, 24
+ st.d zero, a0, 32
+ st.d zero, a0, 40
+ st.d zero, a0, 48
+ st.d zero, a0, 56
+ addi.d a0, a0, 128
+ st.d zero, a0, -64
+ st.d zero, a0, -56
+ st.d zero, a0, -48
+ st.d zero, a0, -40
+ st.d zero, a0, -32
+ st.d zero, a0, -24
+ st.d zero, a0, -16
+ st.d zero, a0, -8
+ bne t0, a0, 1b
- jirl $r0, ra, 0
+ jr ra
SYM_FUNC_END(clear_page)
EXPORT_SYMBOL(clear_page)
.align 5
SYM_FUNC_START(copy_page)
- lu12i.w t8, 1 << (PAGE_SHIFT - 12)
- add.d t8, t8, a0
+ lu12i.w t8, 1 << (PAGE_SHIFT - 12)
+ add.d t8, t8, a0
1:
- ld.d t0, a1, 0
- ld.d t1, a1, 8
- ld.d t2, a1, 16
- ld.d t3, a1, 24
- ld.d t4, a1, 32
- ld.d t5, a1, 40
- ld.d t6, a1, 48
- ld.d t7, a1, 56
+ ld.d t0, a1, 0
+ ld.d t1, a1, 8
+ ld.d t2, a1, 16
+ ld.d t3, a1, 24
+ ld.d t4, a1, 32
+ ld.d t5, a1, 40
+ ld.d t6, a1, 48
+ ld.d t7, a1, 56
- st.d t0, a0, 0
- st.d t1, a0, 8
- ld.d t0, a1, 64
- ld.d t1, a1, 72
- st.d t2, a0, 16
- st.d t3, a0, 24
- ld.d t2, a1, 80
- ld.d t3, a1, 88
- st.d t4, a0, 32
- st.d t5, a0, 40
- ld.d t4, a1, 96
- ld.d t5, a1, 104
- st.d t6, a0, 48
- st.d t7, a0, 56
- ld.d t6, a1, 112
- ld.d t7, a1, 120
- addi.d a0, a0, 128
- addi.d a1, a1, 128
+ st.d t0, a0, 0
+ st.d t1, a0, 8
+ ld.d t0, a1, 64
+ ld.d t1, a1, 72
+ st.d t2, a0, 16
+ st.d t3, a0, 24
+ ld.d t2, a1, 80
+ ld.d t3, a1, 88
+ st.d t4, a0, 32
+ st.d t5, a0, 40
+ ld.d t4, a1, 96
+ ld.d t5, a1, 104
+ st.d t6, a0, 48
+ st.d t7, a0, 56
+ ld.d t6, a1, 112
+ ld.d t7, a1, 120
+ addi.d a0, a0, 128
+ addi.d a1, a1, 128
- st.d t0, a0, -64
- st.d t1, a0, -56
- st.d t2, a0, -48
- st.d t3, a0, -40
- st.d t4, a0, -32
- st.d t5, a0, -24
- st.d t6, a0, -16
- st.d t7, a0, -8
+ st.d t0, a0, -64
+ st.d t1, a0, -56
+ st.d t2, a0, -48
+ st.d t3, a0, -40
+ st.d t4, a0, -32
+ st.d t5, a0, -24
+ st.d t6, a0, -16
+ st.d t7, a0, -8
- bne t8, a0, 1b
- jirl $r0, ra, 0
+ bne t8, a0, 1b
+ jr ra
SYM_FUNC_END(copy_page)
EXPORT_SYMBOL(copy_page)
diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
index 7eee40271577..de19fa2d7f0d 100644
--- a/arch/loongarch/mm/tlbex.S
+++ b/arch/loongarch/mm/tlbex.S
@@ -18,7 +18,7 @@
REG_S a2, sp, PT_BVADDR
li.w a1, \write
la.abs t0, do_page_fault
- jirl ra, t0, 0
+ jirl ra, t0, 0
RESTORE_ALL_AND_RET
SYM_FUNC_END(tlb_do_page_fault_\write)
.endm
@@ -34,7 +34,7 @@ SYM_FUNC_START(handle_tlb_protect)
csrrd a2, LOONGARCH_CSR_BADV
REG_S a2, sp, PT_BVADDR
la.abs t0, do_page_fault
- jirl ra, t0, 0
+ jirl ra, t0, 0
RESTORE_ALL_AND_RET
SYM_FUNC_END(handle_tlb_protect)
@@ -47,7 +47,7 @@ SYM_FUNC_START(handle_tlb_load)
* The vmalloc handling is not in the hotpath.
*/
csrrd t0, LOONGARCH_CSR_BADV
- blt t0, $r0, vmalloc_load
+ bltz t0, vmalloc_load
csrrd t1, LOONGARCH_CSR_PGDL
vmalloc_done_load:
@@ -80,7 +80,7 @@ vmalloc_done_load:
* see if we need to jump to huge tlb processing.
*/
andi t0, ra, _PAGE_HUGE
- bne t0, $r0, tlb_huge_update_load
+ bnez t0, tlb_huge_update_load
csrrd t0, LOONGARCH_CSR_BADV
srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER)
@@ -100,12 +100,12 @@ smp_pgtable_change_load:
srli.d ra, t0, _PAGE_PRESENT_SHIFT
andi ra, ra, 1
- beq ra, $r0, nopage_tlb_load
+ beqz ra, nopage_tlb_load
ori t0, t0, _PAGE_VALID
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, smp_pgtable_change_load
+ beqz t0, smp_pgtable_change_load
#else
st.d t0, t1, 0
#endif
@@ -139,23 +139,23 @@ tlb_huge_update_load:
#endif
srli.d ra, t0, _PAGE_PRESENT_SHIFT
andi ra, ra, 1
- beq ra, $r0, nopage_tlb_load
+ beqz ra, nopage_tlb_load
tlbsrch
ori t0, t0, _PAGE_VALID
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, tlb_huge_update_load
+ beqz t0, tlb_huge_update_load
ld.d t0, t1, 0
#else
st.d t0, t1, 0
#endif
- addu16i.d t1, $r0, -(CSR_TLBIDX_EHINV >> 16)
- addi.d ra, t1, 0
- csrxchg ra, t1, LOONGARCH_CSR_TLBIDX
+ addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16)
+ addi.d ra, t1, 0
+ csrxchg ra, t1, LOONGARCH_CSR_TLBIDX
tlbwr
- csrxchg $r0, t1, LOONGARCH_CSR_TLBIDX
+ csrxchg zero, t1, LOONGARCH_CSR_TLBIDX
/*
* A huge PTE describes an area the size of the
@@ -178,27 +178,27 @@ tlb_huge_update_load:
addi.d t0, ra, 0
/* Convert to entrylo1 */
- addi.d t1, $r0, 1
+ addi.d t1, zero, 1
slli.d t1, t1, (HPAGE_SHIFT - 1)
add.d t0, t0, t1
csrwr t0, LOONGARCH_CSR_TLBELO1
/* Set huge page tlb entry size */
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
tlbfill
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
nopage_tlb_load:
dbar 0
csrrd ra, EXCEPTION_KS2
la.abs t0, tlb_do_page_fault_0
- jirl $r0, t0, 0
+ jr t0
SYM_FUNC_END(handle_tlb_load)
SYM_FUNC_START(handle_tlb_store)
@@ -210,7 +210,7 @@ SYM_FUNC_START(handle_tlb_store)
* The vmalloc handling is not in the hotpath.
*/
csrrd t0, LOONGARCH_CSR_BADV
- blt t0, $r0, vmalloc_store
+ bltz t0, vmalloc_store
csrrd t1, LOONGARCH_CSR_PGDL
vmalloc_done_store:
@@ -244,7 +244,7 @@ vmalloc_done_store:
* see if we need to jump to huge tlb processing.
*/
andi t0, ra, _PAGE_HUGE
- bne t0, $r0, tlb_huge_update_store
+ bnez t0, tlb_huge_update_store
csrrd t0, LOONGARCH_CSR_BADV
srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER)
@@ -265,12 +265,12 @@ smp_pgtable_change_store:
srli.d ra, t0, _PAGE_PRESENT_SHIFT
andi ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
xori ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
- bne ra, $r0, nopage_tlb_store
+ bnez ra, nopage_tlb_store
ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, smp_pgtable_change_store
+ beqz t0, smp_pgtable_change_store
#else
st.d t0, t1, 0
#endif
@@ -306,24 +306,24 @@ tlb_huge_update_store:
srli.d ra, t0, _PAGE_PRESENT_SHIFT
andi ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
xori ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
- bne ra, $r0, nopage_tlb_store
+ bnez ra, nopage_tlb_store
tlbsrch
ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, tlb_huge_update_store
+ beqz t0, tlb_huge_update_store
ld.d t0, t1, 0
#else
st.d t0, t1, 0
#endif
- addu16i.d t1, $r0, -(CSR_TLBIDX_EHINV >> 16)
- addi.d ra, t1, 0
- csrxchg ra, t1, LOONGARCH_CSR_TLBIDX
+ addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16)
+ addi.d ra, t1, 0
+ csrxchg ra, t1, LOONGARCH_CSR_TLBIDX
tlbwr
- csrxchg $r0, t1, LOONGARCH_CSR_TLBIDX
+ csrxchg zero, t1, LOONGARCH_CSR_TLBIDX
/*
* A huge PTE describes an area the size of the
* configured huge page size. This is twice the
@@ -345,28 +345,28 @@ tlb_huge_update_store:
addi.d t0, ra, 0
/* Convert to entrylo1 */
- addi.d t1, $r0, 1
+ addi.d t1, zero, 1
slli.d t1, t1, (HPAGE_SHIFT - 1)
add.d t0, t0, t1
csrwr t0, LOONGARCH_CSR_TLBELO1
/* Set huge page tlb entry size */
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
tlbfill
/* Reset default page size */
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
nopage_tlb_store:
dbar 0
csrrd ra, EXCEPTION_KS2
la.abs t0, tlb_do_page_fault_1
- jirl $r0, t0, 0
+ jr t0
SYM_FUNC_END(handle_tlb_store)
SYM_FUNC_START(handle_tlb_modify)
@@ -378,7 +378,7 @@ SYM_FUNC_START(handle_tlb_modify)
* The vmalloc handling is not in the hotpath.
*/
csrrd t0, LOONGARCH_CSR_BADV
- blt t0, $r0, vmalloc_modify
+ bltz t0, vmalloc_modify
csrrd t1, LOONGARCH_CSR_PGDL
vmalloc_done_modify:
@@ -411,7 +411,7 @@ vmalloc_done_modify:
* see if we need to jump to huge tlb processing.
*/
andi t0, ra, _PAGE_HUGE
- bne t0, $r0, tlb_huge_update_modify
+ bnez t0, tlb_huge_update_modify
csrrd t0, LOONGARCH_CSR_BADV
srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER)
@@ -431,12 +431,12 @@ smp_pgtable_change_modify:
srli.d ra, t0, _PAGE_WRITE_SHIFT
andi ra, ra, 1
- beq ra, $r0, nopage_tlb_modify
+ beqz ra, nopage_tlb_modify
ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, smp_pgtable_change_modify
+ beqz t0, smp_pgtable_change_modify
#else
st.d t0, t1, 0
#endif
@@ -454,7 +454,7 @@ leave_modify:
ertn
#ifdef CONFIG_64BIT
vmalloc_modify:
- la.abs t1, swapper_pg_dir
+ la.abs t1, swapper_pg_dir
b vmalloc_done_modify
#endif
@@ -471,14 +471,14 @@ tlb_huge_update_modify:
srli.d ra, t0, _PAGE_WRITE_SHIFT
andi ra, ra, 1
- beq ra, $r0, nopage_tlb_modify
+ beqz ra, nopage_tlb_modify
tlbsrch
ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, tlb_huge_update_modify
+ beqz t0, tlb_huge_update_modify
ld.d t0, t1, 0
#else
st.d t0, t1, 0
@@ -504,28 +504,28 @@ tlb_huge_update_modify:
addi.d t0, ra, 0
/* Convert to entrylo1 */
- addi.d t1, $r0, 1
+ addi.d t1, zero, 1
slli.d t1, t1, (HPAGE_SHIFT - 1)
add.d t0, t0, t1
csrwr t0, LOONGARCH_CSR_TLBELO1
/* Set huge page tlb entry size */
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
- csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
tlbwr
/* Reset default page size */
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
- csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
nopage_tlb_modify:
dbar 0
csrrd ra, EXCEPTION_KS2
la.abs t0, tlb_do_page_fault_1
- jirl $r0, t0, 0
+ jr t0
SYM_FUNC_END(handle_tlb_modify)
SYM_FUNC_START(handle_tlb_refill)
diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
index f3aa44156969..e0e9e31339c1 100644
--- a/arch/m68k/Kconfig.cpu
+++ b/arch/m68k/Kconfig.cpu
@@ -155,7 +155,7 @@ config M520x
select COLDFIRE_PIT_TIMER
select HAVE_CACHE_SPLIT
help
- Freescale Coldfire 5207/5208 processor support.
+ Freescale Coldfire 5207/5208 processor support.
config M523x
bool "MCF523x"
@@ -322,7 +322,6 @@ config COLDFIRE_SLTIMERS
endif # COLDFIRE
-
comment "Processor Specific Options"
config M68KFPU_EMU
@@ -522,7 +521,7 @@ config CACHE_BOTH
Split the ColdFire CPU cache, and use half as an instruction cache
and half as a data cache.
endchoice
-endif
+endif # HAVE_CACHE_SPLIT
if HAVE_CACHE_CB
choice
@@ -539,4 +538,4 @@ config CACHE_COPYBACK
help
The ColdFire CPU cache is set into Copy-back mode.
endchoice
-endif
+endif # HAVE_CACHE_CB
diff --git a/arch/m68k/Kconfig.debug b/arch/m68k/Kconfig.debug
index 11b306bdd788..465e28be0ce4 100644
--- a/arch/m68k/Kconfig.debug
+++ b/arch/m68k/Kconfig.debug
@@ -1,11 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
config BOOTPARAM
- bool 'Compiled-in Kernel Boot Parameter'
+ bool "Compiled-in Kernel Boot Parameter"
config BOOTPARAM_STRING
- string 'Kernel Boot Parameter'
- default 'console=ttyS0,19200'
+ string "Kernel Boot Parameter"
+ default "console=ttyS0,19200"
depends on BOOTPARAM
config EARLY_PRINTK
diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine
index a1042568b9ad..53c45ccda564 100644
--- a/arch/m68k/Kconfig.machine
+++ b/arch/m68k/Kconfig.machine
@@ -161,10 +161,11 @@ config VIRT
select RTC_CLASS
select RTC_DRV_GOLDFISH
select TTY
+ select VIRTIO_MENU
select VIRTIO_MMIO
help
This options enable a pure virtual machine based on m68k,
- VIRTIO MMIO devices and GOLDFISH interfaces (TTY, RTC, PIC)
+ VIRTIO MMIO devices and GOLDFISH interfaces (TTY, RTC, PIC).
config PILOT
bool
@@ -492,4 +493,4 @@ config ROMKERNEL
endchoice
-endif
+endif # !MMU || COLDFIRE
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index c181030218bf..a6a886a89be2 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -10,8 +10,6 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_USERFAULTFD=y
-CONFIG_SLAB=y
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68020=y
@@ -43,8 +41,9 @@ CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
+CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
-CONFIG_ZPOOL=m
+CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -310,6 +309,7 @@ CONFIG_PARPORT_MFC3=m
CONFIG_PARPORT_1284=y
CONFIG_AMIGA_FLOPPY=y
CONFIG_AMIGA_Z2RAM=y
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
@@ -580,7 +580,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -595,7 +595,7 @@ CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
@@ -648,11 +648,7 @@ CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
-CONFIG_BITFIELD_KUNIT=m
-CONFIG_RESOURCE_KUNIT_TEST=m
CONFIG_LINEAR_RANGES_TEST=m
-CONFIG_CMDLINE_KUNIT_TEST=m
-CONFIG_BITS_TEST=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 40755648fb6c..bffd24c2755e 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -10,8 +10,6 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_USERFAULTFD=y
-CONFIG_SLAB=y
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68020=y
@@ -39,8 +37,9 @@ CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
+CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
-CONFIG_ZPOOL=m
+CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -300,6 +299,7 @@ CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_CONNECTOR=m
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
@@ -537,7 +537,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -552,7 +552,7 @@ CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
@@ -604,11 +604,7 @@ CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
-CONFIG_BITFIELD_KUNIT=m
-CONFIG_RESOURCE_KUNIT_TEST=m
CONFIG_LINEAR_RANGES_TEST=m
-CONFIG_CMDLINE_KUNIT_TEST=m
-CONFIG_BITS_TEST=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index be0d9155fc5b..0013425b1e08 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -10,8 +10,6 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_USERFAULTFD=y
-CONFIG_SLAB=y
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68020=y
@@ -46,8 +44,9 @@ CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
+CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
-CONFIG_ZPOOL=m
+CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -311,6 +310,7 @@ CONFIG_PARPORT=m
CONFIG_PARPORT_ATARI=m
CONFIG_PARPORT_1284=y
CONFIG_ATARI_FLOPPY=y
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
@@ -557,7 +557,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -572,7 +572,7 @@ CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
@@ -625,11 +625,7 @@ CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
-CONFIG_BITFIELD_KUNIT=m
-CONFIG_RESOURCE_KUNIT_TEST=m
CONFIG_LINEAR_RANGES_TEST=m
-CONFIG_CMDLINE_KUNIT_TEST=m
-CONFIG_BITS_TEST=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 9af0e2d0d153..42d969697f7f 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -10,8 +10,6 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_USERFAULTFD=y
-CONFIG_SLAB=y
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68040=y
@@ -36,8 +34,9 @@ CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
+CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
-CONFIG_ZPOOL=m
+CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -297,6 +296,7 @@ CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_CONNECTOR=m
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
@@ -529,7 +529,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -544,7 +544,7 @@ CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
@@ -596,11 +596,7 @@ CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
-CONFIG_BITFIELD_KUNIT=m
-CONFIG_RESOURCE_KUNIT_TEST=m
CONFIG_LINEAR_RANGES_TEST=m
-CONFIG_CMDLINE_KUNIT_TEST=m
-CONFIG_BITS_TEST=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 49341d66feb6..97d6d9acb395 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -10,8 +10,6 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_USERFAULTFD=y
-CONFIG_SLAB=y
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68020=y
@@ -38,8 +36,9 @@ CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
+CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
-CONFIG_ZPOOL=m
+CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -299,6 +298,7 @@ CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_CONNECTOR=m
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
@@ -539,7 +539,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -554,7 +554,7 @@ CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
@@ -606,11 +606,7 @@ CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
-CONFIG_BITFIELD_KUNIT=m
-CONFIG_RESOURCE_KUNIT_TEST=m
CONFIG_LINEAR_RANGES_TEST=m
-CONFIG_CMDLINE_KUNIT_TEST=m
-CONFIG_BITS_TEST=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 92b33d5ffab1..8cbfc1c659a3 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -10,8 +10,6 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_USERFAULTFD=y
-CONFIG_SLAB=y
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68020=y
@@ -37,8 +35,9 @@ CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
+CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
-CONFIG_ZPOOL=m
+CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -302,6 +301,7 @@ CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_CONNECTOR=m
CONFIG_BLK_DEV_SWIM=m
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
@@ -559,7 +559,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -574,7 +574,7 @@ CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
@@ -627,11 +627,7 @@ CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
-CONFIG_BITFIELD_KUNIT=m
-CONFIG_RESOURCE_KUNIT_TEST=m
CONFIG_LINEAR_RANGES_TEST=m
-CONFIG_CMDLINE_KUNIT_TEST=m
-CONFIG_BITS_TEST=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 6aaa947bc849..9f45fe60757f 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -10,8 +10,6 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_USERFAULTFD=y
-CONFIG_SLAB=y
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68020=y
@@ -57,8 +55,9 @@ CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
+CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
-CONFIG_ZPOOL=m
+CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -331,6 +330,7 @@ CONFIG_AMIGA_FLOPPY=y
CONFIG_ATARI_FLOPPY=y
CONFIG_BLK_DEV_SWIM=m
CONFIG_AMIGA_Z2RAM=y
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
@@ -645,7 +645,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -660,7 +660,7 @@ CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
@@ -713,11 +713,7 @@ CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
-CONFIG_BITFIELD_KUNIT=m
-CONFIG_RESOURCE_KUNIT_TEST=m
CONFIG_LINEAR_RANGES_TEST=m
-CONFIG_CMDLINE_KUNIT_TEST=m
-CONFIG_BITS_TEST=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index b62d65e59938..4736cfacf6a2 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -10,8 +10,6 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_USERFAULTFD=y
-CONFIG_SLAB=y
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68030=y
@@ -35,8 +33,9 @@ CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
+CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
-CONFIG_ZPOOL=m
+CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -296,6 +295,7 @@ CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_CONNECTOR=m
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
@@ -528,7 +528,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -543,7 +543,7 @@ CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
@@ -595,11 +595,7 @@ CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
-CONFIG_BITFIELD_KUNIT=m
-CONFIG_RESOURCE_KUNIT_TEST=m
CONFIG_LINEAR_RANGES_TEST=m
-CONFIG_CMDLINE_KUNIT_TEST=m
-CONFIG_BITS_TEST=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 8ecf261487d4..638cd38aa7d2 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -10,8 +10,6 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_USERFAULTFD=y
-CONFIG_SLAB=y
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68040=y
@@ -36,8 +34,9 @@ CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
+CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
-CONFIG_ZPOOL=m
+CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -297,6 +296,7 @@ CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_CONNECTOR=m
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
@@ -529,7 +529,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -544,7 +544,7 @@ CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
@@ -596,11 +596,7 @@ CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
-CONFIG_BITFIELD_KUNIT=m
-CONFIG_RESOURCE_KUNIT_TEST=m
CONFIG_LINEAR_RANGES_TEST=m
-CONFIG_CMDLINE_KUNIT_TEST=m
-CONFIG_BITS_TEST=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 7540d908897b..ec8b6bb70ebd 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -10,8 +10,6 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_USERFAULTFD=y
-CONFIG_SLAB=y
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68040=y
@@ -37,8 +35,9 @@ CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
+CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
-CONFIG_ZPOOL=m
+CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -301,6 +300,7 @@ CONFIG_CONNECTOR=m
CONFIG_PARPORT=m
CONFIG_PARPORT_PC=m
CONFIG_PARPORT_1284=y
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
@@ -546,7 +546,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -561,7 +561,7 @@ CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
@@ -614,11 +614,7 @@ CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
-CONFIG_BITFIELD_KUNIT=m
-CONFIG_RESOURCE_KUNIT_TEST=m
CONFIG_LINEAR_RANGES_TEST=m
-CONFIG_CMDLINE_KUNIT_TEST=m
-CONFIG_BITS_TEST=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 832b45944617..7d8dc578d59c 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -10,8 +10,6 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_USERFAULTFD=y
-CONFIG_SLAB=y
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_SUN3=y
@@ -33,8 +31,9 @@ CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
+CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
-CONFIG_ZPOOL=m
+CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -294,6 +293,7 @@ CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_CONNECTOR=m
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
@@ -528,7 +528,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -543,7 +543,7 @@ CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
@@ -594,11 +594,7 @@ CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
-CONFIG_BITFIELD_KUNIT=m
-CONFIG_RESOURCE_KUNIT_TEST=m
CONFIG_LINEAR_RANGES_TEST=m
-CONFIG_CMDLINE_KUNIT_TEST=m
-CONFIG_BITS_TEST=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 9171b687e565..96290aee5302 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -10,8 +10,6 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_USERFAULTFD=y
-CONFIG_SLAB=y
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_SUN3X=y
@@ -33,8 +31,9 @@ CONFIG_MQ_IOSCHED_KYBER=m
CONFIG_IOSCHED_BFQ=m
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
+CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
-CONFIG_ZPOOL=m
+CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -294,6 +293,7 @@ CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_CONNECTOR=m
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
@@ -527,7 +527,7 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -542,7 +542,7 @@ CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
@@ -594,11 +594,7 @@ CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_FIND_BIT_BENCHMARK=m
CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_SYSCTL=m
-CONFIG_BITFIELD_KUNIT=m
-CONFIG_RESOURCE_KUNIT_TEST=m
CONFIG_LINEAR_RANGES_TEST=m
-CONFIG_CMDLINE_KUNIT_TEST=m
-CONFIG_BITS_TEST=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h
index 51283db53667..87c2cd66a9ce 100644
--- a/arch/m68k/include/asm/bitops.h
+++ b/arch/m68k/include/asm/bitops.h
@@ -510,7 +510,7 @@ static inline int fls(unsigned int x)
return 32 - cnt;
}
-static inline int __fls(int x)
+static inline unsigned long __fls(unsigned long x)
{
return fls(x) - 1;
}
diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h
index ffeda9aa526a..d86b4009880b 100644
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h
@@ -151,6 +151,7 @@ static inline void release_thread(struct task_struct *dead_task)
}
unsigned long __get_wchan(struct task_struct *p);
+void show_registers(struct pt_regs *regs);
#define KSTK_EIP(tsk) \
({ \
diff --git a/arch/m68k/include/uapi/asm/bootinfo-virt.h b/arch/m68k/include/uapi/asm/bootinfo-virt.h
index e4db7e2213ab..b091ee9b06e0 100644
--- a/arch/m68k/include/uapi/asm/bootinfo-virt.h
+++ b/arch/m68k/include/uapi/asm/bootinfo-virt.h
@@ -13,6 +13,14 @@
#define BI_VIRT_VIRTIO_BASE 0x8004
#define BI_VIRT_CTRL_BASE 0x8005
+/*
+ * A random seed used to initialize the RNG. Record format:
+ *
+ * - length [ 2 bytes, 16-bit big endian ]
+ * - seed data [ `length` bytes, padded to preserve 2-byte alignment ]
+ */
+#define BI_VIRT_RNG_SEED 0x8006
+
#define VIRT_BOOTI_VERSION MK_BI_VERSION(2, 0)
#endif /* _UAPI_ASM_M68K_BOOTINFO_MAC_H */
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index 59fc63feb0dc..5c8cba0efc63 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -36,6 +36,7 @@
#include <linux/uaccess.h>
#include <asm/traps.h>
#include <asm/machdep.h>
+#include <asm/processor.h>
#include <asm/siginfo.h>
#include <asm/tlbflush.h>
diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c
index de156a027f5b..010b3b5ae8e8 100644
--- a/arch/m68k/mac/iop.c
+++ b/arch/m68k/mac/iop.c
@@ -25,7 +25,7 @@
* check this.)
* 990605 (jmt) - Rearranged things a bit wrt IOP detection; iop_present is
* gone, IOP base addresses are now in an array and the
- * globally-visible functions take an IOP number instead of an
+ * globally-visible functions take an IOP number instead of
* an actual base address.
* 990610 (jmt) - Finished the message passing framework and it seems to work.
* Sending _definitely_ works; my adb-bus.c mods can send
@@ -66,7 +66,7 @@
* a shared memory area in the IOP RAM. Each IOP has seven "channels"; each
* channel is connected to a specific software driver on the IOP. For example
* on the SCC IOP there is one channel for each serial port. Each channel has
- * an incoming and and outgoing message queue with a depth of one.
+ * an incoming and an outgoing message queue with a depth of one.
*
* A message is 32 bytes plus a state byte for the channel (MSG_IDLE, MSG_NEW,
* MSG_RCVD, MSG_COMPLETE). To send a message you copy the message into the
diff --git a/arch/m68k/mac/macints.c b/arch/m68k/mac/macints.c
index e3575388cd05..5cbaf6e9497c 100644
--- a/arch/m68k/mac/macints.c
+++ b/arch/m68k/mac/macints.c
@@ -126,10 +126,7 @@
#include <asm/mac_baboon.h>
#include <asm/hwtest.h>
#include <asm/irq_regs.h>
-
-extern void show_registers(struct pt_regs *);
-
-irqreturn_t mac_nmi_handler(int, void *);
+#include <asm/processor.h>
static unsigned int mac_irq_startup(struct irq_data *);
static void mac_irq_shutdown(struct irq_data *);
@@ -142,6 +139,21 @@ static struct irq_chip mac_irq_chip = {
.irq_shutdown = mac_irq_shutdown,
};
+static irqreturn_t mac_nmi_handler(int irq, void *dev_id)
+{
+ static volatile int in_nmi;
+
+ if (in_nmi)
+ return IRQ_HANDLED;
+ in_nmi = 1;
+
+ pr_info("Non-Maskable Interrupt\n");
+ show_registers(get_irq_regs());
+
+ in_nmi = 0;
+ return IRQ_HANDLED;
+}
+
void __init mac_init_IRQ(void)
{
m68k_setup_irq_controller(&mac_irq_chip, handle_simple_irq, IRQ_USER,
@@ -254,18 +266,3 @@ static void mac_irq_shutdown(struct irq_data *data)
else
mac_irq_disable(data);
}
-
-static volatile int in_nmi;
-
-irqreturn_t mac_nmi_handler(int irq, void *dev_id)
-{
- if (in_nmi)
- return IRQ_HANDLED;
- in_nmi = 1;
-
- pr_info("Non-Maskable Interrupt\n");
- show_registers(get_irq_regs());
-
- in_nmi = 0;
- return IRQ_HANDLED;
-}
diff --git a/arch/m68k/q40/q40ints.c b/arch/m68k/q40/q40ints.c
index 6886a5d0007b..d15057d34e56 100644
--- a/arch/m68k/q40/q40ints.c
+++ b/arch/m68k/q40/q40ints.c
@@ -32,7 +32,7 @@
* 33 : frame int (50/200 Hz periodic timer)
* 34 : sample int (10/20 KHz periodic timer)
*
-*/
+ */
static void q40_irq_handler(unsigned int, struct pt_regs *fp);
static void q40_irq_enable(struct irq_data *data);
diff --git a/arch/m68k/sun3/mmu_emu.c b/arch/m68k/sun3/mmu_emu.c
index 7ec20817c0c9..7321b3b76283 100644
--- a/arch/m68k/sun3/mmu_emu.c
+++ b/arch/m68k/sun3/mmu_emu.c
@@ -211,7 +211,7 @@ void clear_context(unsigned long context)
if(context) {
if(!ctx_alloc[context])
- panic("clear_context: context not allocated\n");
+ panic("%s: context not allocated\n", __func__);
ctx_alloc[context]->context = SUN3_INVALID_CONTEXT;
ctx_alloc[context] = (struct mm_struct *)0;
@@ -261,7 +261,7 @@ unsigned long get_free_context(struct mm_struct *mm)
}
// check to make sure one was really free...
if(new == CONTEXTS_NUM)
- panic("get_free_context: failed to find free context");
+ panic("%s: failed to find free context", __func__);
}
ctx_alloc[new] = mm;
@@ -369,16 +369,15 @@ int mmu_emu_handle_fault (unsigned long vaddr, int read_flag, int kernel_fault)
}
#ifdef DEBUG_MMU_EMU
- pr_info("mmu_emu_handle_fault: vaddr=%lx type=%s crp=%p\n",
- vaddr, read_flag ? "read" : "write", crp);
+ pr_info("%s: vaddr=%lx type=%s crp=%p\n", __func__, vaddr,
+ read_flag ? "read" : "write", crp);
#endif
segment = (vaddr >> SUN3_PMEG_SIZE_BITS) & 0x7FF;
offset = (vaddr >> SUN3_PTE_SIZE_BITS) & 0xF;
#ifdef DEBUG_MMU_EMU
- pr_info("mmu_emu_handle_fault: segment=%lx offset=%lx\n", segment,
- offset);
+ pr_info("%s: segment=%lx offset=%lx\n", __func__, segment, offset);
#endif
pte = (pte_t *) pgd_val (*(crp + segment));
diff --git a/arch/m68k/virt/config.c b/arch/m68k/virt/config.c
index 632ba200ad42..4ab22946ff68 100644
--- a/arch/m68k/virt/config.c
+++ b/arch/m68k/virt/config.c
@@ -2,6 +2,7 @@
#include <linux/reboot.h>
#include <linux/serial_core.h>
+#include <linux/random.h>
#include <clocksource/timer-goldfish.h>
#include <asm/bootinfo.h>
@@ -92,6 +93,16 @@ int __init virt_parse_bootinfo(const struct bi_record *record)
data += 4;
virt_bi_data.virtio.irq = be32_to_cpup(data);
break;
+ case BI_VIRT_RNG_SEED: {
+ u16 len = be16_to_cpup(data);
+ add_bootloader_randomness(data + 2, len);
+ /*
+ * Zero the data to preserve forward secrecy, and zero the
+ * length to prevent kexec from using it.
+ */
+ memzero_explicit((void *)data, len + 2);
+ break;
+ }
default:
unknown = 1;
break;
diff --git a/arch/m68k/virt/ints.c b/arch/m68k/virt/ints.c
index 95818f901ebe..896aa6eb8bcc 100644
--- a/arch/m68k/virt/ints.c
+++ b/arch/m68k/virt/ints.c
@@ -12,6 +12,7 @@
#include <asm/hwtest.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
+#include <asm/processor.h>
#include <asm/virt.h>
#define GFPIC_REG_IRQ_PENDING 0x04
@@ -19,8 +20,6 @@
#define GFPIC_REG_IRQ_DISABLE 0x0c
#define GFPIC_REG_IRQ_ENABLE 0x10
-extern void show_registers(struct pt_regs *regs);
-
static struct resource picres[6];
static const char *picname[6] = {
"goldfish_pic.0",
diff --git a/arch/m68k/virt/platform.c b/arch/m68k/virt/platform.c
index cb820f19a221..1560c4140ab9 100644
--- a/arch/m68k/virt/platform.c
+++ b/arch/m68k/virt/platform.c
@@ -8,20 +8,15 @@
#define VIRTIO_BUS_NB 128
-static int __init virt_virtio_init(unsigned int id)
+static struct platform_device * __init virt_virtio_init(unsigned int id)
{
const struct resource res[] = {
DEFINE_RES_MEM(virt_bi_data.virtio.mmio + id * 0x200, 0x200),
DEFINE_RES_IRQ(virt_bi_data.virtio.irq + id),
};
- struct platform_device *pdev;
- pdev = platform_device_register_simple("virtio-mmio", id,
+ return platform_device_register_simple("virtio-mmio", id,
res, ARRAY_SIZE(res));
- if (IS_ERR(pdev))
- return PTR_ERR(pdev);
-
- return 0;
}
static int __init virt_platform_init(void)
@@ -35,8 +30,10 @@ static int __init virt_platform_init(void)
DEFINE_RES_MEM(virt_bi_data.rtc.mmio + 0x1000, 0x1000),
DEFINE_RES_IRQ(virt_bi_data.rtc.irq + 1),
};
- struct platform_device *pdev;
+ struct platform_device *pdev1, *pdev2;
+ struct platform_device *pdevs[VIRTIO_BUS_NB];
unsigned int i;
+ int ret = 0;
if (!MACH_IS_VIRT)
return -ENODEV;
@@ -44,29 +41,40 @@ static int __init virt_platform_init(void)
/* We need this to have DMA'able memory provided to goldfish-tty */
min_low_pfn = 0;
- pdev = platform_device_register_simple("goldfish_tty",
- PLATFORM_DEVID_NONE,
- goldfish_tty_res,
- ARRAY_SIZE(goldfish_tty_res));
- if (IS_ERR(pdev))
- return PTR_ERR(pdev);
+ pdev1 = platform_device_register_simple("goldfish_tty",
+ PLATFORM_DEVID_NONE,
+ goldfish_tty_res,
+ ARRAY_SIZE(goldfish_tty_res));
+ if (IS_ERR(pdev1))
+ return PTR_ERR(pdev1);
- pdev = platform_device_register_simple("goldfish_rtc",
- PLATFORM_DEVID_NONE,
- goldfish_rtc_res,
- ARRAY_SIZE(goldfish_rtc_res));
- if (IS_ERR(pdev))
- return PTR_ERR(pdev);
+ pdev2 = platform_device_register_simple("goldfish_rtc",
+ PLATFORM_DEVID_NONE,
+ goldfish_rtc_res,
+ ARRAY_SIZE(goldfish_rtc_res));
+ if (IS_ERR(pdev2)) {
+ ret = PTR_ERR(pdev2);
+ goto err_unregister_tty;
+ }
for (i = 0; i < VIRTIO_BUS_NB; i++) {
- int err;
-
- err = virt_virtio_init(i);
- if (err)
- return err;
+ pdevs[i] = virt_virtio_init(i);
+ if (IS_ERR(pdevs[i])) {
+ ret = PTR_ERR(pdevs[i]);
+ goto err_unregister_rtc_virtio;
+ }
}
return 0;
+
+err_unregister_rtc_virtio:
+ while (i > 0)
+ platform_device_unregister(pdevs[--i]);
+ platform_device_unregister(pdev2);
+err_unregister_tty:
+ platform_device_unregister(pdev1);
+
+ return ret;
}
arch_initcall(virt_platform_init);
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 6cdcbf4de763..9cb9ed44bcaf 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -263,7 +263,7 @@ static int next_cpu_for_irq(struct irq_data *data)
#ifdef CONFIG_SMP
int cpu;
- struct cpumask *mask = irq_data_get_affinity_mask(data);
+ const struct cpumask *mask = irq_data_get_affinity_mask(data);
int weight = cpumask_weight(mask);
struct octeon_ciu_chip_data *cd = irq_data_get_irq_chip_data(data);
@@ -758,7 +758,7 @@ static void octeon_irq_cpu_offline_ciu(struct irq_data *data)
{
int cpu = smp_processor_id();
cpumask_t new_affinity;
- struct cpumask *mask = irq_data_get_affinity_mask(data);
+ const struct cpumask *mask = irq_data_get_affinity_mask(data);
if (!cpumask_test_cpu(cpu, mask))
return;
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index 3185fd3220ec..c5c6864e64bc 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -8,6 +8,8 @@
#ifndef _ASM_MIPS_JUMP_LABEL_H
#define _ASM_MIPS_JUMP_LABEL_H
+#define arch_jump_label_transform_static arch_jump_label_transform
+
#ifndef __ASSEMBLY__
#include <linux/types.h>
diff --git a/arch/mips/include/asm/mach-loongson64/irq.h b/arch/mips/include/asm/mach-loongson64/irq.h
index 98ea977cf0b8..67c15f320f93 100644
--- a/arch/mips/include/asm/mach-loongson64/irq.h
+++ b/arch/mips/include/asm/mach-loongson64/irq.h
@@ -7,8 +7,9 @@
#define NR_MIPS_CPU_IRQS 8
#define NR_MAX_CHAINED_IRQS 40 /* Chained IRQs means those not directly used by devices */
#define NR_IRQS (NR_IRQS_LEGACY + NR_MIPS_CPU_IRQS + NR_MAX_CHAINED_IRQS + 256)
-
+#define MAX_IO_PICS 1
#define MIPS_CPU_IRQ_BASE NR_IRQS_LEGACY
+#define GSI_MIN_CPU_IRQ 0
#include <asm/mach-generic/irq.h>
diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c
index 662c8db9f45b..71a882c8c6eb 100644
--- a/arch/mips/kernel/jump_label.c
+++ b/arch/mips/kernel/jump_label.c
@@ -88,3 +88,22 @@ void arch_jump_label_transform(struct jump_entry *e,
mutex_unlock(&text_mutex);
}
+
+#ifdef CONFIG_MODULES
+void jump_label_apply_nops(struct module *mod)
+{
+ struct jump_entry *iter_start = mod->jump_entries;
+ struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
+ struct jump_entry *iter;
+
+ /* if the module doesn't have jump label entries, just return */
+ if (iter_start == iter_stop)
+ return;
+
+ for (iter = iter_start; iter < iter_stop; iter++) {
+ /* Only write NOPs for arch_branch_static(). */
+ if (jump_label_init_type(iter) == JUMP_LABEL_NOP)
+ arch_jump_label_transform(iter, JUMP_LABEL_NOP);
+ }
+}
+#endif
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 14f46d17500a..0c936cbf20c5 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -21,6 +21,7 @@
#include <linux/spinlock.h>
#include <linux/jump_label.h>
+extern void jump_label_apply_nops(struct module *mod);
struct mips_hi16 {
struct mips_hi16 *next;
@@ -428,8 +429,8 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *s;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
- /* Make jump label nops. */
- jump_label_apply_nops(me);
+ if (IS_ENABLED(CONFIG_JUMP_LABEL))
+ jump_label_apply_nops(me);
INIT_LIST_HEAD(&me->arch.dbe_list);
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 0fe2d79fb123..5ebb1771b4ab 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -315,7 +315,7 @@ unsigned long txn_affinity_addr(unsigned int irq, int cpu)
{
#ifdef CONFIG_SMP
struct irq_data *d = irq_get_irq_data(irq);
- cpumask_copy(irq_data_get_affinity_mask(d), cpumask_of(cpu));
+ irq_data_update_affinity(d, cpumask_of(cpu));
#endif
return per_cpu(cpu_data, cpu).txn_addr;
diff --git a/arch/parisc/kernel/jump_label.c b/arch/parisc/kernel/jump_label.c
index d2f3cb12e282..e253b134500d 100644
--- a/arch/parisc/kernel/jump_label.c
+++ b/arch/parisc/kernel/jump_label.c
@@ -42,14 +42,3 @@ void arch_jump_label_transform(struct jump_entry *entry,
patch_text(addr, insn);
}
-
-void arch_jump_label_transform_static(struct jump_entry *entry,
- enum jump_label_type type)
-{
- /*
- * We use the architected NOP in arch_static_branch, so there's no
- * need to patch an identical NOP over the top of it here. The core
- * will call arch_jump_label_transform from a module notifier if the
- * NOP needs to be replaced by a branch.
- */
-}
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 7aa12e88c580..4d8f26c1399b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -256,6 +256,7 @@ config PPC
select IRQ_FORCED_THREADING
select MMU_GATHER_PAGE_SIZE
select MMU_GATHER_RCU_TABLE_FREE
+ select MMU_GATHER_MERGE_VMAS
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE
select NEED_PER_CPU_EMBED_FIRST_CHUNK if PPC64
@@ -281,6 +282,10 @@ config PPC
# Please keep this list sorted alphabetically.
#
+config PPC_LONG_DOUBLE_128
+ depends on PPC64
+ def_bool $(success,test "$(shell,echo __LONG_DOUBLE_128__ | $(CC) -E -P -)" = 1)
+
config PPC_BARRIER_NOSPEC
bool
default y
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index 09a9ae5f3656..b3de6102a907 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -19,8 +19,6 @@
#include <linux/pagemap.h>
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry __tlb_remove_tlb_entry
#define tlb_flush tlb_flush
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index f91f0f29a566..c8cf924bf9c0 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -20,6 +20,7 @@ CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom_init.o += -fno-stack-protector
CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
CFLAGS_prom_init.o += -ffreestanding
+CFLAGS_prom_init.o += $(call cc-option, -ftrivial-auto-var-init=uninitialized)
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 32ffef9f6e5b..fcbb81feb7ad 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -38,7 +38,7 @@ config RISCV
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
select ARCH_SUPPORTS_HUGETLBFS if MMU
- select ARCH_SUPPORTS_PAGE_TABLE_CHECK
+ select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 34cf8a598617..81029d40a672 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -73,6 +73,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y)
endif
KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax)
+KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax)
# GCC versions that support the "-mstrict-align" option default to allowing
# unaligned accesses. While unaligned accesses are explicitly allowed in the
@@ -110,7 +111,7 @@ PHONY += vdso_install
vdso_install:
$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
$(if $(CONFIG_COMPAT),$(Q)$(MAKE) \
- $(build)=arch/riscv/kernel/compat_vdso $@)
+ $(build)=arch/riscv/kernel/compat_vdso compat_$@)
ifeq ($(KBUILD_EXTMOD),)
ifeq ($(CONFIG_MMU),y)
diff --git a/arch/riscv/boot/dts/canaan/canaan_kd233.dts b/arch/riscv/boot/dts/canaan/canaan_kd233.dts
index 039b92abf046..f72540bd14a3 100644
--- a/arch/riscv/boot/dts/canaan/canaan_kd233.dts
+++ b/arch/riscv/boot/dts/canaan/canaan_kd233.dts
@@ -35,7 +35,7 @@
gpio-keys {
compatible = "gpio-keys";
- key0 {
+ key {
label = "KEY0";
linux,code = <BTN_0>;
gpios = <&gpio0 10 GPIO_ACTIVE_LOW>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
index b9e30df127fe..8abdbe26a1d0 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
@@ -47,7 +47,7 @@
gpio-keys {
compatible = "gpio-keys";
- boot {
+ key-boot {
label = "BOOT";
linux,code = <BTN_0>;
gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
index 8d23401b0bbb..3c6df1ecf76f 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
@@ -52,7 +52,7 @@
gpio-keys {
compatible = "gpio-keys";
- boot {
+ key-boot {
label = "BOOT";
linux,code = <BTN_0>;
gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
index 24fd83b43d9d..03c9843d503e 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
@@ -46,19 +46,19 @@
gpio-keys {
compatible = "gpio-keys";
- up {
+ key-up {
label = "UP";
linux,code = <BTN_1>;
gpios = <&gpio1_0 7 GPIO_ACTIVE_LOW>;
};
- press {
+ key-press {
label = "PRESS";
linux,code = <BTN_0>;
gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
};
- down {
+ key-down {
label = "DOWN";
linux,code = <BTN_2>;
gpios = <&gpio0 1 GPIO_ACTIVE_LOW>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
index 25341f38292a..7164ad063178 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
@@ -23,7 +23,7 @@
gpio-keys {
compatible = "gpio-keys";
- boot {
+ key-boot {
label = "BOOT";
linux,code = <BTN_0>;
gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi
index 3095d08453a1..496d3b7642bd 100644
--- a/arch/riscv/boot/dts/microchip/mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi
@@ -50,6 +50,7 @@
riscv,isa = "rv64imafdc";
clocks = <&clkcfg CLK_CPU>;
tlb-split;
+ next-level-cache = <&cctrllr>;
status = "okay";
cpu1_intc: interrupt-controller {
@@ -77,6 +78,7 @@
riscv,isa = "rv64imafdc";
clocks = <&clkcfg CLK_CPU>;
tlb-split;
+ next-level-cache = <&cctrllr>;
status = "okay";
cpu2_intc: interrupt-controller {
@@ -104,6 +106,7 @@
riscv,isa = "rv64imafdc";
clocks = <&clkcfg CLK_CPU>;
tlb-split;
+ next-level-cache = <&cctrllr>;
status = "okay";
cpu3_intc: interrupt-controller {
@@ -131,6 +134,7 @@
riscv,isa = "rv64imafdc";
clocks = <&clkcfg CLK_CPU>;
tlb-split;
+ next-level-cache = <&cctrllr>;
status = "okay";
cpu4_intc: interrupt-controller {
#interrupt-cells = <1>;
diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c
index 672f02b21ce0..1031038423e7 100644
--- a/arch/riscv/errata/sifive/errata.c
+++ b/arch/riscv/errata/sifive/errata.c
@@ -111,6 +111,7 @@ void __init_or_module sifive_errata_patch_func(struct alt_entry *begin,
cpu_apply_errata |= tmp;
}
}
- if (cpu_apply_errata != cpu_req_errata)
+ if (stage != RISCV_ALTERNATIVES_MODULE &&
+ cpu_apply_errata != cpu_req_errata)
warn_miss_errata(cpu_req_errata - cpu_apply_errata);
}
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 5c2aba5efbd0..dc42375c2357 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -175,7 +175,7 @@ static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot)
static inline unsigned long _pud_pfn(pud_t pud)
{
- return pud_val(pud) >> _PAGE_PFN_SHIFT;
+ return __page_val_to_pfn(pud_val(pud));
}
static inline pmd_t *pud_pgtable(pud_t pud)
@@ -278,13 +278,13 @@ static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot)
static inline unsigned long _p4d_pfn(p4d_t p4d)
{
- return p4d_val(p4d) >> _PAGE_PFN_SHIFT;
+ return __page_val_to_pfn(p4d_val(p4d));
}
static inline pud_t *p4d_pgtable(p4d_t p4d)
{
if (pgtable_l4_enabled)
- return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
+ return (pud_t *)pfn_to_virt(__page_val_to_pfn(p4d_val(p4d)));
return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) });
}
@@ -292,7 +292,7 @@ static inline pud_t *p4d_pgtable(p4d_t p4d)
static inline struct page *p4d_page(p4d_t p4d)
{
- return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
+ return pfn_to_page(__page_val_to_pfn(p4d_val(p4d)));
}
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
@@ -347,7 +347,7 @@ static inline void pgd_clear(pgd_t *pgd)
static inline p4d_t *pgd_pgtable(pgd_t pgd)
{
if (pgtable_l5_enabled)
- return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
+ return (p4d_t *)pfn_to_virt(__page_val_to_pfn(pgd_val(pgd)));
return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) });
}
@@ -355,7 +355,7 @@ static inline p4d_t *pgd_pgtable(pgd_t pgd)
static inline struct page *pgd_page(pgd_t pgd)
{
- return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
+ return pfn_to_page(__page_val_to_pfn(pgd_val(pgd)));
}
#define pgd_page(pgd) pgd_page(pgd)
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 1d1be9d9419c..5dbd6610729b 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -261,7 +261,7 @@ static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot)
static inline unsigned long _pgd_pfn(pgd_t pgd)
{
- return pgd_val(pgd) >> _PAGE_PFN_SHIFT;
+ return __page_val_to_pfn(pgd_val(pgd));
}
static inline struct page *pmd_page(pmd_t pmd)
@@ -590,14 +590,14 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd)
return __pmd(pmd_val(pmd) & ~(_PAGE_PRESENT|_PAGE_PROT_NONE));
}
-#define __pmd_to_phys(pmd) (pmd_val(pmd) >> _PAGE_PFN_SHIFT << PAGE_SHIFT)
+#define __pmd_to_phys(pmd) (__page_val_to_pfn(pmd_val(pmd)) << PAGE_SHIFT)
static inline unsigned long pmd_pfn(pmd_t pmd)
{
return ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT);
}
-#define __pud_to_phys(pud) (pud_val(pud) >> _PAGE_PFN_SHIFT << PAGE_SHIFT)
+#define __pud_to_phys(pud) (__page_val_to_pfn(pud_val(pud)) << PAGE_SHIFT)
static inline unsigned long pud_pfn(pud_t pud)
{
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index c71d6591d539..33bb60a354cd 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -78,7 +78,7 @@ obj-$(CONFIG_SMP) += cpu_ops_sbi.o
endif
obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o
obj-$(CONFIG_KGDB) += kgdb.o
-obj-$(CONFIG_KEXEC) += kexec_relocate.o crash_save_regs.o machine_kexec.o
+obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o
obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c
index 9cb85095fd45..0cb94992c15b 100644
--- a/arch/riscv/kernel/elf_kexec.c
+++ b/arch/riscv/kernel/elf_kexec.c
@@ -349,7 +349,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
{
const char *strtab, *name, *shstrtab;
const Elf_Shdr *sechdrs;
- Elf_Rela *relas;
+ Elf64_Rela *relas;
int i, r_type;
/* String & section header string table */
diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c
index 20e09056d141..e6694759dbd0 100644
--- a/arch/riscv/kernel/jump_label.c
+++ b/arch/riscv/kernel/jump_label.c
@@ -39,15 +39,3 @@ void arch_jump_label_transform(struct jump_entry *entry,
patch_text_nosync(addr, &insn, sizeof(insn));
mutex_unlock(&text_mutex);
}
-
-void arch_jump_label_transform_static(struct jump_entry *entry,
- enum jump_label_type type)
-{
- /*
- * We use the same instructions in the arch_static_branch and
- * arch_static_branch_jump inline functions, so there's no
- * need to patch them up here.
- * The core will call arch_jump_label_transform when those
- * instructions need to be replaced.
- */
-}
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 1c00695ebee7..9826073fbc67 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -54,7 +54,7 @@ static inline unsigned long gstage_pte_index(gpa_t addr, u32 level)
static inline unsigned long gstage_pte_page_vaddr(pte_t pte)
{
- return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT);
+ return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte)));
}
static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 7f4ad5e4373a..f3455dc013fa 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -781,9 +781,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
+ kvm_vcpu_srcu_read_unlock(vcpu);
rcuwait_wait_event(wait,
(!vcpu->arch.power_off) && (!vcpu->arch.pause),
TASK_INTERRUPTIBLE);
+ kvm_vcpu_srcu_read_lock(vcpu);
if (vcpu->arch.power_off || vcpu->arch.pause) {
/*
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8cd9e56c629b..5a1a8dfda6f8 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -204,6 +204,7 @@ config S390
select IOMMU_SUPPORT if PCI
select MMU_GATHER_NO_GATHER
select MMU_GATHER_RCU_TABLE_FREE
+ select MMU_GATHER_MERGE_VMAS
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PCI
select NEED_SG_DMA_LENGTH if PCI
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 495c68a4df1e..4cb5d17e7ead 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -82,7 +82,7 @@ endif
ifdef CONFIG_EXPOLINE
ifdef CONFIG_EXPOLINE_EXTERN
- KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline.o
+ KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline/expoline.o
CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern
CC_FLAGS_EXPOLINE += -mfunction-return=thunk-extern
else
@@ -163,6 +163,12 @@ vdso_prepare: prepare0
$(Q)$(MAKE) $(build)=arch/s390/kernel/vdso64 include/generated/vdso64-offsets.h
$(if $(CONFIG_COMPAT),$(Q)$(MAKE) \
$(build)=arch/s390/kernel/vdso32 include/generated/vdso32-offsets.h)
+
+ifdef CONFIG_EXPOLINE_EXTERN
+modules_prepare: expoline_prepare
+expoline_prepare:
+ $(Q)$(MAKE) $(build)=arch/s390/lib/expoline arch/s390/lib/expoline/expoline.o
+endif
endif
# Don't use tabs in echo arguments
diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h
index 2c6e1c6ecbe7..4120c428dc37 100644
--- a/arch/s390/include/asm/archrandom.h
+++ b/arch/s390/include/asm/archrandom.h
@@ -2,7 +2,7 @@
/*
* Kernel interface for the s390 arch_random_* functions
*
- * Copyright IBM Corp. 2017, 2020
+ * Copyright IBM Corp. 2017, 2022
*
* Author: Harald Freudenberger <freude@de.ibm.com>
*
@@ -14,6 +14,7 @@
#ifdef CONFIG_ARCH_RANDOM
#include <linux/static_key.h>
+#include <linux/preempt.h>
#include <linux/atomic.h>
#include <asm/cpacf.h>
@@ -32,7 +33,8 @@ static inline bool __must_check arch_get_random_int(unsigned int *v)
static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
{
- if (static_branch_likely(&s390_arch_random_available)) {
+ if (static_branch_likely(&s390_arch_random_available) &&
+ in_task()) {
cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
atomic64_add(sizeof(*v), &s390_arch_random_counter);
return true;
@@ -42,7 +44,8 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
{
- if (static_branch_likely(&s390_arch_random_available)) {
+ if (static_branch_likely(&s390_arch_random_available) &&
+ in_task()) {
cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
atomic64_add(sizeof(*v), &s390_arch_random_counter);
return true;
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 916cfcb36d8a..895f774bbcc5 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -10,7 +10,6 @@
#include <linux/stringify.h>
#define JUMP_LABEL_NOP_SIZE 6
-#define JUMP_LABEL_NOP_OFFSET 2
#ifdef CONFIG_CC_IS_CLANG
#define JUMP_LABEL_STATIC_KEY_CONSTRAINT "i"
@@ -21,12 +20,12 @@
#endif
/*
- * We use a brcl 0,2 instruction for jump labels at compile time so it
+ * We use a brcl 0,<offset> instruction for jump labels so it
* can be easily distinguished from a hotpatch generated instruction.
*/
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("0: brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n"
+ asm_volatile_goto("0: brcl 0,%l[label]\n"
".pushsection __jump_table,\"aw\"\n"
".balign 8\n"
".long 0b-.,%l[label]-.\n"
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
index d910d71b5bb5..7e9e99523e95 100644
--- a/arch/s390/include/asm/nospec-insn.h
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -2,8 +2,6 @@
#ifndef _ASM_S390_NOSPEC_ASM_H
#define _ASM_S390_NOSPEC_ASM_H
-#include <asm/alternative-asm.h>
-#include <asm/asm-offsets.h>
#include <asm/dwarf.h>
#ifdef __ASSEMBLY__
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index fe6407f0eb1b..3a5c8fb590e5 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -27,9 +27,6 @@ static inline void tlb_flush(struct mmu_gather *tlb);
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
struct page *page, int page_size);
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-
#define tlb_flush tlb_flush
#define pte_free_tlb pte_free_tlb
#define pmd_free_tlb pmd_free_tlb
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index 6bec000c6c1c..e808bb8bc0da 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -44,14 +44,8 @@ static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
panic("Corrupted kernel text");
}
-static struct insn orignop = {
- .opcode = 0xc004,
- .offset = JUMP_LABEL_NOP_OFFSET >> 1,
-};
-
static void jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type,
- int init)
+ enum jump_label_type type)
{
void *code = (void *)jump_entry_code(entry);
struct insn old, new;
@@ -63,27 +57,22 @@ static void jump_label_transform(struct jump_entry *entry,
jump_label_make_branch(entry, &old);
jump_label_make_nop(entry, &new);
}
- if (init) {
- if (memcmp(code, &orignop, sizeof(orignop)))
- jump_label_bug(entry, &orignop, &new);
- } else {
- if (memcmp(code, &old, sizeof(old)))
- jump_label_bug(entry, &old, &new);
- }
+ if (memcmp(code, &old, sizeof(old)))
+ jump_label_bug(entry, &old, &new);
s390_kernel_write(code, &new, sizeof(new));
}
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
- jump_label_transform(entry, type, 0);
+ jump_label_transform(entry, type);
text_poke_sync();
}
bool arch_jump_label_transform_queue(struct jump_entry *entry,
enum jump_label_type type)
{
- jump_label_transform(entry, type, 0);
+ jump_label_transform(entry, type);
return true;
}
@@ -91,10 +80,3 @@ void arch_jump_label_transform_apply(void)
{
text_poke_sync();
}
-
-void __init_or_module arch_jump_label_transform_static(struct jump_entry *entry,
- enum jump_label_type type)
-{
- jump_label_transform(entry, type, 1);
- text_poke_sync();
-}
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 26125a9c436d..2d159b32885b 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -548,6 +548,5 @@ int module_finalize(const Elf_Ehdr *hdr,
#endif /* CONFIG_FUNCTION_TRACER */
}
- jump_label_apply_nops(me);
return 0;
}
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 5d415b3db6d1..580d2e3265cb 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -7,7 +7,6 @@ lib-y += delay.o string.o uaccess.o find.o spinlock.o
obj-y += mem.o xor.o
lib-$(CONFIG_KPROBES) += probes.o
lib-$(CONFIG_UPROBES) += probes.o
-obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o
obj-$(CONFIG_S390_KPROBES_SANITY_TEST) += test_kprobes_s390.o
test_kprobes_s390-objs += test_kprobes_asm.o test_kprobes.o
@@ -22,3 +21,5 @@ obj-$(CONFIG_S390_MODULES_SANITY_TEST) += test_modules.o
obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+
+obj-$(CONFIG_EXPOLINE_EXTERN) += expoline/
diff --git a/arch/s390/lib/expoline/Makefile b/arch/s390/lib/expoline/Makefile
new file mode 100644
index 000000000000..854631d9cb03
--- /dev/null
+++ b/arch/s390/lib/expoline/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y += expoline.o
diff --git a/arch/s390/lib/expoline.S b/arch/s390/lib/expoline/expoline.S
index 92ed8409a7a4..92ed8409a7a4 100644
--- a/arch/s390/lib/expoline.S
+++ b/arch/s390/lib/expoline/expoline.S
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index cf9a3ec32406..fba90e670ed4 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -271,8 +271,12 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
#endif /* CONFIG_HAVE_IOREMAP_PROT */
#else /* CONFIG_MMU */
-#define iounmap(addr) do { } while (0)
-#define ioremap(offset, size) ((void __iomem *)(unsigned long)(offset))
+static inline void __iomem *ioremap(phys_addr_t offset, size_t size)
+{
+ return (void __iomem *)(unsigned long)offset;
+}
+
+static inline void iounmap(volatile void __iomem *addr) { }
#endif /* CONFIG_MMU */
#define ioremap_uc ioremap
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index ef0f0827cf57..56269c2c3414 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -230,16 +230,17 @@ void migrate_irqs(void)
struct irq_data *data = irq_get_irq_data(irq);
if (irq_data_get_node(data) == cpu) {
- struct cpumask *mask = irq_data_get_affinity_mask(data);
+ const struct cpumask *mask = irq_data_get_affinity_mask(data);
unsigned int newcpu = cpumask_any_and(mask,
cpu_online_mask);
if (newcpu >= nr_cpu_ids) {
pr_info_ratelimited("IRQ%u no longer affine to CPU%u\n",
irq, cpu);
- cpumask_setall(mask);
+ irq_set_affinity(irq, cpu_all_mask);
+ } else {
+ irq_set_affinity(irq, mask);
}
- irq_set_affinity(irq, mask);
}
}
}
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index ba449c47effd..4f7d1dfbc608 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -67,6 +67,8 @@ config SPARC64
select HAVE_KRETPROBES
select HAVE_KPROBES
select MMU_GATHER_RCU_TABLE_FREE if SMP
+ select MMU_GATHER_MERGE_VMAS
+ select MMU_GATHER_NO_FLUSH_CACHE
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h
index 779a5a0f0608..3037187482db 100644
--- a/arch/sparc/include/asm/tlb_64.h
+++ b/arch/sparc/include/asm/tlb_64.h
@@ -22,8 +22,6 @@ void smp_flush_tlb_mm(struct mm_struct *mm);
void __flush_tlb_pending(unsigned long, unsigned long, unsigned long *);
void flush_tlb_pending(void);
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
#define tlb_flush(tlb) flush_tlb_pending()
/*
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index df39580f398d..66c45a2764bc 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -208,9 +208,6 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
{
- /* make jump label nops */
- jump_label_apply_nops(me);
-
do_patch_sections(hdr, sechdrs);
/* Cheetah's I-cache is fully coherent. */
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index 95af12e82a32..cdbd9653aa14 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -102,8 +102,8 @@ extern unsigned long uml_physmem;
* casting is the right thing, but 32-bit UML can't have 64-bit virtual
* addresses
*/
-#define __pa(virt) to_phys((void *) (unsigned long) (virt))
-#define __va(phys) to_virt((unsigned long) (phys))
+#define __pa(virt) uml_to_phys((void *) (unsigned long) (virt))
+#define __va(phys) uml_to_virt((unsigned long) (phys))
#define phys_to_pfn(p) ((p) >> PAGE_SHIFT)
#define pfn_to_phys(pfn) PFN_PHYS(pfn)
diff --git a/arch/um/include/shared/mem.h b/arch/um/include/shared/mem.h
index 4862c91d4213..98aacd544108 100644
--- a/arch/um/include/shared/mem.h
+++ b/arch/um/include/shared/mem.h
@@ -9,12 +9,12 @@
extern int phys_mapping(unsigned long phys, unsigned long long *offset_out);
extern unsigned long uml_physmem;
-static inline unsigned long to_phys(void *virt)
+static inline unsigned long uml_to_phys(void *virt)
{
return(((unsigned long) virt) - uml_physmem);
}
-static inline void *to_virt(unsigned long phys)
+static inline void *uml_to_virt(unsigned long phys)
{
return((void *) uml_physmem + phys);
}
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 0760e24f2eba..9838967d0b2f 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -432,6 +432,10 @@ void apply_retpolines(s32 *start, s32 *end)
{
}
+void apply_returns(s32 *start, s32 *end)
+{
+}
+
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
}
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 87d3129e7362..c316c993a949 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -251,7 +251,7 @@ static int userspace_tramp(void *stack)
signal(SIGTERM, SIG_DFL);
signal(SIGWINCH, SIG_IGN);
- fd = phys_mapping(to_phys(__syscall_stub_start), &offset);
+ fd = phys_mapping(uml_to_phys(__syscall_stub_start), &offset);
addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE,
PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset);
if (addr == MAP_FAILED) {
@@ -261,7 +261,7 @@ static int userspace_tramp(void *stack)
}
if (stack != NULL) {
- fd = phys_mapping(to_phys(stack), &offset);
+ fd = phys_mapping(uml_to_phys(stack), &offset);
addr = mmap((void *) STUB_DATA,
UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_FIXED | MAP_SHARED, fd, offset);
@@ -534,7 +534,7 @@ int copy_context_skas0(unsigned long new_stack, int pid)
struct stub_data *data = (struct stub_data *) current_stack;
struct stub_data *child_data = (struct stub_data *) new_stack;
unsigned long long new_offset;
- int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset);
+ int new_fd = phys_mapping(uml_to_phys((void *)new_stack), &new_offset);
/*
* prepare offset and fd of child's stack as argument for parent's
diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore
index 677111acbaa3..f2e1d6c347fb 100644
--- a/arch/x86/.gitignore
+++ b/arch/x86/.gitignore
@@ -3,6 +3,4 @@ boot/compressed/vmlinux
tools/test_get_len
tools/insn_sanity
tools/insn_decoder_test
-purgatory/kexec-purgatory.c
purgatory/purgatory.ro
-
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index be0b95e51df6..5aa4c2ecf5c7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -245,6 +245,7 @@ config X86
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT
+ select MMU_GATHER_MERGE_VMAS
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if UNWINDER_ORC || STACK_VALIDATION
@@ -277,6 +278,7 @@ config X86
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
select TRACE_IRQFLAGS_SUPPORT
+ select TRACE_IRQFLAGS_NMI_SUPPORT
select USER_STACKTRACE_SUPPORT
select VIRT_TO_BUS
select HAVE_ARCH_KCSAN if X86_64
@@ -391,8 +393,8 @@ config PGTABLE_LEVELS
config CC_HAS_SANE_STACKPROTECTOR
bool
- default $(success,$(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC)) if 64BIT
- default $(success,$(srctree)/scripts/gcc-x86_32-has-stack-protector.sh $(CC))
+ default $(success,$(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) $(CLANG_FLAGS)) if 64BIT
+ default $(success,$(srctree)/scripts/gcc-x86_32-has-stack-protector.sh $(CC) $(CLANG_FLAGS))
help
We have to make sure stack protector is unconditionally disabled if
the compiler produces broken code or if it does not let us control
@@ -462,29 +464,6 @@ config GOLDFISH
def_bool y
depends on X86_GOLDFISH
-config RETPOLINE
- bool "Avoid speculative indirect branches in kernel"
- select OBJTOOL if HAVE_OBJTOOL
- default y
- help
- Compile kernel with the retpoline compiler options to guard against
- kernel-to-user data leaks by avoiding speculative indirect
- branches. Requires a compiler with -mindirect-branch=thunk-extern
- support for full protection. The kernel may run slower.
-
-config CC_HAS_SLS
- def_bool $(cc-option,-mharden-sls=all)
-
-config SLS
- bool "Mitigate Straight-Line-Speculation"
- depends on CC_HAS_SLS && X86_64
- select OBJTOOL if HAVE_OBJTOOL
- default n
- help
- Compile the kernel with straight-line-speculation options to guard
- against straight line speculation. The kernel image might be slightly
- larger.
-
config X86_CPU_RESCTRL
bool "x86 CPU resource control support"
depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
@@ -2032,7 +2011,7 @@ config KEXEC
config KEXEC_FILE
bool "kexec file based system call"
select KEXEC_CORE
- select BUILD_BIN2C
+ select HAVE_IMA_KEXEC if IMA
depends on X86_64
depends on CRYPTO=y
depends on CRYPTO_SHA256=y
@@ -2453,6 +2432,91 @@ source "kernel/livepatch/Kconfig"
endmenu
+config CC_HAS_SLS
+ def_bool $(cc-option,-mharden-sls=all)
+
+config CC_HAS_RETURN_THUNK
+ def_bool $(cc-option,-mfunction-return=thunk-extern)
+
+menuconfig SPECULATION_MITIGATIONS
+ bool "Mitigations for speculative execution vulnerabilities"
+ default y
+ help
+ Say Y here to enable options which enable mitigations for
+ speculative execution hardware vulnerabilities.
+
+ If you say N, all mitigations will be disabled. You really
+ should know what you are doing to say so.
+
+if SPECULATION_MITIGATIONS
+
+config PAGE_TABLE_ISOLATION
+ bool "Remove the kernel mapping in user mode"
+ default y
+ depends on (X86_64 || X86_PAE)
+ help
+ This feature reduces the number of hardware side channels by
+ ensuring that the majority of kernel addresses are not mapped
+ into userspace.
+
+ See Documentation/x86/pti.rst for more details.
+
+config RETPOLINE
+ bool "Avoid speculative indirect branches in kernel"
+ select OBJTOOL if HAVE_OBJTOOL
+ default y
+ help
+ Compile kernel with the retpoline compiler options to guard against
+ kernel-to-user data leaks by avoiding speculative indirect
+ branches. Requires a compiler with -mindirect-branch=thunk-extern
+ support for full protection. The kernel may run slower.
+
+config RETHUNK
+ bool "Enable return-thunks"
+ depends on RETPOLINE && CC_HAS_RETURN_THUNK
+ select OBJTOOL if HAVE_OBJTOOL
+ default y if X86_64
+ help
+ Compile the kernel with the return-thunks compiler option to guard
+ against kernel-to-user data leaks by avoiding return speculation.
+ Requires a compiler with -mfunction-return=thunk-extern
+ support for full protection. The kernel may run slower.
+
+config CPU_UNRET_ENTRY
+ bool "Enable UNRET on kernel entry"
+ depends on CPU_SUP_AMD && RETHUNK && X86_64
+ default y
+ help
+ Compile the kernel with support for the retbleed=unret mitigation.
+
+config CPU_IBPB_ENTRY
+ bool "Enable IBPB on kernel entry"
+ depends on CPU_SUP_AMD && X86_64
+ default y
+ help
+ Compile the kernel with support for the retbleed=ibpb mitigation.
+
+config CPU_IBRS_ENTRY
+ bool "Enable IBRS on kernel entry"
+ depends on CPU_SUP_INTEL && X86_64
+ default y
+ help
+ Compile the kernel with support for the spectre_v2=ibrs mitigation.
+ This mitigates both spectre_v2 and retbleed at great cost to
+ performance.
+
+config SLS
+ bool "Mitigate Straight-Line-Speculation"
+ depends on CC_HAS_SLS && X86_64
+ select OBJTOOL if HAVE_OBJTOOL
+ default n
+ help
+ Compile the kernel with straight-line-speculation options to guard
+ against straight line speculation. The kernel image might be slightly
+ larger.
+
+endif
+
config ARCH_HAS_ADD_PAGES
def_bool y
depends on ARCH_ENABLE_MEMORY_HOTPLUG
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 340399f69954..bdfe08f1a930 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -1,8 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-config TRACE_IRQFLAGS_NMI_SUPPORT
- def_bool y
-
config EARLY_PRINTK_USB
bool
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index a74886aed349..7854685c5f25 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -21,6 +21,13 @@ ifdef CONFIG_CC_IS_CLANG
RETPOLINE_CFLAGS := -mretpoline-external-thunk
RETPOLINE_VDSO_CFLAGS := -mretpoline
endif
+
+ifdef CONFIG_RETHUNK
+RETHUNK_CFLAGS := -mfunction-return=thunk-extern
+RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS)
+endif
+
+export RETHUNK_CFLAGS
export RETPOLINE_CFLAGS
export RETPOLINE_VDSO_CFLAGS
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 7fec5dcf6438..eeadbd7d92cc 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -11,7 +11,7 @@ CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE)
CFLAGS_common.o += -fno-stack-protector
-obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
+obj-y := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
obj-y += common.o
obj-y += vdso/
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 29b36e9e4e74..f6907627172b 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -7,6 +7,8 @@
#include <asm/asm-offsets.h>
#include <asm/processor-flags.h>
#include <asm/ptrace-abi.h>
+#include <asm/msr.h>
+#include <asm/nospec-branch.h>
/*
@@ -283,6 +285,66 @@ For 32-bit we have the following conventions - kernel is built with
#endif
/*
+ * IBRS kernel mitigation for Spectre_v2.
+ *
+ * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers
+ * the regs it uses (AX, CX, DX). Must be called before the first RET
+ * instruction (NOTE! UNTRAIN_RET includes a RET instruction)
+ *
+ * The optional argument is used to save/restore the current value,
+ * which is used on the paranoid paths.
+ *
+ * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
+ */
+.macro IBRS_ENTER save_reg
+#ifdef CONFIG_CPU_IBRS_ENTRY
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
+ movl $MSR_IA32_SPEC_CTRL, %ecx
+
+.ifnb \save_reg
+ rdmsr
+ shl $32, %rdx
+ or %rdx, %rax
+ mov %rax, \save_reg
+ test $SPEC_CTRL_IBRS, %eax
+ jz .Ldo_wrmsr_\@
+ lfence
+ jmp .Lend_\@
+.Ldo_wrmsr_\@:
+.endif
+
+ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
+ movl %edx, %eax
+ shr $32, %rdx
+ wrmsr
+.Lend_\@:
+#endif
+.endm
+
+/*
+ * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX)
+ * regs. Must be called after the last RET.
+ */
+.macro IBRS_EXIT save_reg
+#ifdef CONFIG_CPU_IBRS_ENTRY
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
+ movl $MSR_IA32_SPEC_CTRL, %ecx
+
+.ifnb \save_reg
+ mov \save_reg, %rdx
+.else
+ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
+ andl $(~SPEC_CTRL_IBRS), %edx
+.endif
+
+ movl %edx, %eax
+ shr $32, %rdx
+ wrmsr
+.Lend_\@:
+#endif
+.endm
+
+/*
* Mitigate Spectre v1 for conditional swapgs code paths.
*
* FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
new file mode 100644
index 000000000000..bfb7bcb362bc
--- /dev/null
+++ b/arch/x86/entry/entry.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common place for both 32- and 64-bit entry routines.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+#include <asm/msr-index.h>
+
+.pushsection .noinstr.text, "ax"
+
+SYM_FUNC_START(entry_ibpb)
+ movl $MSR_IA32_PRED_CMD, %ecx
+ movl $PRED_CMD_IBPB, %eax
+ xorl %edx, %edx
+ wrmsr
+ RET
+SYM_FUNC_END(entry_ibpb)
+/* For KVM */
+EXPORT_SYMBOL_GPL(entry_ibpb);
+
+.popsection
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 887420844066..e309e7156038 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -698,7 +698,6 @@ SYM_CODE_START(__switch_to_asm)
movl %ebx, PER_CPU_VAR(__stack_chk_guard)
#endif
-#ifdef CONFIG_RETPOLINE
/*
* When switching from a shallower to a deeper call stack
* the RSB may either underflow or use entries populated
@@ -707,7 +706,6 @@ SYM_CODE_START(__switch_to_asm)
* speculative execution to prevent attack.
*/
FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
-#endif
/* Restore flags or the incoming task to restore AC state. */
popfl
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 4300ba49b5ee..9953d966d124 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -85,7 +85,7 @@
*/
SYM_CODE_START(entry_SYSCALL_64)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
ENDBR
swapgs
@@ -112,6 +112,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
movq %rsp, %rdi
/* Sign extend the lower 32bit as syscall numbers are treated as int */
movslq %eax, %rsi
+
+ /* clobbers %rax, make sure it is after saving the syscall nr */
+ IBRS_ENTER
+ UNTRAIN_RET
+
call do_syscall_64 /* returns with IRQs disabled */
/*
@@ -191,6 +196,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
* perf profiles. Nothing jumps here.
*/
syscall_return_via_sysret:
+ IBRS_EXIT
POP_REGS pop_rdi=0
/*
@@ -249,7 +255,6 @@ SYM_FUNC_START(__switch_to_asm)
movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
#endif
-#ifdef CONFIG_RETPOLINE
/*
* When switching from a shallower to a deeper call stack
* the RSB may either underflow or use entries populated
@@ -258,7 +263,6 @@ SYM_FUNC_START(__switch_to_asm)
* speculative execution to prevent attack.
*/
FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
-#endif
/* restore callee-saved registers */
popq %r15
@@ -322,13 +326,13 @@ SYM_CODE_END(ret_from_fork)
#endif
.endm
-/* Save all registers in pt_regs */
-SYM_CODE_START_LOCAL(push_and_clear_regs)
+SYM_CODE_START_LOCAL(xen_error_entry)
UNWIND_HINT_FUNC
PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
+ UNTRAIN_RET
RET
-SYM_CODE_END(push_and_clear_regs)
+SYM_CODE_END(xen_error_entry)
/**
* idtentry_body - Macro to emit code calling the C function
@@ -337,9 +341,6 @@ SYM_CODE_END(push_and_clear_regs)
*/
.macro idtentry_body cfunc has_error_code:req
- call push_and_clear_regs
- UNWIND_HINT_REGS
-
/*
* Call error_entry() and switch to the task stack if from userspace.
*
@@ -349,7 +350,7 @@ SYM_CODE_END(push_and_clear_regs)
* switch the CR3. So it can skip invoking error_entry().
*/
ALTERNATIVE "call error_entry; movq %rax, %rsp", \
- "", X86_FEATURE_XENPV
+ "call xen_error_entry", X86_FEATURE_XENPV
ENCODE_FRAME_POINTER
UNWIND_HINT_REGS
@@ -612,6 +613,7 @@ __irqentry_text_end:
SYM_CODE_START_LOCAL(common_interrupt_return)
SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
+ IBRS_EXIT
#ifdef CONFIG_DEBUG_ENTRY
/* Assert that pt_regs indicates user mode. */
testb $3, CS(%rsp)
@@ -897,6 +899,9 @@ SYM_CODE_END(xen_failsafe_callback)
* 1 -> no SWAPGS on exit
*
* Y GSBASE value at entry, must be restored in paranoid_exit
+ *
+ * R14 - old CR3
+ * R15 - old SPEC_CTRL
*/
SYM_CODE_START_LOCAL(paranoid_entry)
UNWIND_HINT_FUNC
@@ -940,7 +945,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
* is needed here.
*/
SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
- RET
+ jmp .Lparanoid_gsbase_done
.Lparanoid_entry_checkgs:
/* EBX = 1 -> kernel GSBASE active, no restore required */
@@ -959,8 +964,16 @@ SYM_CODE_START_LOCAL(paranoid_entry)
xorl %ebx, %ebx
swapgs
.Lparanoid_kernel_gsbase:
-
FENCE_SWAPGS_KERNEL_ENTRY
+.Lparanoid_gsbase_done:
+
+ /*
+ * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like
+ * CR3 above, keep the old value in a callee saved register.
+ */
+ IBRS_ENTER save_reg=%r15
+ UNTRAIN_RET
+
RET
SYM_CODE_END(paranoid_entry)
@@ -982,9 +995,19 @@ SYM_CODE_END(paranoid_entry)
* 1 -> no SWAPGS on exit
*
* Y User space GSBASE, must be restored unconditionally
+ *
+ * R14 - old CR3
+ * R15 - old SPEC_CTRL
*/
SYM_CODE_START_LOCAL(paranoid_exit)
UNWIND_HINT_REGS
+
+ /*
+ * Must restore IBRS state before both CR3 and %GS since we need access
+ * to the per-CPU x86_spec_ctrl_shadow variable.
+ */
+ IBRS_EXIT save_reg=%r15
+
/*
* The order of operations is important. RESTORE_CR3 requires
* kernel GSBASE.
@@ -1017,6 +1040,10 @@ SYM_CODE_END(paranoid_exit)
*/
SYM_CODE_START_LOCAL(error_entry)
UNWIND_HINT_FUNC
+
+ PUSH_AND_CLEAR_REGS save_ret=1
+ ENCODE_FRAME_POINTER 8
+
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
@@ -1028,9 +1055,12 @@ SYM_CODE_START_LOCAL(error_entry)
FENCE_SWAPGS_USER_ENTRY
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ IBRS_ENTER
+ UNTRAIN_RET
leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
.Lerror_entry_from_usermode_after_swapgs:
+
/* Put us onto the real thread stack. */
call sync_regs
RET
@@ -1065,6 +1095,7 @@ SYM_CODE_START_LOCAL(error_entry)
.Lerror_entry_done_lfence:
FENCE_SWAPGS_KERNEL_ENTRY
leaq 8(%rsp), %rax /* return pt_regs pointer */
+ ANNOTATE_UNRET_END
RET
.Lbstep_iret:
@@ -1080,6 +1111,8 @@ SYM_CODE_START_LOCAL(error_entry)
swapgs
FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ IBRS_ENTER
+ UNTRAIN_RET
/*
* Pretend that the exception came from user mode: set up pt_regs
@@ -1185,6 +1218,9 @@ SYM_CODE_START(asm_exc_nmi)
PUSH_AND_CLEAR_REGS rdx=(%rdx)
ENCODE_FRAME_POINTER
+ IBRS_ENTER
+ UNTRAIN_RET
+
/*
* At this point we no longer need to worry about stack damage
* due to nesting -- we're on the normal thread stack and we're
@@ -1409,6 +1445,9 @@ end_repeat_nmi:
movq $-1, %rsi
call exc_nmi
+ /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */
+ IBRS_EXIT save_reg=%r15
+
/* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index d1052742ad0c..682338e7e2a3 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -4,7 +4,6 @@
*
* Copyright 2000-2002 Andi Kleen, SuSE Labs.
*/
-#include "calling.h"
#include <asm/asm-offsets.h>
#include <asm/current.h>
#include <asm/errno.h>
@@ -14,9 +13,12 @@
#include <asm/irqflags.h>
#include <asm/asm.h>
#include <asm/smap.h>
+#include <asm/nospec-branch.h>
#include <linux/linkage.h>
#include <linux/err.h>
+#include "calling.h"
+
.section .entry.text, "ax"
/*
@@ -47,7 +49,7 @@
* 0(%ebp) arg6
*/
SYM_CODE_START(entry_SYSENTER_compat)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
ENDBR
/* Interrupts are off on entry. */
swapgs
@@ -88,6 +90,9 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
cld
+ IBRS_ENTER
+ UNTRAIN_RET
+
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
* ourselves. To save a few cycles, we can check whether
@@ -174,7 +179,7 @@ SYM_CODE_END(entry_SYSENTER_compat)
* 0(%esp) arg6
*/
SYM_CODE_START(entry_SYSCALL_compat)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
ENDBR
/* Interrupts are off on entry. */
swapgs
@@ -203,6 +208,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS
UNWIND_HINT_REGS
+ IBRS_ENTER
+ UNTRAIN_RET
+
movq %rsp, %rdi
call do_fast_syscall_32
/* XEN PV guests always use IRET path */
@@ -217,6 +225,8 @@ sysret32_from_system_call:
*/
STACKLEAK_ERASE
+ IBRS_EXIT
+
movq RBX(%rsp), %rbx /* pt_regs->rbx */
movq RBP(%rsp), %rbp /* pt_regs->rbp */
movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
@@ -295,7 +305,7 @@ SYM_CODE_END(entry_SYSCALL_compat)
* ebp arg6
*/
SYM_CODE_START(entry_INT80_compat)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
ENDBR
/*
* Interrupts are off on entry.
@@ -337,6 +347,9 @@ SYM_CODE_START(entry_INT80_compat)
cld
+ IBRS_ENTER
+ UNTRAIN_RET
+
movq %rsp, %rdi
call do_int80_syscall_32
jmp swapgs_restore_regs_and_return_to_usermode
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index c2a8b76ae0bc..76cd790ed0bd 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -92,6 +92,7 @@ endif
endif
$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
+$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO
#
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
index 15e35159ebb6..ef2dd1827243 100644
--- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S
+++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
@@ -19,17 +19,20 @@ __vsyscall_page:
mov $__NR_gettimeofday, %rax
syscall
- RET
+ ret
+ int3
.balign 1024, 0xcc
mov $__NR_time, %rax
syscall
- RET
+ ret
+ int3
.balign 1024, 0xcc
mov $__NR_getcpu, %rax
syscall
- RET
+ ret
+ int3
.balign 4096, 0xcc
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 0d04414b97d2..d568afc705d2 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -21,7 +21,6 @@
#define NUM_COUNTERS_NB 4
#define NUM_COUNTERS_L2 4
#define NUM_COUNTERS_L3 6
-#define MAX_COUNTERS 6
#define RDPMC_BASE_NB 6
#define RDPMC_BASE_LLC 10
@@ -31,6 +30,7 @@
#undef pr_fmt
#define pr_fmt(fmt) "amd_uncore: " fmt
+static int pmu_version;
static int num_counters_llc;
static int num_counters_nb;
static bool l3_mask;
@@ -46,7 +46,7 @@ struct amd_uncore {
u32 msr_base;
cpumask_t *active_mask;
struct pmu *pmu;
- struct perf_event *events[MAX_COUNTERS];
+ struct perf_event **events;
struct hlist_node node;
};
@@ -158,6 +158,16 @@ out:
hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+ /*
+ * The first four DF counters are accessible via RDPMC index 6 to 9
+ * followed by the L3 counters from index 10 to 15. For processors
+ * with more than four DF counters, the DF RDPMC assignments become
+ * discontiguous as the additional counters are accessible starting
+ * from index 16.
+ */
+ if (is_nb_event(event) && hwc->idx >= NUM_COUNTERS_NB)
+ hwc->event_base_rdpmc += NUM_COUNTERS_L3;
+
if (flags & PERF_EF_START)
amd_uncore_start(event, PERF_EF_RELOAD);
@@ -209,10 +219,14 @@ static int amd_uncore_event_init(struct perf_event *event)
{
struct amd_uncore *uncore;
struct hw_perf_event *hwc = &event->hw;
+ u64 event_mask = AMD64_RAW_EVENT_MASK_NB;
if (event->attr.type != event->pmu->type)
return -ENOENT;
+ if (pmu_version >= 2 && is_nb_event(event))
+ event_mask = AMD64_PERFMON_V2_RAW_EVENT_MASK_NB;
+
/*
* NB and Last level cache counters (MSRs) are shared across all cores
* that share the same NB / Last level cache. On family 16h and below,
@@ -221,7 +235,7 @@ static int amd_uncore_event_init(struct perf_event *event)
* out. So we do not support sampling and per-thread events via
* CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
*/
- hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
+ hwc->config = event->attr.config & event_mask;
hwc->idx = -1;
if (event->cpu < 0)
@@ -247,6 +261,19 @@ static int amd_uncore_event_init(struct perf_event *event)
return 0;
}
+static umode_t
+amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ?
+ attr->mode : 0;
+}
+
+static umode_t
+amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0;
+}
+
static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -287,8 +314,10 @@ static struct device_attribute format_attr_##_var = \
DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35");
DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */
+DEFINE_UNCORE_FORMAT_ATTR(event14v2, event, "config:0-7,32-37"); /* PerfMonV2 DF */
DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3 */
-DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(umask8, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(umask12, umask, "config:8-15,24-27"); /* PerfMonV2 DF */
DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */
DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */
DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */
@@ -297,20 +326,33 @@ DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3
DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */
DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */
+/* Common DF and NB attributes */
static struct attribute *amd_uncore_df_format_attr[] = {
- &format_attr_event12.attr, /* event14 if F17h+ */
- &format_attr_umask.attr,
+ &format_attr_event12.attr, /* event */
+ &format_attr_umask8.attr, /* umask */
NULL,
};
+/* Common L2 and L3 attributes */
static struct attribute *amd_uncore_l3_format_attr[] = {
- &format_attr_event12.attr, /* event8 if F17h+ */
- &format_attr_umask.attr,
- NULL, /* slicemask if F17h, coreid if F19h */
- NULL, /* threadmask8 if F17h, enallslices if F19h */
- NULL, /* enallcores if F19h */
- NULL, /* sliceid if F19h */
- NULL, /* threadmask2 if F19h */
+ &format_attr_event12.attr, /* event */
+ &format_attr_umask8.attr, /* umask */
+ NULL, /* threadmask */
+ NULL,
+};
+
+/* F17h unique L3 attributes */
+static struct attribute *amd_f17h_uncore_l3_format_attr[] = {
+ &format_attr_slicemask.attr, /* slicemask */
+ NULL,
+};
+
+/* F19h unique L3 attributes */
+static struct attribute *amd_f19h_uncore_l3_format_attr[] = {
+ &format_attr_coreid.attr, /* coreid */
+ &format_attr_enallslices.attr, /* enallslices */
+ &format_attr_enallcores.attr, /* enallcores */
+ &format_attr_sliceid.attr, /* sliceid */
NULL,
};
@@ -324,6 +366,18 @@ static struct attribute_group amd_uncore_l3_format_group = {
.attrs = amd_uncore_l3_format_attr,
};
+static struct attribute_group amd_f17h_uncore_l3_format_group = {
+ .name = "format",
+ .attrs = amd_f17h_uncore_l3_format_attr,
+ .is_visible = amd_f17h_uncore_is_visible,
+};
+
+static struct attribute_group amd_f19h_uncore_l3_format_group = {
+ .name = "format",
+ .attrs = amd_f19h_uncore_l3_format_attr,
+ .is_visible = amd_f19h_uncore_is_visible,
+};
+
static const struct attribute_group *amd_uncore_df_attr_groups[] = {
&amd_uncore_attr_group,
&amd_uncore_df_format_group,
@@ -336,6 +390,12 @@ static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
NULL,
};
+static const struct attribute_group *amd_uncore_l3_attr_update[] = {
+ &amd_f17h_uncore_l3_format_group,
+ &amd_f19h_uncore_l3_format_group,
+ NULL,
+};
+
static struct pmu amd_nb_pmu = {
.task_ctx_nr = perf_invalid_context,
.attr_groups = amd_uncore_df_attr_groups,
@@ -353,6 +413,7 @@ static struct pmu amd_nb_pmu = {
static struct pmu amd_llc_pmu = {
.task_ctx_nr = perf_invalid_context,
.attr_groups = amd_uncore_l3_attr_groups,
+ .attr_update = amd_uncore_l3_attr_update,
.name = "amd_l2",
.event_init = amd_uncore_event_init,
.add = amd_uncore_add,
@@ -370,11 +431,19 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
cpu_to_node(cpu));
}
+static inline struct perf_event **
+amd_uncore_events_alloc(unsigned int num, unsigned int cpu)
+{
+ return kzalloc_node(sizeof(struct perf_event *) * num, GFP_KERNEL,
+ cpu_to_node(cpu));
+}
+
static int amd_uncore_cpu_up_prepare(unsigned int cpu)
{
- struct amd_uncore *uncore_nb = NULL, *uncore_llc;
+ struct amd_uncore *uncore_nb = NULL, *uncore_llc = NULL;
if (amd_uncore_nb) {
+ *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
uncore_nb = amd_uncore_alloc(cpu);
if (!uncore_nb)
goto fail;
@@ -384,11 +453,15 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
uncore_nb->active_mask = &amd_nb_active_mask;
uncore_nb->pmu = &amd_nb_pmu;
+ uncore_nb->events = amd_uncore_events_alloc(num_counters_nb, cpu);
+ if (!uncore_nb->events)
+ goto fail;
uncore_nb->id = -1;
*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
}
if (amd_uncore_llc) {
+ *per_cpu_ptr(amd_uncore_llc, cpu) = NULL;
uncore_llc = amd_uncore_alloc(cpu);
if (!uncore_llc)
goto fail;
@@ -398,6 +471,9 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL;
uncore_llc->active_mask = &amd_llc_active_mask;
uncore_llc->pmu = &amd_llc_pmu;
+ uncore_llc->events = amd_uncore_events_alloc(num_counters_llc, cpu);
+ if (!uncore_llc->events)
+ goto fail;
uncore_llc->id = -1;
*per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc;
}
@@ -405,9 +481,16 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
return 0;
fail:
- if (amd_uncore_nb)
- *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
- kfree(uncore_nb);
+ if (uncore_nb) {
+ kfree(uncore_nb->events);
+ kfree(uncore_nb);
+ }
+
+ if (uncore_llc) {
+ kfree(uncore_llc->events);
+ kfree(uncore_llc);
+ }
+
return -ENOMEM;
}
@@ -540,8 +623,11 @@ static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
if (cpu == uncore->cpu)
cpumask_clear_cpu(cpu, uncore->active_mask);
- if (!--uncore->refcnt)
+ if (!--uncore->refcnt) {
+ kfree(uncore->events);
kfree(uncore);
+ }
+
*per_cpu_ptr(uncores, cpu) = NULL;
}
@@ -560,6 +646,7 @@ static int __init amd_uncore_init(void)
{
struct attribute **df_attr = amd_uncore_df_format_attr;
struct attribute **l3_attr = amd_uncore_l3_format_attr;
+ union cpuid_0x80000022_ebx ebx;
int ret = -ENODEV;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
@@ -569,6 +656,9 @@ static int __init amd_uncore_init(void)
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
return -ENODEV;
+ if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
+ pmu_version = 2;
+
num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L2;
if (boot_cpu_data.x86 >= 0x17) {
@@ -585,8 +675,12 @@ static int __init amd_uncore_init(void)
}
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
- if (boot_cpu_data.x86 >= 0x17)
+ if (pmu_version >= 2) {
+ *df_attr++ = &format_attr_event14v2.attr;
+ *df_attr++ = &format_attr_umask12.attr;
+ } else if (boot_cpu_data.x86 >= 0x17) {
*df_attr = &format_attr_event14.attr;
+ }
amd_uncore_nb = alloc_percpu(struct amd_uncore *);
if (!amd_uncore_nb) {
@@ -597,6 +691,11 @@ static int __init amd_uncore_init(void)
if (ret)
goto fail_nb;
+ if (pmu_version >= 2) {
+ ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
+ num_counters_nb = ebx.split.num_df_pmc;
+ }
+
pr_info("%d %s %s counters detected\n", num_counters_nb,
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
amd_nb_pmu.name);
@@ -607,16 +706,11 @@ static int __init amd_uncore_init(void)
if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
if (boot_cpu_data.x86 >= 0x19) {
*l3_attr++ = &format_attr_event8.attr;
- *l3_attr++ = &format_attr_umask.attr;
- *l3_attr++ = &format_attr_coreid.attr;
- *l3_attr++ = &format_attr_enallslices.attr;
- *l3_attr++ = &format_attr_enallcores.attr;
- *l3_attr++ = &format_attr_sliceid.attr;
+ *l3_attr++ = &format_attr_umask8.attr;
*l3_attr++ = &format_attr_threadmask2.attr;
} else if (boot_cpu_data.x86 >= 0x17) {
*l3_attr++ = &format_attr_event8.attr;
- *l3_attr++ = &format_attr_umask.attr;
- *l3_attr++ = &format_attr_slicemask.attr;
+ *l3_attr++ = &format_attr_umask8.attr;
*l3_attr++ = &format_attr_threadmask8.attr;
}
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 45024abd929f..bd8b98857609 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4141,6 +4141,8 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
{
struct event_constraint *c;
+ c = intel_get_event_constraints(cpuc, idx, event);
+
/*
* :ppp means to do reduced skid PEBS,
* which is available on PMC0 and fixed counter 0.
@@ -4153,8 +4155,6 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
return &counter0_constraint;
}
- c = intel_get_event_constraints(cpuc, idx, event);
-
return c;
}
@@ -6241,7 +6241,8 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
x86_pmu.lbr_pt_coexist = true;
- intel_pmu_pebs_data_source_skl(false);
+ intel_pmu_pebs_data_source_adl();
+ x86_pmu.pebs_latency_data = adl_latency_data_small;
x86_pmu.num_topdown_events = 8;
x86_pmu.update_topdown_event = adl_update_topdown_event;
x86_pmu.set_topdown_event_period = adl_set_topdown_event_period;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 376cc3d66094..ba60427caa6d 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -94,15 +94,40 @@ void __init intel_pmu_pebs_data_source_nhm(void)
pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
}
-void __init intel_pmu_pebs_data_source_skl(bool pmem)
+static void __init __intel_pmu_pebs_data_source_skl(bool pmem, u64 *data_source)
{
u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);
- pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
- pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
- pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
- pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
- pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
+ data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
+ data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
+ data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
+ data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
+ data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
+}
+
+void __init intel_pmu_pebs_data_source_skl(bool pmem)
+{
+ __intel_pmu_pebs_data_source_skl(pmem, pebs_data_source);
+}
+
+static void __init intel_pmu_pebs_data_source_grt(u64 *data_source)
+{
+ data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
+ data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
+ data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
+}
+
+void __init intel_pmu_pebs_data_source_adl(void)
+{
+ u64 *data_source;
+
+ data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
+ memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
+ __intel_pmu_pebs_data_source_skl(false, data_source);
+
+ data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
+ memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
+ intel_pmu_pebs_data_source_grt(data_source);
}
static u64 precise_store_data(u64 status)
@@ -171,7 +196,50 @@ static u64 precise_datala_hsw(struct perf_event *event, u64 status)
return dse.val;
}
-static u64 load_latency_data(u64 status)
+static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
+{
+ /*
+ * TLB access
+ * 0 = did not miss 2nd level TLB
+ * 1 = missed 2nd level TLB
+ */
+ if (tlb)
+ *val |= P(TLB, MISS) | P(TLB, L2);
+ else
+ *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
+
+ /* locked prefix */
+ if (lock)
+ *val |= P(LOCK, LOCKED);
+}
+
+/* Retrieve the latency data for e-core of ADL */
+u64 adl_latency_data_small(struct perf_event *event, u64 status)
+{
+ union intel_x86_pebs_dse dse;
+ u64 val;
+
+ WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big);
+
+ dse.val = status;
+
+ val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];
+
+ /*
+ * For the atom core on ADL,
+ * bit 4: lock, bit 5: TLB access.
+ */
+ pebs_set_tlb_lock(&val, dse.ld_locked, dse.ld_stlb_miss);
+
+ if (dse.ld_data_blk)
+ val |= P(BLK, DATA);
+ else
+ val |= P(BLK, NA);
+
+ return val;
+}
+
+static u64 load_latency_data(struct perf_event *event, u64 status)
{
union intel_x86_pebs_dse dse;
u64 val;
@@ -181,7 +249,7 @@ static u64 load_latency_data(u64 status)
/*
* use the mapping table for bit 0-3
*/
- val = pebs_data_source[dse.ld_dse];
+ val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];
/*
* Nehalem models do not support TLB, Lock infos
@@ -190,21 +258,8 @@ static u64 load_latency_data(u64 status)
val |= P(TLB, NA) | P(LOCK, NA);
return val;
}
- /*
- * bit 4: TLB access
- * 0 = did not miss 2nd level TLB
- * 1 = missed 2nd level TLB
- */
- if (dse.ld_stlb_miss)
- val |= P(TLB, MISS) | P(TLB, L2);
- else
- val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
- /*
- * bit 5: locked prefix
- */
- if (dse.ld_locked)
- val |= P(LOCK, LOCKED);
+ pebs_set_tlb_lock(&val, dse.ld_stlb_miss, dse.ld_locked);
/*
* Ice Lake and earlier models do not support block infos.
@@ -233,7 +288,7 @@ static u64 load_latency_data(u64 status)
return val;
}
-static u64 store_latency_data(u64 status)
+static u64 store_latency_data(struct perf_event *event, u64 status)
{
union intel_x86_pebs_dse dse;
u64 val;
@@ -243,23 +298,9 @@ static u64 store_latency_data(u64 status)
/*
* use the mapping table for bit 0-3
*/
- val = pebs_data_source[dse.st_lat_dse];
+ val = hybrid_var(event->pmu, pebs_data_source)[dse.st_lat_dse];
- /*
- * bit 4: TLB access
- * 0 = did not miss 2nd level TLB
- * 1 = missed 2nd level TLB
- */
- if (dse.st_lat_stlb_miss)
- val |= P(TLB, MISS) | P(TLB, L2);
- else
- val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
-
- /*
- * bit 5: locked prefix
- */
- if (dse.st_lat_locked)
- val |= P(LOCK, LOCKED);
+ pebs_set_tlb_lock(&val, dse.st_lat_stlb_miss, dse.st_lat_locked);
val |= P(BLK, NA);
@@ -781,8 +822,8 @@ struct event_constraint intel_glm_pebs_event_constraints[] = {
struct event_constraint intel_grt_pebs_event_constraints[] = {
/* Allow all events as PEBS with no flags */
- INTEL_PLD_CONSTRAINT(0x5d0, 0xf),
- INTEL_PSD_CONSTRAINT(0x6d0, 0xf),
+ INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xf),
+ INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf),
EVENT_CONSTRAINT_END
};
@@ -1443,9 +1484,11 @@ static u64 get_data_src(struct perf_event *event, u64 aux)
bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
if (fl & PERF_X86_EVENT_PEBS_LDLAT)
- val = load_latency_data(aux);
+ val = load_latency_data(event, aux);
else if (fl & PERF_X86_EVENT_PEBS_STLAT)
- val = store_latency_data(aux);
+ val = store_latency_data(event, aux);
+ else if (fl & PERF_X86_EVENT_PEBS_LAT_HYBRID)
+ val = x86_pmu.pebs_latency_data(event, aux);
else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
val = precise_datala_hsw(event, aux);
else if (fst)
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 13179f31fe10..4f70fb6c2c1e 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -278,9 +278,9 @@ enum {
};
/*
- * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
- * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
- * TSX is not supported they have no consistent behavior:
+ * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x
+ * are the TSX flags when TSX is supported, but when TSX is not supported
+ * they have no consistent behavior:
*
* - For wrmsr(), bits 61:62 are considered part of the sign extension.
* - For HW updates (branch captures) bits 61:62 are always OFF and are not
@@ -288,7 +288,7 @@ enum {
*
* Therefore, if:
*
- * 1) LBR has TSX format
+ * 1) LBR format LBR_FORMAT_EIP_FLAGS2
* 2) CPU has no TSX support enabled
*
* ... then any value passed to wrmsr() must be sign extended to 63 bits and any
@@ -300,7 +300,7 @@ static inline bool lbr_from_signext_quirk_needed(void)
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
boot_cpu_has(X86_FEATURE_RTM);
- return !tsx_support && x86_pmu.lbr_has_tsx;
+ return !tsx_support;
}
static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
@@ -1609,9 +1609,6 @@ void intel_pmu_lbr_init_hsw(void)
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
-
- if (lbr_from_signext_quirk_needed())
- static_branch_enable(&lbr_from_quirk_key);
}
/* skylake */
@@ -1702,7 +1699,11 @@ void intel_pmu_lbr_init(void)
switch (x86_pmu.intel_cap.lbr_format) {
case LBR_FORMAT_EIP_FLAGS2:
x86_pmu.lbr_has_tsx = 1;
- fallthrough;
+ x86_pmu.lbr_from_flags = 1;
+ if (lbr_from_signext_quirk_needed())
+ static_branch_enable(&lbr_from_quirk_key);
+ break;
+
case LBR_FORMAT_EIP_FLAGS:
x86_pmu.lbr_from_flags = 1;
break;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 21a5482bcf84..ca2f8bfe6ff1 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -84,6 +84,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
#define PERF_X86_EVENT_TOPDOWN 0x04000 /* Count Topdown slots/metrics events */
#define PERF_X86_EVENT_PEBS_STLAT 0x08000 /* st+stlat data address sampling */
#define PERF_X86_EVENT_AMD_BRS 0x10000 /* AMD Branch Sampling */
+#define PERF_X86_EVENT_PEBS_LAT_HYBRID 0x20000 /* ld and st lat for hybrid */
static inline bool is_topdown_count(struct perf_event *event)
{
@@ -136,7 +137,8 @@ struct amd_nb {
PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \
- PERF_SAMPLE_PERIOD | PERF_SAMPLE_CODE_PAGE_SIZE)
+ PERF_SAMPLE_PERIOD | PERF_SAMPLE_CODE_PAGE_SIZE | \
+ PERF_SAMPLE_WEIGHT_TYPE)
#define PEBS_GP_REGS \
((1ULL << PERF_REG_X86_AX) | \
@@ -460,6 +462,10 @@ struct cpu_hw_events {
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
+#define INTEL_HYBRID_LAT_CONSTRAINT(c, n) \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID)
+
/* Event constraint, but match on all event flags too. */
#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
@@ -638,6 +644,8 @@ enum {
x86_lbr_exclusive_max,
};
+#define PERF_PEBS_DATA_SOURCE_MAX 0x10
+
struct x86_hybrid_pmu {
struct pmu pmu;
const char *name;
@@ -665,6 +673,8 @@ struct x86_hybrid_pmu {
unsigned int late_ack :1,
mid_ack :1,
enabled_ack :1;
+
+ u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX];
};
static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu)
@@ -825,6 +835,7 @@ struct x86_pmu {
void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data);
struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event);
+ u64 (*pebs_latency_data)(struct perf_event *event, u64 status);
unsigned long large_pebs_flags;
u64 rtm_abort_event;
@@ -1392,6 +1403,8 @@ void intel_pmu_disable_bts(void);
int intel_pmu_drain_bts_buffer(void);
+u64 adl_latency_data_small(struct perf_event *event, u64 status);
+
extern struct event_constraint intel_core2_pebs_event_constraints[];
extern struct event_constraint intel_atom_pebs_event_constraints[];
@@ -1499,6 +1512,8 @@ void intel_pmu_pebs_data_source_nhm(void);
void intel_pmu_pebs_data_source_skl(bool pmem);
+void intel_pmu_pebs_data_source_adl(void);
+
int intel_pmu_setup_lbr_filter(struct perf_event *event);
void intel_pt_interrupt(void);
diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c
index 7e0f6bedc248..42c70d28ef27 100644
--- a/arch/x86/hyperv/irqdomain.c
+++ b/arch/x86/hyperv/irqdomain.c
@@ -192,7 +192,7 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
struct pci_dev *dev;
struct hv_interrupt_entry out_entry, *stored_entry;
struct irq_cfg *cfg = irqd_cfg(data);
- cpumask_t *affinity;
+ const cpumask_t *affinity;
int cpu;
u64 status;
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 9b10c8c76087..9542c582d546 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -76,6 +76,7 @@ extern int alternatives_patched;
extern void alternative_instructions(void);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
extern void apply_retpolines(s32 *start, s32 *end);
+extern void apply_returns(s32 *start, s32 *end);
extern void apply_ibt_endbr(s32 *start, s32 *end);
struct module;
diff --git a/arch/x86/include/asm/amd-ibs.h b/arch/x86/include/asm/amd-ibs.h
index aabdbb5ab920..f3eb098d63d4 100644
--- a/arch/x86/include/asm/amd-ibs.h
+++ b/arch/x86/include/asm/amd-ibs.h
@@ -29,7 +29,10 @@ union ibs_fetch_ctl {
rand_en:1, /* 57: random tagging enable */
fetch_l2_miss:1,/* 58: L2 miss for sampled fetch
* (needs IbsFetchComp) */
- reserved:5; /* 59-63: reserved */
+ l3_miss_only:1, /* 59: Collect L3 miss samples only */
+ fetch_oc_miss:1,/* 60: Op cache miss for the sampled fetch */
+ fetch_l3_miss:1,/* 61: L3 cache miss for the sampled fetch */
+ reserved:2; /* 62-63: reserved */
};
};
@@ -38,14 +41,14 @@ union ibs_op_ctl {
__u64 val;
struct {
__u64 opmaxcnt:16, /* 0-15: periodic op max. count */
- reserved0:1, /* 16: reserved */
+ l3_miss_only:1, /* 16: Collect L3 miss samples only */
op_en:1, /* 17: op sampling enable */
op_val:1, /* 18: op sample valid */
cnt_ctl:1, /* 19: periodic op counter control */
opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */
- reserved1:5, /* 27-31: reserved */
+ reserved0:5, /* 27-31: reserved */
opcurcnt:27, /* 32-58: periodic op counter current count */
- reserved2:5; /* 59-63: reserved */
+ reserved1:5; /* 59-63: reserved */
};
};
@@ -71,11 +74,12 @@ union ibs_op_data {
union ibs_op_data2 {
__u64 val;
struct {
- __u64 data_src:3, /* 0-2: data source */
+ __u64 data_src_lo:3, /* 0-2: data source low */
reserved0:1, /* 3: reserved */
rmt_node:1, /* 4: destination node */
cache_hit_st:1, /* 5: cache hit state */
- reserved1:57; /* 5-63: reserved */
+ data_src_hi:2, /* 6-7: data source high */
+ reserved1:56; /* 8-63: reserved */
};
};
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 03acc823838a..5fe7f6c8a7a4 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -203,8 +203,8 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
+#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
@@ -219,7 +219,7 @@
#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 or above (Zen) */
+#define X86_FEATURE_ZEN (7*32+28) /* "" CPU based on Zen microarchitecture */
#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
@@ -296,6 +296,13 @@
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
+#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
+#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
+#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
+#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
+#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
@@ -316,6 +323,7 @@
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
+#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
@@ -447,5 +455,6 @@
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
+#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 36369e76cc63..33d2cd04d254 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -50,6 +50,25 @@
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
+#ifdef CONFIG_RETPOLINE
+# define DISABLE_RETPOLINE 0
+#else
+# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
+ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
+#endif
+
+#ifdef CONFIG_RETHUNK
+# define DISABLE_RETHUNK 0
+#else
+# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
+#endif
+
+#ifdef CONFIG_CPU_UNRET_ENTRY
+# define DISABLE_UNRET 0
+#else
+# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
+#endif
+
#ifdef CONFIG_INTEL_IOMMU_SVM
# define DISABLE_ENQCMD 0
#else
@@ -82,7 +101,7 @@
#define DISABLED_MASK8 (DISABLE_TDX_GUEST)
#define DISABLED_MASK9 (DISABLE_SGX)
#define DISABLED_MASK10 0
-#define DISABLED_MASK11 0
+#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
#define DISABLED_MASK12 0
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 6b0f31fb53f7..503a577814b2 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -164,4 +164,6 @@ static inline bool fpstate_is_confidential(struct fpu_guest *gfpu)
/* prctl */
extern long fpu_xstate_prctl(int option, unsigned long arg2);
+extern void fpu_idle_fpregs(void);
+
#endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 85865f1645bd..73ca20049835 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -19,19 +19,27 @@
#define __ALIGN_STR __stringify(__ALIGN)
#endif
+#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#define RET jmp __x86_return_thunk
+#else /* CONFIG_RETPOLINE */
#ifdef CONFIG_SLS
#define RET ret; int3
#else
#define RET ret
#endif
+#endif /* CONFIG_RETPOLINE */
#else /* __ASSEMBLY__ */
+#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#define ASM_RET "jmp __x86_return_thunk\n\t"
+#else /* CONFIG_RETPOLINE */
#ifdef CONFIG_SLS
#define ASM_RET "ret; int3\n\t"
#else
#define ASM_RET "ret\n\t"
#endif
+#endif /* CONFIG_RETPOLINE */
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index d27e0581b777..cc615be27a54 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -51,6 +51,8 @@
#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
+#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
+#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@@ -93,6 +95,7 @@
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
+#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */
#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
#define ARCH_CAP_SSB_NO BIT(4) /*
* Not susceptible to Speculative Store Bypass
@@ -140,6 +143,13 @@
* bit available to control VERW
* behavior.
*/
+#define ARCH_CAP_RRSBA BIT(19) /*
+ * Indicates RET may use predictors
+ * other than the RSB. With eIBRS
+ * enabled predictions in kernel mode
+ * are restricted to targets in
+ * kernel.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@@ -567,6 +577,9 @@
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
+#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
+#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
+
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 29dd27b5a339..3a8fdf881313 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -13,6 +13,7 @@
#define MWAIT_SUBSTATE_SIZE 4
#define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK)
+#define MWAIT_C1_SUBSTATE_MASK 0xf0
#define CPUID_MWAIT_LEAF 5
#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index da251a5645b0..cba942006ffe 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -11,6 +11,7 @@
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
#include <asm/unwind_hints.h>
+#include <asm/percpu.h>
#define RETPOLINE_THUNK_SIZE 32
@@ -76,25 +77,54 @@
.endm
/*
+ * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
+ * vs RETBleed validation.
+ */
+#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
+
+/*
+ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
+ * eventually turn into it's own annotation.
+ */
+.macro ANNOTATE_UNRET_END
+#ifdef CONFIG_DEBUG_ENTRY
+ ANNOTATE_RETPOLINE_SAFE
+ nop
+#endif
+.endm
+
+/*
+ * Equivalent to -mindirect-branch-cs-prefix; emit the 5 byte jmp/call
+ * to the retpoline thunk with a CS prefix when the register requires
+ * a RAX prefix byte to encode. Also see apply_retpolines().
+ */
+.macro __CS_PREFIX reg:req
+ .irp rs,r8,r9,r10,r11,r12,r13,r14,r15
+ .ifc \reg,\rs
+ .byte 0x2e
+ .endif
+ .endr
+.endm
+
+/*
* JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
* indirect jmp/call which may be susceptible to the Spectre variant 2
* attack.
*/
.macro JMP_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
- __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE
+ __CS_PREFIX \reg
+ jmp __x86_indirect_thunk_\reg
#else
jmp *%\reg
+ int3
#endif
.endm
.macro CALL_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
- __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_LFENCE
+ __CS_PREFIX \reg
+ call __x86_indirect_thunk_\reg
#else
call *%\reg
#endif
@@ -105,10 +135,34 @@
* monstrosity above, manually.
*/
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
-#ifdef CONFIG_RETPOLINE
ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
.Lskip_rsb_\@:
+.endm
+
+#ifdef CONFIG_CPU_UNRET_ENTRY
+#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret"
+#else
+#define CALL_ZEN_UNTRAIN_RET ""
+#endif
+
+/*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
+ * return thunk isn't mapped into the userspace tables (then again, AMD
+ * typically has NO_MELTDOWN).
+ *
+ * While zen_untrain_ret() doesn't clobber anything but requires stack,
+ * entry_ibpb() will clobber AX, CX, DX.
+ *
+ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
+ * where we have a stack but before any RET instruction.
+ */
+.macro UNTRAIN_RET
+#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY)
+ ANNOTATE_UNRET_END
+ ALTERNATIVE_2 "", \
+ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
+ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
#endif
.endm
@@ -120,17 +174,20 @@
_ASM_PTR " 999b\n\t" \
".popsection\n\t"
-#ifdef CONFIG_RETPOLINE
-
typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
+extern retpoline_thunk_t __x86_indirect_thunk_array[];
+
+extern void __x86_return_thunk(void);
+extern void zen_untrain_ret(void);
+extern void entry_ibpb(void);
+
+#ifdef CONFIG_RETPOLINE
#define GEN(reg) \
extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
#include <asm/GEN-for-each-reg.h>
#undef GEN
-extern retpoline_thunk_t __x86_indirect_thunk_array[];
-
#ifdef CONFIG_X86_64
/*
@@ -193,6 +250,7 @@ enum spectre_v2_mitigation {
SPECTRE_V2_EIBRS,
SPECTRE_V2_EIBRS_RETPOLINE,
SPECTRE_V2_EIBRS_LFENCE,
+ SPECTRE_V2_IBRS,
};
/* The indirect branch speculation control variants */
@@ -235,6 +293,9 @@ static inline void indirect_branch_prediction_barrier(void)
/* The Intel SPEC CTRL MSR base value cache */
extern u64 x86_spec_ctrl_base;
+DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
+extern void write_spec_ctrl_current(u64 val, bool force);
+extern u64 spec_ctrl_current(void);
/*
* With retpoline, we must use IBRS to restrict branch prediction
@@ -244,18 +305,18 @@ extern u64 x86_spec_ctrl_base;
*/
#define firmware_restrict_branch_speculation_start() \
do { \
- u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
- \
preempt_disable(); \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
+ spec_ctrl_current() | SPEC_CTRL_IBRS, \
X86_FEATURE_USE_IBRS_FW); \
+ alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \
+ X86_FEATURE_USE_IBPB_FW); \
} while (0)
#define firmware_restrict_branch_speculation_end() \
do { \
- u64 val = x86_spec_ctrl_base; \
- \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
+ spec_ctrl_current(), \
X86_FEATURE_USE_IBRS_FW); \
preempt_enable(); \
} while (0)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 409725e86f42..34348ae41cdb 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -89,6 +89,19 @@
#define AMD64_RAW_EVENT_MASK_NB \
(AMD64_EVENTSEL_EVENT | \
ARCH_PERFMON_EVENTSEL_UMASK)
+
+#define AMD64_PERFMON_V2_EVENTSEL_EVENT_NB \
+ (AMD64_EVENTSEL_EVENT | \
+ GENMASK_ULL(37, 36))
+
+#define AMD64_PERFMON_V2_EVENTSEL_UMASK_NB \
+ (ARCH_PERFMON_EVENTSEL_UMASK | \
+ GENMASK_ULL(27, 24))
+
+#define AMD64_PERFMON_V2_RAW_EVENT_MASK_NB \
+ (AMD64_PERFMON_V2_EVENTSEL_EVENT_NB | \
+ AMD64_PERFMON_V2_EVENTSEL_UMASK_NB)
+
#define AMD64_NUM_COUNTERS 4
#define AMD64_NUM_COUNTERS_CORE 6
#define AMD64_NUM_COUNTERS_NB 4
@@ -194,6 +207,9 @@ union cpuid_0x80000022_ebx {
struct {
/* Number of Core Performance Counters */
unsigned int num_core_pmc:4;
+ unsigned int reserved:6;
+ /* Number of Data Fabric Counters */
+ unsigned int num_df_pmc:6;
} split;
unsigned int full;
};
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 19514524f0f8..4a23e52fe0ee 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -72,7 +72,6 @@ static inline u64 lower_bits(u64 val, unsigned int bits)
struct real_mode_header;
enum stack_type;
-struct ghcb;
/* Early IDT entry points for #VC handler */
extern void vc_no_ghcb(void);
@@ -156,11 +155,7 @@ static __always_inline void sev_es_nmi_complete(void)
__sev_es_nmi_complete();
}
extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
-extern enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
- bool set_ghcb_msr,
- struct es_em_ctxt *ctxt,
- u64 exit_code, u64 exit_info_1,
- u64 exit_info_2);
+
static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
{
int rc;
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 45b18eb94fa1..35f709f619fb 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -295,6 +295,15 @@ static inline int enqcmds(void __iomem *dst, const void *src)
return 0;
}
+static inline void tile_release(void)
+{
+ /*
+ * Instruction opcode for TILERELEASE; supported in binutils
+ * version >= 2.36.
+ */
+ asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0");
+}
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_SPECIAL_INSNS_H */
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index 2d8dacd02643..343b722ccaf2 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -21,6 +21,16 @@
* relative displacement across sections.
*/
+/*
+ * The trampoline is 8 bytes and of the general form:
+ *
+ * jmp.d32 \func
+ * ud1 %esp, %ecx
+ *
+ * That trailing #UD provides both a speculation stop and serves as a unique
+ * 3 byte signature identifying static call trampolines. Also see tramp_ud[]
+ * and __static_call_fixup().
+ */
#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \
asm(".pushsection .static_call.text, \"ax\" \n" \
".align 4 \n" \
@@ -28,7 +38,7 @@
STATIC_CALL_TRAMP_STR(name) ": \n" \
ANNOTATE_NOENDBR \
insns " \n" \
- ".byte 0x53, 0x43, 0x54 \n" \
+ ".byte 0x0f, 0xb9, 0xcc \n" \
".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
".popsection \n")
@@ -36,8 +46,13 @@
#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
+#ifdef CONFIG_RETHUNK
+#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
+ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
+#else
#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
+#endif
#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0)
@@ -48,4 +63,6 @@
".long " STATIC_CALL_KEY_STR(name) " - . \n" \
".popsection \n")
+extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
+
#endif /* _ASM_STATIC_CALL_H */
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 1bfe979bb9bc..580636cdc257 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -2,9 +2,6 @@
#ifndef _ASM_X86_TLB_H
#define _ASM_X86_TLB_H
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-
#define tlb_flush tlb_flush
static inline void tlb_flush(struct mmu_gather *tlb);
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 4af5579c7ef7..cda3118f3b27 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -16,6 +16,7 @@
void __flush_tlb_all(void);
#define TLB_FLUSH_ALL -1UL
+#define TLB_GENERATION_INVALID 0
void cr4_update_irqsoff(unsigned long set, unsigned long clear);
unsigned long cr4_read_shadow(void);
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index 8b33674288ea..f66fbe6537dd 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -8,7 +8,11 @@
#ifdef __ASSEMBLY__
.macro UNWIND_HINT_EMPTY
- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1
+ UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1
+.endm
+
+.macro UNWIND_HINT_ENTRY
+ UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1
.endm
.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0
@@ -52,6 +56,14 @@
UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC
.endm
+.macro UNWIND_HINT_SAVE
+ UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE
+.endm
+
+.macro UNWIND_HINT_RESTORE
+ UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE
+.endm
+
#else
#define UNWIND_HINT_FUNC \
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index e02a8a8ef23c..342290624040 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -11,11 +11,12 @@
#define SETUP_APPLE_PROPERTIES 5
#define SETUP_JAILHOUSE 6
#define SETUP_CC_BLOB 7
+#define SETUP_IMA 8
+#define SETUP_RNG_SEED 9
+#define SETUP_ENUM_MAX SETUP_RNG_SEED
#define SETUP_INDIRECT (1<<31)
-
-/* SETUP_INDIRECT | max(SETUP_*) */
-#define SETUP_TYPE_MAX (SETUP_INDIRECT | SETUP_CC_BLOB)
+#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT)
/* ram_size flags */
#define RAMDISK_IMAGE_START_MASK 0x07FF
@@ -172,6 +173,14 @@ struct jailhouse_setup_data {
} __attribute__((packed)) v2;
} __attribute__((packed));
+/*
+ * IMA buffer setup data information from the previous kernel during kexec
+ */
+struct ima_setup_data {
+ __u64 addr;
+ __u64 size;
+} __attribute__((packed));
+
/* The so-called "zeropage" */
struct boot_params {
struct screen_info screen_info; /* 0x000 */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 4c8b6ae802ac..a20a5ebfacd7 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -34,8 +34,6 @@ KASAN_SANITIZE_sev.o := n
# by several compilation units. To be safe, disable all instrumentation.
KCSAN_SANITIZE := n
-OBJECT_FILES_NON_STANDARD_test_nx.o := y
-
# If instrumentation of this dir is enabled, boot hangs during first second.
# Probably could be more selective here, but note that files related to irqs,
# boot, dumpstack/stacktrace, etc are either non-interesting or can lead to
diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c
index 734b96454896..8d8752b44f11 100644
--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
@@ -16,6 +16,12 @@ bool cpc_supported_by_cpu(void)
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
case X86_VENDOR_HYGON:
+ if (boot_cpu_data.x86 == 0x19 && ((boot_cpu_data.x86_model <= 0x0f) ||
+ (boot_cpu_data.x86_model >= 0x20 && boot_cpu_data.x86_model <= 0x2f)))
+ return true;
+ else if (boot_cpu_data.x86 == 0x17 &&
+ boot_cpu_data.x86_model >= 0x70 && boot_cpu_data.x86_model <= 0x7f)
+ return true;
return boot_cpu_has(X86_FEATURE_CPPC);
}
return false;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index e257f6c80372..62f6b8b7c4a5 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -115,6 +115,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
}
extern s32 __retpoline_sites[], __retpoline_sites_end[];
+extern s32 __return_sites[], __return_sites_end[];
extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[];
@@ -507,9 +508,78 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
}
}
+#ifdef CONFIG_RETHUNK
+/*
+ * Rewrite the compiler generated return thunk tail-calls.
+ *
+ * For example, convert:
+ *
+ * JMP __x86_return_thunk
+ *
+ * into:
+ *
+ * RET
+ */
+static int patch_return(void *addr, struct insn *insn, u8 *bytes)
+{
+ int i = 0;
+
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ return -1;
+
+ bytes[i++] = RET_INSN_OPCODE;
+
+ for (; i < insn->length;)
+ bytes[i++] = INT3_INSN_OPCODE;
+
+ return i;
+}
+
+void __init_or_module noinline apply_returns(s32 *start, s32 *end)
+{
+ s32 *s;
+
+ for (s = start; s < end; s++) {
+ void *dest = NULL, *addr = (void *)s + *s;
+ struct insn insn;
+ int len, ret;
+ u8 bytes[16];
+ u8 op;
+
+ ret = insn_decode_kernel(&insn, addr);
+ if (WARN_ON_ONCE(ret < 0))
+ continue;
+
+ op = insn.opcode.bytes[0];
+ if (op == JMP32_INSN_OPCODE)
+ dest = addr + insn.length + insn.immediate.value;
+
+ if (__static_call_fixup(addr, op, dest) ||
+ WARN_ONCE(dest != &__x86_return_thunk,
+ "missing return thunk: %pS-%pS: %*ph",
+ addr, dest, 5, addr))
+ continue;
+
+ DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
+ addr, addr, insn.length,
+ addr + insn.length + insn.immediate.value);
+
+ len = patch_return(addr, &insn, bytes);
+ if (len == insn.length) {
+ DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
+ DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
+ text_poke_early(addr, bytes, len);
+ }
+ }
+}
+#else
+void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
+#endif /* CONFIG_RETHUNK */
+
#else /* !CONFIG_RETPOLINE || !CONFIG_OBJTOOL */
void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
+void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
#endif /* CONFIG_RETPOLINE && CONFIG_OBJTOOL */
@@ -860,6 +930,7 @@ void __init alternative_instructions(void)
* those can rewrite the retpoline thunks.
*/
apply_retpolines(__retpoline_sites, __retpoline_sites_end);
+ apply_returns(__return_sites, __return_sites_end);
/*
* Then patch alternatives, such that those paravirt calls that are in
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 190e0f763375..4266b64631a4 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -19,17 +19,23 @@
#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0
#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480
#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630
+#define PCI_DEVICE_ID_AMD_17H_MA0H_ROOT 0x14b5
#define PCI_DEVICE_ID_AMD_19H_M10H_ROOT 0x14a4
+#define PCI_DEVICE_ID_AMD_19H_M60H_ROOT 0x14d8
+#define PCI_DEVICE_ID_AMD_19H_M70H_ROOT 0x14e8
#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
+#define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F4 0x1728
#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654
#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F4 0x14b1
#define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5
#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d
#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e
+#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4 0x14e4
+#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4
/* Protect the PCI config register pairs used for SMN. */
static DEFINE_MUTEX(smn_mutex);
@@ -41,8 +47,11 @@ static const struct pci_device_id amd_root_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_ROOT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_MA0H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M10H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_ROOT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_ROOT) },
{}
};
@@ -61,12 +70,15 @@ static const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_MA0H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M10H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) },
{}
};
@@ -81,6 +93,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_MA0H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M10H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) },
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 437308004ef2..cb50589a7102 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -19,6 +19,7 @@
#include <asm/suspend.h>
#include <asm/tlbflush.h>
#include <asm/tdx.h>
+#include "../kvm/vmx/vmx.h"
#ifdef CONFIG_XEN
#include <xen/interface/xen.h>
@@ -107,4 +108,9 @@ static void __used common(void)
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
+
+ if (IS_ENABLED(CONFIG_KVM_INTEL)) {
+ BLANK();
+ OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl);
+ }
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 0c0b09796ced..35d5288394cb 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -862,6 +862,28 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
clear_rdrand_cpuid_bit(c);
}
+void init_spectral_chicken(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_CPU_UNRET_ENTRY
+ u64 value;
+
+ /*
+ * On Zen2 we offer this chicken (bit) on the altar of Speculation.
+ *
+ * This suppresses speculation from the middle of a basic block, i.e. it
+ * suppresses non-branch predictions.
+ *
+ * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H
+ */
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) {
+ if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) {
+ value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT;
+ wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value);
+ }
+ }
+#endif
+}
+
static void init_amd_zn(struct cpuinfo_x86 *c)
{
set_cpu_cap(c, X86_FEATURE_ZEN);
@@ -870,12 +892,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
node_reclaim_distance = 32;
#endif
- /*
- * Fix erratum 1076: CPB feature bit not being set in CPUID.
- * Always set it, except when running under a hypervisor.
- */
- if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB))
- set_cpu_cap(c, X86_FEATURE_CPB);
+ /* Fix up CPUID bits, but only if not virtualised. */
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
+
+ /* Erratum 1076: CPB feature bit not being set in CPUID. */
+ if (!cpu_has(c, X86_FEATURE_CPB))
+ set_cpu_cap(c, X86_FEATURE_CPB);
+
+ /*
+ * Zen3 (Fam19 model < 0x10) parts are not susceptible to
+ * Branch Type Confusion, but predate the allocation of the
+ * BTC_NO bit.
+ */
+ if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO))
+ set_cpu_cap(c, X86_FEATURE_BTC_NO);
+ }
}
static void init_amd(struct cpuinfo_x86 *c)
@@ -907,7 +938,8 @@ static void init_amd(struct cpuinfo_x86 *c)
case 0x12: init_amd_ln(c); break;
case 0x15: init_amd_bd(c); break;
case 0x16: init_amd_jg(c); break;
- case 0x17: fallthrough;
+ case 0x17: init_spectral_chicken(c);
+ fallthrough;
case 0x19: init_amd_zn(c); break;
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 74c62cc47a5f..6761668100b9 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -38,6 +38,8 @@
static void __init spectre_v1_select_mitigation(void);
static void __init spectre_v2_select_mitigation(void);
+static void __init retbleed_select_mitigation(void);
+static void __init spectre_v2_user_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);
static void __init mds_select_mitigation(void);
@@ -48,16 +50,40 @@ static void __init mmio_select_mitigation(void);
static void __init srbds_select_mitigation(void);
static void __init l1d_flush_select_mitigation(void);
-/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
+/* The base value of the SPEC_CTRL MSR without task-specific bits set */
u64 x86_spec_ctrl_base;
EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
+
+/* The current value of the SPEC_CTRL MSR with task-specific bits set */
+DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
+
static DEFINE_MUTEX(spec_ctrl_mutex);
/*
- * The vendor and possibly platform specific bits which can be modified in
- * x86_spec_ctrl_base.
+ * Keep track of the SPEC_CTRL MSR value for the current task, which may differ
+ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
*/
-static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
+void write_spec_ctrl_current(u64 val, bool force)
+{
+ if (this_cpu_read(x86_spec_ctrl_current) == val)
+ return;
+
+ this_cpu_write(x86_spec_ctrl_current, val);
+
+ /*
+ * When KERNEL_IBRS this MSR is written on return-to-user, unless
+ * forced the update can be delayed until that time.
+ */
+ if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
+ wrmsrl(MSR_IA32_SPEC_CTRL, val);
+}
+
+u64 spec_ctrl_current(void)
+{
+ return this_cpu_read(x86_spec_ctrl_current);
+}
+EXPORT_SYMBOL_GPL(spec_ctrl_current);
/*
* AMD specific MSR info for Speculative Store Bypass control.
@@ -114,13 +140,21 @@ void __init check_bugs(void)
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
- /* Allow STIBP in MSR_SPEC_CTRL if supported */
- if (boot_cpu_has(X86_FEATURE_STIBP))
- x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
-
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
+ /*
+ * retbleed_select_mitigation() relies on the state set by
+ * spectre_v2_select_mitigation(); specifically it wants to know about
+ * spectre_v2=ibrs.
+ */
+ retbleed_select_mitigation();
+ /*
+ * spectre_v2_user_select_mitigation() relies on the state set by
+ * retbleed_select_mitigation(); specifically the STIBP selection is
+ * forced for UNRET.
+ */
+ spectre_v2_user_select_mitigation();
ssb_select_mitigation();
l1tf_select_mitigation();
md_clear_select_mitigation();
@@ -161,31 +195,17 @@ void __init check_bugs(void)
#endif
}
+/*
+ * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is
+ * done in vmenter.S.
+ */
void
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
{
- u64 msrval, guestval, hostval = x86_spec_ctrl_base;
+ u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
struct thread_info *ti = current_thread_info();
- /* Is MSR_SPEC_CTRL implemented ? */
if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
- /*
- * Restrict guest_spec_ctrl to supported values. Clear the
- * modifiable bits in the host base value and or the
- * modifiable bits from the guest value.
- */
- guestval = hostval & ~x86_spec_ctrl_mask;
- guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
-
- /* SSBD controlled in MSR_SPEC_CTRL */
- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
- static_cpu_has(X86_FEATURE_AMD_SSBD))
- hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
-
- /* Conditional STIBP enabled? */
- if (static_branch_unlikely(&switch_to_cond_stibp))
- hostval |= stibp_tif_to_spec_ctrl(ti->flags);
-
if (hostval != guestval) {
msrval = setguest ? guestval : hostval;
wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
@@ -752,12 +772,180 @@ static int __init nospectre_v1_cmdline(char *str)
}
early_param("nospectre_v1", nospectre_v1_cmdline);
-#undef pr_fmt
-#define pr_fmt(fmt) "Spectre V2 : " fmt
-
static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
SPECTRE_V2_NONE;
+#undef pr_fmt
+#define pr_fmt(fmt) "RETBleed: " fmt
+
+enum retbleed_mitigation {
+ RETBLEED_MITIGATION_NONE,
+ RETBLEED_MITIGATION_UNRET,
+ RETBLEED_MITIGATION_IBPB,
+ RETBLEED_MITIGATION_IBRS,
+ RETBLEED_MITIGATION_EIBRS,
+};
+
+enum retbleed_mitigation_cmd {
+ RETBLEED_CMD_OFF,
+ RETBLEED_CMD_AUTO,
+ RETBLEED_CMD_UNRET,
+ RETBLEED_CMD_IBPB,
+};
+
+static const char * const retbleed_strings[] = {
+ [RETBLEED_MITIGATION_NONE] = "Vulnerable",
+ [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk",
+ [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB",
+ [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS",
+ [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS",
+};
+
+static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
+ RETBLEED_MITIGATION_NONE;
+static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init =
+ RETBLEED_CMD_AUTO;
+
+static int __ro_after_init retbleed_nosmt = false;
+
+static int __init retbleed_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ while (str) {
+ char *next = strchr(str, ',');
+ if (next) {
+ *next = 0;
+ next++;
+ }
+
+ if (!strcmp(str, "off")) {
+ retbleed_cmd = RETBLEED_CMD_OFF;
+ } else if (!strcmp(str, "auto")) {
+ retbleed_cmd = RETBLEED_CMD_AUTO;
+ } else if (!strcmp(str, "unret")) {
+ retbleed_cmd = RETBLEED_CMD_UNRET;
+ } else if (!strcmp(str, "ibpb")) {
+ retbleed_cmd = RETBLEED_CMD_IBPB;
+ } else if (!strcmp(str, "nosmt")) {
+ retbleed_nosmt = true;
+ } else {
+ pr_err("Ignoring unknown retbleed option (%s).", str);
+ }
+
+ str = next;
+ }
+
+ return 0;
+}
+early_param("retbleed", retbleed_parse_cmdline);
+
+#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
+#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n"
+
+static void __init retbleed_select_mitigation(void)
+{
+ bool mitigate_smt = false;
+
+ if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off())
+ return;
+
+ switch (retbleed_cmd) {
+ case RETBLEED_CMD_OFF:
+ return;
+
+ case RETBLEED_CMD_UNRET:
+ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) {
+ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+ } else {
+ pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n");
+ goto do_cmd_auto;
+ }
+ break;
+
+ case RETBLEED_CMD_IBPB:
+ if (!boot_cpu_has(X86_FEATURE_IBPB)) {
+ pr_err("WARNING: CPU does not support IBPB.\n");
+ goto do_cmd_auto;
+ } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
+ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+ } else {
+ pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
+ goto do_cmd_auto;
+ }
+ break;
+
+do_cmd_auto:
+ case RETBLEED_CMD_AUTO:
+ default:
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
+ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY))
+ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+ else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB))
+ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+ }
+
+ /*
+ * The Intel mitigation (IBRS or eIBRS) was already selected in
+ * spectre_v2_select_mitigation(). 'retbleed_mitigation' will
+ * be set accordingly below.
+ */
+
+ break;
+ }
+
+ switch (retbleed_mitigation) {
+ case RETBLEED_MITIGATION_UNRET:
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+ setup_force_cpu_cap(X86_FEATURE_UNRET);
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+ pr_err(RETBLEED_UNTRAIN_MSG);
+
+ mitigate_smt = true;
+ break;
+
+ case RETBLEED_MITIGATION_IBPB:
+ setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ mitigate_smt = true;
+ break;
+
+ default:
+ break;
+ }
+
+ if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) &&
+ (retbleed_nosmt || cpu_mitigations_auto_nosmt()))
+ cpu_smt_disable(false);
+
+ /*
+ * Let IBRS trump all on Intel without affecting the effects of the
+ * retbleed= cmdline option.
+ */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+ switch (spectre_v2_enabled) {
+ case SPECTRE_V2_IBRS:
+ retbleed_mitigation = RETBLEED_MITIGATION_IBRS;
+ break;
+ case SPECTRE_V2_EIBRS:
+ case SPECTRE_V2_EIBRS_RETPOLINE:
+ case SPECTRE_V2_EIBRS_LFENCE:
+ retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
+ break;
+ default:
+ pr_err(RETBLEED_INTEL_MSG);
+ }
+ }
+
+ pr_info("%s\n", retbleed_strings[retbleed_mitigation]);
+}
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Spectre V2 : " fmt
+
static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
SPECTRE_V2_USER_NONE;
static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init =
@@ -787,6 +975,7 @@ static inline const char *spectre_v2_module_string(void) { return ""; }
#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n"
#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n"
#define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n"
+#define SPECTRE_V2_IBRS_PERF_MSG "WARNING: IBRS mitigation selected on Enhanced IBRS CPU, this may cause unnecessary performance loss\n"
#ifdef CONFIG_BPF_SYSCALL
void unpriv_ebpf_notify(int new_state)
@@ -828,6 +1017,7 @@ enum spectre_v2_mitigation_cmd {
SPECTRE_V2_CMD_EIBRS,
SPECTRE_V2_CMD_EIBRS_RETPOLINE,
SPECTRE_V2_CMD_EIBRS_LFENCE,
+ SPECTRE_V2_CMD_IBRS,
};
enum spectre_v2_user_cmd {
@@ -868,13 +1058,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure)
pr_info("spectre_v2_user=%s forced on command line.\n", reason);
}
+static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd;
+
static enum spectre_v2_user_cmd __init
-spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
+spectre_v2_parse_user_cmdline(void)
{
char arg[20];
int ret, i;
- switch (v2_cmd) {
+ switch (spectre_v2_cmd) {
case SPECTRE_V2_CMD_NONE:
return SPECTRE_V2_USER_CMD_NONE;
case SPECTRE_V2_CMD_FORCE:
@@ -900,15 +1092,16 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
return SPECTRE_V2_USER_CMD_AUTO;
}
-static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
+static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
{
- return (mode == SPECTRE_V2_EIBRS ||
- mode == SPECTRE_V2_EIBRS_RETPOLINE ||
- mode == SPECTRE_V2_EIBRS_LFENCE);
+ return mode == SPECTRE_V2_IBRS ||
+ mode == SPECTRE_V2_EIBRS ||
+ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+ mode == SPECTRE_V2_EIBRS_LFENCE;
}
static void __init
-spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
+spectre_v2_user_select_mitigation(void)
{
enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
bool smt_possible = IS_ENABLED(CONFIG_SMP);
@@ -921,7 +1114,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
smt_possible = false;
- cmd = spectre_v2_parse_user_cmdline(v2_cmd);
+ cmd = spectre_v2_parse_user_cmdline();
switch (cmd) {
case SPECTRE_V2_USER_CMD_NONE:
goto set_mode;
@@ -969,12 +1162,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
}
/*
- * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not
- * required.
+ * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible,
+ * STIBP is not required.
*/
if (!boot_cpu_has(X86_FEATURE_STIBP) ||
!smt_possible ||
- spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+ spectre_v2_in_ibrs_mode(spectre_v2_enabled))
return;
/*
@@ -986,6 +1179,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
mode = SPECTRE_V2_USER_STRICT_PREFERRED;
+ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
+ if (mode != SPECTRE_V2_USER_STRICT &&
+ mode != SPECTRE_V2_USER_STRICT_PREFERRED)
+ pr_info("Selecting STIBP always-on mode to complement retbleed mitigation\n");
+ mode = SPECTRE_V2_USER_STRICT_PREFERRED;
+ }
+
spectre_v2_user_stibp = mode;
set_mode:
@@ -999,6 +1199,7 @@ static const char * const spectre_v2_strings[] = {
[SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS",
[SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE",
[SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines",
+ [SPECTRE_V2_IBRS] = "Mitigation: IBRS",
};
static const struct {
@@ -1016,6 +1217,7 @@ static const struct {
{ "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false },
{ "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false },
{ "auto", SPECTRE_V2_CMD_AUTO, false },
+ { "ibrs", SPECTRE_V2_CMD_IBRS, false },
};
static void __init spec_v2_print_cond(const char *reason, bool secure)
@@ -1078,6 +1280,30 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return SPECTRE_V2_CMD_AUTO;
}
+ if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) {
+ pr_err("%s selected but not compiled in. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
+ pr_err("%s selected but not Intel CPU. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) {
+ pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) {
+ pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
spec_v2_print_cond(mitigation_options[i].option,
mitigation_options[i].secure);
return cmd;
@@ -1093,6 +1319,22 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
return SPECTRE_V2_RETPOLINE;
}
+/* Disable in-kernel use of non-RSB RET predictors */
+static void __init spec_ctrl_disable_kernel_rrsba(void)
+{
+ u64 ia32_cap;
+
+ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+ return;
+
+ ia32_cap = x86_read_arch_cap_msr();
+
+ if (ia32_cap & ARCH_CAP_RRSBA) {
+ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ }
+}
+
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -1117,6 +1359,15 @@ static void __init spectre_v2_select_mitigation(void)
break;
}
+ if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) &&
+ boot_cpu_has_bug(X86_BUG_RETBLEED) &&
+ retbleed_cmd != RETBLEED_CMD_OFF &&
+ boot_cpu_has(X86_FEATURE_IBRS) &&
+ boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+ mode = SPECTRE_V2_IBRS;
+ break;
+ }
+
mode = spectre_v2_select_retpoline();
break;
@@ -1133,6 +1384,10 @@ static void __init spectre_v2_select_mitigation(void)
mode = spectre_v2_select_retpoline();
break;
+ case SPECTRE_V2_CMD_IBRS:
+ mode = SPECTRE_V2_IBRS;
+ break;
+
case SPECTRE_V2_CMD_EIBRS:
mode = SPECTRE_V2_EIBRS;
break;
@@ -1149,10 +1404,9 @@ static void __init spectre_v2_select_mitigation(void)
if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
- if (spectre_v2_in_eibrs_mode(mode)) {
- /* Force it so VMEXIT will restore correctly */
+ if (spectre_v2_in_ibrs_mode(mode)) {
x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
}
switch (mode) {
@@ -1160,6 +1414,12 @@ static void __init spectre_v2_select_mitigation(void)
case SPECTRE_V2_EIBRS:
break;
+ case SPECTRE_V2_IBRS:
+ setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS);
+ if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED))
+ pr_warn(SPECTRE_V2_IBRS_PERF_MSG);
+ break;
+
case SPECTRE_V2_LFENCE:
case SPECTRE_V2_EIBRS_LFENCE:
setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
@@ -1171,43 +1431,117 @@ static void __init spectre_v2_select_mitigation(void)
break;
}
+ /*
+ * Disable alternate RSB predictions in kernel when indirect CALLs and
+ * JMPs gets protection against BHI and Intramode-BTI, but RET
+ * prediction from a non-RSB predictor is still a risk.
+ */
+ if (mode == SPECTRE_V2_EIBRS_LFENCE ||
+ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+ mode == SPECTRE_V2_RETPOLINE)
+ spec_ctrl_disable_kernel_rrsba();
+
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
/*
- * If spectre v2 protection has been enabled, unconditionally fill
- * RSB during a context switch; this protects against two independent
- * issues:
+ * If Spectre v2 protection has been enabled, fill the RSB during a
+ * context switch. In general there are two types of RSB attacks
+ * across context switches, for which the CALLs/RETs may be unbalanced.
+ *
+ * 1) RSB underflow
+ *
+ * Some Intel parts have "bottomless RSB". When the RSB is empty,
+ * speculated return targets may come from the branch predictor,
+ * which could have a user-poisoned BTB or BHB entry.
*
- * - RSB underflow (and switch to BTB) on Skylake+
- * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs
+ * AMD has it even worse: *all* returns are speculated from the BTB,
+ * regardless of the state of the RSB.
+ *
+ * When IBRS or eIBRS is enabled, the "user -> kernel" attack
+ * scenario is mitigated by the IBRS branch prediction isolation
+ * properties, so the RSB buffer filling wouldn't be necessary to
+ * protect against this type of attack.
+ *
+ * The "user -> user" attack scenario is mitigated by RSB filling.
+ *
+ * 2) Poisoned RSB entry
+ *
+ * If the 'next' in-kernel return stack is shorter than 'prev',
+ * 'next' could be tricked into speculating with a user-poisoned RSB
+ * entry.
+ *
+ * The "user -> kernel" attack scenario is mitigated by SMEP and
+ * eIBRS.
+ *
+ * The "user -> user" scenario, also known as SpectreBHB, requires
+ * RSB clearing.
+ *
+ * So to mitigate all cases, unconditionally fill RSB on context
+ * switches.
+ *
+ * FIXME: Is this pointless for retbleed-affected AMD?
*/
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
/*
- * Retpoline means the kernel is safe because it has no indirect
- * branches. Enhanced IBRS protects firmware too, so, enable restricted
- * speculation around firmware calls only when Enhanced IBRS isn't
- * supported.
+ * Similar to context switches, there are two types of RSB attacks
+ * after vmexit:
+ *
+ * 1) RSB underflow
+ *
+ * 2) Poisoned RSB entry
+ *
+ * When retpoline is enabled, both are mitigated by filling/clearing
+ * the RSB.
+ *
+ * When IBRS is enabled, while #1 would be mitigated by the IBRS branch
+ * prediction isolation protections, RSB still needs to be cleared
+ * because of #2. Note that SMEP provides no protection here, unlike
+ * user-space-poisoned RSB entries.
+ *
+ * eIBRS, on the other hand, has RSB-poisoning protections, so it
+ * doesn't need RSB clearing after vmexit.
+ */
+ if (boot_cpu_has(X86_FEATURE_RETPOLINE) ||
+ boot_cpu_has(X86_FEATURE_KERNEL_IBRS))
+ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
+
+ /*
+ * Retpoline protects the kernel, but doesn't protect firmware. IBRS
+ * and Enhanced IBRS protect firmware too, so enable IBRS around
+ * firmware calls only when IBRS / Enhanced IBRS aren't otherwise
+ * enabled.
*
* Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
* the user might select retpoline on the kernel command line and if
* the CPU supports Enhanced IBRS, kernel might un-intentionally not
* enable IBRS around firmware calls.
*/
- if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) {
+ if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
+ boot_cpu_has(X86_FEATURE_IBPB) &&
+ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) {
+
+ if (retbleed_cmd != RETBLEED_CMD_IBPB) {
+ setup_force_cpu_cap(X86_FEATURE_USE_IBPB_FW);
+ pr_info("Enabling Speculation Barrier for firmware calls\n");
+ }
+
+ } else if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
/* Set up IBPB and STIBP depending on the general spectre V2 command */
- spectre_v2_user_select_mitigation(cmd);
+ spectre_v2_cmd = cmd;
}
static void update_stibp_msr(void * __unused)
{
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP);
+ write_spec_ctrl_current(val, true);
}
/* Update x86_spec_ctrl_base in case SMT state changed. */
@@ -1424,16 +1758,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
}
/*
- * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
- * bit in the mask to allow guests to use the mitigation even in the
- * case where the host does not enable it.
- */
- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
- static_cpu_has(X86_FEATURE_AMD_SSBD)) {
- x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
- }
-
- /*
* We have three CPU feature flags that are in play here:
* - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
* - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
@@ -1450,7 +1774,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
x86_amd_ssb_disable();
} else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
}
}
@@ -1701,7 +2025,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
void x86_spec_ctrl_setup_ap(void)
{
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
x86_amd_ssb_disable();
@@ -1938,7 +2262,7 @@ static ssize_t mmio_stale_data_show_state(char *buf)
static char *stibp_state(void)
{
- if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+ if (spectre_v2_in_ibrs_mode(spectre_v2_enabled))
return "";
switch (spectre_v2_user_stibp) {
@@ -1994,6 +2318,24 @@ static ssize_t srbds_show_state(char *buf)
return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
}
+static ssize_t retbleed_show_state(char *buf)
+{
+ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+ return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n");
+
+ return sprintf(buf, "%s; SMT %s\n",
+ retbleed_strings[retbleed_mitigation],
+ !sched_smt_active() ? "disabled" :
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ?
+ "enabled with STIBP protection" : "vulnerable");
+ }
+
+ return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
+}
+
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@@ -2039,6 +2381,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_MMIO_STALE_DATA:
return mmio_stale_data_show_state(buf);
+ case X86_BUG_RETBLEED:
+ return retbleed_show_state(buf);
+
default:
break;
}
@@ -2095,4 +2440,9 @@ ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *at
{
return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
}
+
+ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED);
+}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4730b0a58f24..736262a76a12 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1205,48 +1205,60 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
{}
};
+#define VULNBL(vendor, family, model, blacklist) \
+ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist)
+
#define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \
X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \
INTEL_FAM6_##model, steppings, \
X86_FEATURE_ANY, issues)
+#define VULNBL_AMD(family, blacklist) \
+ VULNBL(AMD, family, X86_MODEL_ANY, blacklist)
+
+#define VULNBL_HYGON(family, blacklist) \
+ VULNBL(HYGON, family, X86_MODEL_ANY, blacklist)
+
#define SRBDS BIT(0)
/* CPU is affected by X86_BUG_MMIO_STALE_DATA */
#define MMIO BIT(1)
/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */
#define MMIO_SBDS BIT(2)
+/* CPU is affected by RETbleed, speculating where you would not expect it */
+#define RETBLEED BIT(3)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO),
- VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO),
+ VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) |
- BIT(7) | BIT(0xB), MMIO),
- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS),
- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO),
- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS),
- VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS),
- VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO),
- VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO),
- VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS),
- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO),
- VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO),
- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
+
+ VULNBL_AMD(0x15, RETBLEED),
+ VULNBL_AMD(0x16, RETBLEED),
+ VULNBL_AMD(0x17, RETBLEED),
+ VULNBL_HYGON(0x18, RETBLEED),
{}
};
@@ -1348,6 +1360,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
!arch_cap_mmio_immune(ia32_cap))
setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+ if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
+ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
+ setup_force_cpu_bug(X86_BUG_RETBLEED);
+ }
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 2a8e584fc991..7c9b5893c30a 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -61,6 +61,8 @@ static inline void tsx_init(void) { }
static inline void tsx_ap_init(void) { }
#endif /* CONFIG_CPU_SUP_INTEL */
+extern void init_spectral_chicken(struct cpuinfo_x86 *c);
+
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void get_cpu_address_sizes(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index 3fcdda4c1e11..21fd425088fe 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -302,6 +302,12 @@ static void init_hygon(struct cpuinfo_x86 *c)
/* get apicid instead of initial apic id from cpuid */
c->apicid = hard_smp_processor_id();
+ /*
+ * XXX someone from Hygon needs to confirm this DTRT
+ *
+ init_spectral_chicken(c);
+ */
+
set_cpu_cap(c, X86_FEATURE_ZEN);
set_cpu_cap(c, X86_FEATURE_CPB);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fd5dead8371c..663f6e6dd288 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -682,9 +682,9 @@ static void init_intel(struct cpuinfo_x86 *c)
unsigned int l1, l2;
rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
- if (!(l1 & (1<<11)))
+ if (!(l1 & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL))
set_cpu_cap(c, X86_FEATURE_BTS);
- if (!(l1 & (1<<12)))
+ if (!(l1 & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL))
set_cpu_cap(c, X86_FEATURE_PEBS);
}
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 5fbd7ffb3233..12cf2e7ca33c 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -33,6 +33,8 @@
#include "internal.h"
+static bool hw_injection_possible = true;
+
/*
* Collect all the MCi_XXX settings
*/
@@ -339,6 +341,8 @@ static int __set_inj(const char *buf)
for (i = 0; i < N_INJ_TYPES; i++) {
if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
+ if (i > SW_INJ && !hw_injection_possible)
+ continue;
inj_type = i;
return 0;
}
@@ -717,11 +721,54 @@ static void __init debugfs_init(void)
&i_mce, dfs_fls[i].fops);
}
+static void check_hw_inj_possible(void)
+{
+ int cpu;
+ u8 bank;
+
+ /*
+ * This behavior exists only on SMCA systems though its not directly
+ * related to SMCA.
+ */
+ if (!cpu_feature_enabled(X86_FEATURE_SMCA))
+ return;
+
+ cpu = get_cpu();
+
+ for (bank = 0; bank < MAX_NR_BANKS; ++bank) {
+ u64 status = MCI_STATUS_VAL, ipid;
+
+ /* Check whether bank is populated */
+ rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), ipid);
+ if (!ipid)
+ continue;
+
+ toggle_hw_mce_inject(cpu, true);
+
+ wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), status);
+ rdmsrl_safe(mca_msr_reg(bank, MCA_STATUS), &status);
+
+ if (!status) {
+ hw_injection_possible = false;
+ pr_warn("Platform does not allow *hardware* error injection."
+ "Try using APEI EINJ instead.\n");
+ }
+
+ toggle_hw_mce_inject(cpu, false);
+
+ break;
+ }
+
+ put_cpu();
+}
+
static int __init inject_init(void)
{
if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
return -ENOMEM;
+ check_hw_inj_possible();
+
debugfs_init();
register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 4ae0e603f7fa..7e03f5b7f6bd 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -211,7 +211,7 @@ noinstr u64 mce_rdmsrl(u32 msr);
static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
{
- if (mce_flags.smca) {
+ if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
switch (reg) {
case MCA_CTL: return MSR_AMD64_SMCA_MCx_CTL(bank);
case MCA_ADDR: return MSR_AMD64_SMCA_MCx_ADDR(bank);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index dbaa8326d6f2..fd44b54c90d5 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
+ { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index c04b933f48d3..02039ec3597d 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -476,8 +476,8 @@ static bool __init vmware_legacy_x2apic_available(void)
{
uint32_t eax, ebx, ecx, edx;
VMWARE_CMD(GETVCPU_INFO, eax, ebx, ecx, edx);
- return (eax & (1 << VMWARE_CMD_VCPU_RESERVED)) == 0 &&
- (eax & (1 << VMWARE_CMD_LEGACY_X2APIC)) != 0;
+ return !(eax & BIT(VMWARE_CMD_VCPU_RESERVED)) &&
+ (eax & BIT(VMWARE_CMD_LEGACY_X2APIC));
}
#ifdef CONFIG_AMD_MEM_ENCRYPT
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index f267205f2d5a..9dac24680ff8 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1017,10 +1017,10 @@ void __init e820__reserve_setup_data(void)
e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
/*
- * SETUP_EFI is supplied by kexec and does not need to be
- * reserved.
+ * SETUP_EFI and SETUP_IMA are supplied by kexec and do not need
+ * to be reserved.
*/
- if (data->type != SETUP_EFI)
+ if (data->type != SETUP_EFI && data->type != SETUP_IMA)
e820__range_update_kexec(pa_data,
sizeof(*data) + data->len,
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 0531d6a06df5..3b28c5b25e12 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -851,3 +851,17 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
*/
return 0;
}
+
+/*
+ * Initialize register state that may prevent from entering low-power idle.
+ * This function will be invoked from the cpuidle driver only when needed.
+ */
+void fpu_idle_fpregs(void)
+{
+ /* Note: AMX_TILE being enabled implies XGETBV1 support */
+ if (cpu_feature_enabled(X86_FEATURE_AMX_TILE) &&
+ (xfeatures_in_use() & XFEATURE_MASK_XTILE)) {
+ tile_release();
+ fpregs_deactivate(&current->thread.fpu);
+ }
+}
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 5b4efc927d80..24b9fa89aa27 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -301,7 +301,7 @@ union ftrace_op_code_union {
} __attribute__((packed));
};
-#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS)
+#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS))
static unsigned long
create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
@@ -357,7 +357,10 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
goto fail;
ip = trampoline + size;
- memcpy(ip, retq, RET_SIZE);
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE);
+ else
+ memcpy(ip, retq, sizeof(retq));
/* No need to test direct calls on created trampolines */
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index eb8656bac99b..9b7acc9c7874 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -23,6 +23,7 @@
#include <asm/cpufeatures.h>
#include <asm/percpu.h>
#include <asm/nops.h>
+#include <asm/nospec-branch.h>
#include <asm/bootparam.h>
#include <asm/export.h>
#include <asm/pgtable_32.h>
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 92c4afa2b729..d860d437631b 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -389,6 +389,8 @@ SYM_CODE_START_NOALIGN(vc_boot_ghcb)
UNWIND_HINT_IRET_REGS offset=8
ENDBR
+ ANNOTATE_UNRET_END
+
/* Build pt_regs */
PUSH_AND_CLEAR_REGS
@@ -448,6 +450,7 @@ SYM_CODE_END(early_idt_handler_array)
SYM_CODE_START_LOCAL(early_idt_handler_common)
UNWIND_HINT_IRET_REGS offset=16
+ ANNOTATE_UNRET_END
/*
* The stack is the hardware frame, an error code or zero, and the
* vector number.
@@ -497,6 +500,8 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb)
UNWIND_HINT_IRET_REGS offset=8
ENDBR
+ ANNOTATE_UNRET_END
+
/* Build pt_regs */
PUSH_AND_CLEAR_REGS
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 68f091ba8443..f5b8ef02d172 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -146,16 +146,3 @@ void arch_jump_label_transform_apply(void)
text_poke_finish();
mutex_unlock(&text_mutex);
}
-
-static enum {
- JL_STATE_START,
- JL_STATE_NO_UPDATE,
- JL_STATE_UPDATE,
-} jlstate __initdata_or_module = JL_STATE_START;
-
-__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
- enum jump_label_type type)
-{
- if (jlstate == JL_STATE_UPDATE)
- jump_label_transform(entry, type, 1);
-}
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 170d0fd68b1f..b9bdb40364a6 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -18,6 +18,7 @@
#include <linux/mm.h>
#include <linux/efi.h>
#include <linux/verification.h>
+#include <linux/random.h>
#include <asm/bootparam.h>
#include <asm/setup.h>
@@ -110,6 +111,26 @@ static int setup_e820_entries(struct boot_params *params)
return 0;
}
+enum { RNG_SEED_LENGTH = 32 };
+
+static void
+setup_rng_seed(struct boot_params *params, unsigned long params_load_addr,
+ unsigned int rng_seed_setup_data_offset)
+{
+ struct setup_data *sd = (void *)params + rng_seed_setup_data_offset;
+ unsigned long setup_data_phys;
+
+ if (!rng_is_initialized())
+ return;
+
+ sd->type = SETUP_RNG_SEED;
+ sd->len = RNG_SEED_LENGTH;
+ get_random_bytes(sd->data, RNG_SEED_LENGTH);
+ setup_data_phys = params_load_addr + rng_seed_setup_data_offset;
+ sd->next = params->hdr.setup_data;
+ params->hdr.setup_data = setup_data_phys;
+}
+
#ifdef CONFIG_EFI
static int setup_efi_info_memmap(struct boot_params *params,
unsigned long params_load_addr,
@@ -186,11 +207,38 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
}
#endif /* CONFIG_EFI */
+static void
+setup_ima_state(const struct kimage *image, struct boot_params *params,
+ unsigned long params_load_addr,
+ unsigned int ima_setup_data_offset)
+{
+#ifdef CONFIG_IMA_KEXEC
+ struct setup_data *sd = (void *)params + ima_setup_data_offset;
+ unsigned long setup_data_phys;
+ struct ima_setup_data *ima;
+
+ if (!image->ima_buffer_size)
+ return;
+
+ sd->type = SETUP_IMA;
+ sd->len = sizeof(*ima);
+
+ ima = (void *)sd + sizeof(struct setup_data);
+ ima->addr = image->ima_buffer_addr;
+ ima->size = image->ima_buffer_size;
+
+ /* Add setup data */
+ setup_data_phys = params_load_addr + ima_setup_data_offset;
+ sd->next = params->hdr.setup_data;
+ params->hdr.setup_data = setup_data_phys;
+#endif /* CONFIG_IMA_KEXEC */
+}
+
static int
setup_boot_parameters(struct kimage *image, struct boot_params *params,
unsigned long params_load_addr,
unsigned int efi_map_offset, unsigned int efi_map_sz,
- unsigned int efi_setup_data_offset)
+ unsigned int setup_data_offset)
{
unsigned int nr_e820_entries;
unsigned long long mem_k, start, end;
@@ -245,8 +293,22 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params,
#ifdef CONFIG_EFI
/* Setup EFI state */
setup_efi_state(params, params_load_addr, efi_map_offset, efi_map_sz,
- efi_setup_data_offset);
+ setup_data_offset);
+ setup_data_offset += sizeof(struct setup_data) +
+ sizeof(struct efi_setup_data);
#endif
+
+ if (IS_ENABLED(CONFIG_IMA_KEXEC)) {
+ /* Setup IMA log buffer state */
+ setup_ima_state(image, params, params_load_addr,
+ setup_data_offset);
+ setup_data_offset += sizeof(struct setup_data) +
+ sizeof(struct ima_setup_data);
+ }
+
+ /* Setup RNG seed */
+ setup_rng_seed(params, params_load_addr, setup_data_offset);
+
/* Setup EDD info */
memcpy(params->eddbuf, boot_params.eddbuf,
EDDMAXNR * sizeof(struct edd_info));
@@ -401,7 +463,13 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
params_cmdline_sz = ALIGN(params_cmdline_sz, 16);
kbuf.bufsz = params_cmdline_sz + ALIGN(efi_map_sz, 16) +
sizeof(struct setup_data) +
- sizeof(struct efi_setup_data);
+ sizeof(struct efi_setup_data) +
+ sizeof(struct setup_data) +
+ RNG_SEED_LENGTH;
+
+ if (IS_ENABLED(CONFIG_IMA_KEXEC))
+ kbuf.bufsz += sizeof(struct setup_data) +
+ sizeof(struct ima_setup_data);
params = kzalloc(kbuf.bufsz, GFP_KERNEL);
if (!params)
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index b98ffcf4d250..b1abf663417c 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -253,7 +253,7 @@ int module_finalize(const Elf_Ehdr *hdr,
{
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
*para = NULL, *orc = NULL, *orc_ip = NULL,
- *retpolines = NULL, *ibt_endbr = NULL;
+ *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
@@ -271,6 +271,8 @@ int module_finalize(const Elf_Ehdr *hdr,
orc_ip = s;
if (!strcmp(".retpoline_sites", secstrings + s->sh_name))
retpolines = s;
+ if (!strcmp(".return_sites", secstrings + s->sh_name))
+ returns = s;
if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name))
ibt_endbr = s;
}
@@ -287,6 +289,10 @@ int module_finalize(const Elf_Ehdr *hdr,
void *rseg = (void *)retpolines->sh_addr;
apply_retpolines(rseg, rseg + retpolines->sh_size);
}
+ if (returns) {
+ void *rseg = (void *)returns->sh_addr;
+ apply_returns(rseg, rseg + returns->sh_size);
+ }
if (alt) {
/* patch .altinstructions */
void *aseg = (void *)alt->sh_addr;
@@ -304,9 +310,6 @@ int module_finalize(const Elf_Ehdr *hdr,
tseg, tseg + text->sh_size);
}
- /* make jump label nops */
- jump_label_apply_nops(me);
-
if (orc && orc_ip)
unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size,
(void *)orc->sh_addr, orc->sh_size);
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
index 6b07faaa1579..23154d24b117 100644
--- a/arch/x86/kernel/pmem.c
+++ b/arch/x86/kernel/pmem.c
@@ -27,6 +27,11 @@ static __init int register_e820_pmem(void)
* simply here to trigger the module to load on demand.
*/
pdev = platform_device_alloc("e820_pmem", -1);
- return platform_device_add(pdev);
+
+ rc = platform_device_add(pdev);
+ if (rc)
+ platform_device_put(pdev);
+
+ return rc;
}
device_initcall(register_e820_pmem);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9b2772b7e1f3..58a6ea472db9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
}
if (updmsr)
- wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+ write_spec_ctrl_current(msr, false);
}
static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
@@ -810,24 +810,43 @@ static void amd_e400_idle(void)
}
/*
- * Intel Core2 and older machines prefer MWAIT over HALT for C1.
- * We can't rely on cpuidle installing MWAIT, because it will not load
- * on systems that support only C1 -- so the boot default must be MWAIT.
+ * Prefer MWAIT over HALT if MWAIT is supported, MWAIT_CPUID leaf
+ * exists and whenever MONITOR/MWAIT extensions are present there is at
+ * least one C1 substate.
*
- * Some AMD machines are the opposite, they depend on using HALT.
- *
- * So for default C1, which is used during boot until cpuidle loads,
- * use MWAIT-C1 on Intel HW that has it, else use HALT.
+ * Do not prefer MWAIT if MONITOR instruction has a bug or idle=nomwait
+ * is passed to kernel commandline parameter.
*/
static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
{
- if (c->x86_vendor != X86_VENDOR_INTEL)
+ u32 eax, ebx, ecx, edx;
+
+ /* User has disallowed the use of MWAIT. Fallback to HALT */
+ if (boot_option_idle_override == IDLE_NOMWAIT)
return 0;
- if (!cpu_has(c, X86_FEATURE_MWAIT) || boot_cpu_has_bug(X86_BUG_MONITOR))
+ /* MWAIT is not supported on this platform. Fallback to HALT */
+ if (!cpu_has(c, X86_FEATURE_MWAIT))
return 0;
- return 1;
+ /* Monitor has a bug. Fallback to HALT */
+ if (boot_cpu_has_bug(X86_BUG_MONITOR))
+ return 0;
+
+ cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
+
+ /*
+ * If MWAIT extensions are not available, it is safe to use MWAIT
+ * with EAX=0, ECX=0.
+ */
+ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED))
+ return 1;
+
+ /*
+ * If MWAIT extensions are available, there should be at least one
+ * MWAIT C1 substate present.
+ */
+ return (edx & MWAIT_C1_SUBSTATE_MASK);
}
/*
@@ -932,9 +951,8 @@ static int __init idle_setup(char *str)
} else if (!strcmp(str, "nomwait")) {
/*
* If the boot option of "idle=nomwait" is added,
- * it means that mwait will be disabled for CPU C2/C3
- * states. In such case it won't touch the variable
- * of boot_option_idle_override.
+ * it means that mwait will be disabled for CPU C1/C2/C3
+ * states.
*/
boot_option_idle_override = IDLE_NOMWAIT;
} else
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index fcc8a7699103..c7c4b1917336 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -7,10 +7,12 @@
#include <linux/linkage.h>
#include <asm/page_types.h>
#include <asm/kexec.h>
+#include <asm/nospec-branch.h>
#include <asm/processor-flags.h>
/*
- * Must be relocatable PIC code callable as a C function
+ * Must be relocatable PIC code callable as a C function, in particular
+ * there must be a plain RET and not jump to return thunk.
*/
#define PTR(x) (x << 2)
@@ -91,7 +93,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
movl %edi, %eax
addl $(identity_mapped - relocate_kernel), %eax
pushl %eax
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(relocate_kernel)
SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
@@ -159,12 +163,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
xorl %edx, %edx
xorl %esi, %esi
xorl %ebp, %ebp
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
1:
popl %edx
movl CP_PA_SWAP_PAGE(%edi), %esp
addl $PAGE_SIZE, %esp
2:
+ ANNOTATE_RETPOLINE_SAFE
call *%edx
/* get the re-entry point of the peer system */
@@ -190,7 +197,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
movl %edi, %eax
addl $(virtual_mapped - relocate_kernel), %eax
pushl %eax
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(identity_mapped)
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
@@ -208,7 +217,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
popl %edi
popl %esi
popl %ebx
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(virtual_mapped)
/* Do the copies */
@@ -271,7 +282,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
popl %edi
popl %ebx
popl %ebp
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(swap_pages)
.globl kexec_control_code_size
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index c1d8626c53b6..4809c0dc4eb0 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -13,7 +13,8 @@
#include <asm/unwind_hints.h>
/*
- * Must be relocatable PIC code callable as a C function
+ * Must be relocatable PIC code callable as a C function, in particular
+ * there must be a plain RET and not jump to return thunk.
*/
#define PTR(x) (x << 3)
@@ -105,7 +106,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
/* jump to identity mapped page */
addq $(identity_mapped - relocate_kernel), %r8
pushq %r8
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(relocate_kernel)
SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
@@ -200,7 +203,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
xorl %r14d, %r14d
xorl %r15d, %r15d
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
1:
popq %rdx
@@ -219,7 +224,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
call swap_pages
movq $virtual_mapped, %rax
pushq %rax
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(identity_mapped)
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
@@ -241,7 +248,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
popq %r12
popq %rbp
popq %rbx
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(virtual_mapped)
/* Do the copies */
@@ -298,7 +307,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
lea PAGE_SIZE(%rax), %rsi
jmp 0b
3:
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(swap_pages)
.globl kexec_control_code_size
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index bd6c6fd373ae..216fee7144ee 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -11,6 +11,7 @@
#include <linux/dma-map-ops.h>
#include <linux/dmi.h>
#include <linux/efi.h>
+#include <linux/ima.h>
#include <linux/init_ohci1394_dma.h>
#include <linux/initrd.h>
#include <linux/iscsi_ibft.h>
@@ -23,6 +24,7 @@
#include <linux/usb/xhci-dbgp.h>
#include <linux/static_call.h>
#include <linux/swiotlb.h>
+#include <linux/random.h>
#include <uapi/linux/mount.h>
@@ -140,6 +142,11 @@ __visible unsigned long mmu_cr4_features __ro_after_init;
__visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE;
#endif
+#ifdef CONFIG_IMA
+static phys_addr_t ima_kexec_buffer_phys;
+static size_t ima_kexec_buffer_size;
+#endif
+
/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
int bootloader_type, bootloader_version;
@@ -330,6 +337,60 @@ static void __init reserve_initrd(void)
}
#endif /* CONFIG_BLK_DEV_INITRD */
+static void __init add_early_ima_buffer(u64 phys_addr)
+{
+#ifdef CONFIG_IMA
+ struct ima_setup_data *data;
+
+ data = early_memremap(phys_addr + sizeof(struct setup_data), sizeof(*data));
+ if (!data) {
+ pr_warn("setup: failed to memremap ima_setup_data entry\n");
+ return;
+ }
+
+ if (data->size) {
+ memblock_reserve(data->addr, data->size);
+ ima_kexec_buffer_phys = data->addr;
+ ima_kexec_buffer_size = data->size;
+ }
+
+ early_memunmap(data, sizeof(*data));
+#else
+ pr_warn("Passed IMA kexec data, but CONFIG_IMA not set. Ignoring.\n");
+#endif
+}
+
+#if defined(CONFIG_HAVE_IMA_KEXEC) && !defined(CONFIG_OF_FLATTREE)
+int __init ima_free_kexec_buffer(void)
+{
+ int rc;
+
+ if (!ima_kexec_buffer_size)
+ return -ENOENT;
+
+ rc = memblock_phys_free(ima_kexec_buffer_phys,
+ ima_kexec_buffer_size);
+ if (rc)
+ return rc;
+
+ ima_kexec_buffer_phys = 0;
+ ima_kexec_buffer_size = 0;
+
+ return 0;
+}
+
+int __init ima_get_kexec_buffer(void **addr, size_t *size)
+{
+ if (!ima_kexec_buffer_size)
+ return -ENOENT;
+
+ *addr = __va(ima_kexec_buffer_phys);
+ *size = ima_kexec_buffer_size;
+
+ return 0;
+}
+#endif
+
static void __init parse_setup_data(void)
{
struct setup_data *data;
@@ -355,6 +416,18 @@ static void __init parse_setup_data(void)
case SETUP_EFI:
parse_efi_setup(pa_data, data_len);
break;
+ case SETUP_IMA:
+ add_early_ima_buffer(pa_data);
+ break;
+ case SETUP_RNG_SEED:
+ data = early_memremap(pa_data, data_len);
+ add_bootloader_randomness(data->data, data->len);
+ /* Zero seed for forward secrecy. */
+ memzero_explicit(data->data, data->len);
+ /* Zero length in case we find ourselves back here by accident. */
+ memzero_explicit(&data->len, sizeof(data->len));
+ early_memunmap(data, data_len);
+ break;
default:
break;
}
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index b478edf43bec..3a5b0c9c4fcc 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -219,9 +219,10 @@ static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt
return ES_VMM_ERROR;
}
-enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
- struct es_em_ctxt *ctxt, u64 exit_code,
- u64 exit_info_1, u64 exit_info_2)
+static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt,
+ u64 exit_code, u64 exit_info_1,
+ u64 exit_info_2)
{
/* Fill in protocol and format specifiers */
ghcb->protocol_version = ghcb_version;
@@ -231,14 +232,7 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
- /*
- * Hyper-V unenlightened guests use a paravisor for communicating and
- * GHCB pages are being allocated and set up by that paravisor. Linux
- * should not change the GHCB page's physical address.
- */
- if (set_ghcb_msr)
- sev_es_wr_ghcb_msr(__pa(ghcb));
-
+ sev_es_wr_ghcb_msr(__pa(ghcb));
VMGEXIT();
return verify_exception_info(ghcb, ctxt);
@@ -795,7 +789,7 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
*/
sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer);
ghcb_set_sw_scratch(ghcb, sw_scratch);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_IOIO,
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO,
exit_info_1, exit_info_2);
if (ret != ES_OK)
return ret;
@@ -837,8 +831,7 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
ghcb_set_rax(ghcb, rax);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt,
- SVM_EXIT_IOIO, exit_info_1, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0);
if (ret != ES_OK)
return ret;
@@ -894,7 +887,7 @@ static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
/* xgetbv will cause #GP - use reset value for xcr0 */
ghcb_set_xcr0(ghcb, 1);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_CPUID, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
if (ret != ES_OK)
return ret;
@@ -919,7 +912,7 @@ static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
bool rdtscp = (exit_code == SVM_EXIT_RDTSCP);
enum es_result ret;
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0);
if (ret != ES_OK)
return ret;
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index c05f0124c410..63dc626627a0 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -786,7 +786,7 @@ static int vmgexit_psc(struct snp_psc_desc *desc)
ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
/* This will advance the shared buffer data points to. */
- ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
/*
* Page State Change VMGEXIT can pass error code through
@@ -1212,8 +1212,7 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
ghcb_set_rdx(ghcb, regs->dx);
}
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_MSR,
- exit_info_1, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0);
if ((ret == ES_OK) && (!exit_info_1)) {
regs->ax = ghcb->save.rax;
@@ -1452,7 +1451,7 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));
- return sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, exit_info_1, exit_info_2);
+ return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
}
/*
@@ -1628,7 +1627,7 @@ static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
/* Using a value of 0 for ExitInfo1 means RAX holds the value */
ghcb_set_rax(ghcb, val);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
if (ret != ES_OK)
return ret;
@@ -1658,7 +1657,7 @@ static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
struct es_em_ctxt *ctxt)
{
- return sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WBINVD, 0, 0);
+ return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0);
}
static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
@@ -1667,7 +1666,7 @@ static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt
ghcb_set_rcx(ghcb, ctxt->regs->cx);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_RDPMC, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0);
if (ret != ES_OK)
return ret;
@@ -1708,7 +1707,7 @@ static enum es_result vc_handle_vmmcall(struct ghcb *ghcb,
if (x86_platform.hyper.sev_es_hcall_prepare)
x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_VMMCALL, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0);
if (ret != ES_OK)
return ret;
@@ -2197,7 +2196,7 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned
ghcb_set_rbx(ghcb, input->data_npages);
}
- ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, exit_code, input->req_gpa, input->resp_gpa);
+ ret = sev_es_ghcb_hv_call(ghcb, &ctxt, exit_code, input->req_gpa, input->resp_gpa);
if (ret)
goto e_put;
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index aa72cefdd5be..aaaba85d6d7f 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -12,13 +12,21 @@ enum insn_type {
};
/*
+ * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such
+ * that there is no false-positive trampoline identification while also being a
+ * speculation stop.
+ */
+static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc };
+
+/*
* cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax
*/
static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 };
static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
-static void __ref __static_call_transform(void *insn, enum insn_type type, void *func)
+static void __ref __static_call_transform(void *insn, enum insn_type type,
+ void *func, bool modinit)
{
const void *emulate = NULL;
int size = CALL_INSN_SIZE;
@@ -43,14 +51,17 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void
break;
case RET:
- code = &retinsn;
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk);
+ else
+ code = &retinsn;
break;
}
if (memcmp(insn, code, size) == 0)
return;
- if (unlikely(system_state == SYSTEM_BOOTING))
+ if (system_state == SYSTEM_BOOTING || modinit)
return text_poke_early(insn, code, size);
text_poke_bp(insn, code, size, emulate);
@@ -60,7 +71,7 @@ static void __static_call_validate(void *insn, bool tail, bool tramp)
{
u8 opcode = *(u8 *)insn;
- if (tramp && memcmp(insn+5, "SCT", 3)) {
+ if (tramp && memcmp(insn+5, tramp_ud, 3)) {
pr_err("trampoline signature fail");
BUG();
}
@@ -104,14 +115,42 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
if (tramp) {
__static_call_validate(tramp, true, true);
- __static_call_transform(tramp, __sc_insn(!func, true), func);
+ __static_call_transform(tramp, __sc_insn(!func, true), func, false);
}
if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) {
__static_call_validate(site, tail, false);
- __static_call_transform(site, __sc_insn(!func, tail), func);
+ __static_call_transform(site, __sc_insn(!func, tail), func, false);
}
mutex_unlock(&text_mutex);
}
EXPORT_SYMBOL_GPL(arch_static_call_transform);
+
+#ifdef CONFIG_RETHUNK
+/*
+ * This is called by apply_returns() to fix up static call trampolines,
+ * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as
+ * having a return trampoline.
+ *
+ * The problem is that static_call() is available before determining
+ * X86_FEATURE_RETHUNK and, by implication, running alternatives.
+ *
+ * This means that __static_call_transform() above can have overwritten the
+ * return trampoline and we now need to fix things up to be consistent.
+ */
+bool __static_call_fixup(void *tramp, u8 op, void *dest)
+{
+ if (memcmp(tramp+5, tramp_ud, 3)) {
+ /* Not a trampoline site, not our problem. */
+ return false;
+ }
+
+ mutex_lock(&text_mutex);
+ if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk)
+ __static_call_transform(tramp, RET, NULL, true);
+ mutex_unlock(&text_mutex);
+
+ return true;
+}
+#endif
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 9487ce8c13ee..15f29053cec4 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -141,7 +141,7 @@ SECTIONS
#ifdef CONFIG_RETPOLINE
__indirect_thunk_start = .;
- *(.text.__x86.indirect_thunk)
+ *(.text.__x86.*)
__indirect_thunk_end = .;
#endif
} :text =0xcccc
@@ -283,6 +283,13 @@ SECTIONS
*(.retpoline_sites)
__retpoline_sites_end = .;
}
+
+ . = ALIGN(8);
+ .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) {
+ __return_sites = .;
+ *(.return_sites)
+ __return_sites_end = .;
+ }
#endif
#ifdef CONFIG_X86_KERNEL_IBT
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 89b11e7dca8a..f8382abe22ff 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -189,9 +189,6 @@
#define X8(x...) X4(x), X4(x)
#define X16(x...) X8(x), X8(x)
-#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
-#define FASTOP_SIZE (8 * (1 + HAS_KERNEL_IBT))
-
struct opcode {
u64 flags;
u8 intercept;
@@ -306,9 +303,15 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
* Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
* different operand sizes can be reached by calculation, rather than a jump
* table (which would be bigger than the code).
+ *
+ * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR
+ * and 1 for the straight line speculation INT3, leaves 7 bytes for the
+ * body of the function. Currently none is larger than 4.
*/
static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
+#define FASTOP_SIZE 16
+
#define __FOP_FUNC(name) \
".align " __stringify(FASTOP_SIZE) " \n\t" \
".type " name ", @function \n\t" \
@@ -325,13 +328,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
#define FOP_RET(name) \
__FOP_RET(#name)
-#define FOP_START(op) \
+#define __FOP_START(op, align) \
extern void em_##op(struct fastop *fake); \
asm(".pushsection .text, \"ax\" \n\t" \
".global em_" #op " \n\t" \
- ".align " __stringify(FASTOP_SIZE) " \n\t" \
+ ".align " __stringify(align) " \n\t" \
"em_" #op ":\n\t"
+#define FOP_START(op) __FOP_START(op, FASTOP_SIZE)
+
#define FOP_END \
".popsection")
@@ -435,17 +440,12 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
/*
* Depending on .config the SETcc functions look like:
*
- * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT]
- * SETcc %al [3 bytes]
- * RET [1 byte]
- * INT3 [1 byte; CONFIG_SLS]
- *
- * Which gives possible sizes 4, 5, 8 or 9. When rounded up to the
- * next power-of-two alignment they become 4, 8 or 16 resp.
+ * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT]
+ * SETcc %al [3 bytes]
+ * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK]
+ * INT3 [1 byte; CONFIG_SLS]
*/
-#define SETCC_LENGTH (ENDBR_INSN_SIZE + 4 + IS_ENABLED(CONFIG_SLS))
-#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS) << HAS_KERNEL_IBT)
-static_assert(SETCC_LENGTH <= SETCC_ALIGN);
+#define SETCC_ALIGN 16
#define FOP_SETCC(op) \
".align " __stringify(SETCC_ALIGN) " \n\t" \
@@ -453,9 +453,10 @@ static_assert(SETCC_LENGTH <= SETCC_ALIGN);
#op ": \n\t" \
ASM_ENDBR \
#op " %al \n\t" \
- __FOP_RET(#op)
+ __FOP_RET(#op) \
+ ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t"
-FOP_START(setcc)
+__FOP_START(setcc, SETCC_ALIGN)
FOP_SETCC(seto)
FOP_SETCC(setno)
FOP_SETCC(setc)
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index dfaeb47fcf2a..723f8534986c 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -111,6 +111,15 @@ SYM_FUNC_START(__svm_vcpu_run)
#endif
/*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
+ * untrained as soon as we exit the VM and are back to the
+ * kernel. This should be done before re-enabling interrupts
+ * because interrupt handlers won't sanitize 'ret' if the return is
+ * from the kernel.
+ */
+ UNTRAIN_RET
+
+ /*
* Clear all general purpose registers except RSP and RAX to prevent
* speculative use of the guest's values, even those that are reloaded
* via the stack. In theory, an L1 cache miss when restoring registers
@@ -190,6 +199,15 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
+ /*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
+ * untrained as soon as we exit the VM and are back to the
+ * kernel. This should be done before re-enabling interrupts
+ * because interrupt handlers won't sanitize RET if the return is
+ * from the kernel.
+ */
+ UNTRAIN_RET
+
pop %_ASM_BX
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 3f430e218375..c0e24826a86f 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -4,8 +4,8 @@
#include <asm/vmx.h>
-#include "lapic.h"
-#include "x86.h"
+#include "../lapic.h"
+#include "../x86.h"
extern bool __read_mostly enable_vpid;
extern bool __read_mostly flexpriority_enabled;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index f5cb18e00e78..ab135f9ef52f 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2278,7 +2278,6 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_ENABLE_VMFUNC |
- SECONDARY_EXEC_TSC_SCALING |
SECONDARY_EXEC_DESC);
if (nested_cpu_has(vmcs12,
@@ -3087,7 +3086,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
}
vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
- vmx->loaded_vmcs->launched);
+ __vmx_vcpu_run_flags(vmx));
if (vmx->msr_autoload.host.nr)
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
new file mode 100644
index 000000000000..edc3f16cc189
--- /dev/null
+++ b/arch/x86/kvm/vmx/run_flags.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVM_X86_VMX_RUN_FLAGS_H
+#define __KVM_X86_VMX_RUN_FLAGS_H
+
+#define VMX_RUN_VMRESUME (1 << 0)
+#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
+
+#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 435c187927c4..4182c7ffc909 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -1,10 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <asm/asm.h>
+#include <asm/asm-offsets.h>
#include <asm/bitsperlong.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>
+#include <asm/percpu.h>
#include <asm/segment.h>
+#include "run_flags.h"
#define WORD_SIZE (BITS_PER_LONG / 8)
@@ -31,72 +34,11 @@
.section .noinstr.text, "ax"
/**
- * vmx_vmenter - VM-Enter the current loaded VMCS
- *
- * %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME
- *
- * Returns:
- * %RFLAGS.CF is set on VM-Fail Invalid
- * %RFLAGS.ZF is set on VM-Fail Valid
- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
- *
- * Note that VMRESUME/VMLAUNCH fall-through and return directly if
- * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump
- * to vmx_vmexit.
- */
-SYM_FUNC_START_LOCAL(vmx_vmenter)
- /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */
- je 2f
-
-1: vmresume
- RET
-
-2: vmlaunch
- RET
-
-3: cmpb $0, kvm_rebooting
- je 4f
- RET
-4: ud2
-
- _ASM_EXTABLE(1b, 3b)
- _ASM_EXTABLE(2b, 3b)
-
-SYM_FUNC_END(vmx_vmenter)
-
-/**
- * vmx_vmexit - Handle a VMX VM-Exit
- *
- * Returns:
- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
- *
- * This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump
- * here after hardware loads the host's state, i.e. this is the destination
- * referred to by VMCS.HOST_RIP.
- */
-SYM_FUNC_START(vmx_vmexit)
-#ifdef CONFIG_RETPOLINE
- ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE
- /* Preserve guest's RAX, it's used to stuff the RSB. */
- push %_ASM_AX
-
- /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
- FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
-
- /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */
- or $1, %_ASM_AX
-
- pop %_ASM_AX
-.Lvmexit_skip_rsb:
-#endif
- RET
-SYM_FUNC_END(vmx_vmexit)
-
-/**
* __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
- * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
+ * @vmx: struct vcpu_vmx *
* @regs: unsigned long * (to guest registers)
- * @launched: %true if the VMCS has been launched
+ * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH
+ * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl
*
* Returns:
* 0 on VM-Exit, 1 on VM-Fail
@@ -115,24 +57,56 @@ SYM_FUNC_START(__vmx_vcpu_run)
#endif
push %_ASM_BX
+ /* Save @vmx for SPEC_CTRL handling */
+ push %_ASM_ARG1
+
+ /* Save @flags for SPEC_CTRL handling */
+ push %_ASM_ARG3
+
/*
* Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
* @regs is needed after VM-Exit to save the guest's register values.
*/
push %_ASM_ARG2
- /* Copy @launched to BL, _ASM_ARG3 is volatile. */
+ /* Copy @flags to BL, _ASM_ARG3 is volatile. */
mov %_ASM_ARG3B, %bl
- /* Adjust RSP to account for the CALL to vmx_vmenter(). */
- lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2
+ lea (%_ASM_SP), %_ASM_ARG2
call vmx_update_host_rsp
+ ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL
+
+ /*
+ * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
+ * host's, write the MSR.
+ *
+ * IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
+ * there must not be any returns or indirect branches between this code
+ * and vmentry.
+ */
+ mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI
+ movl VMX_spec_ctrl(%_ASM_DI), %edi
+ movl PER_CPU_VAR(x86_spec_ctrl_current), %esi
+ cmp %edi, %esi
+ je .Lspec_ctrl_done
+ mov $MSR_IA32_SPEC_CTRL, %ecx
+ xor %edx, %edx
+ mov %edi, %eax
+ wrmsr
+
+.Lspec_ctrl_done:
+
+ /*
+ * Since vmentry is serializing on affected CPUs, there's no need for
+ * an LFENCE to stop speculation from skipping the wrmsr.
+ */
+
/* Load @regs to RAX. */
mov (%_ASM_SP), %_ASM_AX
/* Check if vmlaunch or vmresume is needed */
- testb %bl, %bl
+ testb $VMX_RUN_VMRESUME, %bl
/* Load guest registers. Don't clobber flags. */
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
@@ -154,11 +128,37 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* Load guest RAX. This kills the @regs pointer! */
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
- /* Enter guest mode */
- call vmx_vmenter
+ /* Check EFLAGS.ZF from 'testb' above */
+ jz .Lvmlaunch
+
+ /*
+ * After a successful VMRESUME/VMLAUNCH, control flow "magically"
+ * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting.
+ * So this isn't a typical function and objtool needs to be told to
+ * save the unwind state here and restore it below.
+ */
+ UNWIND_HINT_SAVE
+
+/*
+ * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at
+ * the 'vmx_vmexit' label below.
+ */
+.Lvmresume:
+ vmresume
+ jmp .Lvmfail
+
+.Lvmlaunch:
+ vmlaunch
+ jmp .Lvmfail
- /* Jump on VM-Fail. */
- jbe 2f
+ _ASM_EXTABLE(.Lvmresume, .Lfixup)
+ _ASM_EXTABLE(.Lvmlaunch, .Lfixup)
+
+SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
+
+ /* Restore unwind state from before the VMRESUME/VMLAUNCH. */
+ UNWIND_HINT_RESTORE
+ ENDBR
/* Temporarily save guest's RAX. */
push %_ASM_AX
@@ -185,21 +185,23 @@ SYM_FUNC_START(__vmx_vcpu_run)
mov %r15, VCPU_R15(%_ASM_AX)
#endif
- /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */
- xor %eax, %eax
+ /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */
+ xor %ebx, %ebx
+.Lclear_regs:
/*
- * Clear all general purpose registers except RSP and RAX to prevent
+ * Clear all general purpose registers except RSP and RBX to prevent
* speculative use of the guest's values, even those that are reloaded
* via the stack. In theory, an L1 cache miss when restoring registers
* could lead to speculative execution with the guest's values.
* Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
* free. RSP and RAX are exempt as RSP is restored by hardware during
- * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail.
+ * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return
+ * value.
*/
-1: xor %ecx, %ecx
+ xor %eax, %eax
+ xor %ecx, %ecx
xor %edx, %edx
- xor %ebx, %ebx
xor %ebp, %ebp
xor %esi, %esi
xor %edi, %edi
@@ -216,8 +218,30 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* "POP" @regs. */
add $WORD_SIZE, %_ASM_SP
- pop %_ASM_BX
+ /*
+ * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
+ * the first unbalanced RET after vmexit!
+ *
+ * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB
+ * entries and (in some cases) RSB underflow.
+ *
+ * eIBRS has its own protection against poisoned RSB, so it doesn't
+ * need the RSB filling sequence. But it does need to be enabled
+ * before the first unbalanced RET.
+ */
+
+ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
+
+ pop %_ASM_ARG2 /* @flags */
+ pop %_ASM_ARG1 /* @vmx */
+
+ call vmx_spec_ctrl_restore_host
+
+ /* Put return value in AX */
+ mov %_ASM_BX, %_ASM_AX
+
+ pop %_ASM_BX
#ifdef CONFIG_X86_64
pop %r12
pop %r13
@@ -230,9 +254,15 @@ SYM_FUNC_START(__vmx_vcpu_run)
pop %_ASM_BP
RET
- /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */
-2: mov $1, %eax
- jmp 1b
+.Lfixup:
+ cmpb $0, kvm_rebooting
+ jne .Lvmfail
+ ud2
+.Lvmfail:
+ /* VM-Fail: set return value to 1 */
+ mov $1, %_ASM_BX
+ jmp .Lclear_regs
+
SYM_FUNC_END(__vmx_vcpu_run)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3a919e49129b..be7c19374fdd 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -383,9 +383,9 @@ static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
if (!vmx->disable_fb_clear)
return;
- rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
+ msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL);
msr |= FB_CLEAR_DIS;
- wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
+ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
/* Cache the MSR value to avoid reading it later */
vmx->msr_ia32_mcu_opt_ctrl = msr;
}
@@ -396,7 +396,7 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
return;
vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS;
- wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
+ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
}
static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
@@ -839,6 +839,24 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
MSR_IA32_SPEC_CTRL);
}
+unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
+{
+ unsigned int flags = 0;
+
+ if (vmx->loaded_vmcs->launched)
+ flags |= VMX_RUN_VMRESUME;
+
+ /*
+ * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free
+ * to change it directly without causing a vmexit. In that case read
+ * it after vmexit and store it in vmx->spec_ctrl.
+ */
+ if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
+ flags |= VMX_RUN_SAVE_SPEC_CTRL;
+
+ return flags;
+}
+
static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
unsigned long entry, unsigned long exit)
{
@@ -6813,6 +6831,31 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
}
}
+void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
+ unsigned int flags)
+{
+ u64 hostval = this_cpu_read(x86_spec_ctrl_current);
+
+ if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL))
+ return;
+
+ if (flags & VMX_RUN_SAVE_SPEC_CTRL)
+ vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
+
+ /*
+ * If the guest/host SPEC_CTRL values differ, restore the host value.
+ *
+ * For legacy IBRS, the IBRS bit always needs to be written after
+ * transitioning from a less privileged predictor mode, regardless of
+ * whether the guest/host values differ.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) ||
+ vmx->spec_ctrl != hostval)
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
+
+ barrier_nospec();
+}
+
static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{
switch (to_vmx(vcpu)->exit_reason.basic) {
@@ -6826,7 +6869,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
}
static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
- struct vcpu_vmx *vmx)
+ struct vcpu_vmx *vmx,
+ unsigned long flags)
{
guest_state_enter_irqoff();
@@ -6845,7 +6889,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
native_write_cr2(vcpu->arch.cr2);
vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
- vmx->loaded_vmcs->launched);
+ flags);
vcpu->arch.cr2 = native_read_cr2();
@@ -6944,36 +6988,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
kvm_wait_lapic_expire(vcpu);
- /*
- * If this vCPU has touched SPEC_CTRL, restore the guest's value if
- * it's non-zero. Since vmentry is serialising on affected CPUs, there
- * is no need to worry about the conditional branch over the wrmsr
- * being speculatively taken.
- */
- x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
-
/* The actual VMENTER/EXIT is in the .noinstr.text section. */
- vmx_vcpu_enter_exit(vcpu, vmx);
-
- /*
- * We do not use IBRS in the kernel. If this vCPU has used the
- * SPEC_CTRL MSR it may have left it on; save the value and
- * turn it off. This is much more efficient than blindly adding
- * it to the atomic save/restore list. Especially as the former
- * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
- *
- * For non-nested case:
- * If the L01 MSR bitmap does not intercept the MSR, then we need to
- * save it.
- *
- * For nested case:
- * If the L02 MSR bitmap does not intercept the MSR, then we need to
- * save it.
- */
- if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
- vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
-
- x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
+ vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx));
/* All fields are clean at this point */
if (static_branch_unlikely(&enable_evmcs)) {
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 8d2342ede0c5..1e7f9453894b 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -8,11 +8,12 @@
#include <asm/intel_pt.h>
#include "capabilities.h"
-#include "kvm_cache_regs.h"
+#include "../kvm_cache_regs.h"
#include "posted_intr.h"
#include "vmcs.h"
#include "vmx_ops.h"
-#include "cpuid.h"
+#include "../cpuid.h"
+#include "run_flags.h"
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
@@ -404,7 +405,10 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
-bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
+void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags);
+unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
+bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
+ unsigned int flags);
int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index 5e7f41225780..5cfc49ddb1b4 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -8,7 +8,7 @@
#include "evmcs.h"
#include "vmcs.h"
-#include "x86.h"
+#include "../x86.h"
asmlinkage void vmread_error(unsigned long field, bool fault);
__attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1910e1e78b15..e5fa335a4ea7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -298,7 +298,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, directed_yield_successful),
STATS_DESC_COUNTER(VCPU, preemption_reported),
STATS_DESC_COUNTER(VCPU, preemption_other),
- STATS_DESC_ICOUNTER(VCPU, guest_mode)
+ STATS_DESC_IBOOLEAN(VCPU, guest_mode)
};
const struct kvm_stats_header kvm_vcpu_stats_header = {
@@ -6029,6 +6029,11 @@ split_irqchip_unlock:
r = 0;
break;
case KVM_CAP_X86_USER_SPACE_MSR:
+ r = -EINVAL;
+ if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL |
+ KVM_MSR_EXIT_REASON_UNKNOWN |
+ KVM_MSR_EXIT_REASON_FILTER))
+ break;
kvm->arch.user_space_msr_mask = cap->args[0];
r = 0;
break;
@@ -6183,6 +6188,9 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
return -EFAULT;
+ if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
+ return -EINVAL;
+
for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
empty &= !filter.ranges[i].nmsrs;
@@ -9143,15 +9151,17 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
*/
static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid)
{
- struct kvm_lapic_irq lapic_irq;
-
- lapic_irq.shorthand = APIC_DEST_NOSHORT;
- lapic_irq.dest_mode = APIC_DEST_PHYSICAL;
- lapic_irq.level = 0;
- lapic_irq.dest_id = apicid;
- lapic_irq.msi_redir_hint = false;
+ /*
+ * All other fields are unused for APIC_DM_REMRD, but may be consumed by
+ * common code, e.g. for tracing. Defer initialization to the compiler.
+ */
+ struct kvm_lapic_irq lapic_irq = {
+ .delivery_mode = APIC_DM_REMRD,
+ .dest_mode = APIC_DEST_PHYSICAL,
+ .shorthand = APIC_DEST_NOSHORT,
+ .dest_id = apicid,
+ };
- lapic_irq.delivery_mode = APIC_DM_REMRD;
kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
}
@@ -12631,9 +12641,9 @@ void kvm_arch_end_assignment(struct kvm *kvm)
}
EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
-bool kvm_arch_has_assigned_device(struct kvm *kvm)
+bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm)
{
- return atomic_read(&kvm->arch.assigned_device_count);
+ return arch_atomic_read(&kvm->arch.assigned_device_count);
}
EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index d83cba364e31..724bbf83eb5b 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -39,7 +39,7 @@ SYM_FUNC_START(__memmove)
/* FSRM implies ERMS => no length checks, do the copy directly */
.Lmemmove_begin_forward:
ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
- ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS
+ ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS
/*
* movsq instruction have many startup latency
@@ -205,6 +205,11 @@ SYM_FUNC_START(__memmove)
movb %r11b, (%rdi)
13:
RET
+
+.Lmemmove_erms:
+ movq %rdx, %rcx
+ rep movsb
+ RET
SYM_FUNC_END(__memmove)
EXPORT_SYMBOL(__memmove)
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index b2b2366885a2..073289a55f84 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -33,9 +33,9 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
- __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE
+ ALTERNATIVE_2 __stringify(RETPOLINE \reg), \
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \
+ __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE)
.endm
@@ -67,3 +67,76 @@ SYM_CODE_END(__x86_indirect_thunk_array)
#define GEN(reg) EXPORT_THUNK(reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN
+
+/*
+ * This function name is magical and is used by -mfunction-return=thunk-extern
+ * for the compiler to generate JMPs to it.
+ */
+#ifdef CONFIG_RETHUNK
+
+ .section .text.__x86.return_thunk
+
+/*
+ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
+ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
+ * alignment within the BTB.
+ * 2) The instruction at zen_untrain_ret must contain, and not
+ * end with, the 0xc3 byte of the RET.
+ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
+ * from re-poisioning the BTB prediction.
+ */
+ .align 64
+ .skip 63, 0xcc
+SYM_FUNC_START_NOALIGN(zen_untrain_ret);
+
+ /*
+ * As executed from zen_untrain_ret, this is:
+ *
+ * TEST $0xcc, %bl
+ * LFENCE
+ * JMP __x86_return_thunk
+ *
+ * Executing the TEST instruction has a side effect of evicting any BTB
+ * prediction (potentially attacker controlled) attached to the RET, as
+ * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
+ */
+ .byte 0xf6
+
+ /*
+ * As executed from __x86_return_thunk, this is a plain RET.
+ *
+ * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
+ *
+ * We subsequently jump backwards and architecturally execute the RET.
+ * This creates a correct BTB prediction (type=ret), but in the
+ * meantime we suffer Straight Line Speculation (because the type was
+ * no branch) which is halted by the INT3.
+ *
+ * With SMT enabled and STIBP active, a sibling thread cannot poison
+ * RET's prediction to a type of its choice, but can evict the
+ * prediction due to competitive sharing. If the prediction is
+ * evicted, __x86_return_thunk will suffer Straight Line Speculation
+ * which will be contained safely by the INT3.
+ */
+SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
+ ret
+ int3
+SYM_CODE_END(__x86_return_thunk)
+
+ /*
+ * Ensure the TEST decoding / BTB invalidation is complete.
+ */
+ lfence
+
+ /*
+ * Jump back and execute the RET in the middle of the TEST instruction.
+ * INT3 is for SLS protection.
+ */
+ jmp __x86_return_thunk
+ int3
+SYM_FUNC_END(zen_untrain_ret)
+__EXPORT_THUNK(zen_untrain_ret)
+
+EXPORT_SYMBOL(__x86_return_thunk)
+
+#endif /* CONFIG_RETHUNK */
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index dba2197c05c3..331310c29349 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -94,16 +94,18 @@ static bool ex_handler_copy(const struct exception_table_entry *fixup,
static bool ex_handler_msr(const struct exception_table_entry *fixup,
struct pt_regs *regs, bool wrmsr, bool safe, int reg)
{
- if (!safe && wrmsr &&
- pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
- (unsigned int)regs->cx, (unsigned int)regs->dx,
- (unsigned int)regs->ax, regs->ip, (void *)regs->ip))
+ if (__ONCE_LITE_IF(!safe && wrmsr)) {
+ pr_warn("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
+ (unsigned int)regs->cx, (unsigned int)regs->dx,
+ (unsigned int)regs->ax, regs->ip, (void *)regs->ip);
show_stack_regs(regs);
+ }
- if (!safe && !wrmsr &&
- pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
- (unsigned int)regs->cx, regs->ip, (void *)regs->ip))
+ if (__ONCE_LITE_IF(!safe && !wrmsr)) {
+ pr_warn("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
+ (unsigned int)regs->cx, regs->ip, (void *)regs->ip);
show_stack_regs(regs);
+ }
if (!wrmsr) {
/* Pretend that the read succeeded and returned 0. */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index d8cfce221275..82a042c03824 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -77,10 +77,20 @@ static uint8_t __pte2cachemode_tbl[8] = {
[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
};
-/* Check that the write-protect PAT entry is set for write-protect */
+/*
+ * Check that the write-protect PAT entry is set for write-protect.
+ * To do this without making assumptions how PAT has been set up (Xen has
+ * another layout than the kernel), translate the _PAGE_CACHE_MODE_WP cache
+ * mode via the __cachemode2pte_tbl[] into protection bits (those protection
+ * bits will select a cache mode of WP or better), and then translate the
+ * protection bits back into the cache mode using __pte2cm_idx() and the
+ * __pte2cachemode_tbl[] array. This will return the really used cache mode.
+ */
bool x86_has_pat_wp(void)
{
- return __pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] == _PAGE_CACHE_MODE_WP;
+ uint16_t prot = __cachemode2pte_tbl[_PAGE_CACHE_MODE_WP];
+
+ return __pte2cachemode_tbl[__pte2cm_idx(prot)] == _PAGE_CACHE_MODE_WP;
}
enum page_cache_mode pgprot2cachemode(pgprot_t pgprot)
@@ -846,7 +856,7 @@ int devmem_is_allowed(unsigned long pagenr)
/*
* This must follow RAM test, since System RAM is considered a
- * restricted resource under CONFIG_STRICT_IOMEM.
+ * restricted resource under CONFIG_STRICT_DEVMEM.
*/
if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) {
/* Low 1MB bypasses iomem restrictions. */
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 3d1dba05fce4..9de3d900bc92 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -65,7 +65,10 @@ SYM_FUNC_START(sme_encrypt_execute)
movq %rbp, %rsp /* Restore original stack pointer */
pop %rbp
- RET
+ /* Offset to __x86_return_thunk would be wrong here */
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_FUNC_END(sme_encrypt_execute)
SYM_FUNC_START(__enc_copy)
@@ -151,6 +154,9 @@ SYM_FUNC_START(__enc_copy)
pop %r12
pop %r15
- RET
+ /* Offset to __x86_return_thunk would be wrong here */
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
.L__enc_copy_end:
SYM_FUNC_END(__enc_copy)
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index e44e938885b7..7418c367e328 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -110,7 +110,7 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
return vma_pkey(vma);
}
-#define PKRU_AD_KEY(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY))
+#define PKRU_AD_MASK(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY))
/*
* Make the default PKRU value (at execve() time) as restrictive
@@ -118,11 +118,14 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
* in the process's lifetime will not accidentally get access
* to data which is pkey-protected later on.
*/
-u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
- PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) |
- PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) |
- PKRU_AD_KEY(10) | PKRU_AD_KEY(11) | PKRU_AD_KEY(12) |
- PKRU_AD_KEY(13) | PKRU_AD_KEY(14) | PKRU_AD_KEY(15);
+u32 init_pkru_value = PKRU_AD_MASK( 1) | PKRU_AD_MASK( 2) |
+ PKRU_AD_MASK( 3) | PKRU_AD_MASK( 4) |
+ PKRU_AD_MASK( 5) | PKRU_AD_MASK( 6) |
+ PKRU_AD_MASK( 7) | PKRU_AD_MASK( 8) |
+ PKRU_AD_MASK( 9) | PKRU_AD_MASK(10) |
+ PKRU_AD_MASK(11) | PKRU_AD_MASK(12) |
+ PKRU_AD_MASK(13) | PKRU_AD_MASK(14) |
+ PKRU_AD_MASK(15);
static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index d400b6d9d246..c1e31e9a85d7 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -734,10 +734,10 @@ static void flush_tlb_func(void *info)
const struct flush_tlb_info *f = info;
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
- u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
bool local = smp_processor_id() == f->initiating_cpu;
unsigned long nr_invalidate = 0;
+ u64 mm_tlb_gen;
/* This code cannot presently handle being reentered. */
VM_WARN_ON(!irqs_disabled());
@@ -771,6 +771,23 @@ static void flush_tlb_func(void *info)
return;
}
+ if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID &&
+ f->new_tlb_gen <= local_tlb_gen)) {
+ /*
+ * The TLB is already up to date in respect to f->new_tlb_gen.
+ * While the core might be still behind mm_tlb_gen, checking
+ * mm_tlb_gen unnecessarily would have negative caching effects
+ * so avoid it.
+ */
+ return;
+ }
+
+ /*
+ * Defer mm_tlb_gen reading as long as possible to avoid cache
+ * contention.
+ */
+ mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
+
if (unlikely(local_tlb_gen == mm_tlb_gen)) {
/*
* There's nothing to do: we're already up to date. This can
@@ -827,6 +844,12 @@ static void flush_tlb_func(void *info)
/* Partial flush */
unsigned long addr = f->start;
+ /* Partial flush cannot have invalid generations */
+ VM_WARN_ON(f->new_tlb_gen == TLB_GENERATION_INVALID);
+
+ /* Partial flush must have valid mm */
+ VM_WARN_ON(f->mm == NULL);
+
nr_invalidate = (f->end - f->start) >> f->stride_shift;
while (addr < f->end) {
@@ -1029,7 +1052,8 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
struct flush_tlb_info *info;
preempt_disable();
- info = get_flush_tlb_info(NULL, start, end, 0, false, 0);
+ info = get_flush_tlb_info(NULL, start, end, 0, false,
+ TLB_GENERATION_INVALID);
on_each_cpu(do_kernel_range_flush, info, 1);
@@ -1198,7 +1222,8 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
int cpu = get_cpu();
- info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, 0);
+ info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
+ TLB_GENERATION_INVALID);
/*
* flush_tlb_multi() is not optimized for the common case in which only
* a local TLB flush is needed. Optimize this use-case by calling
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index c98b8c0ed3b8..b808c9a80d1b 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -412,16 +412,30 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
{
u8 *prog = *pprog;
-#ifdef CONFIG_RETPOLINE
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
EMIT_LFENCE();
EMIT2(0xFF, 0xE0 + reg);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
OPTIMIZER_HIDE_VAR(reg);
emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
- } else
-#endif
- EMIT2(0xFF, 0xE0 + reg);
+ } else {
+ EMIT2(0xFF, 0xE0 + reg);
+ }
+
+ *pprog = prog;
+}
+
+static void emit_return(u8 **pprog, u8 *ip)
+{
+ u8 *prog = *pprog;
+
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
+ emit_jump(&prog, &__x86_return_thunk, ip);
+ } else {
+ EMIT1(0xC3); /* ret */
+ if (IS_ENABLED(CONFIG_SLS))
+ EMIT1(0xCC); /* int3 */
+ }
*pprog = prog;
}
@@ -1686,7 +1700,7 @@ emit_jmp:
ctx->cleanup_addr = proglen;
pop_callee_regs(&prog, callee_regs_used);
EMIT1(0xC9); /* leave */
- EMIT1(0xC3); /* ret */
+ emit_return(&prog, image + addrs[i - 1] + (prog - temp));
break;
default:
@@ -2189,7 +2203,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (flags & BPF_TRAMP_F_SKIP_FRAME)
/* skip our return address and return to parent */
EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
- EMIT1(0xC3); /* ret */
+ emit_return(&prog, prog);
/* Make sure the trampoline generation logic doesn't overflow */
if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
ret = -EFAULT;
diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S
index 9ffe2bad27d5..4e5257a4811b 100644
--- a/arch/x86/platform/efi/efi_thunk_64.S
+++ b/arch/x86/platform/efi/efi_thunk_64.S
@@ -23,6 +23,7 @@
#include <linux/objtool.h>
#include <asm/page_types.h>
#include <asm/segment.h>
+#include <asm/nospec-branch.h>
.text
.code64
@@ -75,7 +76,9 @@ STACK_FRAME_NON_STANDARD __efi64_thunk
1: movq 0x20(%rsp), %rsp
pop %rbx
pop %rbp
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
.code32
2: pushl $__KERNEL_CS
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index ae53d54d7959..31c634a22818 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -73,12 +73,6 @@ $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(obj)/purgatory.chk: $(obj)/purgatory.ro FORCE
$(call if_changed,ld)
-targets += kexec-purgatory.c
+$(obj)/kexec-purgatory.o: $(obj)/purgatory.ro $(obj)/purgatory.chk
-quiet_cmd_bin2c = BIN2C $@
- cmd_bin2c = $(objtree)/scripts/bin2c kexec_purgatory < $< > $@
-
-$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro $(obj)/purgatory.chk FORCE
- $(call if_changed,bin2c)
-
-obj-$(CONFIG_KEXEC_FILE) += kexec-purgatory.o
+obj-y += kexec-purgatory.o
diff --git a/arch/x86/purgatory/kexec-purgatory.S b/arch/x86/purgatory/kexec-purgatory.S
new file mode 100644
index 000000000000..8530fe93b718
--- /dev/null
+++ b/arch/x86/purgatory/kexec-purgatory.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+ .section .rodata, "a"
+
+ .align 8
+kexec_purgatory:
+ .globl kexec_purgatory
+ .incbin "arch/x86/purgatory/purgatory.ro"
+.Lkexec_purgatory_end:
+
+ .align 8
+kexec_purgatory_size:
+ .globl kexec_purgatory_size
+ .quad .Lkexec_purgatory_end - kexec_purgatory
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 81aa46f770c5..cfa99e8f054b 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -918,7 +918,7 @@ void xen_enable_sysenter(void)
if (!boot_cpu_has(sysenter_feature))
return;
- ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
+ ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat);
if(ret != 0)
setup_clear_cpu_cap(sysenter_feature);
}
@@ -927,7 +927,7 @@ void xen_enable_syscall(void)
{
int ret;
- ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
+ ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64);
if (ret != 0) {
printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
/* Pretty fatal; 64-bit userspace has no other
@@ -936,7 +936,7 @@ void xen_enable_syscall(void)
if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
ret = register_callback(CALLBACKTYPE_syscall32,
- xen_syscall32_target);
+ xen_entry_SYSCALL_compat);
if (ret != 0)
setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
}
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index caa9bc2fa100..6b4fdf6b9542 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -121,7 +121,7 @@ SYM_FUNC_END(xen_read_cr2_direct);
.macro xen_pv_trap name
SYM_CODE_START(xen_\name)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
ENDBR
pop %rcx
pop %r11
@@ -234,8 +234,8 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode)
*/
/* Normal 64-bit system call target */
-SYM_CODE_START(xen_syscall_target)
- UNWIND_HINT_EMPTY
+SYM_CODE_START(xen_entry_SYSCALL_64)
+ UNWIND_HINT_ENTRY
ENDBR
popq %rcx
popq %r11
@@ -249,13 +249,13 @@ SYM_CODE_START(xen_syscall_target)
movq $__USER_CS, 1*8(%rsp)
jmp entry_SYSCALL_64_after_hwframe
-SYM_CODE_END(xen_syscall_target)
+SYM_CODE_END(xen_entry_SYSCALL_64)
#ifdef CONFIG_IA32_EMULATION
/* 32-bit compat syscall target */
-SYM_CODE_START(xen_syscall32_target)
- UNWIND_HINT_EMPTY
+SYM_CODE_START(xen_entry_SYSCALL_compat)
+ UNWIND_HINT_ENTRY
ENDBR
popq %rcx
popq %r11
@@ -269,11 +269,11 @@ SYM_CODE_START(xen_syscall32_target)
movq $__USER32_CS, 1*8(%rsp)
jmp entry_SYSCALL_compat_after_hwframe
-SYM_CODE_END(xen_syscall32_target)
+SYM_CODE_END(xen_entry_SYSCALL_compat)
/* 32-bit compat sysenter target */
-SYM_CODE_START(xen_sysenter_target)
- UNWIND_HINT_EMPTY
+SYM_CODE_START(xen_entry_SYSENTER_compat)
+ UNWIND_HINT_ENTRY
ENDBR
/*
* NB: Xen is polite and clears TF from EFLAGS for us. This means
@@ -291,19 +291,19 @@ SYM_CODE_START(xen_sysenter_target)
movq $__USER32_CS, 1*8(%rsp)
jmp entry_SYSENTER_compat_after_hwframe
-SYM_CODE_END(xen_sysenter_target)
+SYM_CODE_END(xen_entry_SYSENTER_compat)
#else /* !CONFIG_IA32_EMULATION */
-SYM_CODE_START(xen_syscall32_target)
-SYM_CODE_START(xen_sysenter_target)
- UNWIND_HINT_EMPTY
+SYM_CODE_START(xen_entry_SYSCALL_compat)
+SYM_CODE_START(xen_entry_SYSENTER_compat)
+ UNWIND_HINT_ENTRY
ENDBR
lea 16(%rsp), %rsp /* strip %rcx, %r11 */
mov $-ENOSYS, %rax
pushq $0
jmp hypercall_iret
-SYM_CODE_END(xen_sysenter_target)
-SYM_CODE_END(xen_syscall32_target)
+SYM_CODE_END(xen_entry_SYSENTER_compat)
+SYM_CODE_END(xen_entry_SYSCALL_compat)
#endif /* CONFIG_IA32_EMULATION */
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 13af6fe453e3..ffaa62167f6e 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -26,6 +26,7 @@ SYM_CODE_START(hypercall_page)
.rept (PAGE_SIZE / 32)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
+ ANNOTATE_UNRET_SAFE
ret
/*
* Xen will write the hypercall page, and sort out ENDBR.
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index fd0fec6e92f4..9a8bb972193d 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -10,10 +10,10 @@
/* These are code, but not functions. Defined in entry.S */
extern const char xen_failsafe_callback[];
-void xen_sysenter_target(void);
+void xen_entry_SYSENTER_compat(void);
#ifdef CONFIG_X86_64
-void xen_syscall_target(void);
-void xen_syscall32_target(void);
+void xen_entry_SYSCALL_64(void);
+void xen_entry_SYSCALL_compat(void);
#endif
extern void *xen_initial_gdt;
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 529fe9245821..42f106004400 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -169,7 +169,7 @@ void migrate_irqs(void)
for_each_active_irq(i) {
struct irq_data *data = irq_get_irq_data(i);
- struct cpumask *mask;
+ const struct cpumask *mask;
unsigned int newcpu;
if (irqd_is_per_cpu(data))
@@ -185,9 +185,10 @@ void migrate_irqs(void)
pr_info_ratelimited("IRQ%u no longer affine to CPU%u\n",
i, cpu);
- cpumask_setall(mask);
+ irq_set_affinity(i, cpu_all_mask);
+ } else {
+ irq_set_affinity(i, mask);
}
- irq_set_affinity(i, mask);
}
}
#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 7771dacc99cb..f5e6527ebc9c 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -345,6 +345,7 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
/* there isn't chance to merge the splitted bio */
split->bi_opf |= REQ_NOMERGE;
+ blkcg_bio_issue_init(split);
bio_chain(split, *bio);
trace_block_split(split, (*bio)->bi_iter.bi_sector);
submit_bio_noacct(*bio);
diff --git a/certs/Kconfig b/certs/Kconfig
index 476755703cf8..bf9b511573d7 100644
--- a/certs/Kconfig
+++ b/certs/Kconfig
@@ -43,6 +43,7 @@ config SYSTEM_TRUSTED_KEYRING
bool "Provide system-wide ring of trusted keys"
depends on KEYS
depends on ASYMMETRIC_KEY_TYPE
+ depends on X509_CERTIFICATE_PARSER
help
Provide a system keyring to which trusted keys can be added. Keys in
the keyring are considered to be trusted. Keys may be added at will
diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index 43177c20ce4f..eaea733b368a 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -73,7 +73,7 @@ module_param(device_id_scheme, bool, 0444);
static int only_lcd = -1;
module_param(only_lcd, int, 0444);
-static bool has_backlight;
+static bool may_report_brightness_keys;
static int register_count;
static DEFINE_MUTEX(register_count_mutex);
static DEFINE_MUTEX(video_list_lock);
@@ -1224,7 +1224,7 @@ acpi_video_bus_get_one_device(struct acpi_device *device,
acpi_video_device_find_cap(data);
if (data->cap._BCM && data->cap._BCL)
- has_backlight = true;
+ may_report_brightness_keys = true;
mutex_lock(&video->device_list_lock);
list_add_tail(&data->entry, &video->video_device_list);
@@ -1693,6 +1693,9 @@ static void acpi_video_device_notify(acpi_handle handle, u32 event, void *data)
break;
}
+ if (keycode)
+ may_report_brightness_keys = true;
+
acpi_notifier_call_chain(device, event, 0);
if (keycode && (report_key_events & REPORT_BRIGHTNESS_KEY_EVENTS)) {
@@ -2253,7 +2256,7 @@ void acpi_video_unregister(void)
if (register_count) {
acpi_bus_unregister_driver(&acpi_video_bus);
register_count = 0;
- has_backlight = false;
+ may_report_brightness_keys = false;
}
mutex_unlock(&register_count_mutex);
}
@@ -2275,7 +2278,7 @@ void acpi_video_unregister_backlight(void)
bool acpi_video_handles_brightness_key_presses(void)
{
- return has_backlight &&
+ return may_report_brightness_keys &&
(report_key_events & REPORT_BRIGHTNESS_KEY_EVENTS);
}
EXPORT_SYMBOL(acpi_video_handles_brightness_key_presses);
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index e2db1bdd9dd2..0dbb39ef4179 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1144,6 +1144,9 @@ static int __init acpi_bus_init_irq(void)
case ACPI_IRQ_MODEL_PLATFORM:
message = "platform specific model";
break;
+ case ACPI_IRQ_MODEL_LPIC:
+ message = "LPIC";
+ break;
default:
pr_info("Unknown interrupt routing model\n");
return -ENODEV;
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 6ff1901d7d43..3c6d4ef87be0 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -782,7 +782,8 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
if (!osc_cpc_flexible_adr_space_confirmed) {
pr_debug("Flexible address space capability not supported\n");
- goto out_free;
+ if (!cpc_supported_by_cpu())
+ goto out_free;
}
addr = ioremap(gas_t->address, gas_t->bit_width/8);
@@ -809,7 +810,8 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
}
if (!osc_cpc_flexible_adr_space_confirmed) {
pr_debug("Flexible address space capability not supported\n");
- goto out_free;
+ if (!cpc_supported_by_cpu())
+ goto out_free;
}
} else {
if (gas_t->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE || !cpc_ffh_supported()) {
diff --git a/drivers/acpi/irq.c b/drivers/acpi/irq.c
index c68e694fca26..dabe45eba055 100644
--- a/drivers/acpi/irq.c
+++ b/drivers/acpi/irq.c
@@ -12,7 +12,8 @@
enum acpi_irq_model_id acpi_irq_model;
-static struct fwnode_handle *acpi_gsi_domain_id;
+static struct fwnode_handle *(*acpi_get_gsi_domain_id)(u32 gsi);
+static u32 (*acpi_gsi_to_irq_fallback)(u32 gsi);
/**
* acpi_gsi_to_irq() - Retrieve the linux irq number for a given GSI
@@ -26,14 +27,18 @@ static struct fwnode_handle *acpi_gsi_domain_id;
*/
int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
{
- struct irq_domain *d = irq_find_matching_fwnode(acpi_gsi_domain_id,
- DOMAIN_BUS_ANY);
+ struct irq_domain *d;
+ d = irq_find_matching_fwnode(acpi_get_gsi_domain_id(gsi),
+ DOMAIN_BUS_ANY);
*irq = irq_find_mapping(d, gsi);
/*
- * *irq == 0 means no mapping, that should
- * be reported as a failure
+ * *irq == 0 means no mapping, that should be reported as a
+ * failure, unless there is an arch-specific fallback handler.
*/
+ if (!*irq && acpi_gsi_to_irq_fallback)
+ *irq = acpi_gsi_to_irq_fallback(gsi);
+
return (*irq > 0) ? 0 : -EINVAL;
}
EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
@@ -53,12 +58,12 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger,
{
struct irq_fwspec fwspec;
- if (WARN_ON(!acpi_gsi_domain_id)) {
+ fwspec.fwnode = acpi_get_gsi_domain_id(gsi);
+ if (WARN_ON(!fwspec.fwnode)) {
pr_warn("GSI: No registered irqchip, giving up\n");
return -EINVAL;
}
- fwspec.fwnode = acpi_gsi_domain_id;
fwspec.param[0] = gsi;
fwspec.param[1] = acpi_dev_get_irq_type(trigger, polarity);
fwspec.param_count = 2;
@@ -73,13 +78,14 @@ EXPORT_SYMBOL_GPL(acpi_register_gsi);
*/
void acpi_unregister_gsi(u32 gsi)
{
- struct irq_domain *d = irq_find_matching_fwnode(acpi_gsi_domain_id,
- DOMAIN_BUS_ANY);
+ struct irq_domain *d;
int irq;
if (WARN_ON(acpi_irq_model == ACPI_IRQ_MODEL_GIC && gsi < 16))
return;
+ d = irq_find_matching_fwnode(acpi_get_gsi_domain_id(gsi),
+ DOMAIN_BUS_ANY);
irq = irq_find_mapping(d, gsi);
irq_dispose_mapping(irq);
}
@@ -97,7 +103,8 @@ EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
* The referenced device fwhandle or NULL on failure
*/
static struct fwnode_handle *
-acpi_get_irq_source_fwhandle(const struct acpi_resource_source *source)
+acpi_get_irq_source_fwhandle(const struct acpi_resource_source *source,
+ u32 gsi)
{
struct fwnode_handle *result;
struct acpi_device *device;
@@ -105,7 +112,7 @@ acpi_get_irq_source_fwhandle(const struct acpi_resource_source *source)
acpi_status status;
if (!source->string_length)
- return acpi_gsi_domain_id;
+ return acpi_get_gsi_domain_id(gsi);
status = acpi_get_handle(NULL, source->string_ptr, &handle);
if (WARN_ON(ACPI_FAILURE(status)))
@@ -194,7 +201,7 @@ static acpi_status acpi_irq_parse_one_cb(struct acpi_resource *ares,
ctx->index -= irq->interrupt_count;
return AE_OK;
}
- fwnode = acpi_gsi_domain_id;
+ fwnode = acpi_get_gsi_domain_id(irq->interrupts[ctx->index]);
acpi_irq_parse_one_match(fwnode, irq->interrupts[ctx->index],
irq->triggering, irq->polarity,
irq->shareable, ctx);
@@ -207,7 +214,8 @@ static acpi_status acpi_irq_parse_one_cb(struct acpi_resource *ares,
ctx->index -= eirq->interrupt_count;
return AE_OK;
}
- fwnode = acpi_get_irq_source_fwhandle(&eirq->resource_source);
+ fwnode = acpi_get_irq_source_fwhandle(&eirq->resource_source,
+ eirq->interrupts[ctx->index]);
acpi_irq_parse_one_match(fwnode, eirq->interrupts[ctx->index],
eirq->triggering, eirq->polarity,
eirq->shareable, ctx);
@@ -291,10 +299,20 @@ EXPORT_SYMBOL_GPL(acpi_irq_get);
* GSI interrupts
*/
void __init acpi_set_irq_model(enum acpi_irq_model_id model,
- struct fwnode_handle *fwnode)
+ struct fwnode_handle *(*fn)(u32))
{
acpi_irq_model = model;
- acpi_gsi_domain_id = fwnode;
+ acpi_get_gsi_domain_id = fn;
+}
+
+/**
+ * acpi_set_gsi_to_irq_fallback - Register a GSI transfer
+ * callback to fallback to arch specified implementation.
+ * @fn: arch-specific fallback handler
+ */
+void __init acpi_set_gsi_to_irq_fallback(u32 (*fn)(u32))
+{
+ acpi_gsi_to_irq_fallback = fn;
}
/**
@@ -312,8 +330,14 @@ struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags,
const struct irq_domain_ops *ops,
void *host_data)
{
- struct irq_domain *d = irq_find_matching_fwnode(acpi_gsi_domain_id,
- DOMAIN_BUS_ANY);
+ struct irq_domain *d;
+
+ /* This only works for the GIC model... */
+ if (acpi_irq_model != ACPI_IRQ_MODEL_GIC)
+ return NULL;
+
+ d = irq_find_matching_fwnode(acpi_get_gsi_domain_id(0),
+ DOMAIN_BUS_ANY);
if (!d)
return NULL;
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 0e3ed5eb367b..0cb20324da16 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -493,13 +493,8 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
goto skip_probe;
ret = amba_read_periphid(dev);
- if (ret) {
- if (ret != -EPROBE_DEFER) {
- amba_device_put(dev);
- goto err_out;
- }
+ if (ret)
goto err_release;
- }
skip_probe:
ret = device_add(&dev->dev);
@@ -546,6 +541,7 @@ static int amba_deferred_retry(void)
continue;
list_del_init(&ddev->node);
+ amba_device_put(ddev->dev);
kfree(ddev);
}
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index a97776ea9d99..4c98849577d4 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -570,6 +570,12 @@ ssize_t __weak cpu_show_mmio_stale_data(struct device *dev,
return sysfs_emit(buf, "Not affected\n");
}
+ssize_t __weak cpu_show_retbleed(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "Not affected\n");
+}
+
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
@@ -580,6 +586,7 @@ static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
+static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
@@ -592,6 +599,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_itlb_multihit.attr,
&dev_attr_srbds.attr,
&dev_attr_mmio_stale_data.attr,
+ &dev_attr_retbleed.attr,
NULL
};
diff --git a/drivers/char/random.c b/drivers/char/random.c
index e3dd1dd3dd22..a1af90bacc9f 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1174,7 +1174,7 @@ static void __cold entropy_timer(struct timer_list *timer)
*/
static void __cold try_to_generate_entropy(void)
{
- enum { NUM_TRIAL_SAMPLES = 8192, MAX_SAMPLES_PER_BIT = 32 };
+ enum { NUM_TRIAL_SAMPLES = 8192, MAX_SAMPLES_PER_BIT = HZ / 30 };
struct entropy_timer_state stack;
unsigned int i, num_different = 0;
unsigned long last = random_get_entropy();
diff --git a/drivers/clk/clk-lan966x.c b/drivers/clk/clk-lan966x.c
index d1535ac13e89..81cb90955d68 100644
--- a/drivers/clk/clk-lan966x.c
+++ b/drivers/clk/clk-lan966x.c
@@ -213,7 +213,7 @@ static int lan966x_gate_clk_register(struct device *dev,
hw_data->hws[i] =
devm_clk_hw_register_gate(dev, clk_gate_desc[idx].name,
- "lan966x", 0, base,
+ "lan966x", 0, gate_base,
clk_gate_desc[idx].bit_idx,
0, &clk_gate_lock);
diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c b/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c
index 29a8c710ae06..b7962e5149a5 100644
--- a/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c
+++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c
@@ -138,6 +138,7 @@ static struct ccu_common *sun50i_h6_r_ccu_clks[] = {
&r_apb2_rsb_clk.common,
&r_apb1_ir_clk.common,
&r_apb1_w1_clk.common,
+ &r_apb1_rtc_clk.common,
&ir_clk.common,
&w1_clk.common,
};
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 3c0ee102fe73..4f2bb7315b67 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -22,7 +22,7 @@ config CLKEVT_I8253
config I8253_LOCK
bool
-config OMAP_DM_TIMER
+config OMAP_DM_SYSTIMER
bool
select TIMER_OF
@@ -56,6 +56,13 @@ config DIGICOLOR_TIMER
help
Enables the support for the digicolor timer driver.
+config OMAP_DM_TIMER
+ bool "OMAP dual-mode timer driver" if ARCH_K3 || COMPILE_TEST
+ default y if ARCH_K3
+ select TIMER_OF
+ help
+ Enables the support for the TI dual-mode timer driver.
+
config DW_APB_TIMER
bool "DW APB timer driver" if COMPILE_TEST
help
@@ -150,6 +157,14 @@ config TEGRA_TIMER
help
Enables support for the Tegra driver.
+config TEGRA186_TIMER
+ bool "NVIDIA Tegra186 timer driver"
+ depends on ARCH_TEGRA || COMPILE_TEST
+ depends on WATCHDOG && WATCHDOG_CORE
+ help
+ Enables support for the timers and watchdogs found on NVIDIA
+ Tegra186 and later SoCs.
+
config VT8500_TIMER
bool "VT8500 timer driver" if COMPILE_TEST
depends on HAS_IOMEM
@@ -367,7 +382,7 @@ config ARM_GT_INITIAL_PRESCALER_VAL
depends on ARM_GLOBAL_TIMER
help
When the ARM global timer initializes, its current rate is declared
- to the kernel and maintained forever. Should it's parent clock
+ to the kernel and maintained forever. Should its parent clock
change, the driver tries to fix the timer's internal prescaler.
On some machs (i.e. Zynq) the initial prescaler value thus poses
bounds about how much the parent clock is allowed to decrease or
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 6ca640019e10..64ab547de97b 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_CLKSRC_MMIO) += mmio.o
obj-$(CONFIG_DAVINCI_TIMER) += timer-davinci.o
obj-$(CONFIG_DIGICOLOR_TIMER) += timer-digicolor.o
obj-$(CONFIG_OMAP_DM_TIMER) += timer-ti-dm.o
-obj-$(CONFIG_OMAP_DM_TIMER) += timer-ti-dm-systimer.o
+obj-$(CONFIG_OMAP_DM_SYSTIMER) += timer-ti-dm-systimer.o
obj-$(CONFIG_DW_APB_TIMER) += dw_apb_timer.o
obj-$(CONFIG_DW_APB_TIMER_OF) += dw_apb_timer_of.o
obj-$(CONFIG_FTTMR010_TIMER) += timer-fttmr010.o
@@ -36,6 +36,7 @@ obj-$(CONFIG_SUN4I_TIMER) += timer-sun4i.o
obj-$(CONFIG_SUN5I_HSTIMER) += timer-sun5i.o
obj-$(CONFIG_MESON6_TIMER) += timer-meson6.o
obj-$(CONFIG_TEGRA_TIMER) += timer-tegra.o
+obj-$(CONFIG_TEGRA186_TIMER) += timer-tegra186.o
obj-$(CONFIG_VT8500_TIMER) += timer-vt8500.o
obj-$(CONFIG_NSPIRE_TIMER) += timer-zevio.o
obj-$(CONFIG_BCM_KONA_TIMER) += bcm_kona_timer.o
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index dd0956ad969c..64dcb082d4cf 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -981,6 +981,14 @@ static const struct of_device_id sh_cmt_of_table[] __maybe_unused = {
.compatible = "renesas,rcar-gen3-cmt1",
.data = &sh_cmt_info[SH_CMT1_RCAR_GEN2]
},
+ {
+ .compatible = "renesas,rcar-gen4-cmt0",
+ .data = &sh_cmt_info[SH_CMT0_RCAR_GEN2]
+ },
+ {
+ .compatible = "renesas,rcar-gen4-cmt1",
+ .data = &sh_cmt_info[SH_CMT1_RCAR_GEN2]
+ },
{ }
};
MODULE_DEVICE_TABLE(of, sh_cmt_of_table);
diff --git a/drivers/clocksource/timer-mediatek.c b/drivers/clocksource/timer-mediatek.c
index 7bcb4a3f26fb..d5b29fd03ca2 100644
--- a/drivers/clocksource/timer-mediatek.c
+++ b/drivers/clocksource/timer-mediatek.c
@@ -22,6 +22,19 @@
#define TIMER_SYNC_TICKS (3)
+/* cpux mcusys wrapper */
+#define CPUX_CON_REG 0x0
+#define CPUX_IDX_REG 0x4
+
+/* cpux */
+#define CPUX_IDX_GLOBAL_CTRL 0x0
+ #define CPUX_ENABLE BIT(0)
+ #define CPUX_CLK_DIV_MASK GENMASK(10, 8)
+ #define CPUX_CLK_DIV1 BIT(8)
+ #define CPUX_CLK_DIV2 BIT(9)
+ #define CPUX_CLK_DIV4 BIT(10)
+#define CPUX_IDX_GLOBAL_IRQ 0x30
+
/* gpt */
#define GPT_IRQ_EN_REG 0x00
#define GPT_IRQ_ENABLE(val) BIT((val) - 1)
@@ -72,6 +85,52 @@
static void __iomem *gpt_sched_reg __read_mostly;
+static u32 mtk_cpux_readl(u32 reg_idx, struct timer_of *to)
+{
+ writel(reg_idx, timer_of_base(to) + CPUX_IDX_REG);
+ return readl(timer_of_base(to) + CPUX_CON_REG);
+}
+
+static void mtk_cpux_writel(u32 val, u32 reg_idx, struct timer_of *to)
+{
+ writel(reg_idx, timer_of_base(to) + CPUX_IDX_REG);
+ writel(val, timer_of_base(to) + CPUX_CON_REG);
+}
+
+static void mtk_cpux_set_irq(struct timer_of *to, bool enable)
+{
+ const unsigned long *irq_mask = cpumask_bits(cpu_possible_mask);
+ u32 val;
+
+ val = mtk_cpux_readl(CPUX_IDX_GLOBAL_IRQ, to);
+
+ if (enable)
+ val |= *irq_mask;
+ else
+ val &= ~(*irq_mask);
+
+ mtk_cpux_writel(val, CPUX_IDX_GLOBAL_IRQ, to);
+}
+
+static int mtk_cpux_clkevt_shutdown(struct clock_event_device *clkevt)
+{
+ /* Clear any irq */
+ mtk_cpux_set_irq(to_timer_of(clkevt), false);
+
+ /*
+ * Disabling CPUXGPT timer will crash the platform, especially
+ * if Trusted Firmware is using it (usually, for sleep states),
+ * so we only mask the IRQ and call it a day.
+ */
+ return 0;
+}
+
+static int mtk_cpux_clkevt_resume(struct clock_event_device *clkevt)
+{
+ mtk_cpux_set_irq(to_timer_of(clkevt), true);
+ return 0;
+}
+
static void mtk_syst_ack_irq(struct timer_of *to)
{
/* Clear and disable interrupt */
@@ -281,6 +340,60 @@ static struct timer_of to = {
},
};
+static int __init mtk_cpux_init(struct device_node *node)
+{
+ static struct timer_of to_cpux;
+ u32 freq, val;
+ int ret;
+
+ /*
+ * There are per-cpu interrupts for the CPUX General Purpose Timer
+ * but since this timer feeds the AArch64 System Timer we can rely
+ * on the CPU timer PPIs as well, so we don't declare TIMER_OF_IRQ.
+ */
+ to_cpux.flags = TIMER_OF_BASE | TIMER_OF_CLOCK;
+ to_cpux.clkevt.name = "mtk-cpuxgpt";
+ to_cpux.clkevt.rating = 10;
+ to_cpux.clkevt.cpumask = cpu_possible_mask;
+ to_cpux.clkevt.set_state_shutdown = mtk_cpux_clkevt_shutdown;
+ to_cpux.clkevt.tick_resume = mtk_cpux_clkevt_resume;
+
+ /* If this fails, bad things are about to happen... */
+ ret = timer_of_init(node, &to_cpux);
+ if (ret) {
+ WARN(1, "Cannot start CPUX timers.\n");
+ return ret;
+ }
+
+ /*
+ * Check if we're given a clock with the right frequency for this
+ * timer, otherwise warn but keep going with the setup anyway, as
+ * that makes it possible to still boot the kernel, even though
+ * it may not work correctly (random lockups, etc).
+ * The reason behind this is that having an early UART may not be
+ * possible for everyone and this gives a chance to retrieve kmsg
+ * for eventual debugging even on consumer devices.
+ */
+ freq = timer_of_rate(&to_cpux);
+ if (freq > 13000000)
+ WARN(1, "Requested unsupported timer frequency %u\n", freq);
+
+ /* Clock input is 26MHz, set DIV2 to achieve 13MHz clock */
+ val = mtk_cpux_readl(CPUX_IDX_GLOBAL_CTRL, &to_cpux);
+ val &= ~CPUX_CLK_DIV_MASK;
+ val |= CPUX_CLK_DIV2;
+ mtk_cpux_writel(val, CPUX_IDX_GLOBAL_CTRL, &to_cpux);
+
+ /* Enable all CPUXGPT timers */
+ val = mtk_cpux_readl(CPUX_IDX_GLOBAL_CTRL, &to_cpux);
+ mtk_cpux_writel(val | CPUX_ENABLE, CPUX_IDX_GLOBAL_CTRL, &to_cpux);
+
+ clockevents_config_and_register(&to_cpux.clkevt, timer_of_rate(&to_cpux),
+ TIMER_SYNC_TICKS, 0xffffffff);
+
+ return 0;
+}
+
static int __init mtk_syst_init(struct device_node *node)
{
int ret;
@@ -339,3 +452,4 @@ static int __init mtk_gpt_init(struct device_node *node)
}
TIMER_OF_DECLARE(mtk_mt6577, "mediatek,mt6577-timer", mtk_gpt_init);
TIMER_OF_DECLARE(mtk_mt6765, "mediatek,mt6765-timer", mtk_syst_init);
+TIMER_OF_DECLARE(mtk_mt6795, "mediatek,mt6795-systimer", mtk_cpux_init);
diff --git a/drivers/clocksource/timer-microchip-pit64b.c b/drivers/clocksource/timer-microchip-pit64b.c
index abce83d2f00b..d5f1436f33d9 100644
--- a/drivers/clocksource/timer-microchip-pit64b.c
+++ b/drivers/clocksource/timer-microchip-pit64b.c
@@ -61,7 +61,7 @@ struct mchp_pit64b_timer {
};
/**
- * mchp_pit64b_clkevt - PIT64B clockevent data structure
+ * struct mchp_pit64b_clkevt - PIT64B clockevent data structure
* @timer: PIT64B timer
* @clkevt: clockevent
*/
@@ -75,7 +75,7 @@ struct mchp_pit64b_clkevt {
struct mchp_pit64b_clkevt, clkevt))
/**
- * mchp_pit64b_clksrc - PIT64B clocksource data structure
+ * struct mchp_pit64b_clksrc - PIT64B clocksource data structure
* @timer: PIT64B timer
* @clksrc: clocksource
*/
@@ -173,7 +173,8 @@ static int mchp_pit64b_clkevt_shutdown(struct clock_event_device *cedev)
{
struct mchp_pit64b_timer *timer = clkevt_to_mchp_pit64b_timer(cedev);
- writel_relaxed(MCHP_PIT64B_CR_SWRST, timer->base + MCHP_PIT64B_CR);
+ if (!clockevent_state_detached(cedev))
+ mchp_pit64b_suspend(timer);
return 0;
}
@@ -182,35 +183,37 @@ static int mchp_pit64b_clkevt_set_periodic(struct clock_event_device *cedev)
{
struct mchp_pit64b_timer *timer = clkevt_to_mchp_pit64b_timer(cedev);
+ if (clockevent_state_shutdown(cedev))
+ mchp_pit64b_resume(timer);
+
mchp_pit64b_reset(timer, mchp_pit64b_ce_cycles, MCHP_PIT64B_MR_CONT,
MCHP_PIT64B_IER_PERIOD);
return 0;
}
-static int mchp_pit64b_clkevt_set_next_event(unsigned long evt,
- struct clock_event_device *cedev)
+static int mchp_pit64b_clkevt_set_oneshot(struct clock_event_device *cedev)
{
struct mchp_pit64b_timer *timer = clkevt_to_mchp_pit64b_timer(cedev);
- mchp_pit64b_reset(timer, evt, MCHP_PIT64B_MR_ONE_SHOT,
+ if (clockevent_state_shutdown(cedev))
+ mchp_pit64b_resume(timer);
+
+ mchp_pit64b_reset(timer, mchp_pit64b_ce_cycles, MCHP_PIT64B_MR_ONE_SHOT,
MCHP_PIT64B_IER_PERIOD);
return 0;
}
-static void mchp_pit64b_clkevt_suspend(struct clock_event_device *cedev)
+static int mchp_pit64b_clkevt_set_next_event(unsigned long evt,
+ struct clock_event_device *cedev)
{
struct mchp_pit64b_timer *timer = clkevt_to_mchp_pit64b_timer(cedev);
- mchp_pit64b_suspend(timer);
-}
-
-static void mchp_pit64b_clkevt_resume(struct clock_event_device *cedev)
-{
- struct mchp_pit64b_timer *timer = clkevt_to_mchp_pit64b_timer(cedev);
+ mchp_pit64b_reset(timer, evt, MCHP_PIT64B_MR_ONE_SHOT,
+ MCHP_PIT64B_IER_PERIOD);
- mchp_pit64b_resume(timer);
+ return 0;
}
static irqreturn_t mchp_pit64b_interrupt(int irq, void *dev_id)
@@ -242,8 +245,10 @@ static void __init mchp_pit64b_pres_compute(u32 *pres, u32 clk_rate,
}
/**
- * mchp_pit64b_init_mode - prepare PIT64B mode register value to be used at
- * runtime; this includes prescaler and SGCLK bit
+ * mchp_pit64b_init_mode() - prepare PIT64B mode register value to be used at
+ * runtime; this includes prescaler and SGCLK bit
+ * @timer: pointer to pit64b timer to init
+ * @max_rate: maximum rate that timer's clock could use
*
* PIT64B timer may be fed by gclk or pclk. When gclk is used its rate has to
* be at least 3 times lower that pclk's rate. pclk rate is fixed, gclk rate
@@ -341,6 +346,7 @@ static int __init mchp_pit64b_init_clksrc(struct mchp_pit64b_timer *timer,
if (!cs)
return -ENOMEM;
+ mchp_pit64b_resume(timer);
mchp_pit64b_reset(timer, ULLONG_MAX, MCHP_PIT64B_MR_CONT, 0);
mchp_pit64b_cs_base = timer->base;
@@ -362,8 +368,7 @@ static int __init mchp_pit64b_init_clksrc(struct mchp_pit64b_timer *timer,
pr_debug("clksrc: Failed to register PIT64B clocksource!\n");
/* Stop timer. */
- writel_relaxed(MCHP_PIT64B_CR_SWRST,
- timer->base + MCHP_PIT64B_CR);
+ mchp_pit64b_suspend(timer);
kfree(cs);
return ret;
@@ -395,9 +400,8 @@ static int __init mchp_pit64b_init_clkevt(struct mchp_pit64b_timer *timer,
ce->clkevt.rating = 150;
ce->clkevt.set_state_shutdown = mchp_pit64b_clkevt_shutdown;
ce->clkevt.set_state_periodic = mchp_pit64b_clkevt_set_periodic;
+ ce->clkevt.set_state_oneshot = mchp_pit64b_clkevt_set_oneshot;
ce->clkevt.set_next_event = mchp_pit64b_clkevt_set_next_event;
- ce->clkevt.suspend = mchp_pit64b_clkevt_suspend;
- ce->clkevt.resume = mchp_pit64b_clkevt_resume;
ce->clkevt.cpumask = cpumask_of(0);
ce->clkevt.irq = irq;
@@ -448,19 +452,10 @@ static int __init mchp_pit64b_dt_init_timer(struct device_node *node,
if (ret)
goto irq_unmap;
- ret = clk_prepare_enable(timer.pclk);
- if (ret)
- goto irq_unmap;
-
- if (timer.mode & MCHP_PIT64B_MR_SGCLK) {
- ret = clk_prepare_enable(timer.gclk);
- if (ret)
- goto pclk_unprepare;
-
+ if (timer.mode & MCHP_PIT64B_MR_SGCLK)
clk_rate = clk_get_rate(timer.gclk);
- } else {
+ else
clk_rate = clk_get_rate(timer.pclk);
- }
clk_rate = clk_rate / (MCHP_PIT64B_MODE_TO_PRES(timer.mode) + 1);
if (clkevt)
@@ -469,15 +464,10 @@ static int __init mchp_pit64b_dt_init_timer(struct device_node *node,
ret = mchp_pit64b_init_clksrc(&timer, clk_rate);
if (ret)
- goto gclk_unprepare;
+ goto irq_unmap;
return 0;
-gclk_unprepare:
- if (timer.mode & MCHP_PIT64B_MR_SGCLK)
- clk_disable_unprepare(timer.gclk);
-pclk_unprepare:
- clk_disable_unprepare(timer.pclk);
irq_unmap:
irq_dispose_mapping(irq);
io_unmap:
diff --git a/drivers/clocksource/timer-sun4i.c b/drivers/clocksource/timer-sun4i.c
index bb6ea6c19829..94dc6e42e983 100644
--- a/drivers/clocksource/timer-sun4i.c
+++ b/drivers/clocksource/timer-sun4i.c
@@ -128,7 +128,7 @@ static void sun4i_timer_clear_interrupt(void __iomem *base)
static irqreturn_t sun4i_timer_interrupt(int irq, void *dev_id)
{
- struct clock_event_device *evt = (struct clock_event_device *)dev_id;
+ struct clock_event_device *evt = dev_id;
struct timer_of *to = to_timer_of(evt);
sun4i_timer_clear_interrupt(timer_of_base(to));
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
index 85900f7fc69f..7d5fa9069906 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -142,7 +142,7 @@ static int sun5i_clkevt_next_event(unsigned long evt,
static irqreturn_t sun5i_timer_interrupt(int irq, void *dev_id)
{
- struct sun5i_timer_clkevt *ce = (struct sun5i_timer_clkevt *)dev_id;
+ struct sun5i_timer_clkevt *ce = dev_id;
writel(0x1, ce->timer.base + TIMER_IRQ_ST_REG);
ce->clkevt.event_handler(&ce->clkevt);
diff --git a/drivers/clocksource/timer-tegra186.c b/drivers/clocksource/timer-tegra186.c
new file mode 100644
index 000000000000..ea742889ee06
--- /dev/null
+++ b/drivers/clocksource/timer-tegra186.c
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2019-2020 NVIDIA Corporation. All rights reserved.
+ */
+
+#include <linux/clocksource.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/watchdog.h>
+
+/* shared registers */
+#define TKETSC0 0x000
+#define TKETSC1 0x004
+#define TKEUSEC 0x008
+#define TKEOSC 0x00c
+
+#define TKEIE(x) (0x100 + ((x) * 4))
+#define TKEIE_WDT_MASK(x, y) ((y) << (16 + 4 * (x)))
+
+/* timer registers */
+#define TMRCR 0x000
+#define TMRCR_ENABLE BIT(31)
+#define TMRCR_PERIODIC BIT(30)
+#define TMRCR_PTV(x) ((x) & 0x0fffffff)
+
+#define TMRSR 0x004
+#define TMRSR_INTR_CLR BIT(30)
+
+#define TMRCSSR 0x008
+#define TMRCSSR_SRC_USEC (0 << 0)
+
+/* watchdog registers */
+#define WDTCR 0x000
+#define WDTCR_SYSTEM_POR_RESET_ENABLE BIT(16)
+#define WDTCR_SYSTEM_DEBUG_RESET_ENABLE BIT(15)
+#define WDTCR_REMOTE_INT_ENABLE BIT(14)
+#define WDTCR_LOCAL_FIQ_ENABLE BIT(13)
+#define WDTCR_LOCAL_INT_ENABLE BIT(12)
+#define WDTCR_PERIOD_MASK (0xff << 4)
+#define WDTCR_PERIOD(x) (((x) & 0xff) << 4)
+#define WDTCR_TIMER_SOURCE_MASK 0xf
+#define WDTCR_TIMER_SOURCE(x) ((x) & 0xf)
+
+#define WDTCMDR 0x008
+#define WDTCMDR_DISABLE_COUNTER BIT(1)
+#define WDTCMDR_START_COUNTER BIT(0)
+
+#define WDTUR 0x00c
+#define WDTUR_UNLOCK_PATTERN 0x0000c45a
+
+struct tegra186_timer_soc {
+ unsigned int num_timers;
+ unsigned int num_wdts;
+};
+
+struct tegra186_tmr {
+ struct tegra186_timer *parent;
+ void __iomem *regs;
+ unsigned int index;
+ unsigned int hwirq;
+};
+
+struct tegra186_wdt {
+ struct watchdog_device base;
+
+ void __iomem *regs;
+ unsigned int index;
+ bool locked;
+
+ struct tegra186_tmr *tmr;
+};
+
+static inline struct tegra186_wdt *to_tegra186_wdt(struct watchdog_device *wdd)
+{
+ return container_of(wdd, struct tegra186_wdt, base);
+}
+
+struct tegra186_timer {
+ const struct tegra186_timer_soc *soc;
+ struct device *dev;
+ void __iomem *regs;
+
+ struct tegra186_wdt *wdt;
+ struct clocksource usec;
+ struct clocksource tsc;
+ struct clocksource osc;
+};
+
+static void tmr_writel(struct tegra186_tmr *tmr, u32 value, unsigned int offset)
+{
+ writel_relaxed(value, tmr->regs + offset);
+}
+
+static void wdt_writel(struct tegra186_wdt *wdt, u32 value, unsigned int offset)
+{
+ writel_relaxed(value, wdt->regs + offset);
+}
+
+static u32 wdt_readl(struct tegra186_wdt *wdt, unsigned int offset)
+{
+ return readl_relaxed(wdt->regs + offset);
+}
+
+static struct tegra186_tmr *tegra186_tmr_create(struct tegra186_timer *tegra,
+ unsigned int index)
+{
+ unsigned int offset = 0x10000 + index * 0x10000;
+ struct tegra186_tmr *tmr;
+
+ tmr = devm_kzalloc(tegra->dev, sizeof(*tmr), GFP_KERNEL);
+ if (!tmr)
+ return ERR_PTR(-ENOMEM);
+
+ tmr->parent = tegra;
+ tmr->regs = tegra->regs + offset;
+ tmr->index = index;
+ tmr->hwirq = 0;
+
+ return tmr;
+}
+
+static const struct watchdog_info tegra186_wdt_info = {
+ .options = WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING,
+ .identity = "NVIDIA Tegra186 WDT",
+};
+
+static void tegra186_wdt_disable(struct tegra186_wdt *wdt)
+{
+ /* unlock and disable the watchdog */
+ wdt_writel(wdt, WDTUR_UNLOCK_PATTERN, WDTUR);
+ wdt_writel(wdt, WDTCMDR_DISABLE_COUNTER, WDTCMDR);
+
+ /* disable timer */
+ tmr_writel(wdt->tmr, 0, TMRCR);
+}
+
+static void tegra186_wdt_enable(struct tegra186_wdt *wdt)
+{
+ struct tegra186_timer *tegra = wdt->tmr->parent;
+ u32 value;
+
+ /* unmask hardware IRQ, this may have been lost across powergate */
+ value = TKEIE_WDT_MASK(wdt->index, 1);
+ writel(value, tegra->regs + TKEIE(wdt->tmr->hwirq));
+
+ /* clear interrupt */
+ tmr_writel(wdt->tmr, TMRSR_INTR_CLR, TMRSR);
+
+ /* select microsecond source */
+ tmr_writel(wdt->tmr, TMRCSSR_SRC_USEC, TMRCSSR);
+
+ /* configure timer (system reset happens on the fifth expiration) */
+ value = TMRCR_PTV(wdt->base.timeout * USEC_PER_SEC / 5) |
+ TMRCR_PERIODIC | TMRCR_ENABLE;
+ tmr_writel(wdt->tmr, value, TMRCR);
+
+ if (!wdt->locked) {
+ value = wdt_readl(wdt, WDTCR);
+
+ /* select the proper timer source */
+ value &= ~WDTCR_TIMER_SOURCE_MASK;
+ value |= WDTCR_TIMER_SOURCE(wdt->tmr->index);
+
+ /* single timer period since that's already configured */
+ value &= ~WDTCR_PERIOD_MASK;
+ value |= WDTCR_PERIOD(1);
+
+ /* enable local interrupt for WDT petting */
+ value |= WDTCR_LOCAL_INT_ENABLE;
+
+ /* enable local FIQ and remote interrupt for debug dump */
+ if (0)
+ value |= WDTCR_REMOTE_INT_ENABLE |
+ WDTCR_LOCAL_FIQ_ENABLE;
+
+ /* enable system debug reset (doesn't properly reboot) */
+ if (0)
+ value |= WDTCR_SYSTEM_DEBUG_RESET_ENABLE;
+
+ /* enable system POR reset */
+ value |= WDTCR_SYSTEM_POR_RESET_ENABLE;
+
+ wdt_writel(wdt, value, WDTCR);
+ }
+
+ wdt_writel(wdt, WDTCMDR_START_COUNTER, WDTCMDR);
+}
+
+static int tegra186_wdt_start(struct watchdog_device *wdd)
+{
+ struct tegra186_wdt *wdt = to_tegra186_wdt(wdd);
+
+ tegra186_wdt_enable(wdt);
+
+ return 0;
+}
+
+static int tegra186_wdt_stop(struct watchdog_device *wdd)
+{
+ struct tegra186_wdt *wdt = to_tegra186_wdt(wdd);
+
+ tegra186_wdt_disable(wdt);
+
+ return 0;
+}
+
+static int tegra186_wdt_ping(struct watchdog_device *wdd)
+{
+ struct tegra186_wdt *wdt = to_tegra186_wdt(wdd);
+
+ tegra186_wdt_disable(wdt);
+ tegra186_wdt_enable(wdt);
+
+ return 0;
+}
+
+static int tegra186_wdt_set_timeout(struct watchdog_device *wdd,
+ unsigned int timeout)
+{
+ struct tegra186_wdt *wdt = to_tegra186_wdt(wdd);
+
+ if (watchdog_active(&wdt->base))
+ tegra186_wdt_disable(wdt);
+
+ wdt->base.timeout = timeout;
+
+ if (watchdog_active(&wdt->base))
+ tegra186_wdt_enable(wdt);
+
+ return 0;
+}
+
+static const struct watchdog_ops tegra186_wdt_ops = {
+ .owner = THIS_MODULE,
+ .start = tegra186_wdt_start,
+ .stop = tegra186_wdt_stop,
+ .ping = tegra186_wdt_ping,
+ .set_timeout = tegra186_wdt_set_timeout,
+};
+
+static struct tegra186_wdt *tegra186_wdt_create(struct tegra186_timer *tegra,
+ unsigned int index)
+{
+ unsigned int offset = 0x10000, source;
+ struct tegra186_wdt *wdt;
+ u32 value;
+ int err;
+
+ offset += tegra->soc->num_timers * 0x10000 + index * 0x10000;
+
+ wdt = devm_kzalloc(tegra->dev, sizeof(*wdt), GFP_KERNEL);
+ if (!wdt)
+ return ERR_PTR(-ENOMEM);
+
+ wdt->regs = tegra->regs + offset;
+ wdt->index = index;
+
+ /* read the watchdog configuration since it might be locked down */
+ value = wdt_readl(wdt, WDTCR);
+
+ if (value & WDTCR_LOCAL_INT_ENABLE)
+ wdt->locked = true;
+
+ source = value & WDTCR_TIMER_SOURCE_MASK;
+
+ wdt->tmr = tegra186_tmr_create(tegra, source);
+ if (IS_ERR(wdt->tmr))
+ return ERR_CAST(wdt->tmr);
+
+ wdt->base.info = &tegra186_wdt_info;
+ wdt->base.ops = &tegra186_wdt_ops;
+ wdt->base.min_timeout = 1;
+ wdt->base.max_timeout = 255;
+ wdt->base.parent = tegra->dev;
+
+ err = watchdog_init_timeout(&wdt->base, 5, tegra->dev);
+ if (err < 0) {
+ dev_err(tegra->dev, "failed to initialize timeout: %d\n", err);
+ return ERR_PTR(err);
+ }
+
+ err = devm_watchdog_register_device(tegra->dev, &wdt->base);
+ if (err < 0) {
+ dev_err(tegra->dev, "failed to register WDT: %d\n", err);
+ return ERR_PTR(err);
+ }
+
+ return wdt;
+}
+
+static u64 tegra186_timer_tsc_read(struct clocksource *cs)
+{
+ struct tegra186_timer *tegra = container_of(cs, struct tegra186_timer,
+ tsc);
+ u32 hi, lo, ss;
+
+ hi = readl_relaxed(tegra->regs + TKETSC1);
+
+ /*
+ * The 56-bit value of the TSC is spread across two registers that are
+ * not synchronized. In order to read them atomically, ensure that the
+ * high 24 bits match before and after reading the low 32 bits.
+ */
+ do {
+ /* snapshot the high 24 bits */
+ ss = hi;
+
+ lo = readl_relaxed(tegra->regs + TKETSC0);
+ hi = readl_relaxed(tegra->regs + TKETSC1);
+ } while (hi != ss);
+
+ return (u64)hi << 32 | lo;
+}
+
+static int tegra186_timer_tsc_init(struct tegra186_timer *tegra)
+{
+ tegra->tsc.name = "tsc";
+ tegra->tsc.rating = 300;
+ tegra->tsc.read = tegra186_timer_tsc_read;
+ tegra->tsc.mask = CLOCKSOURCE_MASK(56);
+ tegra->tsc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+
+ return clocksource_register_hz(&tegra->tsc, 31250000);
+}
+
+static u64 tegra186_timer_osc_read(struct clocksource *cs)
+{
+ struct tegra186_timer *tegra = container_of(cs, struct tegra186_timer,
+ osc);
+
+ return readl_relaxed(tegra->regs + TKEOSC);
+}
+
+static int tegra186_timer_osc_init(struct tegra186_timer *tegra)
+{
+ tegra->osc.name = "osc";
+ tegra->osc.rating = 300;
+ tegra->osc.read = tegra186_timer_osc_read;
+ tegra->osc.mask = CLOCKSOURCE_MASK(32);
+ tegra->osc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+
+ return clocksource_register_hz(&tegra->osc, 38400000);
+}
+
+static u64 tegra186_timer_usec_read(struct clocksource *cs)
+{
+ struct tegra186_timer *tegra = container_of(cs, struct tegra186_timer,
+ usec);
+
+ return readl_relaxed(tegra->regs + TKEUSEC);
+}
+
+static int tegra186_timer_usec_init(struct tegra186_timer *tegra)
+{
+ tegra->usec.name = "usec";
+ tegra->usec.rating = 300;
+ tegra->usec.read = tegra186_timer_usec_read;
+ tegra->usec.mask = CLOCKSOURCE_MASK(32);
+ tegra->usec.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+
+ return clocksource_register_hz(&tegra->usec, USEC_PER_SEC);
+}
+
+static irqreturn_t tegra186_timer_irq(int irq, void *data)
+{
+ struct tegra186_timer *tegra = data;
+
+ if (watchdog_active(&tegra->wdt->base)) {
+ tegra186_wdt_disable(tegra->wdt);
+ tegra186_wdt_enable(tegra->wdt);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int tegra186_timer_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct tegra186_timer *tegra;
+ unsigned int irq;
+ int err;
+
+ tegra = devm_kzalloc(dev, sizeof(*tegra), GFP_KERNEL);
+ if (!tegra)
+ return -ENOMEM;
+
+ tegra->soc = of_device_get_match_data(dev);
+ dev_set_drvdata(dev, tegra);
+ tegra->dev = dev;
+
+ tegra->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(tegra->regs))
+ return PTR_ERR(tegra->regs);
+
+ err = platform_get_irq(pdev, 0);
+ if (err < 0)
+ return err;
+
+ irq = err;
+
+ /* create a watchdog using a preconfigured timer */
+ tegra->wdt = tegra186_wdt_create(tegra, 0);
+ if (IS_ERR(tegra->wdt)) {
+ err = PTR_ERR(tegra->wdt);
+ dev_err(dev, "failed to create WDT: %d\n", err);
+ return err;
+ }
+
+ err = tegra186_timer_tsc_init(tegra);
+ if (err < 0) {
+ dev_err(dev, "failed to register TSC counter: %d\n", err);
+ return err;
+ }
+
+ err = tegra186_timer_osc_init(tegra);
+ if (err < 0) {
+ dev_err(dev, "failed to register OSC counter: %d\n", err);
+ goto unregister_tsc;
+ }
+
+ err = tegra186_timer_usec_init(tegra);
+ if (err < 0) {
+ dev_err(dev, "failed to register USEC counter: %d\n", err);
+ goto unregister_osc;
+ }
+
+ err = devm_request_irq(dev, irq, tegra186_timer_irq, 0,
+ "tegra186-timer", tegra);
+ if (err < 0) {
+ dev_err(dev, "failed to request IRQ#%u: %d\n", irq, err);
+ goto unregister_usec;
+ }
+
+ return 0;
+
+unregister_usec:
+ clocksource_unregister(&tegra->usec);
+unregister_osc:
+ clocksource_unregister(&tegra->osc);
+unregister_tsc:
+ clocksource_unregister(&tegra->tsc);
+ return err;
+}
+
+static int tegra186_timer_remove(struct platform_device *pdev)
+{
+ struct tegra186_timer *tegra = platform_get_drvdata(pdev);
+
+ clocksource_unregister(&tegra->usec);
+ clocksource_unregister(&tegra->osc);
+ clocksource_unregister(&tegra->tsc);
+
+ return 0;
+}
+
+static int __maybe_unused tegra186_timer_suspend(struct device *dev)
+{
+ struct tegra186_timer *tegra = dev_get_drvdata(dev);
+
+ if (watchdog_active(&tegra->wdt->base))
+ tegra186_wdt_disable(tegra->wdt);
+
+ return 0;
+}
+
+static int __maybe_unused tegra186_timer_resume(struct device *dev)
+{
+ struct tegra186_timer *tegra = dev_get_drvdata(dev);
+
+ if (watchdog_active(&tegra->wdt->base))
+ tegra186_wdt_enable(tegra->wdt);
+
+ return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(tegra186_timer_pm_ops, tegra186_timer_suspend,
+ tegra186_timer_resume);
+
+static const struct tegra186_timer_soc tegra186_timer = {
+ .num_timers = 10,
+ .num_wdts = 3,
+};
+
+static const struct tegra186_timer_soc tegra234_timer = {
+ .num_timers = 16,
+ .num_wdts = 3,
+};
+
+static const struct of_device_id tegra186_timer_of_match[] = {
+ { .compatible = "nvidia,tegra186-timer", .data = &tegra186_timer },
+ { .compatible = "nvidia,tegra234-timer", .data = &tegra234_timer },
+ { }
+};
+MODULE_DEVICE_TABLE(of, tegra186_timer_of_match);
+
+static struct platform_driver tegra186_wdt_driver = {
+ .driver = {
+ .name = "tegra186-timer",
+ .pm = &tegra186_timer_pm_ops,
+ .of_match_table = tegra186_timer_of_match,
+ },
+ .probe = tegra186_timer_probe,
+ .remove = tegra186_timer_remove,
+};
+module_platform_driver(tegra186_wdt_driver);
+
+MODULE_AUTHOR("Thierry Reding <treding@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA Tegra186 timers driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c
index c194e8f74e1d..469f7c91564b 100644
--- a/drivers/clocksource/timer-ti-dm.c
+++ b/drivers/clocksource/timer-ti-dm.c
@@ -44,6 +44,121 @@ enum {
REQUEST_BY_NODE,
};
+static inline u32 __omap_dm_timer_read(struct omap_dm_timer *timer, u32 reg,
+ int posted)
+{
+ if (posted)
+ while (readl_relaxed(timer->pend) & (reg >> WPSHIFT))
+ cpu_relax();
+
+ return readl_relaxed(timer->func_base + (reg & 0xff));
+}
+
+static inline void __omap_dm_timer_write(struct omap_dm_timer *timer,
+ u32 reg, u32 val, int posted)
+{
+ if (posted)
+ while (readl_relaxed(timer->pend) & (reg >> WPSHIFT))
+ cpu_relax();
+
+ writel_relaxed(val, timer->func_base + (reg & 0xff));
+}
+
+static inline void __omap_dm_timer_init_regs(struct omap_dm_timer *timer)
+{
+ u32 tidr;
+
+ /* Assume v1 ip if bits [31:16] are zero */
+ tidr = readl_relaxed(timer->io_base);
+ if (!(tidr >> 16)) {
+ timer->revision = 1;
+ timer->irq_stat = timer->io_base + OMAP_TIMER_V1_STAT_OFFSET;
+ timer->irq_ena = timer->io_base + OMAP_TIMER_V1_INT_EN_OFFSET;
+ timer->irq_dis = timer->io_base + OMAP_TIMER_V1_INT_EN_OFFSET;
+ timer->pend = timer->io_base + _OMAP_TIMER_WRITE_PEND_OFFSET;
+ timer->func_base = timer->io_base;
+ } else {
+ timer->revision = 2;
+ timer->irq_stat = timer->io_base + OMAP_TIMER_V2_IRQSTATUS;
+ timer->irq_ena = timer->io_base + OMAP_TIMER_V2_IRQENABLE_SET;
+ timer->irq_dis = timer->io_base + OMAP_TIMER_V2_IRQENABLE_CLR;
+ timer->pend = timer->io_base +
+ _OMAP_TIMER_WRITE_PEND_OFFSET +
+ OMAP_TIMER_V2_FUNC_OFFSET;
+ timer->func_base = timer->io_base + OMAP_TIMER_V2_FUNC_OFFSET;
+ }
+}
+
+/*
+ * __omap_dm_timer_enable_posted - enables write posted mode
+ * @timer: pointer to timer instance handle
+ *
+ * Enables the write posted mode for the timer. When posted mode is enabled
+ * writes to certain timer registers are immediately acknowledged by the
+ * internal bus and hence prevents stalling the CPU waiting for the write to
+ * complete. Enabling this feature can improve performance for writing to the
+ * timer registers.
+ */
+static inline void __omap_dm_timer_enable_posted(struct omap_dm_timer *timer)
+{
+ if (timer->posted)
+ return;
+
+ if (timer->errata & OMAP_TIMER_ERRATA_I103_I767) {
+ timer->posted = OMAP_TIMER_NONPOSTED;
+ __omap_dm_timer_write(timer, OMAP_TIMER_IF_CTRL_REG, 0, 0);
+ return;
+ }
+
+ __omap_dm_timer_write(timer, OMAP_TIMER_IF_CTRL_REG,
+ OMAP_TIMER_CTRL_POSTED, 0);
+ timer->context.tsicr = OMAP_TIMER_CTRL_POSTED;
+ timer->posted = OMAP_TIMER_POSTED;
+}
+
+static inline void __omap_dm_timer_stop(struct omap_dm_timer *timer,
+ int posted, unsigned long rate)
+{
+ u32 l;
+
+ l = __omap_dm_timer_read(timer, OMAP_TIMER_CTRL_REG, posted);
+ if (l & OMAP_TIMER_CTRL_ST) {
+ l &= ~0x1;
+ __omap_dm_timer_write(timer, OMAP_TIMER_CTRL_REG, l, posted);
+#ifdef CONFIG_ARCH_OMAP2PLUS
+ /* Readback to make sure write has completed */
+ __omap_dm_timer_read(timer, OMAP_TIMER_CTRL_REG, posted);
+ /*
+ * Wait for functional clock period x 3.5 to make sure that
+ * timer is stopped
+ */
+ udelay(3500000 / rate + 1);
+#endif
+ }
+
+ /* Ack possibly pending interrupt */
+ writel_relaxed(OMAP_TIMER_INT_OVERFLOW, timer->irq_stat);
+}
+
+static inline void __omap_dm_timer_int_enable(struct omap_dm_timer *timer,
+ unsigned int value)
+{
+ writel_relaxed(value, timer->irq_ena);
+ __omap_dm_timer_write(timer, OMAP_TIMER_WAKEUP_EN_REG, value, 0);
+}
+
+static inline unsigned int
+__omap_dm_timer_read_counter(struct omap_dm_timer *timer, int posted)
+{
+ return __omap_dm_timer_read(timer, OMAP_TIMER_COUNTER_REG, posted);
+}
+
+static inline void __omap_dm_timer_write_status(struct omap_dm_timer *timer,
+ unsigned int value)
+{
+ writel_relaxed(value, timer->irq_stat);
+}
+
/**
* omap_dm_timer_read_reg - read timer registers in posted and non-posted mode
* @timer: timer pointer over which read operation to perform
@@ -921,6 +1036,10 @@ static const struct dmtimer_platform_data omap3plus_pdata = {
.timer_ops = &dmtimer_ops,
};
+static const struct dmtimer_platform_data am6_pdata = {
+ .timer_ops = &dmtimer_ops,
+};
+
static const struct of_device_id omap_timer_match[] = {
{
.compatible = "ti,omap2420-timer",
@@ -949,6 +1068,10 @@ static const struct of_device_id omap_timer_match[] = {
.compatible = "ti,dm816-timer",
.data = &omap3plus_pdata,
},
+ {
+ .compatible = "ti,am654-timer",
+ .data = &am6_pdata,
+ },
{},
};
MODULE_DEVICE_TABLE(of, omap_timer_match);
diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c
index 37a1eb20f5ba..76f6b3884e6b 100644
--- a/drivers/cpufreq/mediatek-cpufreq.c
+++ b/drivers/cpufreq/mediatek-cpufreq.c
@@ -439,9 +439,13 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
/* Both presence and absence of sram regulator are valid cases. */
info->sram_reg = regulator_get_optional(cpu_dev, "sram");
- if (IS_ERR(info->sram_reg))
+ if (IS_ERR(info->sram_reg)) {
+ ret = PTR_ERR(info->sram_reg);
+ if (ret == -EPROBE_DEFER)
+ goto out_free_resources;
+
info->sram_reg = NULL;
- else {
+ } else {
ret = regulator_enable(info->sram_reg);
if (ret) {
dev_warn(cpu_dev, "cpu%d: failed to enable vsram\n", cpu);
diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm
index be7f512109f7..747aa537389b 100644
--- a/drivers/cpuidle/Kconfig.arm
+++ b/drivers/cpuidle/Kconfig.arm
@@ -3,7 +3,8 @@
# ARM CPU Idle drivers
#
config ARM_CPUIDLE
- bool "Generic ARM/ARM64 CPU idle Driver"
+ bool "Generic ARM CPU idle Driver"
+ depends on ARM
select DT_IDLE_STATES
select CPU_IDLE_MULTIPLE_DRIVERS
help
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 0cce6e4ec946..205acb2c744d 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -343,7 +343,7 @@ void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
if (old->context != context)
continue;
- dma_resv_list_set(list, i, replacement, usage);
+ dma_resv_list_set(list, i, dma_fence_get(replacement), usage);
dma_fence_put(old);
}
}
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 59b0bedc9c24..c8fa7dcfdbd0 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -103,9 +103,14 @@ static void dimm_setup_label(struct dimm_info *dimm, u16 handle)
dmi_memdev_name(handle, &bank, &device);
- /* both strings must be non-zero */
- if (bank && *bank && device && *device)
- snprintf(dimm->label, sizeof(dimm->label), "%s %s", bank, device);
+ /*
+ * Set to a NULL string when both bank and device are zero. In this case,
+ * the label assigned by default will be preserved.
+ */
+ snprintf(dimm->label, sizeof(dimm->label), "%s%s%s",
+ (bank && *bank) ? bank : "",
+ (bank && *bank && device && *device) ? " " : "",
+ (device && *device) ? device : "");
}
static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry)
diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
index 1cee64b80a7e..f7d37c282819 100644
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -514,6 +514,28 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
memset(p, 0, sizeof(*p));
}
+static void enable_intr(struct synps_edac_priv *priv)
+{
+ /* Enable UE/CE Interrupts */
+ if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)
+ writel(DDR_UE_MASK | DDR_CE_MASK,
+ priv->baseaddr + ECC_CLR_OFST);
+ else
+ writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
+ priv->baseaddr + DDR_QOS_IRQ_EN_OFST);
+
+}
+
+static void disable_intr(struct synps_edac_priv *priv)
+{
+ /* Disable UE/CE Interrupts */
+ if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)
+ writel(0x0, priv->baseaddr + ECC_CLR_OFST);
+ else
+ writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
+ priv->baseaddr + DDR_QOS_IRQ_DB_OFST);
+}
+
/**
* intr_handler - Interrupt Handler for ECC interrupts.
* @irq: IRQ number.
@@ -555,6 +577,9 @@ static irqreturn_t intr_handler(int irq, void *dev_id)
/* v3.0 of the controller does not have this register */
if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR))
writel(regval, priv->baseaddr + DDR_QOS_IRQ_STAT_OFST);
+ else
+ enable_intr(priv);
+
return IRQ_HANDLED;
}
@@ -837,25 +862,6 @@ static void mc_init(struct mem_ctl_info *mci, struct platform_device *pdev)
init_csrows(mci);
}
-static void enable_intr(struct synps_edac_priv *priv)
-{
- /* Enable UE/CE Interrupts */
- if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)
- writel(DDR_UE_MASK | DDR_CE_MASK,
- priv->baseaddr + ECC_CLR_OFST);
- else
- writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
- priv->baseaddr + DDR_QOS_IRQ_EN_OFST);
-
-}
-
-static void disable_intr(struct synps_edac_priv *priv)
-{
- /* Disable UE/CE Interrupts */
- writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
- priv->baseaddr + DDR_QOS_IRQ_DB_OFST);
-}
-
static int setup_irq(struct mem_ctl_info *mci,
struct platform_device *pdev)
{
diff --git a/drivers/firmware/efi/reboot.c b/drivers/firmware/efi/reboot.c
index 73089a24f04b..ceae84c19d22 100644
--- a/drivers/firmware/efi/reboot.c
+++ b/drivers/firmware/efi/reboot.c
@@ -6,7 +6,7 @@
#include <linux/efi.h>
#include <linux/reboot.h>
-static void (*orig_pm_power_off)(void);
+static struct sys_off_handler *efi_sys_off_handler;
int efi_reboot_quirk_mode = -1;
@@ -51,15 +51,11 @@ bool __weak efi_poweroff_required(void)
return false;
}
-static void efi_power_off(void)
+static int efi_power_off(struct sys_off_data *data)
{
efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL);
- /*
- * The above call should not return, if it does fall back to
- * the original power off method (typically ACPI poweroff).
- */
- if (orig_pm_power_off)
- orig_pm_power_off();
+
+ return NOTIFY_DONE;
}
static int __init efi_shutdown_init(void)
@@ -68,8 +64,13 @@ static int __init efi_shutdown_init(void)
return -ENODEV;
if (efi_poweroff_required()) {
- orig_pm_power_off = pm_power_off;
- pm_power_off = efi_power_off;
+ /* SYS_OFF_PRIO_FIRMWARE + 1 so that it runs before acpi_power_off */
+ efi_sys_off_handler =
+ register_sys_off_handler(SYS_OFF_MODE_POWER_OFF,
+ SYS_OFF_PRIO_FIRMWARE + 1,
+ efi_power_off, NULL);
+ if (IS_ERR(efi_sys_off_handler))
+ return PTR_ERR(efi_sys_off_handler);
}
return 0;
diff --git a/drivers/gpio/gpio-msc313.c b/drivers/gpio/gpio-msc313.c
index b2c90bdd39d0..52d7b8d99170 100644
--- a/drivers/gpio/gpio-msc313.c
+++ b/drivers/gpio/gpio-msc313.c
@@ -550,15 +550,12 @@ static struct irq_chip msc313_gpio_irqchip = {
* so we need to provide the fwspec. Essentially gpiochip_populate_parent_fwspec_twocell
* that puts GIC_SPI into the first cell.
*/
-static void *msc313_gpio_populate_parent_fwspec(struct gpio_chip *gc,
- unsigned int parent_hwirq,
- unsigned int parent_type)
+static int msc313_gpio_populate_parent_fwspec(struct gpio_chip *gc,
+ union gpio_irq_fwspec *gfwspec,
+ unsigned int parent_hwirq,
+ unsigned int parent_type)
{
- struct irq_fwspec *fwspec;
-
- fwspec = kmalloc(sizeof(*fwspec), GFP_KERNEL);
- if (!fwspec)
- return NULL;
+ struct irq_fwspec *fwspec = &gfwspec->fwspec;
fwspec->fwnode = gc->irq.parent_domain->fwnode;
fwspec->param_count = 3;
@@ -566,7 +563,7 @@ static void *msc313_gpio_populate_parent_fwspec(struct gpio_chip *gc,
fwspec->param[1] = parent_hwirq;
fwspec->param[2] = parent_type;
- return fwspec;
+ return 0;
}
static int msc313e_gpio_child_to_parent_hwirq(struct gpio_chip *chip,
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 08bc52c3cdcb..ecd7d169470b 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -351,6 +351,9 @@ static const struct regmap_config pca953x_i2c_regmap = {
.reg_bits = 8,
.val_bits = 8,
+ .use_single_read = true,
+ .use_single_write = true,
+
.readable_reg = pca953x_readable_register,
.writeable_reg = pca953x_writeable_register,
.volatile_reg = pca953x_volatile_register,
@@ -906,15 +909,18 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert)
{
DECLARE_BITMAP(val, MAX_LINE);
+ u8 regaddr;
int ret;
- ret = regcache_sync_region(chip->regmap, chip->regs->output,
- chip->regs->output + NBANK(chip));
+ regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0);
+ ret = regcache_sync_region(chip->regmap, regaddr,
+ regaddr + NBANK(chip) - 1);
if (ret)
goto out;
- ret = regcache_sync_region(chip->regmap, chip->regs->direction,
- chip->regs->direction + NBANK(chip));
+ regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0);
+ ret = regcache_sync_region(chip->regmap, regaddr,
+ regaddr + NBANK(chip) - 1);
if (ret)
goto out;
@@ -1127,14 +1133,14 @@ static int pca953x_regcache_sync(struct device *dev)
* sync these registers first and only then sync the rest.
*/
regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0);
- ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip));
+ ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1);
if (ret) {
dev_err(dev, "Failed to sync GPIO dir registers: %d\n", ret);
return ret;
}
regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0);
- ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip));
+ ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1);
if (ret) {
dev_err(dev, "Failed to sync GPIO out registers: %d\n", ret);
return ret;
@@ -1144,7 +1150,7 @@ static int pca953x_regcache_sync(struct device *dev)
if (chip->driver_data & PCA_PCAL) {
regaddr = pca953x_recalc_addr(chip, PCAL953X_IN_LATCH, 0);
ret = regcache_sync_region(chip->regmap, regaddr,
- regaddr + NBANK(chip));
+ regaddr + NBANK(chip) - 1);
if (ret) {
dev_err(dev, "Failed to sync INT latch registers: %d\n",
ret);
@@ -1153,7 +1159,7 @@ static int pca953x_regcache_sync(struct device *dev)
regaddr = pca953x_recalc_addr(chip, PCAL953X_INT_MASK, 0);
ret = regcache_sync_region(chip->regmap, regaddr,
- regaddr + NBANK(chip));
+ regaddr + NBANK(chip) - 1);
if (ret) {
dev_err(dev, "Failed to sync INT mask registers: %d\n",
ret);
diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
index 98109839102f..1020c2feb249 100644
--- a/drivers/gpio/gpio-sim.c
+++ b/drivers/gpio/gpio-sim.c
@@ -991,28 +991,22 @@ static struct configfs_attribute *gpio_sim_device_config_attrs[] = {
};
struct gpio_sim_chip_name_ctx {
- struct gpio_sim_device *dev;
+ struct fwnode_handle *swnode;
char *page;
};
static int gpio_sim_emit_chip_name(struct device *dev, void *data)
{
struct gpio_sim_chip_name_ctx *ctx = data;
- struct fwnode_handle *swnode;
- struct gpio_sim_bank *bank;
/* This would be the sysfs device exported in /sys/class/gpio. */
if (dev->class)
return 0;
- swnode = dev_fwnode(dev);
+ if (device_match_fwnode(dev, ctx->swnode))
+ return sprintf(ctx->page, "%s\n", dev_name(dev));
- list_for_each_entry(bank, &ctx->dev->bank_list, siblings) {
- if (bank->swnode == swnode)
- return sprintf(ctx->page, "%s\n", dev_name(dev));
- }
-
- return -ENODATA;
+ return 0;
}
static ssize_t gpio_sim_bank_config_chip_name_show(struct config_item *item,
@@ -1020,7 +1014,7 @@ static ssize_t gpio_sim_bank_config_chip_name_show(struct config_item *item,
{
struct gpio_sim_bank *bank = to_gpio_sim_bank(item);
struct gpio_sim_device *dev = gpio_sim_bank_get_device(bank);
- struct gpio_sim_chip_name_ctx ctx = { dev, page };
+ struct gpio_sim_chip_name_ctx ctx = { bank->swnode, page };
int ret;
mutex_lock(&dev->lock);
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index ff2d2a1f9c73..e4fb4cb38a0f 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -443,15 +443,12 @@ static int tegra_gpio_child_to_parent_hwirq(struct gpio_chip *chip,
return 0;
}
-static void *tegra_gpio_populate_parent_fwspec(struct gpio_chip *chip,
- unsigned int parent_hwirq,
- unsigned int parent_type)
+static int tegra_gpio_populate_parent_fwspec(struct gpio_chip *chip,
+ union gpio_irq_fwspec *gfwspec,
+ unsigned int parent_hwirq,
+ unsigned int parent_type)
{
- struct irq_fwspec *fwspec;
-
- fwspec = kmalloc(sizeof(*fwspec), GFP_KERNEL);
- if (!fwspec)
- return NULL;
+ struct irq_fwspec *fwspec = &gfwspec->fwspec;
fwspec->fwnode = chip->irq.parent_domain->fwnode;
fwspec->param_count = 3;
@@ -459,7 +456,7 @@ static void *tegra_gpio_populate_parent_fwspec(struct gpio_chip *chip,
fwspec->param[1] = parent_hwirq;
fwspec->param[2] = parent_type;
- return fwspec;
+ return 0;
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c
index de28a68daea0..54d9fa7da9c1 100644
--- a/drivers/gpio/gpio-tegra186.c
+++ b/drivers/gpio/gpio-tegra186.c
@@ -621,16 +621,13 @@ static int tegra186_gpio_irq_domain_translate(struct irq_domain *domain,
return 0;
}
-static void *tegra186_gpio_populate_parent_fwspec(struct gpio_chip *chip,
- unsigned int parent_hwirq,
- unsigned int parent_type)
+static int tegra186_gpio_populate_parent_fwspec(struct gpio_chip *chip,
+ union gpio_irq_fwspec *gfwspec,
+ unsigned int parent_hwirq,
+ unsigned int parent_type)
{
struct tegra_gpio *gpio = gpiochip_get_data(chip);
- struct irq_fwspec *fwspec;
-
- fwspec = kmalloc(sizeof(*fwspec), GFP_KERNEL);
- if (!fwspec)
- return NULL;
+ struct irq_fwspec *fwspec = &gfwspec->fwspec;
fwspec->fwnode = chip->irq.parent_domain->fwnode;
fwspec->param_count = 3;
@@ -638,7 +635,7 @@ static void *tegra186_gpio_populate_parent_fwspec(struct gpio_chip *chip,
fwspec->param[1] = parent_hwirq;
fwspec->param[2] = parent_type;
- return fwspec;
+ return 0;
}
static int tegra186_gpio_child_to_parent_hwirq(struct gpio_chip *chip,
diff --git a/drivers/gpio/gpio-thunderx.c b/drivers/gpio/gpio-thunderx.c
index 9f66deab46ea..cc62c6e64103 100644
--- a/drivers/gpio/gpio-thunderx.c
+++ b/drivers/gpio/gpio-thunderx.c
@@ -15,8 +15,6 @@
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/spinlock.h>
-#include <asm-generic/msi.h>
-
#define GPIO_RX_DAT 0x0
#define GPIO_TX_SET 0x8
@@ -408,18 +406,15 @@ static int thunderx_gpio_child_to_parent_hwirq(struct gpio_chip *gc,
return 0;
}
-static void *thunderx_gpio_populate_parent_alloc_info(struct gpio_chip *chip,
- unsigned int parent_hwirq,
- unsigned int parent_type)
+static int thunderx_gpio_populate_parent_alloc_info(struct gpio_chip *chip,
+ union gpio_irq_fwspec *gfwspec,
+ unsigned int parent_hwirq,
+ unsigned int parent_type)
{
- msi_alloc_info_t *info;
-
- info = kmalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
- return NULL;
+ msi_alloc_info_t *info = &gfwspec->msiinfo;
info->hwirq = parent_hwirq;
- return info;
+ return 0;
}
static int thunderx_gpio_probe(struct pci_dev *pdev,
diff --git a/drivers/gpio/gpio-visconti.c b/drivers/gpio/gpio-visconti.c
index e6534ea1eaa7..5e108ba9956a 100644
--- a/drivers/gpio/gpio-visconti.c
+++ b/drivers/gpio/gpio-visconti.c
@@ -103,15 +103,12 @@ static int visconti_gpio_child_to_parent_hwirq(struct gpio_chip *gc,
return -EINVAL;
}
-static void *visconti_gpio_populate_parent_fwspec(struct gpio_chip *chip,
- unsigned int parent_hwirq,
- unsigned int parent_type)
+static int visconti_gpio_populate_parent_fwspec(struct gpio_chip *chip,
+ union gpio_irq_fwspec *gfwspec,
+ unsigned int parent_hwirq,
+ unsigned int parent_type)
{
- struct irq_fwspec *fwspec;
-
- fwspec = kmalloc(sizeof(*fwspec), GFP_KERNEL);
- if (!fwspec)
- return NULL;
+ struct irq_fwspec *fwspec = &gfwspec->fwspec;
fwspec->fwnode = chip->irq.parent_domain->fwnode;
fwspec->param_count = 3;
@@ -119,7 +116,7 @@ static void *visconti_gpio_populate_parent_fwspec(struct gpio_chip *chip,
fwspec->param[1] = parent_hwirq;
fwspec->param[2] = parent_type;
- return fwspec;
+ return 0;
}
static int visconti_gpio_probe(struct platform_device *pdev)
diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c
index b6d3a57e27ed..7f8e2fed2988 100644
--- a/drivers/gpio/gpio-xilinx.c
+++ b/drivers/gpio/gpio-xilinx.c
@@ -99,7 +99,7 @@ static inline void xgpio_set_value32(unsigned long *map, int bit, u32 v)
const unsigned long offset = (bit % BITS_PER_LONG) & BIT(5);
map[index] &= ~(0xFFFFFFFFul << offset);
- map[index] |= v << offset;
+ map[index] |= (unsigned long)v << offset;
}
static inline int xgpio_regoffset(struct xgpio_instance *chip, int ch)
diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 0c9a63becfef..b26e64338376 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -421,6 +421,10 @@ out_free_lh:
* @work: the worker that implements software debouncing
* @sw_debounced: flag indicating if the software debouncer is active
* @level: the current debounced physical level of the line
+ * @hdesc: the Hardware Timestamp Engine (HTE) descriptor
+ * @raw_level: the line level at the time of event
+ * @total_discard_seq: the running counter of the discarded events
+ * @last_seqno: the last sequence number before debounce period expires
*/
struct line {
struct gpio_desc *desc;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 9535f48e18d1..68d9f95d7799 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1107,7 +1107,7 @@ static int gpiochip_hierarchy_irq_domain_alloc(struct irq_domain *d,
irq_hw_number_t hwirq;
unsigned int type = IRQ_TYPE_NONE;
struct irq_fwspec *fwspec = data;
- void *parent_arg;
+ union gpio_irq_fwspec gpio_parent_fwspec = {};
unsigned int parent_hwirq;
unsigned int parent_type;
struct gpio_irq_chip *girq = &gc->irq;
@@ -1147,14 +1147,15 @@ static int gpiochip_hierarchy_irq_domain_alloc(struct irq_domain *d,
irq_set_probe(irq);
/* This parent only handles asserted level IRQs */
- parent_arg = girq->populate_parent_alloc_arg(gc, parent_hwirq, parent_type);
- if (!parent_arg)
- return -ENOMEM;
+ ret = girq->populate_parent_alloc_arg(gc, &gpio_parent_fwspec,
+ parent_hwirq, parent_type);
+ if (ret)
+ return ret;
chip_dbg(gc, "alloc_irqs_parent for %d parent hwirq %d\n",
irq, parent_hwirq);
irq_set_lockdep_class(irq, gc->irq.lock_key, gc->irq.request_key);
- ret = irq_domain_alloc_irqs_parent(d, irq, 1, parent_arg);
+ ret = irq_domain_alloc_irqs_parent(d, irq, 1, &gpio_parent_fwspec);
/*
* If the parent irqdomain is msi, the interrupts have already
* been allocated, so the EEXIST is good.
@@ -1166,7 +1167,6 @@ static int gpiochip_hierarchy_irq_domain_alloc(struct irq_domain *d,
"failed to allocate parent hwirq %d for hwirq %lu\n",
parent_hwirq, hwirq);
- kfree(parent_arg);
return ret;
}
@@ -1181,15 +1181,18 @@ static void gpiochip_hierarchy_setup_domain_ops(struct irq_domain_ops *ops)
ops->activate = gpiochip_irq_domain_activate;
ops->deactivate = gpiochip_irq_domain_deactivate;
ops->alloc = gpiochip_hierarchy_irq_domain_alloc;
- ops->free = irq_domain_free_irqs_common;
/*
- * We only allow overriding the translate() function for
+ * We only allow overriding the translate() and free() functions for
* hierarchical chips, and this should only be done if the user
- * really need something other than 1:1 translation.
+ * really need something other than 1:1 translation for translate()
+ * callback and free if user wants to free up any resources which
+ * were allocated during callbacks, for example populate_parent_alloc_arg.
*/
if (!ops->translate)
ops->translate = gpiochip_hierarchy_irq_domain_translate;
+ if (!ops->free)
+ ops->free = irq_domain_free_irqs_common;
}
static int gpiochip_hierarchy_add_domain(struct gpio_chip *gc)
@@ -1230,34 +1233,28 @@ static bool gpiochip_hierarchy_is_hierarchical(struct gpio_chip *gc)
return !!gc->irq.parent_domain;
}
-void *gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc,
- unsigned int parent_hwirq,
- unsigned int parent_type)
+int gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc,
+ union gpio_irq_fwspec *gfwspec,
+ unsigned int parent_hwirq,
+ unsigned int parent_type)
{
- struct irq_fwspec *fwspec;
-
- fwspec = kmalloc(sizeof(*fwspec), GFP_KERNEL);
- if (!fwspec)
- return NULL;
+ struct irq_fwspec *fwspec = &gfwspec->fwspec;
fwspec->fwnode = gc->irq.parent_domain->fwnode;
fwspec->param_count = 2;
fwspec->param[0] = parent_hwirq;
fwspec->param[1] = parent_type;
- return fwspec;
+ return 0;
}
EXPORT_SYMBOL_GPL(gpiochip_populate_parent_fwspec_twocell);
-void *gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc,
- unsigned int parent_hwirq,
- unsigned int parent_type)
+int gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc,
+ union gpio_irq_fwspec *gfwspec,
+ unsigned int parent_hwirq,
+ unsigned int parent_type)
{
- struct irq_fwspec *fwspec;
-
- fwspec = kmalloc(sizeof(*fwspec), GFP_KERNEL);
- if (!fwspec)
- return NULL;
+ struct irq_fwspec *fwspec = &gfwspec->fwspec;
fwspec->fwnode = gc->irq.parent_domain->fwnode;
fwspec->param_count = 4;
@@ -1266,7 +1263,7 @@ void *gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc,
fwspec->param[2] = 0;
fwspec->param[3] = parent_type;
- return fwspec;
+ return 0;
}
EXPORT_SYMBOL_GPL(gpiochip_populate_parent_fwspec_fourcell);
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 6c2256e8474b..d438d5ff8b40 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -272,7 +272,6 @@ config DRM_AMDGPU
select HWMON
select BACKLIGHT_CLASS_DEVICE
select INTERVAL_TREE
- select DRM_BUDDY
help
Choose this option if you have a recent AMD Radeon graphics card.
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 96cbc87f7b6b..413d8c6d592f 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -6,7 +6,7 @@ config DRM_AMD_DC
bool "AMD DC - Enable new display engine"
default y
select SND_HDA_COMPONENT if SND_HDA_CORE
- select DRM_AMD_DC_DCN if (X86 || PPC64)
+ select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128)
help
Choose this option if you want to use the new display engine
support for AMDGPU. This adds required support for Vega and
diff --git a/drivers/gpu/drm/drm_gem_ttm_helper.c b/drivers/gpu/drm/drm_gem_ttm_helper.c
index d5962a34c01d..e5fc875990c4 100644
--- a/drivers/gpu/drm/drm_gem_ttm_helper.c
+++ b/drivers/gpu/drm/drm_gem_ttm_helper.c
@@ -64,8 +64,13 @@ int drm_gem_ttm_vmap(struct drm_gem_object *gem,
struct iosys_map *map)
{
struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem);
+ int ret;
+
+ dma_resv_lock(gem->resv, NULL);
+ ret = ttm_bo_vmap(bo, map);
+ dma_resv_unlock(gem->resv);
- return ttm_bo_vmap(bo, map);
+ return ret;
}
EXPORT_SYMBOL(drm_gem_ttm_vmap);
@@ -82,7 +87,9 @@ void drm_gem_ttm_vunmap(struct drm_gem_object *gem,
{
struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem);
+ dma_resv_lock(gem->resv, NULL);
ttm_bo_vunmap(bo, map);
+ dma_resv_unlock(gem->resv);
}
EXPORT_SYMBOL(drm_gem_ttm_vunmap);
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index 7a9eeed239f3..fc1728d46ac2 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -286,6 +286,21 @@ static const struct dmi_system_id orientation_data[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9"),
},
.driver_data = (void *)&lcd1200x1920_rightside_up,
+ }, { /* Lenovo Yoga Tablet 2 830F / 830L */
+ .matches = {
+ /*
+ * Note this also matches the Lenovo Yoga Tablet 2 1050F/L
+ * since that uses the same mainboard. The resolution match
+ * will limit this to only matching on the 830F/L. Neither has
+ * any external video outputs so those are not a concern.
+ */
+ DMI_MATCH(DMI_SYS_VENDOR, "Intel Corp."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "VALLEYVIEW C0 PLATFORM"),
+ DMI_MATCH(DMI_BOARD_NAME, "BYT-T FFD8"),
+ /* Partial match on beginning of BIOS version */
+ DMI_MATCH(DMI_BIOS_VERSION, "BLADE_21"),
+ },
+ .driver_data = (void *)&lcd1200x1920_rightside_up,
}, { /* OneGX1 Pro */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SYSTEM_MANUFACTURER"),
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index d2d75d9c0c8d..04eacae1aca5 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -275,10 +275,17 @@ struct intel_context {
u8 child_index;
/** @guc: GuC specific members for parallel submission */
struct {
- /** @wqi_head: head pointer in work queue */
+ /** @wqi_head: cached head pointer in work queue */
u16 wqi_head;
- /** @wqi_tail: tail pointer in work queue */
+ /** @wqi_tail: cached tail pointer in work queue */
u16 wqi_tail;
+ /** @wq_head: pointer to the actual head in work queue */
+ u32 *wq_head;
+ /** @wq_tail: pointer to the actual head in work queue */
+ u32 *wq_tail;
+ /** @wq_status: pointer to the status in work queue */
+ u32 *wq_status;
+
/**
* @parent_page: page in context state (ce->state) used
* by parent for work queue, process descriptor
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index 4ef9990ed7f8..29ef8afc8c2e 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -122,6 +122,9 @@ enum intel_guc_action {
INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003,
INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004,
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY = 0x1005,
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006,
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007,
INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008,
INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index d0d99f178f2d..a7acffbf15d1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -170,6 +170,11 @@ struct intel_guc {
/** @ads_engine_usage_size: size of engine usage in the ADS */
u32 ads_engine_usage_size;
+ /** @lrc_desc_pool_v69: object allocated to hold the GuC LRC descriptor pool */
+ struct i915_vma *lrc_desc_pool_v69;
+ /** @lrc_desc_pool_vaddr_v69: contents of the GuC LRC descriptor pool */
+ void *lrc_desc_pool_vaddr_v69;
+
/**
* @context_lookup: used to resolve intel_context from guc_id, if a
* context is present in this structure it is registered with the GuC
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index b3c9a9327f76..323b055e5db9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -204,6 +204,20 @@ struct guc_wq_item {
u32 fence_id;
} __packed;
+struct guc_process_desc_v69 {
+ u32 stage_id;
+ u64 db_base_addr;
+ u32 head;
+ u32 tail;
+ u32 error_offset;
+ u64 wq_base_addr;
+ u32 wq_size_bytes;
+ u32 wq_status;
+ u32 engine_presence;
+ u32 priority;
+ u32 reserved[36];
+} __packed;
+
struct guc_sched_wq_desc {
u32 head;
u32 tail;
@@ -228,6 +242,37 @@ struct guc_ctxt_registration_info {
};
#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)
+/* Preempt to idle on quantum expiry */
+#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69 BIT(0)
+
+/*
+ * GuC Context registration descriptor.
+ * FIXME: This is only required to exist during context registration.
+ * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC
+ * is not required.
+ */
+struct guc_lrc_desc_v69 {
+ u32 hw_context_desc;
+ u32 slpm_perf_mode_hint; /* SPLC v1 only */
+ u32 slpm_freq_hint;
+ u32 engine_submit_mask; /* In logical space */
+ u8 engine_class;
+ u8 reserved0[3];
+ u32 priority;
+ u32 process_desc;
+ u32 wq_addr;
+ u32 wq_size;
+ u32 context_flags; /* CONTEXT_REGISTRATION_* */
+ /* Time for one workload to execute. (in micro seconds) */
+ u32 execution_quantum;
+ /* Time to wait for a preemption request to complete before issuing a
+ * reset. (in micro seconds).
+ */
+ u32 preemption_timeout;
+ u32 policy_flags; /* CONTEXT_POLICY_* */
+ u32 reserved1[19];
+} __packed;
+
/* 32-bit KLV structure as used by policy updates and others */
struct guc_klv_generic_dw_t {
u32 kl;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 40f726c61e95..76916aed897a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -414,12 +414,15 @@ struct sync_semaphore {
};
struct parent_scratch {
- struct guc_sched_wq_desc wq_desc;
+ union guc_descs {
+ struct guc_sched_wq_desc wq_desc;
+ struct guc_process_desc_v69 pdesc;
+ } descs;
struct sync_semaphore go;
struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
- u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
+ u8 unused[WQ_OFFSET - sizeof(union guc_descs) -
sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
u32 wq[WQ_SIZE / sizeof(u32)];
@@ -456,17 +459,23 @@ __get_parent_scratch(struct intel_context *ce)
LRC_STATE_OFFSET) / sizeof(u32)));
}
+static struct guc_process_desc_v69 *
+__get_process_desc_v69(struct intel_context *ce)
+{
+ struct parent_scratch *ps = __get_parent_scratch(ce);
+
+ return &ps->descs.pdesc;
+}
+
static struct guc_sched_wq_desc *
-__get_wq_desc(struct intel_context *ce)
+__get_wq_desc_v70(struct intel_context *ce)
{
struct parent_scratch *ps = __get_parent_scratch(ce);
- return &ps->wq_desc;
+ return &ps->descs.wq_desc;
}
-static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc,
- struct intel_context *ce,
- u32 wqi_size)
+static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size)
{
/*
* Check for space in work queue. Caching a value of head pointer in
@@ -476,7 +485,7 @@ static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc,
#define AVAILABLE_SPACE \
CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
if (wqi_size > AVAILABLE_SPACE) {
- ce->parallel.guc.wqi_head = READ_ONCE(wq_desc->head);
+ ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head);
if (wqi_size > AVAILABLE_SPACE)
return NULL;
@@ -495,11 +504,55 @@ static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
return ce;
}
+static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index)
+{
+ struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69;
+
+ if (!base)
+ return NULL;
+
+ GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID);
+
+ return &base[index];
+}
+
+static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc)
+{
+ u32 size;
+ int ret;
+
+ size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) *
+ GUC_MAX_CONTEXT_ID);
+ ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69,
+ (void **)&guc->lrc_desc_pool_vaddr_v69);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc)
+{
+ if (!guc->lrc_desc_pool_vaddr_v69)
+ return;
+
+ guc->lrc_desc_pool_vaddr_v69 = NULL;
+ i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP);
+}
+
static inline bool guc_submission_initialized(struct intel_guc *guc)
{
return guc->submission_initialized;
}
+static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id)
+{
+ struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id);
+
+ if (desc)
+ memset(desc, 0, sizeof(*desc));
+}
+
static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id)
{
return __get_context(guc, id);
@@ -526,6 +579,8 @@ static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id)
if (unlikely(!guc_submission_initialized(guc)))
return;
+ _reset_lrc_desc_v69(guc, id);
+
/*
* xarray API doesn't have xa_erase_irqsave wrapper, so calling
* the lower level functions directly.
@@ -611,7 +666,7 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
true, timeout);
}
-static int guc_context_policy_init(struct intel_context *ce, bool loop);
+static int guc_context_policy_init_v70(struct intel_context *ce, bool loop);
static int try_context_registration(struct intel_context *ce, bool loop);
static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
@@ -639,7 +694,7 @@ static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
GEM_BUG_ON(context_guc_id_invalid(ce));
if (context_policy_required(ce)) {
- err = guc_context_policy_init(ce, false);
+ err = guc_context_policy_init_v70(ce, false);
if (err)
return err;
}
@@ -737,9 +792,7 @@ static u32 wq_space_until_wrap(struct intel_context *ce)
return (WQ_SIZE - ce->parallel.guc.wqi_tail);
}
-static void write_wqi(struct guc_sched_wq_desc *wq_desc,
- struct intel_context *ce,
- u32 wqi_size)
+static void write_wqi(struct intel_context *ce, u32 wqi_size)
{
BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
@@ -750,13 +803,12 @@ static void write_wqi(struct guc_sched_wq_desc *wq_desc,
ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
(WQ_SIZE - 1);
- WRITE_ONCE(wq_desc->tail, ce->parallel.guc.wqi_tail);
+ WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail);
}
static int guc_wq_noop_append(struct intel_context *ce)
{
- struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
- u32 *wqi = get_wq_pointer(wq_desc, ce, wq_space_until_wrap(ce));
+ u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce));
u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
if (!wqi)
@@ -775,7 +827,6 @@ static int __guc_wq_item_append(struct i915_request *rq)
{
struct intel_context *ce = request_to_scheduling_context(rq);
struct intel_context *child;
- struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
unsigned int wqi_size = (ce->parallel.number_children + 4) *
sizeof(u32);
u32 *wqi;
@@ -795,7 +846,7 @@ static int __guc_wq_item_append(struct i915_request *rq)
return ret;
}
- wqi = get_wq_pointer(wq_desc, ce, wqi_size);
+ wqi = get_wq_pointer(ce, wqi_size);
if (!wqi)
return -EBUSY;
@@ -810,7 +861,7 @@ static int __guc_wq_item_append(struct i915_request *rq)
for_each_child(ce, child)
*wqi++ = child->ring->tail / sizeof(u64);
- write_wqi(wq_desc, ce, wqi_size);
+ write_wqi(ce, wqi_size);
return 0;
}
@@ -1812,20 +1863,34 @@ static void reset_fail_worker_func(struct work_struct *w);
int intel_guc_submission_init(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
+ int ret;
if (guc->submission_initialized)
return 0;
+ if (guc->fw.major_ver_found < 70) {
+ ret = guc_lrc_desc_pool_create_v69(guc);
+ if (ret)
+ return ret;
+ }
+
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
- if (!guc->submission_state.guc_ids_bitmap)
- return -ENOMEM;
+ if (!guc->submission_state.guc_ids_bitmap) {
+ ret = -ENOMEM;
+ goto destroy_pool;
+ }
guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
guc->timestamp.shift = gpm_timestamp_shift(gt);
guc->submission_initialized = true;
return 0;
+
+destroy_pool:
+ guc_lrc_desc_pool_destroy_v69(guc);
+
+ return ret;
}
void intel_guc_submission_fini(struct intel_guc *guc)
@@ -1834,6 +1899,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
return;
guc_flush_destroyed_contexts(guc);
+ guc_lrc_desc_pool_destroy_v69(guc);
i915_sched_engine_put(guc->sched_engine);
bitmap_free(guc->submission_state.guc_ids_bitmap);
guc->submission_initialized = false;
@@ -2091,10 +2157,34 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
spin_unlock_irqrestore(&guc->submission_state.lock, flags);
}
-static int __guc_action_register_multi_lrc(struct intel_guc *guc,
- struct intel_context *ce,
- struct guc_ctxt_registration_info *info,
- bool loop)
+static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc,
+ struct intel_context *ce,
+ u32 guc_id,
+ u32 offset,
+ bool loop)
+{
+ struct intel_context *child;
+ u32 action[4 + MAX_ENGINE_INSTANCE];
+ int len = 0;
+
+ GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
+
+ action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
+ action[len++] = guc_id;
+ action[len++] = ce->parallel.number_children + 1;
+ action[len++] = offset;
+ for_each_child(ce, child) {
+ offset += sizeof(struct guc_lrc_desc_v69);
+ action[len++] = offset;
+ }
+
+ return guc_submission_send_busy_loop(guc, action, len, 0, loop);
+}
+
+static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc,
+ struct intel_context *ce,
+ struct guc_ctxt_registration_info *info,
+ bool loop)
{
struct intel_context *child;
u32 action[13 + (MAX_ENGINE_INSTANCE * 2)];
@@ -2134,9 +2224,24 @@ static int __guc_action_register_multi_lrc(struct intel_guc *guc,
return guc_submission_send_busy_loop(guc, action, len, 0, loop);
}
-static int __guc_action_register_context(struct intel_guc *guc,
- struct guc_ctxt_registration_info *info,
- bool loop)
+static int __guc_action_register_context_v69(struct intel_guc *guc,
+ u32 guc_id,
+ u32 offset,
+ bool loop)
+{
+ u32 action[] = {
+ INTEL_GUC_ACTION_REGISTER_CONTEXT,
+ guc_id,
+ offset,
+ };
+
+ return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
+ 0, loop);
+}
+
+static int __guc_action_register_context_v70(struct intel_guc *guc,
+ struct guc_ctxt_registration_info *info,
+ bool loop)
{
u32 action[] = {
INTEL_GUC_ACTION_REGISTER_CONTEXT,
@@ -2157,24 +2262,52 @@ static int __guc_action_register_context(struct intel_guc *guc,
0, loop);
}
-static void prepare_context_registration_info(struct intel_context *ce,
- struct guc_ctxt_registration_info *info);
+static void prepare_context_registration_info_v69(struct intel_context *ce);
+static void prepare_context_registration_info_v70(struct intel_context *ce,
+ struct guc_ctxt_registration_info *info);
-static int register_context(struct intel_context *ce, bool loop)
+static int
+register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop)
+{
+ u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) +
+ ce->guc_id.id * sizeof(struct guc_lrc_desc_v69);
+
+ prepare_context_registration_info_v69(ce);
+
+ if (intel_context_is_parent(ce))
+ return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id,
+ offset, loop);
+ else
+ return __guc_action_register_context_v69(guc, ce->guc_id.id,
+ offset, loop);
+}
+
+static int
+register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop)
{
struct guc_ctxt_registration_info info;
+
+ prepare_context_registration_info_v70(ce, &info);
+
+ if (intel_context_is_parent(ce))
+ return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop);
+ else
+ return __guc_action_register_context_v70(guc, &info, loop);
+}
+
+static int register_context(struct intel_context *ce, bool loop)
+{
struct intel_guc *guc = ce_to_guc(ce);
int ret;
GEM_BUG_ON(intel_context_is_child(ce));
trace_intel_context_register(ce);
- prepare_context_registration_info(ce, &info);
-
- if (intel_context_is_parent(ce))
- ret = __guc_action_register_multi_lrc(guc, ce, &info, loop);
+ if (guc->fw.major_ver_found >= 70)
+ ret = register_context_v70(guc, ce, loop);
else
- ret = __guc_action_register_context(guc, &info, loop);
+ ret = register_context_v69(guc, ce, loop);
+
if (likely(!ret)) {
unsigned long flags;
@@ -2182,7 +2315,8 @@ static int register_context(struct intel_context *ce, bool loop)
set_context_registered(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- guc_context_policy_init(ce, loop);
+ if (guc->fw.major_ver_found >= 70)
+ guc_context_policy_init_v70(ce, loop);
}
return ret;
@@ -2279,7 +2413,7 @@ static int __guc_context_set_context_policies(struct intel_guc *guc,
0, loop);
}
-static int guc_context_policy_init(struct intel_context *ce, bool loop)
+static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
{
struct intel_engine_cs *engine = ce->engine;
struct intel_guc *guc = &engine->gt->uc.guc;
@@ -2338,6 +2472,19 @@ static int guc_context_policy_init(struct intel_context *ce, bool loop)
return ret;
}
+static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
+ struct guc_lrc_desc_v69 *desc)
+{
+ desc->policy_flags = 0;
+
+ if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
+ desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
+
+ /* NB: For both of these, zero means disabled. */
+ desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
+ desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
+}
+
static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
{
/*
@@ -2358,8 +2505,75 @@ static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
}
}
-static void prepare_context_registration_info(struct intel_context *ce,
- struct guc_ctxt_registration_info *info)
+static void prepare_context_registration_info_v69(struct intel_context *ce)
+{
+ struct intel_engine_cs *engine = ce->engine;
+ struct intel_guc *guc = &engine->gt->uc.guc;
+ u32 ctx_id = ce->guc_id.id;
+ struct guc_lrc_desc_v69 *desc;
+ struct intel_context *child;
+
+ GEM_BUG_ON(!engine->mask);
+
+ /*
+ * Ensure LRC + CT vmas are is same region as write barrier is done
+ * based on CT vma region.
+ */
+ GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
+ i915_gem_object_is_lmem(ce->ring->vma->obj));
+
+ desc = __get_lrc_desc_v69(guc, ctx_id);
+ desc->engine_class = engine_class_to_guc_class(engine->class);
+ desc->engine_submit_mask = engine->logical_mask;
+ desc->hw_context_desc = ce->lrc.lrca;
+ desc->priority = ce->guc_state.prio;
+ desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
+ guc_context_policy_init_v69(engine, desc);
+
+ /*
+ * If context is a parent, we need to register a process descriptor
+ * describing a work queue and register all child contexts.
+ */
+ if (intel_context_is_parent(ce)) {
+ struct guc_process_desc_v69 *pdesc;
+
+ ce->parallel.guc.wqi_tail = 0;
+ ce->parallel.guc.wqi_head = 0;
+
+ desc->process_desc = i915_ggtt_offset(ce->state) +
+ __get_parent_scratch_offset(ce);
+ desc->wq_addr = i915_ggtt_offset(ce->state) +
+ __get_wq_offset(ce);
+ desc->wq_size = WQ_SIZE;
+
+ pdesc = __get_process_desc_v69(ce);
+ memset(pdesc, 0, sizeof(*(pdesc)));
+ pdesc->stage_id = ce->guc_id.id;
+ pdesc->wq_base_addr = desc->wq_addr;
+ pdesc->wq_size_bytes = desc->wq_size;
+ pdesc->wq_status = WQ_STATUS_ACTIVE;
+
+ ce->parallel.guc.wq_head = &pdesc->head;
+ ce->parallel.guc.wq_tail = &pdesc->tail;
+ ce->parallel.guc.wq_status = &pdesc->wq_status;
+
+ for_each_child(ce, child) {
+ desc = __get_lrc_desc_v69(guc, child->guc_id.id);
+
+ desc->engine_class =
+ engine_class_to_guc_class(engine->class);
+ desc->hw_context_desc = child->lrc.lrca;
+ desc->priority = ce->guc_state.prio;
+ desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
+ guc_context_policy_init_v69(engine, desc);
+ }
+
+ clear_children_join_go_memory(ce);
+ }
+}
+
+static void prepare_context_registration_info_v70(struct intel_context *ce,
+ struct guc_ctxt_registration_info *info)
{
struct intel_engine_cs *engine = ce->engine;
struct intel_guc *guc = &engine->gt->uc.guc;
@@ -2409,10 +2623,14 @@ static void prepare_context_registration_info(struct intel_context *ce,
info->wq_base_hi = upper_32_bits(wq_base_offset);
info->wq_size = WQ_SIZE;
- wq_desc = __get_wq_desc(ce);
+ wq_desc = __get_wq_desc_v70(ce);
memset(wq_desc, 0, sizeof(*wq_desc));
wq_desc->wq_status = WQ_STATUS_ACTIVE;
+ ce->parallel.guc.wq_head = &wq_desc->head;
+ ce->parallel.guc.wq_tail = &wq_desc->tail;
+ ce->parallel.guc.wq_status = &wq_desc->wq_status;
+
clear_children_join_go_memory(ce);
}
}
@@ -2727,11 +2945,21 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
u16 guc_id,
u32 preemption_timeout)
{
- struct context_policy policy;
+ if (guc->fw.major_ver_found >= 70) {
+ struct context_policy policy;
- __guc_context_policy_start_klv(&policy, guc_id);
- __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
- __guc_context_set_context_policies(guc, &policy, true);
+ __guc_context_policy_start_klv(&policy, guc_id);
+ __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
+ __guc_context_set_context_policies(guc, &policy, true);
+ } else {
+ u32 action[] = {
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT,
+ guc_id,
+ preemption_timeout
+ };
+
+ intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
+ }
}
static void
@@ -2982,11 +3210,21 @@ static int guc_context_alloc(struct intel_context *ce)
static void __guc_context_set_prio(struct intel_guc *guc,
struct intel_context *ce)
{
- struct context_policy policy;
+ if (guc->fw.major_ver_found >= 70) {
+ struct context_policy policy;
- __guc_context_policy_start_klv(&policy, ce->guc_id.id);
- __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
- __guc_context_set_context_policies(guc, &policy, true);
+ __guc_context_policy_start_klv(&policy, ce->guc_id.id);
+ __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
+ __guc_context_set_context_policies(guc, &policy, true);
+ } else {
+ u32 action[] = {
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY,
+ ce->guc_id.id,
+ ce->guc_state.prio,
+ };
+
+ guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
+ }
}
static void guc_context_set_prio(struct intel_guc *guc,
@@ -4496,17 +4734,19 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
guc_log_context_priority(p, ce);
if (intel_context_is_parent(ce)) {
- struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
struct intel_context *child;
drm_printf(p, "\t\tNumber children: %u\n",
ce->parallel.number_children);
- drm_printf(p, "\t\tWQI Head: %u\n",
- READ_ONCE(wq_desc->head));
- drm_printf(p, "\t\tWQI Tail: %u\n",
- READ_ONCE(wq_desc->tail));
- drm_printf(p, "\t\tWQI Status: %u\n\n",
- READ_ONCE(wq_desc->wq_status));
+
+ if (ce->parallel.guc.wq_status) {
+ drm_printf(p, "\t\tWQI Head: %u\n",
+ READ_ONCE(*ce->parallel.guc.wq_head));
+ drm_printf(p, "\t\tWQI Tail: %u\n",
+ READ_ONCE(*ce->parallel.guc.wq_tail));
+ drm_printf(p, "\t\tWQI Status: %u\n\n",
+ READ_ONCE(*ce->parallel.guc.wq_status));
+ }
if (ce->engine->emit_bb_start ==
emit_bb_start_parent_no_preempt_mid_batch) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 27363091e1af..56a0d80f88ba 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -70,6 +70,10 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
fw_def(BROXTON, 0, guc_def(bxt, 70, 1, 1)) \
fw_def(SKYLAKE, 0, guc_def(skl, 70, 1, 1))
+#define INTEL_GUC_FIRMWARE_DEFS_FALLBACK(fw_def, guc_def) \
+ fw_def(ALDERLAKE_P, 0, guc_def(adlp, 69, 0, 3)) \
+ fw_def(ALDERLAKE_S, 0, guc_def(tgl, 69, 0, 3))
+
#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \
fw_def(ALDERLAKE_S, 0, huc_def(tgl, 7, 9, 3)) \
@@ -105,6 +109,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
MODULE_FIRMWARE(uc_);
INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH)
+INTEL_GUC_FIRMWARE_DEFS_FALLBACK(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH)
INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH)
/* The below structs and macros are used to iterate across the list of blobs */
@@ -149,6 +154,9 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
static const struct uc_fw_platform_requirement blobs_guc[] = {
INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB)
};
+ static const struct uc_fw_platform_requirement blobs_guc_fallback[] = {
+ INTEL_GUC_FIRMWARE_DEFS_FALLBACK(MAKE_FW_LIST, GUC_FW_BLOB)
+ };
static const struct uc_fw_platform_requirement blobs_huc[] = {
INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB)
};
@@ -179,12 +187,29 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) {
const struct uc_fw_blob *blob = &fw_blobs[i].blob;
uc_fw->path = blob->path;
+ uc_fw->wanted_path = blob->path;
uc_fw->major_ver_wanted = blob->major;
uc_fw->minor_ver_wanted = blob->minor;
break;
}
}
+ if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) {
+ const struct uc_fw_platform_requirement *blobs = blobs_guc_fallback;
+ u32 count = ARRAY_SIZE(blobs_guc_fallback);
+
+ for (i = 0; i < count && p <= blobs[i].p; i++) {
+ if (p == blobs[i].p && rev >= blobs[i].rev) {
+ const struct uc_fw_blob *blob = &blobs[i].blob;
+
+ uc_fw->fallback.path = blob->path;
+ uc_fw->fallback.major_ver = blob->major;
+ uc_fw->fallback.minor_ver = blob->minor;
+ break;
+ }
+ }
+ }
+
/* make sure the list is ordered as expected */
if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) {
for (i = 1; i < fw_count; i++) {
@@ -412,7 +437,24 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
__force_fw_fetch_failures(uc_fw, -EINVAL);
__force_fw_fetch_failures(uc_fw, -ESTALE);
- err = request_firmware(&fw, uc_fw->path, dev);
+ err = firmware_request_nowarn(&fw, uc_fw->path, dev);
+ if (err && !intel_uc_fw_is_overridden(uc_fw) && uc_fw->fallback.path) {
+ err = firmware_request_nowarn(&fw, uc_fw->fallback.path, dev);
+ if (!err) {
+ drm_notice(&i915->drm,
+ "%s firmware %s is recommended, but only %s was found\n",
+ intel_uc_fw_type_repr(uc_fw->type),
+ uc_fw->wanted_path,
+ uc_fw->fallback.path);
+ drm_info(&i915->drm,
+ "Consider updating your linux-firmware pkg or downloading from %s\n",
+ INTEL_UC_FIRMWARE_URL);
+
+ uc_fw->path = uc_fw->fallback.path;
+ uc_fw->major_ver_wanted = uc_fw->fallback.major_ver;
+ uc_fw->minor_ver_wanted = uc_fw->fallback.minor_ver;
+ }
+ }
if (err)
goto fail;
@@ -460,8 +502,8 @@ fail:
INTEL_UC_FIRMWARE_MISSING :
INTEL_UC_FIRMWARE_ERROR);
- drm_notice(&i915->drm, "%s firmware %s: fetch failed with error %d\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
+ i915_probe_error(i915, "%s firmware %s: fetch failed with error %d\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
drm_info(&i915->drm, "%s firmware(s) can be downloaded from %s\n",
intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL);
@@ -822,7 +864,13 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p)
{
drm_printf(p, "%s firmware: %s\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->wanted_path);
+ if (uc_fw->fallback.path) {
+ drm_printf(p, "%s firmware fallback: %s\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->fallback.path);
+ drm_printf(p, "fallback selected: %s\n",
+ str_yes_no(uc_fw->path == uc_fw->fallback.path));
+ }
drm_printf(p, "\tstatus: %s\n",
intel_uc_fw_status_repr(uc_fw->status));
drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n",
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 4f169035f504..7aa2644400b9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -74,6 +74,7 @@ struct intel_uc_fw {
const enum intel_uc_fw_status status;
enum intel_uc_fw_status __status; /* no accidental overwrites */
};
+ const char *wanted_path;
const char *path;
bool user_overridden;
size_t size;
@@ -98,6 +99,12 @@ struct intel_uc_fw {
u16 major_ver_found;
u16 minor_ver_found;
+ struct {
+ const char *path;
+ u16 major_ver;
+ u16 minor_ver;
+ } fallback;
+
u32 rsa_size;
u32 ucode_size;
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 0ba2a3455d99..de13f102d4fd 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -3117,9 +3117,9 @@ void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu)
continue;
vaddr = shmem_pin_map(engine->default_state);
- if (IS_ERR(vaddr)) {
- gvt_err("failed to map %s->default state, err:%zd\n",
- engine->name, PTR_ERR(vaddr));
+ if (!vaddr) {
+ gvt_err("failed to map %s->default state\n",
+ engine->name);
return;
}
diff --git a/drivers/gpu/drm/imx/dcss/dcss-dev.c b/drivers/gpu/drm/imx/dcss/dcss-dev.c
index c849533ca83e..3f5750cc2673 100644
--- a/drivers/gpu/drm/imx/dcss/dcss-dev.c
+++ b/drivers/gpu/drm/imx/dcss/dcss-dev.c
@@ -207,6 +207,7 @@ struct dcss_dev *dcss_dev_create(struct device *dev, bool hdmi_output)
ret = dcss_submodules_init(dcss);
if (ret) {
+ of_node_put(dcss->of_port);
dev_err(dev, "submodules initialization failed\n");
goto clks_err;
}
@@ -237,6 +238,8 @@ void dcss_dev_destroy(struct dcss_dev *dcss)
dcss_clocks_disable(dcss);
}
+ of_node_put(dcss->of_port);
+
pm_runtime_disable(dcss->dev);
dcss_submodules_stop(dcss);
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 7ba66ad68a8a..16356611b5b9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -680,7 +680,11 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
goto out_free_dma;
for (i = 0; i < npages; i += max) {
- args.end = start + (max << PAGE_SHIFT);
+ if (args.start + (max << PAGE_SHIFT) > end)
+ args.end = end;
+ else
+ args.end = args.start + (max << PAGE_SHIFT);
+
ret = migrate_vma_setup(&args);
if (ret)
goto out_free_pfns;
diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c
index 3626469c4cc2..cdb154c8b866 100644
--- a/drivers/gpu/drm/panel/panel-edp.c
+++ b/drivers/gpu/drm/panel/panel-edp.c
@@ -738,7 +738,7 @@ static int generic_edp_panel_probe(struct device *dev, struct panel_edp *panel)
of_property_read_u32(dev->of_node, "hpd-reliable-delay-ms", &reliable_ms);
desc->delay.hpd_reliable = reliable_ms;
of_property_read_u32(dev->of_node, "hpd-absent-delay-ms", &absent_ms);
- desc->delay.hpd_reliable = absent_ms;
+ desc->delay.hpd_absent = absent_ms;
/* Power the panel on so we can read the EDID */
ret = pm_runtime_get_sync(dev);
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index c58075bc096e..2d870cf73b07 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -433,8 +433,8 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data,
if (args->retained) {
if (args->madv == PANFROST_MADV_DONTNEED)
- list_add_tail(&bo->base.madv_list,
- &pfdev->shrinker_list);
+ list_move_tail(&bo->base.madv_list,
+ &pfdev->shrinker_list);
else if (args->madv == PANFROST_MADV_WILLNEED)
list_del_init(&bo->base.madv_list);
}
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index d3f82b26a631..b285a8001b1d 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -518,7 +518,7 @@ err_map:
err_pages:
drm_gem_shmem_put_pages(&bo->base);
err_bo:
- drm_gem_object_put(&bo->base.base);
+ panfrost_gem_mapping_put(bomapping);
return ret;
}
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index 67d38f53d3e5..13ed33e74457 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -23,6 +23,14 @@
#include <drm/drm_probe_helper.h>
#include <drm/drm_vblank.h>
+#if defined(CONFIG_ARM_DMA_USE_IOMMU)
+#include <asm/dma-iommu.h>
+#else
+#define arm_iommu_detach_device(...) ({ })
+#define arm_iommu_release_mapping(...) ({ })
+#define to_dma_iommu_mapping(dev) NULL
+#endif
+
#include "rockchip_drm_drv.h"
#include "rockchip_drm_fb.h"
#include "rockchip_drm_gem.h"
@@ -49,6 +57,15 @@ int rockchip_drm_dma_attach_device(struct drm_device *drm_dev,
if (!private->domain)
return 0;
+ if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) {
+ struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+
+ if (mapping) {
+ arm_iommu_detach_device(dev);
+ arm_iommu_release_mapping(mapping);
+ }
+ }
+
ret = iommu_attach_device(private->domain, dev);
if (ret) {
DRM_DEV_ERROR(dev, "Failed to attach iommu device\n");
diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c
index 77f80b0d3a5e..5a3e3b78cd9e 100644
--- a/drivers/gpu/drm/solomon/ssd130x.c
+++ b/drivers/gpu/drm/solomon/ssd130x.c
@@ -352,7 +352,7 @@ static int ssd130x_init(struct ssd130x_device *ssd130x)
/* Set precharge period in number of ticks from the internal clock */
precharge = (SSD130X_SET_PRECHARGE_PERIOD1_SET(ssd130x->prechargep1) |
- SSD130X_SET_PRECHARGE_PERIOD1_SET(ssd130x->prechargep2));
+ SSD130X_SET_PRECHARGE_PERIOD2_SET(ssd130x->prechargep2));
ret = ssd130x_write_cmd(ssd130x, 2, SSD130X_SET_PRECHARGE_PERIOD, precharge);
if (ret < 0)
return ret;
diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c
index 768242a78e2b..5422363690e7 100644
--- a/drivers/gpu/drm/tiny/simpledrm.c
+++ b/drivers/gpu/drm/tiny/simpledrm.c
@@ -627,7 +627,7 @@ static const struct drm_connector_funcs simpledrm_connector_funcs = {
.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
};
-static int
+static enum drm_mode_status
simpledrm_simple_display_pipe_mode_valid(struct drm_simple_display_pipe *pipe,
const struct drm_display_mode *mode)
{
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index 4e239bd75b1d..5a9d47a229e4 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -428,6 +428,10 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
data->ccd_offset = 0x154;
k10temp_get_ccd_support(pdev, data, 8);
break;
+ case 0xa0 ... 0xaf:
+ data->ccd_offset = 0x300;
+ k10temp_get_ccd_support(pdev, data, 8);
+ break;
}
} else if (boot_cpu_data.x86 == 0x19) {
data->temp_adjust_mask = ZEN_CUR_TEMP_RANGE_SEL_MASK;
@@ -445,6 +449,11 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
data->ccd_offset = 0x300;
k10temp_get_ccd_support(pdev, data, 8);
break;
+ case 0x60 ... 0x6f:
+ case 0x70 ... 0x7f:
+ data->ccd_offset = 0x308;
+ k10temp_get_ccd_support(pdev, data, 8);
+ break;
case 0x10 ... 0x1f:
case 0xa0 ... 0xaf:
data->ccd_offset = 0x300;
@@ -489,10 +498,13 @@ static const struct pci_device_id k10temp_id_table[] = {
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
+ { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_MA0H_DF_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M10H_DF_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F3) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) },
+ { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) },
+ { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) },
{ PCI_VDEVICE(HYGON, PCI_DEVICE_ID_AMD_17H_DF_F3) },
{}
};
diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c
index 3d6f8ee355bf..630cfa4ddd46 100644
--- a/drivers/i2c/busses/i2c-cadence.c
+++ b/drivers/i2c/busses/i2c-cadence.c
@@ -388,9 +388,9 @@ static irqreturn_t cdns_i2c_slave_isr(void *ptr)
*/
static irqreturn_t cdns_i2c_master_isr(void *ptr)
{
- unsigned int isr_status, avail_bytes, updatetx;
+ unsigned int isr_status, avail_bytes;
unsigned int bytes_to_send;
- bool hold_quirk;
+ bool updatetx;
struct cdns_i2c *id = ptr;
/* Signal completion only after everything is updated */
int done_flag = 0;
@@ -410,11 +410,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr)
* Check if transfer size register needs to be updated again for a
* large data receive operation.
*/
- updatetx = 0;
- if (id->recv_count > id->curr_recv_count)
- updatetx = 1;
-
- hold_quirk = (id->quirks & CDNS_I2C_BROKEN_HOLD_BIT) && updatetx;
+ updatetx = id->recv_count > id->curr_recv_count;
/* When receiving, handle data interrupt and completion interrupt */
if (id->p_recv_buf &&
@@ -445,7 +441,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr)
break;
}
- if (cdns_is_holdquirk(id, hold_quirk))
+ if (cdns_is_holdquirk(id, updatetx))
break;
}
@@ -456,7 +452,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr)
* maintain transfer size non-zero while performing a large
* receive operation.
*/
- if (cdns_is_holdquirk(id, hold_quirk)) {
+ if (cdns_is_holdquirk(id, updatetx)) {
/* wait while fifo is full */
while (cdns_i2c_readreg(CDNS_I2C_XFER_SIZE_OFFSET) !=
(id->curr_recv_count - CDNS_I2C_FIFO_DEPTH))
@@ -478,22 +474,6 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr)
CDNS_I2C_XFER_SIZE_OFFSET);
id->curr_recv_count = id->recv_count;
}
- } else if (id->recv_count && !hold_quirk &&
- !id->curr_recv_count) {
-
- /* Set the slave address in address register*/
- cdns_i2c_writereg(id->p_msg->addr & CDNS_I2C_ADDR_MASK,
- CDNS_I2C_ADDR_OFFSET);
-
- if (id->recv_count > CDNS_I2C_TRANSFER_SIZE) {
- cdns_i2c_writereg(CDNS_I2C_TRANSFER_SIZE,
- CDNS_I2C_XFER_SIZE_OFFSET);
- id->curr_recv_count = CDNS_I2C_TRANSFER_SIZE;
- } else {
- cdns_i2c_writereg(id->recv_count,
- CDNS_I2C_XFER_SIZE_OFFSET);
- id->curr_recv_count = id->recv_count;
- }
}
/* Clear hold (if not repeated start) and signal completion */
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index e9e2db68b9fb..78fb1a4274a6 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -66,7 +66,7 @@
/* IMX I2C registers:
* the I2C register offset is different between SoCs,
- * to provid support for all these chips, split the
+ * to provide support for all these chips, split the
* register offset into a fixed base address and a
* variable shift value, then the full register offset
* will be calculated by
diff --git a/drivers/i2c/busses/i2c-mlxcpld.c b/drivers/i2c/busses/i2c-mlxcpld.c
index 56aa424fd71d..815cc561386b 100644
--- a/drivers/i2c/busses/i2c-mlxcpld.c
+++ b/drivers/i2c/busses/i2c-mlxcpld.c
@@ -49,7 +49,7 @@
#define MLXCPLD_LPCI2C_NACK_IND 2
#define MLXCPLD_I2C_FREQ_1000KHZ_SET 0x04
-#define MLXCPLD_I2C_FREQ_400KHZ_SET 0x0c
+#define MLXCPLD_I2C_FREQ_400KHZ_SET 0x0e
#define MLXCPLD_I2C_FREQ_100KHZ_SET 0x42
enum mlxcpld_i2c_frequency {
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 424ef470223d..445b19d20b9a 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -47,13 +47,16 @@
#include <linux/tick.h>
#include <trace/events/power.h>
#include <linux/sched.h>
+#include <linux/sched/smt.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/moduleparam.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
+#include <asm/nospec-branch.h>
#include <asm/mwait.h>
#include <asm/msr.h>
+#include <asm/fpu/api.h>
#define INTEL_IDLE_VERSION "0.5.1"
@@ -106,6 +109,17 @@ static unsigned int mwait_substates __initdata;
#define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15)
/*
+ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
+ * above.
+ */
+#define CPUIDLE_FLAG_IBRS BIT(16)
+
+/*
+ * Initialize large xstate for the C6-state entrance.
+ */
+#define CPUIDLE_FLAG_INIT_XSTATE BIT(17)
+
+/*
* MWAIT takes an 8-bit "hint" in EAX "suggesting"
* the C-state (top nibble) and sub-state (bottom nibble)
* 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
@@ -154,11 +168,42 @@ static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
raw_local_irq_enable();
ret = __intel_idle(dev, drv, index);
- raw_local_irq_disable();
+
+ /*
+ * The lockdep hardirqs state may be changed to 'on' with timer
+ * tick interrupt followed by __do_softirq(). Use local_irq_disable()
+ * to keep the hardirqs state correct.
+ */
+ local_irq_disable();
+
+ return ret;
+}
+
+static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ bool smt_active = sched_smt_active();
+ u64 spec_ctrl = spec_ctrl_current();
+ int ret;
+
+ if (smt_active)
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+
+ ret = __intel_idle(dev, drv, index);
+
+ if (smt_active)
+ wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
return ret;
}
+static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ fpu_idle_fpregs();
+ return __intel_idle(dev, drv, index);
+}
+
/**
* intel_idle_s2idle - Ask the processor to enter the given idle state.
* @dev: cpuidle device of the target CPU.
@@ -174,8 +219,12 @@ static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
- unsigned long eax = flg2MWAIT(drv->states[index].flags);
unsigned long ecx = 1; /* break on interrupt flag */
+ struct cpuidle_state *state = &drv->states[index];
+ unsigned long eax = flg2MWAIT(state->flags);
+
+ if (state->flags & CPUIDLE_FLAG_INIT_XSTATE)
+ fpu_idle_fpregs();
mwait_idle_with_hints(eax, ecx);
@@ -680,7 +729,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{
.name = "C6",
.desc = "MWAIT 0x20",
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 85,
.target_residency = 200,
.enter = &intel_idle,
@@ -688,7 +737,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{
.name = "C7s",
.desc = "MWAIT 0x33",
- .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 124,
.target_residency = 800,
.enter = &intel_idle,
@@ -696,7 +745,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{
.name = "C8",
.desc = "MWAIT 0x40",
- .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 200,
.target_residency = 800,
.enter = &intel_idle,
@@ -704,7 +753,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{
.name = "C9",
.desc = "MWAIT 0x50",
- .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 480,
.target_residency = 5000,
.enter = &intel_idle,
@@ -712,7 +761,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{
.name = "C10",
.desc = "MWAIT 0x60",
- .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 890,
.target_residency = 5000,
.enter = &intel_idle,
@@ -741,7 +790,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
{
.name = "C6",
.desc = "MWAIT 0x20",
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 133,
.target_residency = 600,
.enter = &intel_idle,
@@ -910,7 +959,8 @@ static struct cpuidle_state spr_cstates[] __initdata = {
{
.name = "C6",
.desc = "MWAIT 0x20",
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
+ CPUIDLE_FLAG_INIT_XSTATE,
.exit_latency = 290,
.target_residency = 800,
.enter = &intel_idle,
@@ -1819,6 +1869,15 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE)
drv->states[drv->state_count].enter = intel_idle_irq;
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
+ cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
+ WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE);
+ drv->states[drv->state_count].enter = intel_idle_ibrs;
+ }
+
+ if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_INIT_XSTATE)
+ drv->states[drv->state_count].enter = intel_idle_xstate;
+
if ((disabled_states_mask & BIT(drv->state_count)) ||
((icpu->use_acpi || force_use_acpi) &&
intel_idle_off_by_default(mwait_hint) &&
diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
index 638bf4a1ed94..646fa8677490 100644
--- a/drivers/infiniband/hw/irdma/cm.c
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -4231,10 +4231,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr,
struct irdma_cm_node *cm_node;
struct list_head teardown_list;
struct ib_qp_attr attr;
- struct irdma_sc_vsi *vsi = &iwdev->vsi;
- struct irdma_sc_qp *sc_qp;
- struct irdma_qp *qp;
- int i;
INIT_LIST_HEAD(&teardown_list);
@@ -4251,52 +4247,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr,
irdma_cm_disconn(cm_node->iwqp);
irdma_rem_ref_cm_node(cm_node);
}
- if (!iwdev->roce_mode)
- return;
-
- INIT_LIST_HEAD(&teardown_list);
- for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
- mutex_lock(&vsi->qos[i].qos_mutex);
- list_for_each_safe (list_node, list_core_temp,
- &vsi->qos[i].qplist) {
- u32 qp_ip[4];
-
- sc_qp = container_of(list_node, struct irdma_sc_qp,
- list);
- if (sc_qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_RC)
- continue;
-
- qp = sc_qp->qp_uk.back_qp;
- if (!disconnect_all) {
- if (nfo->ipv4)
- qp_ip[0] = qp->udp_info.local_ipaddr[3];
- else
- memcpy(qp_ip,
- &qp->udp_info.local_ipaddr[0],
- sizeof(qp_ip));
- }
-
- if (disconnect_all ||
- (nfo->vlan_id == (qp->udp_info.vlan_tag & VLAN_VID_MASK) &&
- !memcmp(qp_ip, ipaddr, nfo->ipv4 ? 4 : 16))) {
- spin_lock(&iwdev->rf->qptable_lock);
- if (iwdev->rf->qp_table[sc_qp->qp_uk.qp_id]) {
- irdma_qp_add_ref(&qp->ibqp);
- list_add(&qp->teardown_entry,
- &teardown_list);
- }
- spin_unlock(&iwdev->rf->qptable_lock);
- }
- }
- mutex_unlock(&vsi->qos[i].qos_mutex);
- }
-
- list_for_each_safe (list_node, list_core_temp, &teardown_list) {
- qp = container_of(list_node, struct irdma_qp, teardown_entry);
- attr.qp_state = IB_QPS_ERR;
- irdma_modify_qp_roce(&qp->ibqp, &attr, IB_QP_STATE, NULL);
- irdma_qp_rem_ref(&qp->ibqp);
- }
}
/**
diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.c b/drivers/infiniband/hw/irdma/i40iw_hw.c
index e46fc110004d..50299f58b6b3 100644
--- a/drivers/infiniband/hw/irdma/i40iw_hw.c
+++ b/drivers/infiniband/hw/irdma/i40iw_hw.c
@@ -201,6 +201,7 @@ void i40iw_init_hw(struct irdma_sc_dev *dev)
dev->hw_attrs.uk_attrs.max_hw_read_sges = I40IW_MAX_SGE_RD;
dev->hw_attrs.max_hw_device_pages = I40IW_MAX_PUSH_PAGE_COUNT;
dev->hw_attrs.uk_attrs.max_hw_inline = I40IW_MAX_INLINE_DATA_SIZE;
+ dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M;
dev->hw_attrs.max_hw_ird = I40IW_MAX_IRD_SIZE;
dev->hw_attrs.max_hw_ord = I40IW_MAX_ORD_SIZE;
dev->hw_attrs.max_hw_wqes = I40IW_MAX_WQ_ENTRIES;
diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.c b/drivers/infiniband/hw/irdma/icrdma_hw.c
index cf53b17510cd..5986fd906308 100644
--- a/drivers/infiniband/hw/irdma/icrdma_hw.c
+++ b/drivers/infiniband/hw/irdma/icrdma_hw.c
@@ -139,6 +139,7 @@ void icrdma_init_hw(struct irdma_sc_dev *dev)
dev->cqp_db = dev->hw_regs[IRDMA_CQPDB];
dev->cq_ack_db = dev->hw_regs[IRDMA_CQACK];
dev->irq_ops = &icrdma_irq_ops;
+ dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M | SZ_1G;
dev->hw_attrs.max_hw_ird = ICRDMA_MAX_IRD_SIZE;
dev->hw_attrs.max_hw_ord = ICRDMA_MAX_ORD_SIZE;
dev->hw_attrs.max_stat_inst = ICRDMA_MAX_STATS_COUNT;
diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h
index 46c12334c735..4789e85d717b 100644
--- a/drivers/infiniband/hw/irdma/irdma.h
+++ b/drivers/infiniband/hw/irdma/irdma.h
@@ -127,6 +127,7 @@ struct irdma_hw_attrs {
u64 max_hw_outbound_msg_size;
u64 max_hw_inbound_msg_size;
u64 max_mr_size;
+ u64 page_size_cap;
u32 min_hw_qp_id;
u32 min_hw_aeq_size;
u32 max_hw_aeq_size;
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index c4412ece5a6d..96135a228f26 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -32,7 +32,7 @@ static int irdma_query_device(struct ib_device *ibdev,
props->vendor_part_id = pcidev->device;
props->hw_ver = rf->pcidev->revision;
- props->page_size_cap = SZ_4K | SZ_2M | SZ_1G;
+ props->page_size_cap = hw_attrs->page_size_cap;
props->max_mr_size = hw_attrs->max_mr_size;
props->max_qp = rf->max_qp - rf->used_qps;
props->max_qp_wr = hw_attrs->max_qp_wr;
@@ -2781,7 +2781,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) {
iwmr->page_size = ib_umem_find_best_pgsz(region,
- SZ_4K | SZ_2M | SZ_1G,
+ iwdev->rf->sc_dev.hw_attrs.page_size_cap,
virt);
if (unlikely(!iwmr->page_size)) {
kfree(iwmr);
diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c
index 3ad9870db108..aa45a9fee6a0 100644
--- a/drivers/input/touchscreen/goodix.c
+++ b/drivers/input/touchscreen/goodix.c
@@ -900,6 +900,11 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts)
} else {
dev_warn(dev, "Unexpected ACPI resources: gpio_count %d, gpio_int_idx %d\n",
ts->gpio_count, ts->gpio_int_idx);
+ /*
+ * On some devices _PS0 does a reset for us and
+ * sometimes this is necessary for things to work.
+ */
+ acpi_device_fix_up_power(ACPI_COMPANION(dev));
return -EINVAL;
}
diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c
index 43c521f50c85..3dda6eaabdab 100644
--- a/drivers/input/touchscreen/usbtouchscreen.c
+++ b/drivers/input/touchscreen/usbtouchscreen.c
@@ -1654,6 +1654,9 @@ static int usbtouch_probe(struct usb_interface *intf,
if (id->driver_info == DEVTYPE_IGNORE)
return -ENODEV;
+ if (id->driver_info >= ARRAY_SIZE(usbtouch_dev_info))
+ return -ENODEV;
+
endpoint = usbtouch_get_input_endpoint(intf->cur_altsetting);
if (!endpoint)
return -ENXIO;
diff --git a/drivers/input/touchscreen/wm97xx-core.c b/drivers/input/touchscreen/wm97xx-core.c
index 2757c7768ffe..f51ab5614532 100644
--- a/drivers/input/touchscreen/wm97xx-core.c
+++ b/drivers/input/touchscreen/wm97xx-core.c
@@ -758,7 +758,9 @@ batt_err:
static int wm97xx_mfd_remove(struct platform_device *pdev)
{
- return wm97xx_remove(&pdev->dev);
+ wm97xx_remove(&pdev->dev);
+
+ return 0;
}
static int __maybe_unused wm97xx_suspend(struct device *dev)
diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index e285a220c913..51bd66a45a11 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -194,7 +194,7 @@ hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg)
u32 vector;
struct irq_cfg *cfg;
int ioapic_id;
- struct cpumask *affinity;
+ const struct cpumask *affinity;
int cpu;
struct hv_interrupt_entry entry;
struct hyperv_root_ir_data *data = irq_data->chip_data;
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index bbb11cb8b0f7..2935912b195f 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -8,7 +8,7 @@ config IRQCHIP
config ARM_GIC
bool
select IRQ_DOMAIN_HIERARCHY
- select GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
config ARM_GIC_PM
bool
@@ -34,7 +34,7 @@ config ARM_GIC_V3
bool
select IRQ_DOMAIN_HIERARCHY
select PARTITION_PERCPU
- select GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
config ARM_GIC_V3_ITS
bool
@@ -76,7 +76,7 @@ config ARMADA_370_XP_IRQ
bool
select GENERIC_IRQ_CHIP
select PCI_MSI if PCI
- select GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
config ALPINE_MSI
bool
@@ -112,7 +112,7 @@ config BCM6345_L1_IRQ
bool
select GENERIC_IRQ_CHIP
select IRQ_DOMAIN
- select GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
config BCM7038_L1_IRQ
tristate "Broadcom STB 7038-style L1/L2 interrupt controller driver"
@@ -120,7 +120,7 @@ config BCM7038_L1_IRQ
default ARCH_BRCMSTB || BMIPS_GENERIC
select GENERIC_IRQ_CHIP
select IRQ_DOMAIN
- select GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
config BCM7120_L2_IRQ
tristate "Broadcom STB 7120-style L2 interrupt controller driver"
@@ -177,9 +177,9 @@ config MADERA_IRQ
config IRQ_MIPS_CPU
bool
select GENERIC_IRQ_CHIP
- select GENERIC_IRQ_IPI if SYS_SUPPORTS_MULTITHREADING
+ select GENERIC_IRQ_IPI if SMP && SYS_SUPPORTS_MULTITHREADING
select IRQ_DOMAIN
- select GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
config CLPS711X_IRQCHIP
bool
@@ -242,6 +242,14 @@ config RENESAS_RZA1_IRQC
Enable support for the Renesas RZ/A1 Interrupt Controller, to use up
to 8 external interrupts with configurable sense select.
+config RENESAS_RZG2L_IRQC
+ bool "Renesas RZ/G2L (and alike SoC) IRQC support" if COMPILE_TEST
+ select GENERIC_IRQ_CHIP
+ select IRQ_DOMAIN_HIERARCHY
+ help
+ Enable support for the Renesas RZ/G2L (and alike SoC) Interrupt Controller
+ for external devices.
+
config SL28CPLD_INTC
bool "Kontron sl28cpld IRQ controller"
depends on MFD_SL28CPLD=y || COMPILE_TEST
@@ -294,7 +302,7 @@ config VERSATILE_FPGA_IRQ_NR
config XTENSA_MX
bool
select IRQ_DOMAIN
- select GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
config XILINX_INTC
bool "Xilinx Interrupt Controller IP"
@@ -322,7 +330,8 @@ config KEYSTONE_IRQ
config MIPS_GIC
bool
- select GENERIC_IRQ_IPI
+ select GENERIC_IRQ_IPI if SMP
+ select IRQ_DOMAIN_HIERARCHY
select MIPS_CM
config INGENIC_IRQ
@@ -530,6 +539,7 @@ config SIFIVE_PLIC
bool "SiFive Platform-Level Interrupt Controller"
depends on RISCV
select IRQ_DOMAIN_HIERARCHY
+ select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
help
This enables support for the PLIC chip found in SiFive (and
potentially other) RISC-V systems. The PLIC controls devices
@@ -546,6 +556,16 @@ config EXYNOS_IRQ_COMBINER
Say yes here to add support for the IRQ combiner devices embedded
in Samsung Exynos chips.
+config IRQ_LOONGARCH_CPU
+ bool
+ select GENERIC_IRQ_CHIP
+ select IRQ_DOMAIN
+ select GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ help
+ Support for the LoongArch CPU Interrupt Controller. For details of
+ irq chip hierarchy on LoongArch platforms please read the document
+ Documentation/loongarch/irq-chip-model.rst.
+
config LOONGSON_LIOINTC
bool "Loongson Local I/O Interrupt Controller"
depends on MACH_LOONGSON64
@@ -555,6 +575,16 @@ config LOONGSON_LIOINTC
help
Support for the Loongson Local I/O Interrupt Controller.
+config LOONGSON_EIOINTC
+ bool "Loongson Extend I/O Interrupt Controller"
+ depends on LOONGARCH
+ depends on MACH_LOONGSON64
+ default MACH_LOONGSON64
+ select IRQ_DOMAIN_HIERARCHY
+ select GENERIC_IRQ_CHIP
+ help
+ Support for the Loongson3 Extend I/O Interrupt Vector Controller.
+
config LOONGSON_HTPIC
bool "Loongson3 HyperTransport PIC Controller"
depends on MACH_LOONGSON64 && MIPS
@@ -574,7 +604,7 @@ config LOONGSON_HTVEC
config LOONGSON_PCH_PIC
bool "Loongson PCH PIC Controller"
- depends on MACH_LOONGSON64 || COMPILE_TEST
+ depends on MACH_LOONGSON64
default MACH_LOONGSON64
select IRQ_DOMAIN_HIERARCHY
select IRQ_FASTEOI_HIERARCHY_HANDLERS
@@ -583,7 +613,7 @@ config LOONGSON_PCH_PIC
config LOONGSON_PCH_MSI
bool "Loongson PCH MSI Controller"
- depends on MACH_LOONGSON64 || COMPILE_TEST
+ depends on MACH_LOONGSON64
depends on PCI
default MACH_LOONGSON64
select IRQ_DOMAIN_HIERARCHY
@@ -591,6 +621,14 @@ config LOONGSON_PCH_MSI
help
Support for the Loongson PCH MSI Controller.
+config LOONGSON_PCH_LPC
+ bool "Loongson PCH LPC Controller"
+ depends on MACH_LOONGSON64
+ default (MACH_LOONGSON64 && LOONGARCH)
+ select IRQ_DOMAIN_HIERARCHY
+ help
+ Support for the Loongson PCH LPC Controller.
+
config MST_IRQ
bool "MStar Interrupt Controller"
depends on ARCH_MEDIATEK || ARCH_MSTARV7 || COMPILE_TEST
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 5b67450a9538..86450eb11398 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_RDA_INTC) += irq-rda-intc.o
obj-$(CONFIG_RENESAS_INTC_IRQPIN) += irq-renesas-intc-irqpin.o
obj-$(CONFIG_RENESAS_IRQC) += irq-renesas-irqc.o
obj-$(CONFIG_RENESAS_RZA1_IRQC) += irq-renesas-rza1.o
+obj-$(CONFIG_RENESAS_RZG2L_IRQC) += irq-renesas-rzg2l.o
obj-$(CONFIG_VERSATILE_FPGA_IRQ) += irq-versatile-fpga.o
obj-$(CONFIG_ARCH_NSPIRE) += irq-zevio.o
obj-$(CONFIG_ARCH_VT8500) += irq-vt8500.o
@@ -103,11 +104,14 @@ obj-$(CONFIG_LS1X_IRQ) += irq-ls1x.o
obj-$(CONFIG_TI_SCI_INTR_IRQCHIP) += irq-ti-sci-intr.o
obj-$(CONFIG_TI_SCI_INTA_IRQCHIP) += irq-ti-sci-inta.o
obj-$(CONFIG_TI_PRUSS_INTC) += irq-pruss-intc.o
+obj-$(CONFIG_IRQ_LOONGARCH_CPU) += irq-loongarch-cpu.o
obj-$(CONFIG_LOONGSON_LIOINTC) += irq-loongson-liointc.o
+obj-$(CONFIG_LOONGSON_EIOINTC) += irq-loongson-eiointc.o
obj-$(CONFIG_LOONGSON_HTPIC) += irq-loongson-htpic.o
obj-$(CONFIG_LOONGSON_HTVEC) += irq-loongson-htvec.o
obj-$(CONFIG_LOONGSON_PCH_PIC) += irq-loongson-pch-pic.o
obj-$(CONFIG_LOONGSON_PCH_MSI) += irq-loongson-pch-msi.o
+obj-$(CONFIG_LOONGSON_PCH_LPC) += irq-loongson-pch-lpc.o
obj-$(CONFIG_MST_IRQ) += irq-mst-intc.o
obj-$(CONFIG_SL28CPLD_INTC) += irq-sl28cpld.o
obj-$(CONFIG_MACH_REALTEK_RTL) += irq-realtek-rtl.o
diff --git a/drivers/irqchip/irq-bcm6345-l1.c b/drivers/irqchip/irq-bcm6345-l1.c
index 142a7431745f..6899e37810a8 100644
--- a/drivers/irqchip/irq-bcm6345-l1.c
+++ b/drivers/irqchip/irq-bcm6345-l1.c
@@ -216,11 +216,11 @@ static int bcm6345_l1_set_affinity(struct irq_data *d,
enabled = intc->cpus[old_cpu]->enable_cache[word] & mask;
if (enabled)
__bcm6345_l1_mask(d);
- cpumask_copy(irq_data_get_affinity_mask(d), dest);
+ irq_data_update_affinity(d, dest);
if (enabled)
__bcm6345_l1_unmask(d);
} else {
- cpumask_copy(irq_data_get_affinity_mask(d), dest);
+ irq_data_update_affinity(d, dest);
}
raw_spin_unlock_irqrestore(&intc->lock, flags);
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 2d25bca63d2a..262658fd5f9e 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1783,7 +1783,7 @@ static void gic_enable_nmi_support(void)
* the security state of the GIC (controlled by the GICD_CTRL.DS bit)
* and if Group 0 interrupts can be delivered to Linux in the non-secure
* world as FIQs (controlled by the SCR_EL3.FIQ bit). These affect the
- * the ICC_PMR_EL1 register and the priority that software assigns to
+ * ICC_PMR_EL1 register and the priority that software assigns to
* interrupts:
*
* GICD_CTRL.DS | SCR_EL3.FIQ | ICC_PMR_EL1 | Group 1 priority
@@ -2381,11 +2381,17 @@ static void __init gic_acpi_setup_kvm_info(void)
vgic_set_kvm_info(&gic_v3_kvm_info);
}
+static struct fwnode_handle *gsi_domain_handle;
+
+static struct fwnode_handle *gic_v3_get_gsi_domain_id(u32 gsi)
+{
+ return gsi_domain_handle;
+}
+
static int __init
gic_acpi_init(union acpi_subtable_headers *header, const unsigned long end)
{
struct acpi_madt_generic_distributor *dist;
- struct fwnode_handle *domain_handle;
size_t size;
int i, err;
@@ -2417,18 +2423,18 @@ gic_acpi_init(union acpi_subtable_headers *header, const unsigned long end)
if (err)
goto out_redist_unmap;
- domain_handle = irq_domain_alloc_fwnode(&dist->base_address);
- if (!domain_handle) {
+ gsi_domain_handle = irq_domain_alloc_fwnode(&dist->base_address);
+ if (!gsi_domain_handle) {
err = -ENOMEM;
goto out_redist_unmap;
}
err = gic_init_bases(acpi_data.dist_base, acpi_data.redist_regs,
- acpi_data.nr_redist_regions, 0, domain_handle);
+ acpi_data.nr_redist_regions, 0, gsi_domain_handle);
if (err)
goto out_fwhandle_free;
- acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle);
+ acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, gic_v3_get_gsi_domain_id);
if (static_branch_likely(&supports_deactivate_key))
gic_acpi_setup_kvm_info();
@@ -2436,7 +2442,7 @@ gic_acpi_init(union acpi_subtable_headers *header, const unsigned long end)
return 0;
out_fwhandle_free:
- irq_domain_free_fwnode(domain_handle);
+ irq_domain_free_fwnode(gsi_domain_handle);
out_redist_unmap:
for (i = 0; i < acpi_data.nr_redist_regions; i++)
if (acpi_data.redist_regs[i].redist_base)
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 820404cb56bc..4c7bae0ec8f9 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1682,11 +1682,17 @@ static void __init gic_acpi_setup_kvm_info(void)
vgic_set_kvm_info(&gic_v2_kvm_info);
}
+static struct fwnode_handle *gsi_domain_handle;
+
+static struct fwnode_handle *gic_v2_get_gsi_domain_id(u32 gsi)
+{
+ return gsi_domain_handle;
+}
+
static int __init gic_v2_acpi_init(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_madt_generic_distributor *dist;
- struct fwnode_handle *domain_handle;
struct gic_chip_data *gic = &gic_data[0];
int count, ret;
@@ -1724,22 +1730,22 @@ static int __init gic_v2_acpi_init(union acpi_subtable_headers *header,
/*
* Initialize GIC instance zero (no multi-GIC support).
*/
- domain_handle = irq_domain_alloc_fwnode(&dist->base_address);
- if (!domain_handle) {
+ gsi_domain_handle = irq_domain_alloc_fwnode(&dist->base_address);
+ if (!gsi_domain_handle) {
pr_err("Unable to allocate domain handle\n");
gic_teardown(gic);
return -ENOMEM;
}
- ret = __gic_init_bases(gic, domain_handle);
+ ret = __gic_init_bases(gic, gsi_domain_handle);
if (ret) {
pr_err("Failed to initialise GIC\n");
- irq_domain_free_fwnode(domain_handle);
+ irq_domain_free_fwnode(gsi_domain_handle);
gic_teardown(gic);
return ret;
}
- acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle);
+ acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, gic_v2_get_gsi_domain_id);
if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
gicv2m_init(NULL, gic_data[0].domain);
diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c
new file mode 100644
index 000000000000..327f3ab62c03
--- /dev/null
+++ b/drivers/irqchip/irq-loongarch-cpu.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+
+#include <asm/loongarch.h>
+#include <asm/setup.h>
+
+static struct irq_domain *irq_domain;
+struct fwnode_handle *cpuintc_handle;
+
+static u32 lpic_gsi_to_irq(u32 gsi)
+{
+ /* Only pch irqdomain transferring is required for LoongArch. */
+ if (gsi >= GSI_MIN_PCH_IRQ && gsi <= GSI_MAX_PCH_IRQ)
+ return acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH);
+
+ return 0;
+}
+
+static struct fwnode_handle *lpic_get_gsi_domain_id(u32 gsi)
+{
+ int id;
+ struct fwnode_handle *domain_handle = NULL;
+
+ switch (gsi) {
+ case GSI_MIN_CPU_IRQ ... GSI_MAX_CPU_IRQ:
+ if (liointc_handle)
+ domain_handle = liointc_handle;
+ break;
+
+ case GSI_MIN_LPC_IRQ ... GSI_MAX_LPC_IRQ:
+ if (pch_lpc_handle)
+ domain_handle = pch_lpc_handle;
+ break;
+
+ case GSI_MIN_PCH_IRQ ... GSI_MAX_PCH_IRQ:
+ id = find_pch_pic(gsi);
+ if (id >= 0 && pch_pic_handle[id])
+ domain_handle = pch_pic_handle[id];
+ break;
+ }
+
+ return domain_handle;
+}
+
+static void mask_loongarch_irq(struct irq_data *d)
+{
+ clear_csr_ecfg(ECFGF(d->hwirq));
+}
+
+static void unmask_loongarch_irq(struct irq_data *d)
+{
+ set_csr_ecfg(ECFGF(d->hwirq));
+}
+
+static struct irq_chip cpu_irq_controller = {
+ .name = "CPUINTC",
+ .irq_mask = mask_loongarch_irq,
+ .irq_unmask = unmask_loongarch_irq,
+};
+
+static void handle_cpu_irq(struct pt_regs *regs)
+{
+ int hwirq;
+ unsigned int estat = read_csr_estat() & CSR_ESTAT_IS;
+
+ while ((hwirq = ffs(estat))) {
+ estat &= ~BIT(hwirq - 1);
+ generic_handle_domain_irq(irq_domain, hwirq - 1);
+ }
+}
+
+static int loongarch_cpu_intc_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hwirq)
+{
+ irq_set_noprobe(irq);
+ irq_set_chip_and_handler(irq, &cpu_irq_controller, handle_percpu_irq);
+
+ return 0;
+}
+
+static const struct irq_domain_ops loongarch_cpu_intc_irq_domain_ops = {
+ .map = loongarch_cpu_intc_map,
+ .xlate = irq_domain_xlate_onecell,
+};
+
+static int __init
+liointc_parse_madt(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ struct acpi_madt_lio_pic *liointc_entry = (struct acpi_madt_lio_pic *)header;
+
+ return liointc_acpi_init(irq_domain, liointc_entry);
+}
+
+static int __init
+eiointc_parse_madt(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ struct acpi_madt_eio_pic *eiointc_entry = (struct acpi_madt_eio_pic *)header;
+
+ return eiointc_acpi_init(irq_domain, eiointc_entry);
+}
+
+static int __init acpi_cascade_irqdomain_init(void)
+{
+ acpi_table_parse_madt(ACPI_MADT_TYPE_LIO_PIC,
+ liointc_parse_madt, 0);
+ acpi_table_parse_madt(ACPI_MADT_TYPE_EIO_PIC,
+ eiointc_parse_madt, 0);
+ return 0;
+}
+
+static int __init cpuintc_acpi_init(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ if (irq_domain)
+ return 0;
+
+ /* Mask interrupts. */
+ clear_csr_ecfg(ECFG0_IM);
+ clear_csr_estat(ESTATF_IP);
+
+ cpuintc_handle = irq_domain_alloc_fwnode(NULL);
+ irq_domain = irq_domain_create_linear(cpuintc_handle, EXCCODE_INT_NUM,
+ &loongarch_cpu_intc_irq_domain_ops, NULL);
+
+ if (!irq_domain)
+ panic("Failed to add irqdomain for LoongArch CPU");
+
+ set_handle_irq(&handle_cpu_irq);
+ acpi_set_irq_model(ACPI_IRQ_MODEL_LPIC, lpic_get_gsi_domain_id);
+ acpi_set_gsi_to_irq_fallback(lpic_gsi_to_irq);
+ acpi_cascade_irqdomain_init();
+
+ return 0;
+}
+
+IRQCHIP_ACPI_DECLARE(cpuintc_v1, ACPI_MADT_TYPE_CORE_PIC,
+ NULL, ACPI_MADT_CORE_PIC_VERSION_V1, cpuintc_acpi_init);
diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c
new file mode 100644
index 000000000000..80d8ca6f2d46
--- /dev/null
+++ b/drivers/irqchip/irq-loongson-eiointc.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Loongson Extend I/O Interrupt Controller support
+ *
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#define pr_fmt(fmt) "eiointc: " fmt
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+
+#define EIOINTC_REG_NODEMAP 0x14a0
+#define EIOINTC_REG_IPMAP 0x14c0
+#define EIOINTC_REG_ENABLE 0x1600
+#define EIOINTC_REG_BOUNCE 0x1680
+#define EIOINTC_REG_ISR 0x1800
+#define EIOINTC_REG_ROUTE 0x1c00
+
+#define VEC_REG_COUNT 4
+#define VEC_COUNT_PER_REG 64
+#define VEC_COUNT (VEC_REG_COUNT * VEC_COUNT_PER_REG)
+#define VEC_REG_IDX(irq_id) ((irq_id) / VEC_COUNT_PER_REG)
+#define VEC_REG_BIT(irq_id) ((irq_id) % VEC_COUNT_PER_REG)
+#define EIOINTC_ALL_ENABLE 0xffffffff
+
+#define MAX_EIO_NODES (NR_CPUS / CORES_PER_EIO_NODE)
+
+static int nr_pics;
+
+struct eiointc_priv {
+ u32 node;
+ nodemask_t node_map;
+ cpumask_t cpuspan_map;
+ struct fwnode_handle *domain_handle;
+ struct irq_domain *eiointc_domain;
+};
+
+static struct eiointc_priv *eiointc_priv[MAX_IO_PICS];
+
+static void eiointc_enable(void)
+{
+ uint64_t misc;
+
+ misc = iocsr_read64(LOONGARCH_IOCSR_MISC_FUNC);
+ misc |= IOCSR_MISC_FUNC_EXT_IOI_EN;
+ iocsr_write64(misc, LOONGARCH_IOCSR_MISC_FUNC);
+}
+
+static int cpu_to_eio_node(int cpu)
+{
+ return cpu_logical_map(cpu) / CORES_PER_EIO_NODE;
+}
+
+static void eiointc_set_irq_route(int pos, unsigned int cpu, unsigned int mnode, nodemask_t *node_map)
+{
+ int i, node, cpu_node, route_node;
+ unsigned char coremap;
+ uint32_t pos_off, data, data_byte, data_mask;
+
+ pos_off = pos & ~3;
+ data_byte = pos & 3;
+ data_mask = ~BIT_MASK(data_byte) & 0xf;
+
+ /* Calculate node and coremap of target irq */
+ cpu_node = cpu_logical_map(cpu) / CORES_PER_EIO_NODE;
+ coremap = BIT(cpu_logical_map(cpu) % CORES_PER_EIO_NODE);
+
+ for_each_online_cpu(i) {
+ node = cpu_to_eio_node(i);
+ if (!node_isset(node, *node_map))
+ continue;
+
+ /* EIO node 0 is in charge of inter-node interrupt dispatch */
+ route_node = (node == mnode) ? cpu_node : node;
+ data = ((coremap | (route_node << 4)) << (data_byte * 8));
+ csr_any_send(EIOINTC_REG_ROUTE + pos_off, data, data_mask, node * CORES_PER_EIO_NODE);
+ }
+}
+
+static DEFINE_RAW_SPINLOCK(affinity_lock);
+
+static int eiointc_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity, bool force)
+{
+ unsigned int cpu;
+ unsigned long flags;
+ uint32_t vector, regaddr;
+ struct cpumask intersect_affinity;
+ struct eiointc_priv *priv = d->domain->host_data;
+
+ raw_spin_lock_irqsave(&affinity_lock, flags);
+
+ cpumask_and(&intersect_affinity, affinity, cpu_online_mask);
+ cpumask_and(&intersect_affinity, &intersect_affinity, &priv->cpuspan_map);
+
+ if (cpumask_empty(&intersect_affinity)) {
+ raw_spin_unlock_irqrestore(&affinity_lock, flags);
+ return -EINVAL;
+ }
+ cpu = cpumask_first(&intersect_affinity);
+
+ vector = d->hwirq;
+ regaddr = EIOINTC_REG_ENABLE + ((vector >> 5) << 2);
+
+ /* Mask target vector */
+ csr_any_send(regaddr, EIOINTC_ALL_ENABLE & (~BIT(vector & 0x1F)), 0x0, 0);
+ /* Set route for target vector */
+ eiointc_set_irq_route(vector, cpu, priv->node, &priv->node_map);
+ /* Unmask target vector */
+ csr_any_send(regaddr, EIOINTC_ALL_ENABLE, 0x0, 0);
+
+ irq_data_update_effective_affinity(d, cpumask_of(cpu));
+
+ raw_spin_unlock_irqrestore(&affinity_lock, flags);
+
+ return IRQ_SET_MASK_OK;
+}
+
+static int eiointc_index(int node)
+{
+ int i;
+
+ for (i = 0; i < nr_pics; i++) {
+ if (node_isset(node, eiointc_priv[i]->node_map))
+ return i;
+ }
+
+ return -1;
+}
+
+static int eiointc_router_init(unsigned int cpu)
+{
+ int i, bit;
+ uint32_t data;
+ uint32_t node = cpu_to_eio_node(cpu);
+ uint32_t index = eiointc_index(node);
+
+ if (index < 0) {
+ pr_err("Error: invalid nodemap!\n");
+ return -1;
+ }
+
+ if ((cpu_logical_map(cpu) % CORES_PER_EIO_NODE) == 0) {
+ eiointc_enable();
+
+ for (i = 0; i < VEC_COUNT / 32; i++) {
+ data = (((1 << (i * 2 + 1)) << 16) | (1 << (i * 2)));
+ iocsr_write32(data, EIOINTC_REG_NODEMAP + i * 4);
+ }
+
+ for (i = 0; i < VEC_COUNT / 32 / 4; i++) {
+ bit = BIT(1 + index); /* Route to IP[1 + index] */
+ data = bit | (bit << 8) | (bit << 16) | (bit << 24);
+ iocsr_write32(data, EIOINTC_REG_IPMAP + i * 4);
+ }
+
+ for (i = 0; i < VEC_COUNT / 4; i++) {
+ /* Route to Node-0 Core-0 */
+ if (index == 0)
+ bit = BIT(cpu_logical_map(0));
+ else
+ bit = (eiointc_priv[index]->node << 4) | 1;
+
+ data = bit | (bit << 8) | (bit << 16) | (bit << 24);
+ iocsr_write32(data, EIOINTC_REG_ROUTE + i * 4);
+ }
+
+ for (i = 0; i < VEC_COUNT / 32; i++) {
+ data = 0xffffffff;
+ iocsr_write32(data, EIOINTC_REG_ENABLE + i * 4);
+ iocsr_write32(data, EIOINTC_REG_BOUNCE + i * 4);
+ }
+ }
+
+ return 0;
+}
+
+static void eiointc_irq_dispatch(struct irq_desc *desc)
+{
+ int i;
+ u64 pending;
+ bool handled = false;
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct eiointc_priv *priv = irq_desc_get_handler_data(desc);
+
+ chained_irq_enter(chip, desc);
+
+ for (i = 0; i < VEC_REG_COUNT; i++) {
+ pending = iocsr_read64(EIOINTC_REG_ISR + (i << 3));
+ iocsr_write64(pending, EIOINTC_REG_ISR + (i << 3));
+ while (pending) {
+ int bit = __ffs(pending);
+ int irq = bit + VEC_COUNT_PER_REG * i;
+
+ generic_handle_domain_irq(priv->eiointc_domain, irq);
+ pending &= ~BIT(bit);
+ handled = true;
+ }
+ }
+
+ if (!handled)
+ spurious_interrupt();
+
+ chained_irq_exit(chip, desc);
+}
+
+static void eiointc_ack_irq(struct irq_data *d)
+{
+}
+
+static void eiointc_mask_irq(struct irq_data *d)
+{
+}
+
+static void eiointc_unmask_irq(struct irq_data *d)
+{
+}
+
+static struct irq_chip eiointc_irq_chip = {
+ .name = "EIOINTC",
+ .irq_ack = eiointc_ack_irq,
+ .irq_mask = eiointc_mask_irq,
+ .irq_unmask = eiointc_unmask_irq,
+ .irq_set_affinity = eiointc_set_irq_affinity,
+};
+
+static int eiointc_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ int ret;
+ unsigned int i, type;
+ unsigned long hwirq = 0;
+ struct eiointc *priv = domain->host_data;
+
+ ret = irq_domain_translate_onecell(domain, arg, &hwirq, &type);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < nr_irqs; i++) {
+ irq_domain_set_info(domain, virq + i, hwirq + i, &eiointc_irq_chip,
+ priv, handle_edge_irq, NULL, NULL);
+ }
+
+ return 0;
+}
+
+static void eiointc_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ int i;
+
+ for (i = 0; i < nr_irqs; i++) {
+ struct irq_data *d = irq_domain_get_irq_data(domain, virq + i);
+
+ irq_set_handler(virq + i, NULL);
+ irq_domain_reset_irq_data(d);
+ }
+}
+
+static const struct irq_domain_ops eiointc_domain_ops = {
+ .translate = irq_domain_translate_onecell,
+ .alloc = eiointc_domain_alloc,
+ .free = eiointc_domain_free,
+};
+
+static void acpi_set_vec_parent(int node, struct irq_domain *parent, struct acpi_vector_group *vec_group)
+{
+ int i;
+
+ if (cpu_has_flatmode)
+ node = cpu_to_node(node * CORES_PER_EIO_NODE);
+
+ for (i = 0; i < MAX_IO_PICS; i++) {
+ if (node == vec_group[i].node) {
+ vec_group[i].parent = parent;
+ return;
+ }
+ }
+}
+
+struct irq_domain *acpi_get_vec_parent(int node, struct acpi_vector_group *vec_group)
+{
+ int i;
+
+ for (i = 0; i < MAX_IO_PICS; i++) {
+ if (node == vec_group[i].node)
+ return vec_group[i].parent;
+ }
+ return NULL;
+}
+
+static int __init
+pch_pic_parse_madt(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ struct acpi_madt_bio_pic *pchpic_entry = (struct acpi_madt_bio_pic *)header;
+ unsigned int node = (pchpic_entry->address >> 44) & 0xf;
+ struct irq_domain *parent = acpi_get_vec_parent(node, pch_group);
+
+ if (parent)
+ return pch_pic_acpi_init(parent, pchpic_entry);
+
+ return -EINVAL;
+}
+
+static int __init
+pch_msi_parse_madt(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ struct acpi_madt_msi_pic *pchmsi_entry = (struct acpi_madt_msi_pic *)header;
+ struct irq_domain *parent = acpi_get_vec_parent(eiointc_priv[nr_pics - 1]->node, msi_group);
+
+ if (parent)
+ return pch_msi_acpi_init(parent, pchmsi_entry);
+
+ return -EINVAL;
+}
+
+static int __init acpi_cascade_irqdomain_init(void)
+{
+ acpi_table_parse_madt(ACPI_MADT_TYPE_BIO_PIC,
+ pch_pic_parse_madt, 0);
+ acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC,
+ pch_msi_parse_madt, 1);
+ return 0;
+}
+
+int __init eiointc_acpi_init(struct irq_domain *parent,
+ struct acpi_madt_eio_pic *acpi_eiointc)
+{
+ int i, parent_irq;
+ unsigned long node_map;
+ struct eiointc_priv *priv;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_eiointc);
+ if (!priv->domain_handle) {
+ pr_err("Unable to allocate domain handle\n");
+ goto out_free_priv;
+ }
+
+ priv->node = acpi_eiointc->node;
+ node_map = acpi_eiointc->node_map ? : -1ULL;
+
+ for_each_possible_cpu(i) {
+ if (node_map & (1ULL << cpu_to_eio_node(i))) {
+ node_set(cpu_to_eio_node(i), priv->node_map);
+ cpumask_or(&priv->cpuspan_map, &priv->cpuspan_map, cpumask_of(i));
+ }
+ }
+
+ /* Setup IRQ domain */
+ priv->eiointc_domain = irq_domain_create_linear(priv->domain_handle, VEC_COUNT,
+ &eiointc_domain_ops, priv);
+ if (!priv->eiointc_domain) {
+ pr_err("loongson-eiointc: cannot add IRQ domain\n");
+ goto out_free_handle;
+ }
+
+ eiointc_priv[nr_pics++] = priv;
+
+ eiointc_router_init(0);
+
+ parent_irq = irq_create_mapping(parent, acpi_eiointc->cascade);
+ irq_set_chained_handler_and_data(parent_irq, eiointc_irq_dispatch, priv);
+
+ cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_LOONGARCH_STARTING,
+ "irqchip/loongarch/intc:starting",
+ eiointc_router_init, NULL);
+
+ acpi_set_vec_parent(acpi_eiointc->node, priv->eiointc_domain, pch_group);
+ acpi_set_vec_parent(acpi_eiointc->node, priv->eiointc_domain, msi_group);
+ acpi_cascade_irqdomain_init();
+
+ return 0;
+
+out_free_handle:
+ irq_domain_free_fwnode(priv->domain_handle);
+ priv->domain_handle = NULL;
+out_free_priv:
+ kfree(priv);
+
+ return -ENOMEM;
+}
diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c
index 8d05d8bcf56f..c4f3c886ad61 100644
--- a/drivers/irqchip/irq-loongson-liointc.c
+++ b/drivers/irqchip/irq-loongson-liointc.c
@@ -23,7 +23,7 @@
#endif
#define LIOINTC_CHIP_IRQ 32
-#define LIOINTC_NUM_PARENT 4
+#define LIOINTC_NUM_PARENT 4
#define LIOINTC_NUM_CORES 4
#define LIOINTC_INTC_CHIP_START 0x20
@@ -58,6 +58,8 @@ struct liointc_priv {
bool has_lpc_irq_errata;
};
+struct fwnode_handle *liointc_handle;
+
static void liointc_chained_handle_irq(struct irq_desc *desc)
{
struct liointc_handler_data *handler = irq_desc_get_handler_data(desc);
@@ -153,97 +155,79 @@ static void liointc_resume(struct irq_chip_generic *gc)
irq_gc_unlock_irqrestore(gc, flags);
}
-static const char * const parent_names[] = {"int0", "int1", "int2", "int3"};
-static const char * const core_reg_names[] = {"isr0", "isr1", "isr2", "isr3"};
+static int parent_irq[LIOINTC_NUM_PARENT];
+static u32 parent_int_map[LIOINTC_NUM_PARENT];
+static const char *const parent_names[] = {"int0", "int1", "int2", "int3"};
+static const char *const core_reg_names[] = {"isr0", "isr1", "isr2", "isr3"};
-static void __iomem *liointc_get_reg_byname(struct device_node *node,
- const char *name)
+static int liointc_domain_xlate(struct irq_domain *d, struct device_node *ctrlr,
+ const u32 *intspec, unsigned int intsize,
+ unsigned long *out_hwirq, unsigned int *out_type)
{
- int index = of_property_match_string(node, "reg-names", name);
-
- if (index < 0)
- return NULL;
-
- return of_iomap(node, index);
+ if (WARN_ON(intsize < 1))
+ return -EINVAL;
+ *out_hwirq = intspec[0] - GSI_MIN_CPU_IRQ;
+ *out_type = IRQ_TYPE_NONE;
+ return 0;
}
-static int __init liointc_of_init(struct device_node *node,
- struct device_node *parent)
+static const struct irq_domain_ops acpi_irq_gc_ops = {
+ .map = irq_map_generic_chip,
+ .unmap = irq_unmap_generic_chip,
+ .xlate = liointc_domain_xlate,
+};
+
+static int liointc_init(phys_addr_t addr, unsigned long size, int revision,
+ struct fwnode_handle *domain_handle, struct device_node *node)
{
+ int i, err;
+ void __iomem *base;
+ struct irq_chip_type *ct;
struct irq_chip_generic *gc;
struct irq_domain *domain;
- struct irq_chip_type *ct;
struct liointc_priv *priv;
- void __iomem *base;
- u32 of_parent_int_map[LIOINTC_NUM_PARENT];
- int parent_irq[LIOINTC_NUM_PARENT];
- bool have_parent = FALSE;
- int sz, i, err = 0;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
- if (of_device_is_compatible(node, "loongson,liointc-2.0")) {
- base = liointc_get_reg_byname(node, "main");
- if (!base) {
- err = -ENODEV;
- goto out_free_priv;
- }
+ base = ioremap(addr, size);
+ if (!base)
+ goto out_free_priv;
- for (i = 0; i < LIOINTC_NUM_CORES; i++)
- priv->core_isr[i] = liointc_get_reg_byname(node, core_reg_names[i]);
- if (!priv->core_isr[0]) {
- err = -ENODEV;
- goto out_iounmap_base;
- }
- } else {
- base = of_iomap(node, 0);
- if (!base) {
- err = -ENODEV;
- goto out_free_priv;
- }
+ for (i = 0; i < LIOINTC_NUM_CORES; i++)
+ priv->core_isr[i] = base + LIOINTC_REG_INTC_STATUS;
- for (i = 0; i < LIOINTC_NUM_CORES; i++)
- priv->core_isr[i] = base + LIOINTC_REG_INTC_STATUS;
- }
+ for (i = 0; i < LIOINTC_NUM_PARENT; i++)
+ priv->handler[i].parent_int_map = parent_int_map[i];
- for (i = 0; i < LIOINTC_NUM_PARENT; i++) {
- parent_irq[i] = of_irq_get_byname(node, parent_names[i]);
- if (parent_irq[i] > 0)
- have_parent = TRUE;
- }
- if (!have_parent) {
- err = -ENODEV;
- goto out_iounmap_isr;
- }
+ if (revision > 1) {
+ for (i = 0; i < LIOINTC_NUM_CORES; i++) {
+ int index = of_property_match_string(node,
+ "reg-names", core_reg_names[i]);
- sz = of_property_read_variable_u32_array(node,
- "loongson,parent_int_map",
- &of_parent_int_map[0],
- LIOINTC_NUM_PARENT,
- LIOINTC_NUM_PARENT);
- if (sz < 4) {
- pr_err("loongson-liointc: No parent_int_map\n");
- err = -ENODEV;
- goto out_iounmap_isr;
- }
+ if (index < 0)
+ return -EINVAL;
- for (i = 0; i < LIOINTC_NUM_PARENT; i++)
- priv->handler[i].parent_int_map = of_parent_int_map[i];
+ priv->core_isr[i] = of_iomap(node, index);
+ }
+ }
/* Setup IRQ domain */
- domain = irq_domain_add_linear(node, 32,
+ if (!acpi_disabled)
+ domain = irq_domain_create_linear(domain_handle, LIOINTC_CHIP_IRQ,
+ &acpi_irq_gc_ops, priv);
+ else
+ domain = irq_domain_create_linear(domain_handle, LIOINTC_CHIP_IRQ,
&irq_generic_chip_ops, priv);
if (!domain) {
pr_err("loongson-liointc: cannot add IRQ domain\n");
- err = -EINVAL;
- goto out_iounmap_isr;
+ goto out_iounmap;
}
- err = irq_alloc_domain_generic_chips(domain, 32, 1,
- node->full_name, handle_level_irq,
- IRQ_NOPROBE, 0, 0);
+ err = irq_alloc_domain_generic_chips(domain, LIOINTC_CHIP_IRQ, 1,
+ (node ? node->full_name : "LIOINTC"),
+ handle_level_irq, 0, IRQ_NOPROBE, 0);
if (err) {
pr_err("loongson-liointc: unable to register IRQ domain\n");
goto out_free_domain;
@@ -299,24 +283,93 @@ static int __init liointc_of_init(struct device_node *node,
liointc_chained_handle_irq, &priv->handler[i]);
}
+ liointc_handle = domain_handle;
return 0;
out_free_domain:
irq_domain_remove(domain);
-out_iounmap_isr:
- for (i = 0; i < LIOINTC_NUM_CORES; i++) {
- if (!priv->core_isr[i])
- continue;
- iounmap(priv->core_isr[i]);
- }
-out_iounmap_base:
+out_iounmap:
iounmap(base);
out_free_priv:
kfree(priv);
- return err;
+ return -EINVAL;
+}
+
+#ifdef CONFIG_OF
+
+static int __init liointc_of_init(struct device_node *node,
+ struct device_node *parent)
+{
+ bool have_parent = FALSE;
+ int sz, i, index, revision, err = 0;
+ struct resource res;
+
+ if (!of_device_is_compatible(node, "loongson,liointc-2.0")) {
+ index = 0;
+ revision = 1;
+ } else {
+ index = of_property_match_string(node, "reg-names", "main");
+ revision = 2;
+ }
+
+ if (of_address_to_resource(node, index, &res))
+ return -EINVAL;
+
+ for (i = 0; i < LIOINTC_NUM_PARENT; i++) {
+ parent_irq[i] = of_irq_get_byname(node, parent_names[i]);
+ if (parent_irq[i] > 0)
+ have_parent = TRUE;
+ }
+ if (!have_parent)
+ return -ENODEV;
+
+ sz = of_property_read_variable_u32_array(node,
+ "loongson,parent_int_map",
+ &parent_int_map[0],
+ LIOINTC_NUM_PARENT,
+ LIOINTC_NUM_PARENT);
+ if (sz < 4) {
+ pr_err("loongson-liointc: No parent_int_map\n");
+ return -ENODEV;
+ }
+
+ err = liointc_init(res.start, resource_size(&res),
+ revision, of_node_to_fwnode(node), node);
+ if (err < 0)
+ return err;
+
+ return 0;
}
IRQCHIP_DECLARE(loongson_liointc_1_0, "loongson,liointc-1.0", liointc_of_init);
IRQCHIP_DECLARE(loongson_liointc_1_0a, "loongson,liointc-1.0a", liointc_of_init);
IRQCHIP_DECLARE(loongson_liointc_2_0, "loongson,liointc-2.0", liointc_of_init);
+
+#endif
+
+#ifdef CONFIG_ACPI
+int __init liointc_acpi_init(struct irq_domain *parent, struct acpi_madt_lio_pic *acpi_liointc)
+{
+ int ret;
+ struct fwnode_handle *domain_handle;
+
+ parent_int_map[0] = acpi_liointc->cascade_map[0];
+ parent_int_map[1] = acpi_liointc->cascade_map[1];
+
+ parent_irq[0] = irq_create_mapping(parent, acpi_liointc->cascade[0]);
+ parent_irq[1] = irq_create_mapping(parent, acpi_liointc->cascade[1]);
+
+ domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_liointc);
+ if (!domain_handle) {
+ pr_err("Unable to allocate domain handle\n");
+ return -ENOMEM;
+ }
+ ret = liointc_init(acpi_liointc->address, acpi_liointc->size,
+ 1, domain_handle, NULL);
+ if (ret)
+ irq_domain_free_fwnode(domain_handle);
+
+ return ret;
+}
+#endif
diff --git a/drivers/irqchip/irq-loongson-pch-lpc.c b/drivers/irqchip/irq-loongson-pch-lpc.c
new file mode 100644
index 000000000000..bf2324910a75
--- /dev/null
+++ b/drivers/irqchip/irq-loongson-pch-lpc.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Loongson LPC Interrupt Controller support
+ *
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#define pr_fmt(fmt) "lpc: " fmt
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+
+/* Registers */
+#define LPC_INT_CTL 0x00
+#define LPC_INT_ENA 0x04
+#define LPC_INT_STS 0x08
+#define LPC_INT_CLR 0x0c
+#define LPC_INT_POL 0x10
+#define LPC_COUNT 16
+
+/* LPC_INT_CTL */
+#define LPC_INT_CTL_EN BIT(31)
+
+struct pch_lpc {
+ void __iomem *base;
+ struct irq_domain *lpc_domain;
+ raw_spinlock_t lpc_lock;
+ u32 saved_reg_ctl;
+ u32 saved_reg_ena;
+ u32 saved_reg_pol;
+};
+
+struct fwnode_handle *pch_lpc_handle;
+
+static void lpc_irq_ack(struct irq_data *d)
+{
+ unsigned long flags;
+ struct pch_lpc *priv = d->domain->host_data;
+
+ raw_spin_lock_irqsave(&priv->lpc_lock, flags);
+ writel(0x1 << d->hwirq, priv->base + LPC_INT_CLR);
+ raw_spin_unlock_irqrestore(&priv->lpc_lock, flags);
+}
+
+static void lpc_irq_mask(struct irq_data *d)
+{
+ unsigned long flags;
+ struct pch_lpc *priv = d->domain->host_data;
+
+ raw_spin_lock_irqsave(&priv->lpc_lock, flags);
+ writel(readl(priv->base + LPC_INT_ENA) & (~(0x1 << (d->hwirq))),
+ priv->base + LPC_INT_ENA);
+ raw_spin_unlock_irqrestore(&priv->lpc_lock, flags);
+}
+
+static void lpc_irq_unmask(struct irq_data *d)
+{
+ unsigned long flags;
+ struct pch_lpc *priv = d->domain->host_data;
+
+ raw_spin_lock_irqsave(&priv->lpc_lock, flags);
+ writel(readl(priv->base + LPC_INT_ENA) | (0x1 << (d->hwirq)),
+ priv->base + LPC_INT_ENA);
+ raw_spin_unlock_irqrestore(&priv->lpc_lock, flags);
+}
+
+static int lpc_irq_set_type(struct irq_data *d, unsigned int type)
+{
+ u32 val;
+ u32 mask = 0x1 << (d->hwirq);
+ struct pch_lpc *priv = d->domain->host_data;
+
+ if (!(type & IRQ_TYPE_LEVEL_MASK))
+ return 0;
+
+ val = readl(priv->base + LPC_INT_POL);
+
+ if (type == IRQ_TYPE_LEVEL_HIGH)
+ val |= mask;
+ else
+ val &= ~mask;
+
+ writel(val, priv->base + LPC_INT_POL);
+
+ return 0;
+}
+
+static const struct irq_chip pch_lpc_irq_chip = {
+ .name = "PCH LPC",
+ .irq_mask = lpc_irq_mask,
+ .irq_unmask = lpc_irq_unmask,
+ .irq_ack = lpc_irq_ack,
+ .irq_set_type = lpc_irq_set_type,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
+};
+
+static void lpc_irq_dispatch(struct irq_desc *desc)
+{
+ u32 pending, bit;
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct pch_lpc *priv = irq_desc_get_handler_data(desc);
+
+ chained_irq_enter(chip, desc);
+
+ pending = readl(priv->base + LPC_INT_ENA);
+ pending &= readl(priv->base + LPC_INT_STS);
+ if (!pending)
+ spurious_interrupt();
+
+ while (pending) {
+ bit = __ffs(pending);
+
+ generic_handle_domain_irq(priv->lpc_domain, bit);
+ pending &= ~BIT(bit);
+ }
+ chained_irq_exit(chip, desc);
+}
+
+static int pch_lpc_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hw)
+{
+ irq_set_chip_and_handler(irq, &pch_lpc_irq_chip, handle_level_irq);
+ return 0;
+}
+
+static const struct irq_domain_ops pch_lpc_domain_ops = {
+ .map = pch_lpc_map,
+ .translate = irq_domain_translate_twocell,
+};
+
+static void pch_lpc_reset(struct pch_lpc *priv)
+{
+ /* Enable the LPC interrupt, bit31: en bit30: edge */
+ writel(LPC_INT_CTL_EN, priv->base + LPC_INT_CTL);
+ writel(0, priv->base + LPC_INT_ENA);
+ /* Clear all 18-bit interrpt bit */
+ writel(GENMASK(17, 0), priv->base + LPC_INT_CLR);
+}
+
+static int pch_lpc_disabled(struct pch_lpc *priv)
+{
+ return (readl(priv->base + LPC_INT_ENA) == 0xffffffff) &&
+ (readl(priv->base + LPC_INT_STS) == 0xffffffff);
+}
+
+int __init pch_lpc_acpi_init(struct irq_domain *parent,
+ struct acpi_madt_lpc_pic *acpi_pchlpc)
+{
+ int parent_irq;
+ struct pch_lpc *priv;
+ struct irq_fwspec fwspec;
+ struct fwnode_handle *irq_handle;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ raw_spin_lock_init(&priv->lpc_lock);
+
+ priv->base = ioremap(acpi_pchlpc->address, acpi_pchlpc->size);
+ if (!priv->base)
+ goto free_priv;
+
+ if (pch_lpc_disabled(priv)) {
+ pr_err("Failed to get LPC status\n");
+ goto iounmap_base;
+ }
+
+ irq_handle = irq_domain_alloc_named_fwnode("lpcintc");
+ if (!irq_handle) {
+ pr_err("Unable to allocate domain handle\n");
+ goto iounmap_base;
+ }
+
+ priv->lpc_domain = irq_domain_create_linear(irq_handle, LPC_COUNT,
+ &pch_lpc_domain_ops, priv);
+ if (!priv->lpc_domain) {
+ pr_err("Failed to create IRQ domain\n");
+ goto free_irq_handle;
+ }
+ pch_lpc_reset(priv);
+
+ fwspec.fwnode = parent->fwnode;
+ fwspec.param[0] = acpi_pchlpc->cascade + GSI_MIN_PCH_IRQ;
+ fwspec.param[1] = IRQ_TYPE_LEVEL_HIGH;
+ fwspec.param_count = 2;
+ parent_irq = irq_create_fwspec_mapping(&fwspec);
+ irq_set_chained_handler_and_data(parent_irq, lpc_irq_dispatch, priv);
+
+ pch_lpc_handle = irq_handle;
+ return 0;
+
+free_irq_handle:
+ irq_domain_free_fwnode(irq_handle);
+iounmap_base:
+ iounmap(priv->base);
+free_priv:
+ kfree(priv);
+
+ return -ENOMEM;
+}
diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c
index e3801c4a77ed..d0e8551bebfa 100644
--- a/drivers/irqchip/irq-loongson-pch-msi.c
+++ b/drivers/irqchip/irq-loongson-pch-msi.c
@@ -15,6 +15,8 @@
#include <linux/pci.h>
#include <linux/slab.h>
+static int nr_pics;
+
struct pch_msi_data {
struct mutex msi_map_lock;
phys_addr_t doorbell;
@@ -23,6 +25,8 @@ struct pch_msi_data {
unsigned long *msi_map;
};
+static struct fwnode_handle *pch_msi_handle[MAX_IO_PICS];
+
static void pch_msi_mask_msi_irq(struct irq_data *d)
{
pci_msi_mask_irq(d);
@@ -154,12 +158,12 @@ static const struct irq_domain_ops pch_msi_middle_domain_ops = {
};
static int pch_msi_init_domains(struct pch_msi_data *priv,
- struct device_node *node,
- struct irq_domain *parent)
+ struct irq_domain *parent,
+ struct fwnode_handle *domain_handle)
{
struct irq_domain *middle_domain, *msi_domain;
- middle_domain = irq_domain_create_linear(of_node_to_fwnode(node),
+ middle_domain = irq_domain_create_linear(domain_handle,
priv->num_irqs,
&pch_msi_middle_domain_ops,
priv);
@@ -171,7 +175,7 @@ static int pch_msi_init_domains(struct pch_msi_data *priv,
middle_domain->parent = parent;
irq_domain_update_bus_token(middle_domain, DOMAIN_BUS_NEXUS);
- msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(node),
+ msi_domain = pci_msi_create_irq_domain(domain_handle,
&pch_msi_domain_info,
middle_domain);
if (!msi_domain) {
@@ -183,19 +187,11 @@ static int pch_msi_init_domains(struct pch_msi_data *priv,
return 0;
}
-static int pch_msi_init(struct device_node *node,
- struct device_node *parent)
+static int pch_msi_init(phys_addr_t msg_address, int irq_base, int irq_count,
+ struct irq_domain *parent_domain, struct fwnode_handle *domain_handle)
{
- struct pch_msi_data *priv;
- struct irq_domain *parent_domain;
- struct resource res;
int ret;
-
- parent_domain = irq_find_host(parent);
- if (!parent_domain) {
- pr_err("Failed to find the parent domain\n");
- return -ENXIO;
- }
+ struct pch_msi_data *priv;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
@@ -203,48 +199,95 @@ static int pch_msi_init(struct device_node *node,
mutex_init(&priv->msi_map_lock);
- ret = of_address_to_resource(node, 0, &res);
- if (ret) {
- pr_err("Failed to allocate resource\n");
- goto err_priv;
- }
-
- priv->doorbell = res.start;
-
- if (of_property_read_u32(node, "loongson,msi-base-vec",
- &priv->irq_first)) {
- pr_err("Unable to parse MSI vec base\n");
- ret = -EINVAL;
- goto err_priv;
- }
-
- if (of_property_read_u32(node, "loongson,msi-num-vecs",
- &priv->num_irqs)) {
- pr_err("Unable to parse MSI vec number\n");
- ret = -EINVAL;
- goto err_priv;
- }
+ priv->doorbell = msg_address;
+ priv->irq_first = irq_base;
+ priv->num_irqs = irq_count;
priv->msi_map = bitmap_zalloc(priv->num_irqs, GFP_KERNEL);
- if (!priv->msi_map) {
- ret = -ENOMEM;
+ if (!priv->msi_map)
goto err_priv;
- }
pr_debug("Registering %d MSIs, starting at %d\n",
priv->num_irqs, priv->irq_first);
- ret = pch_msi_init_domains(priv, node, parent_domain);
+ ret = pch_msi_init_domains(priv, parent_domain, domain_handle);
if (ret)
goto err_map;
+ pch_msi_handle[nr_pics++] = domain_handle;
return 0;
err_map:
bitmap_free(priv->msi_map);
err_priv:
kfree(priv);
- return ret;
+
+ return -EINVAL;
+}
+
+#ifdef CONFIG_OF
+static int pch_msi_of_init(struct device_node *node, struct device_node *parent)
+{
+ int err;
+ int irq_base, irq_count;
+ struct resource res;
+ struct irq_domain *parent_domain;
+
+ parent_domain = irq_find_host(parent);
+ if (!parent_domain) {
+ pr_err("Failed to find the parent domain\n");
+ return -ENXIO;
+ }
+
+ if (of_address_to_resource(node, 0, &res)) {
+ pr_err("Failed to allocate resource\n");
+ return -EINVAL;
+ }
+
+ if (of_property_read_u32(node, "loongson,msi-base-vec", &irq_base)) {
+ pr_err("Unable to parse MSI vec base\n");
+ return -EINVAL;
+ }
+
+ if (of_property_read_u32(node, "loongson,msi-num-vecs", &irq_count)) {
+ pr_err("Unable to parse MSI vec number\n");
+ return -EINVAL;
+ }
+
+ err = pch_msi_init(res.start, irq_base, irq_count, parent_domain, of_node_to_fwnode(node));
+ if (err < 0)
+ return err;
+
+ return 0;
}
-IRQCHIP_DECLARE(pch_msi, "loongson,pch-msi-1.0", pch_msi_init);
+IRQCHIP_DECLARE(pch_msi, "loongson,pch-msi-1.0", pch_msi_of_init);
+#endif
+
+#ifdef CONFIG_ACPI
+struct fwnode_handle *get_pch_msi_handle(int pci_segment)
+{
+ int i;
+
+ for (i = 0; i < MAX_IO_PICS; i++) {
+ if (msi_group[i].pci_segment == pci_segment)
+ return pch_msi_handle[i];
+ }
+ return NULL;
+}
+
+int __init pch_msi_acpi_init(struct irq_domain *parent,
+ struct acpi_madt_msi_pic *acpi_pchmsi)
+{
+ int ret;
+ struct fwnode_handle *domain_handle;
+
+ domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_pchmsi);
+ ret = pch_msi_init(acpi_pchmsi->msg_address, acpi_pchmsi->start,
+ acpi_pchmsi->count, parent, domain_handle);
+ if (ret < 0)
+ irq_domain_free_fwnode(domain_handle);
+
+ return ret;
+}
+#endif
diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c
index a4eb8a2181c7..b6f1392964b1 100644
--- a/drivers/irqchip/irq-loongson-pch-pic.c
+++ b/drivers/irqchip/irq-loongson-pch-pic.c
@@ -33,13 +33,40 @@
#define PIC_REG_IDX(irq_id) ((irq_id) / PIC_COUNT_PER_REG)
#define PIC_REG_BIT(irq_id) ((irq_id) % PIC_COUNT_PER_REG)
+static int nr_pics;
+
struct pch_pic {
void __iomem *base;
struct irq_domain *pic_domain;
u32 ht_vec_base;
raw_spinlock_t pic_lock;
+ u32 vec_count;
+ u32 gsi_base;
};
+static struct pch_pic *pch_pic_priv[MAX_IO_PICS];
+
+struct fwnode_handle *pch_pic_handle[MAX_IO_PICS];
+
+int find_pch_pic(u32 gsi)
+{
+ int i;
+
+ /* Find the PCH_PIC that manages this GSI. */
+ for (i = 0; i < MAX_IO_PICS; i++) {
+ struct pch_pic *priv = pch_pic_priv[i];
+
+ if (!priv)
+ return -1;
+
+ if (gsi >= priv->gsi_base && gsi < (priv->gsi_base + priv->vec_count))
+ return i;
+ }
+
+ pr_err("ERROR: Unable to locate PCH_PIC for GSI %d\n", gsi);
+ return -1;
+}
+
static void pch_pic_bitset(struct pch_pic *priv, int offset, int bit)
{
u32 reg;
@@ -139,6 +166,28 @@ static struct irq_chip pch_pic_irq_chip = {
.irq_set_type = pch_pic_set_type,
};
+static int pch_pic_domain_translate(struct irq_domain *d,
+ struct irq_fwspec *fwspec,
+ unsigned long *hwirq,
+ unsigned int *type)
+{
+ struct pch_pic *priv = d->host_data;
+ struct device_node *of_node = to_of_node(fwspec->fwnode);
+
+ if (fwspec->param_count < 1)
+ return -EINVAL;
+
+ if (of_node) {
+ *hwirq = fwspec->param[0] + priv->ht_vec_base;
+ *type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK;
+ } else {
+ *hwirq = fwspec->param[0] - priv->gsi_base;
+ *type = IRQ_TYPE_NONE;
+ }
+
+ return 0;
+}
+
static int pch_pic_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
@@ -149,13 +198,13 @@ static int pch_pic_alloc(struct irq_domain *domain, unsigned int virq,
struct irq_fwspec parent_fwspec;
struct pch_pic *priv = domain->host_data;
- err = irq_domain_translate_twocell(domain, fwspec, &hwirq, &type);
+ err = pch_pic_domain_translate(domain, fwspec, &hwirq, &type);
if (err)
return err;
parent_fwspec.fwnode = domain->parent->fwnode;
parent_fwspec.param_count = 1;
- parent_fwspec.param[0] = hwirq + priv->ht_vec_base;
+ parent_fwspec.param[0] = hwirq;
err = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec);
if (err)
@@ -170,7 +219,7 @@ static int pch_pic_alloc(struct irq_domain *domain, unsigned int virq,
}
static const struct irq_domain_ops pch_pic_domain_ops = {
- .translate = irq_domain_translate_twocell,
+ .translate = pch_pic_domain_translate,
.alloc = pch_pic_alloc,
.free = irq_domain_free_irqs_parent,
};
@@ -180,7 +229,7 @@ static void pch_pic_reset(struct pch_pic *priv)
int i;
for (i = 0; i < PIC_COUNT; i++) {
- /* Write vectored ID */
+ /* Write vector ID */
writeb(priv->ht_vec_base + i, priv->base + PCH_INT_HTVEC(i));
/* Hardcode route to HT0 Lo */
writeb(1, priv->base + PCH_INT_ROUTE(i));
@@ -198,50 +247,37 @@ static void pch_pic_reset(struct pch_pic *priv)
}
}
-static int pch_pic_of_init(struct device_node *node,
- struct device_node *parent)
+static int pch_pic_init(phys_addr_t addr, unsigned long size, int vec_base,
+ struct irq_domain *parent_domain, struct fwnode_handle *domain_handle,
+ u32 gsi_base)
{
struct pch_pic *priv;
- struct irq_domain *parent_domain;
- int err;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
raw_spin_lock_init(&priv->pic_lock);
- priv->base = of_iomap(node, 0);
- if (!priv->base) {
- err = -ENOMEM;
+ priv->base = ioremap(addr, size);
+ if (!priv->base)
goto free_priv;
- }
- parent_domain = irq_find_host(parent);
- if (!parent_domain) {
- pr_err("Failed to find the parent domain\n");
- err = -ENXIO;
- goto iounmap_base;
- }
-
- if (of_property_read_u32(node, "loongson,pic-base-vec",
- &priv->ht_vec_base)) {
- pr_err("Failed to determine pic-base-vec\n");
- err = -EINVAL;
- goto iounmap_base;
- }
+ priv->ht_vec_base = vec_base;
+ priv->vec_count = ((readq(priv->base) >> 48) & 0xff) + 1;
+ priv->gsi_base = gsi_base;
priv->pic_domain = irq_domain_create_hierarchy(parent_domain, 0,
- PIC_COUNT,
- of_node_to_fwnode(node),
- &pch_pic_domain_ops,
- priv);
+ priv->vec_count, domain_handle,
+ &pch_pic_domain_ops, priv);
+
if (!priv->pic_domain) {
pr_err("Failed to create IRQ domain\n");
- err = -ENOMEM;
goto iounmap_base;
}
pch_pic_reset(priv);
+ pch_pic_handle[nr_pics] = domain_handle;
+ pch_pic_priv[nr_pics++] = priv;
return 0;
@@ -250,7 +286,86 @@ iounmap_base:
free_priv:
kfree(priv);
- return err;
+ return -EINVAL;
+}
+
+#ifdef CONFIG_OF
+
+static int pch_pic_of_init(struct device_node *node,
+ struct device_node *parent)
+{
+ int err, vec_base;
+ struct resource res;
+ struct irq_domain *parent_domain;
+
+ if (of_address_to_resource(node, 0, &res))
+ return -EINVAL;
+
+ parent_domain = irq_find_host(parent);
+ if (!parent_domain) {
+ pr_err("Failed to find the parent domain\n");
+ return -ENXIO;
+ }
+
+ if (of_property_read_u32(node, "loongson,pic-base-vec", &vec_base)) {
+ pr_err("Failed to determine pic-base-vec\n");
+ return -EINVAL;
+ }
+
+ err = pch_pic_init(res.start, resource_size(&res), vec_base,
+ parent_domain, of_node_to_fwnode(node), 0);
+ if (err < 0)
+ return err;
+
+ return 0;
}
IRQCHIP_DECLARE(pch_pic, "loongson,pch-pic-1.0", pch_pic_of_init);
+
+#endif
+
+#ifdef CONFIG_ACPI
+static int __init
+pch_lpc_parse_madt(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ struct acpi_madt_lpc_pic *pchlpc_entry = (struct acpi_madt_lpc_pic *)header;
+
+ return pch_lpc_acpi_init(pch_pic_priv[0]->pic_domain, pchlpc_entry);
+}
+
+static int __init acpi_cascade_irqdomain_init(void)
+{
+ acpi_table_parse_madt(ACPI_MADT_TYPE_LPC_PIC,
+ pch_lpc_parse_madt, 0);
+ return 0;
+}
+
+int __init pch_pic_acpi_init(struct irq_domain *parent,
+ struct acpi_madt_bio_pic *acpi_pchpic)
+{
+ int ret, vec_base;
+ struct fwnode_handle *domain_handle;
+
+ vec_base = acpi_pchpic->gsi_base - GSI_MIN_PCH_IRQ;
+
+ domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_pchpic);
+ if (!domain_handle) {
+ pr_err("Unable to allocate domain handle\n");
+ return -ENOMEM;
+ }
+
+ ret = pch_pic_init(acpi_pchpic->address, acpi_pchpic->size,
+ vec_base, parent, domain_handle, acpi_pchpic->gsi_base);
+
+ if (ret < 0) {
+ irq_domain_free_fwnode(domain_handle);
+ return ret;
+ }
+
+ if (acpi_pchpic->id == 0)
+ acpi_cascade_irqdomain_init();
+
+ return ret;
+}
+#endif
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index ff89b36267dd..1ba0f1555c80 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -52,13 +52,15 @@ static DEFINE_PER_CPU_READ_MOSTLY(unsigned long[GIC_MAX_LONGS], pcpu_masks);
static DEFINE_SPINLOCK(gic_lock);
static struct irq_domain *gic_irq_domain;
-static struct irq_domain *gic_ipi_domain;
static int gic_shared_intrs;
static unsigned int gic_cpu_pin;
static unsigned int timer_cpu_pin;
static struct irq_chip gic_level_irq_controller, gic_edge_irq_controller;
+
+#ifdef CONFIG_GENERIC_IRQ_IPI
static DECLARE_BITMAP(ipi_resrv, GIC_MAX_INTRS);
static DECLARE_BITMAP(ipi_available, GIC_MAX_INTRS);
+#endif /* CONFIG_GENERIC_IRQ_IPI */
static struct gic_all_vpes_chip_data {
u32 map;
@@ -472,9 +474,11 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq,
u32 map;
if (hwirq >= GIC_SHARED_HWIRQ_BASE) {
+#ifdef CONFIG_GENERIC_IRQ_IPI
/* verify that shared irqs don't conflict with an IPI irq */
if (test_bit(GIC_HWIRQ_TO_SHARED(hwirq), ipi_resrv))
return -EBUSY;
+#endif /* CONFIG_GENERIC_IRQ_IPI */
err = irq_domain_set_hwirq_and_chip(d, virq, hwirq,
&gic_level_irq_controller,
@@ -567,6 +571,8 @@ static const struct irq_domain_ops gic_irq_domain_ops = {
.map = gic_irq_domain_map,
};
+#ifdef CONFIG_GENERIC_IRQ_IPI
+
static int gic_ipi_domain_xlate(struct irq_domain *d, struct device_node *ctrlr,
const u32 *intspec, unsigned int intsize,
irq_hw_number_t *out_hwirq,
@@ -670,6 +676,48 @@ static const struct irq_domain_ops gic_ipi_domain_ops = {
.match = gic_ipi_domain_match,
};
+static int gic_register_ipi_domain(struct device_node *node)
+{
+ struct irq_domain *gic_ipi_domain;
+ unsigned int v[2], num_ipis;
+
+ gic_ipi_domain = irq_domain_add_hierarchy(gic_irq_domain,
+ IRQ_DOMAIN_FLAG_IPI_PER_CPU,
+ GIC_NUM_LOCAL_INTRS + gic_shared_intrs,
+ node, &gic_ipi_domain_ops, NULL);
+ if (!gic_ipi_domain) {
+ pr_err("Failed to add IPI domain");
+ return -ENXIO;
+ }
+
+ irq_domain_update_bus_token(gic_ipi_domain, DOMAIN_BUS_IPI);
+
+ if (node &&
+ !of_property_read_u32_array(node, "mti,reserved-ipi-vectors", v, 2)) {
+ bitmap_set(ipi_resrv, v[0], v[1]);
+ } else {
+ /*
+ * Reserve 2 interrupts per possible CPU/VP for use as IPIs,
+ * meeting the requirements of arch/mips SMP.
+ */
+ num_ipis = 2 * num_possible_cpus();
+ bitmap_set(ipi_resrv, gic_shared_intrs - num_ipis, num_ipis);
+ }
+
+ bitmap_copy(ipi_available, ipi_resrv, GIC_MAX_INTRS);
+
+ return 0;
+}
+
+#else /* !CONFIG_GENERIC_IRQ_IPI */
+
+static inline int gic_register_ipi_domain(struct device_node *node)
+{
+ return 0;
+}
+
+#endif /* !CONFIG_GENERIC_IRQ_IPI */
+
static int gic_cpu_startup(unsigned int cpu)
{
/* Enable or disable EIC */
@@ -688,11 +736,12 @@ static int gic_cpu_startup(unsigned int cpu)
static int __init gic_of_init(struct device_node *node,
struct device_node *parent)
{
- unsigned int cpu_vec, i, gicconfig, v[2], num_ipis;
+ unsigned int cpu_vec, i, gicconfig;
unsigned long reserved;
phys_addr_t gic_base;
struct resource res;
size_t gic_len;
+ int ret;
/* Find the first available CPU vector. */
i = 0;
@@ -734,6 +783,10 @@ static int __init gic_of_init(struct device_node *node,
}
mips_gic_base = ioremap(gic_base, gic_len);
+ if (!mips_gic_base) {
+ pr_err("Failed to ioremap gic_base\n");
+ return -ENOMEM;
+ }
gicconfig = read_gic_config();
gic_shared_intrs = FIELD_GET(GIC_CONFIG_NUMINTERRUPTS, gicconfig);
@@ -780,30 +833,9 @@ static int __init gic_of_init(struct device_node *node,
return -ENXIO;
}
- gic_ipi_domain = irq_domain_add_hierarchy(gic_irq_domain,
- IRQ_DOMAIN_FLAG_IPI_PER_CPU,
- GIC_NUM_LOCAL_INTRS + gic_shared_intrs,
- node, &gic_ipi_domain_ops, NULL);
- if (!gic_ipi_domain) {
- pr_err("Failed to add IPI domain");
- return -ENXIO;
- }
-
- irq_domain_update_bus_token(gic_ipi_domain, DOMAIN_BUS_IPI);
-
- if (node &&
- !of_property_read_u32_array(node, "mti,reserved-ipi-vectors", v, 2)) {
- bitmap_set(ipi_resrv, v[0], v[1]);
- } else {
- /*
- * Reserve 2 interrupts per possible CPU/VP for use as IPIs,
- * meeting the requirements of arch/mips SMP.
- */
- num_ipis = 2 * num_possible_cpus();
- bitmap_set(ipi_resrv, gic_shared_intrs - num_ipis, num_ipis);
- }
-
- bitmap_copy(ipi_available, ipi_resrv, GIC_MAX_INTRS);
+ ret = gic_register_ipi_domain(node);
+ if (ret)
+ return ret;
board_bind_eic_interrupt = &gic_bind_eic_interrupt;
diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c
new file mode 100644
index 000000000000..25fd8ee66565
--- /dev/null
+++ b/drivers/irqchip/irq-renesas-rzg2l.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Renesas RZ/G2L IRQC Driver
+ *
+ * Copyright (C) 2022 Renesas Electronics Corporation.
+ *
+ * Author: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <linux/spinlock.h>
+
+#define IRQC_IRQ_START 1
+#define IRQC_IRQ_COUNT 8
+#define IRQC_TINT_START (IRQC_IRQ_START + IRQC_IRQ_COUNT)
+#define IRQC_TINT_COUNT 32
+#define IRQC_NUM_IRQ (IRQC_TINT_START + IRQC_TINT_COUNT)
+
+#define ISCR 0x10
+#define IITSR 0x14
+#define TSCR 0x20
+#define TITSR0 0x24
+#define TITSR1 0x28
+#define TITSR0_MAX_INT 16
+#define TITSEL_WIDTH 0x2
+#define TSSR(n) (0x30 + ((n) * 4))
+#define TIEN BIT(7)
+#define TSSEL_SHIFT(n) (8 * (n))
+#define TSSEL_MASK GENMASK(7, 0)
+#define IRQ_MASK 0x3
+
+#define TSSR_OFFSET(n) ((n) % 4)
+#define TSSR_INDEX(n) ((n) / 4)
+
+#define TITSR_TITSEL_EDGE_RISING 0
+#define TITSR_TITSEL_EDGE_FALLING 1
+#define TITSR_TITSEL_LEVEL_HIGH 2
+#define TITSR_TITSEL_LEVEL_LOW 3
+
+#define IITSR_IITSEL(n, sense) ((sense) << ((n) * 2))
+#define IITSR_IITSEL_LEVEL_LOW 0
+#define IITSR_IITSEL_EDGE_FALLING 1
+#define IITSR_IITSEL_EDGE_RISING 2
+#define IITSR_IITSEL_EDGE_BOTH 3
+#define IITSR_IITSEL_MASK(n) IITSR_IITSEL((n), 3)
+
+#define TINT_EXTRACT_HWIRQ(x) FIELD_GET(GENMASK(15, 0), (x))
+#define TINT_EXTRACT_GPIOINT(x) FIELD_GET(GENMASK(31, 16), (x))
+
+struct rzg2l_irqc_priv {
+ void __iomem *base;
+ struct irq_fwspec fwspec[IRQC_NUM_IRQ];
+ raw_spinlock_t lock;
+};
+
+static struct rzg2l_irqc_priv *irq_data_to_priv(struct irq_data *data)
+{
+ return data->domain->host_data;
+}
+
+static void rzg2l_irq_eoi(struct irq_data *d)
+{
+ unsigned int hw_irq = irqd_to_hwirq(d) - IRQC_IRQ_START;
+ struct rzg2l_irqc_priv *priv = irq_data_to_priv(d);
+ u32 bit = BIT(hw_irq);
+ u32 reg;
+
+ reg = readl_relaxed(priv->base + ISCR);
+ if (reg & bit)
+ writel_relaxed(reg & ~bit, priv->base + ISCR);
+}
+
+static void rzg2l_tint_eoi(struct irq_data *d)
+{
+ unsigned int hw_irq = irqd_to_hwirq(d) - IRQC_TINT_START;
+ struct rzg2l_irqc_priv *priv = irq_data_to_priv(d);
+ u32 bit = BIT(hw_irq);
+ u32 reg;
+
+ reg = readl_relaxed(priv->base + TSCR);
+ if (reg & bit)
+ writel_relaxed(reg & ~bit, priv->base + TSCR);
+}
+
+static void rzg2l_irqc_eoi(struct irq_data *d)
+{
+ struct rzg2l_irqc_priv *priv = irq_data_to_priv(d);
+ unsigned int hw_irq = irqd_to_hwirq(d);
+
+ raw_spin_lock(&priv->lock);
+ if (hw_irq >= IRQC_IRQ_START && hw_irq <= IRQC_IRQ_COUNT)
+ rzg2l_irq_eoi(d);
+ else if (hw_irq >= IRQC_TINT_START && hw_irq < IRQC_NUM_IRQ)
+ rzg2l_tint_eoi(d);
+ raw_spin_unlock(&priv->lock);
+ irq_chip_eoi_parent(d);
+}
+
+static void rzg2l_irqc_irq_disable(struct irq_data *d)
+{
+ unsigned int hw_irq = irqd_to_hwirq(d);
+
+ if (hw_irq >= IRQC_TINT_START && hw_irq < IRQC_NUM_IRQ) {
+ struct rzg2l_irqc_priv *priv = irq_data_to_priv(d);
+ u32 offset = hw_irq - IRQC_TINT_START;
+ u32 tssr_offset = TSSR_OFFSET(offset);
+ u8 tssr_index = TSSR_INDEX(offset);
+ u32 reg;
+
+ raw_spin_lock(&priv->lock);
+ reg = readl_relaxed(priv->base + TSSR(tssr_index));
+ reg &= ~(TSSEL_MASK << tssr_offset);
+ writel_relaxed(reg, priv->base + TSSR(tssr_index));
+ raw_spin_unlock(&priv->lock);
+ }
+ irq_chip_disable_parent(d);
+}
+
+static void rzg2l_irqc_irq_enable(struct irq_data *d)
+{
+ unsigned int hw_irq = irqd_to_hwirq(d);
+
+ if (hw_irq >= IRQC_TINT_START && hw_irq < IRQC_NUM_IRQ) {
+ struct rzg2l_irqc_priv *priv = irq_data_to_priv(d);
+ unsigned long tint = (uintptr_t)d->chip_data;
+ u32 offset = hw_irq - IRQC_TINT_START;
+ u32 tssr_offset = TSSR_OFFSET(offset);
+ u8 tssr_index = TSSR_INDEX(offset);
+ u32 reg;
+
+ raw_spin_lock(&priv->lock);
+ reg = readl_relaxed(priv->base + TSSR(tssr_index));
+ reg |= (TIEN | tint) << TSSEL_SHIFT(tssr_offset);
+ writel_relaxed(reg, priv->base + TSSR(tssr_index));
+ raw_spin_unlock(&priv->lock);
+ }
+ irq_chip_enable_parent(d);
+}
+
+static int rzg2l_irq_set_type(struct irq_data *d, unsigned int type)
+{
+ unsigned int hw_irq = irqd_to_hwirq(d) - IRQC_IRQ_START;
+ struct rzg2l_irqc_priv *priv = irq_data_to_priv(d);
+ u16 sense, tmp;
+
+ switch (type & IRQ_TYPE_SENSE_MASK) {
+ case IRQ_TYPE_LEVEL_LOW:
+ sense = IITSR_IITSEL_LEVEL_LOW;
+ break;
+
+ case IRQ_TYPE_EDGE_FALLING:
+ sense = IITSR_IITSEL_EDGE_FALLING;
+ break;
+
+ case IRQ_TYPE_EDGE_RISING:
+ sense = IITSR_IITSEL_EDGE_RISING;
+ break;
+
+ case IRQ_TYPE_EDGE_BOTH:
+ sense = IITSR_IITSEL_EDGE_BOTH;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ raw_spin_lock(&priv->lock);
+ tmp = readl_relaxed(priv->base + IITSR);
+ tmp &= ~IITSR_IITSEL_MASK(hw_irq);
+ tmp |= IITSR_IITSEL(hw_irq, sense);
+ writel_relaxed(tmp, priv->base + IITSR);
+ raw_spin_unlock(&priv->lock);
+
+ return 0;
+}
+
+static int rzg2l_tint_set_edge(struct irq_data *d, unsigned int type)
+{
+ struct rzg2l_irqc_priv *priv = irq_data_to_priv(d);
+ unsigned int hwirq = irqd_to_hwirq(d);
+ u32 titseln = hwirq - IRQC_TINT_START;
+ u32 offset;
+ u8 sense;
+ u32 reg;
+
+ switch (type & IRQ_TYPE_SENSE_MASK) {
+ case IRQ_TYPE_EDGE_RISING:
+ sense = TITSR_TITSEL_EDGE_RISING;
+ break;
+
+ case IRQ_TYPE_EDGE_FALLING:
+ sense = TITSR_TITSEL_EDGE_FALLING;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ offset = TITSR0;
+ if (titseln >= TITSR0_MAX_INT) {
+ titseln -= TITSR0_MAX_INT;
+ offset = TITSR1;
+ }
+
+ raw_spin_lock(&priv->lock);
+ reg = readl_relaxed(priv->base + offset);
+ reg &= ~(IRQ_MASK << (titseln * TITSEL_WIDTH));
+ reg |= sense << (titseln * TITSEL_WIDTH);
+ writel_relaxed(reg, priv->base + offset);
+ raw_spin_unlock(&priv->lock);
+
+ return 0;
+}
+
+static int rzg2l_irqc_set_type(struct irq_data *d, unsigned int type)
+{
+ unsigned int hw_irq = irqd_to_hwirq(d);
+ int ret = -EINVAL;
+
+ if (hw_irq >= IRQC_IRQ_START && hw_irq <= IRQC_IRQ_COUNT)
+ ret = rzg2l_irq_set_type(d, type);
+ else if (hw_irq >= IRQC_TINT_START && hw_irq < IRQC_NUM_IRQ)
+ ret = rzg2l_tint_set_edge(d, type);
+ if (ret)
+ return ret;
+
+ return irq_chip_set_type_parent(d, IRQ_TYPE_LEVEL_HIGH);
+}
+
+static const struct irq_chip irqc_chip = {
+ .name = "rzg2l-irqc",
+ .irq_eoi = rzg2l_irqc_eoi,
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_disable = rzg2l_irqc_irq_disable,
+ .irq_enable = rzg2l_irqc_irq_enable,
+ .irq_get_irqchip_state = irq_chip_get_parent_state,
+ .irq_set_irqchip_state = irq_chip_set_parent_state,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_set_type = rzg2l_irqc_set_type,
+ .flags = IRQCHIP_MASK_ON_SUSPEND |
+ IRQCHIP_SET_TYPE_MASKED |
+ IRQCHIP_SKIP_SET_WAKE,
+};
+
+static int rzg2l_irqc_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct rzg2l_irqc_priv *priv = domain->host_data;
+ unsigned long tint = 0;
+ irq_hw_number_t hwirq;
+ unsigned int type;
+ int ret;
+
+ ret = irq_domain_translate_twocell(domain, arg, &hwirq, &type);
+ if (ret)
+ return ret;
+
+ /*
+ * For TINT interrupts ie where pinctrl driver is child of irqc domain
+ * the hwirq and TINT are encoded in fwspec->param[0].
+ * hwirq for TINT range from 9-40, hwirq is embedded 0-15 bits and TINT
+ * from 16-31 bits. TINT from the pinctrl driver needs to be programmed
+ * in IRQC registers to enable a given gpio pin as interrupt.
+ */
+ if (hwirq > IRQC_IRQ_COUNT) {
+ tint = TINT_EXTRACT_GPIOINT(hwirq);
+ hwirq = TINT_EXTRACT_HWIRQ(hwirq);
+
+ if (hwirq < IRQC_TINT_START)
+ return -EINVAL;
+ }
+
+ if (hwirq > (IRQC_NUM_IRQ - 1))
+ return -EINVAL;
+
+ ret = irq_domain_set_hwirq_and_chip(domain, virq, hwirq, &irqc_chip,
+ (void *)(uintptr_t)tint);
+ if (ret)
+ return ret;
+
+ return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &priv->fwspec[hwirq]);
+}
+
+static const struct irq_domain_ops rzg2l_irqc_domain_ops = {
+ .alloc = rzg2l_irqc_alloc,
+ .free = irq_domain_free_irqs_common,
+ .translate = irq_domain_translate_twocell,
+};
+
+static int rzg2l_irqc_parse_interrupts(struct rzg2l_irqc_priv *priv,
+ struct device_node *np)
+{
+ struct of_phandle_args map;
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < IRQC_NUM_IRQ; i++) {
+ ret = of_irq_parse_one(np, i, &map);
+ if (ret)
+ return ret;
+ of_phandle_args_to_fwspec(np, map.args, map.args_count,
+ &priv->fwspec[i]);
+ }
+
+ return 0;
+}
+
+static int rzg2l_irqc_init(struct device_node *node, struct device_node *parent)
+{
+ struct irq_domain *irq_domain, *parent_domain;
+ struct platform_device *pdev;
+ struct reset_control *resetn;
+ struct rzg2l_irqc_priv *priv;
+ int ret;
+
+ pdev = of_find_device_by_node(node);
+ if (!pdev)
+ return -ENODEV;
+
+ parent_domain = irq_find_host(parent);
+ if (!parent_domain) {
+ dev_err(&pdev->dev, "cannot find parent domain\n");
+ return -ENODEV;
+ }
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->base = devm_of_iomap(&pdev->dev, pdev->dev.of_node, 0, NULL);
+ if (IS_ERR(priv->base))
+ return PTR_ERR(priv->base);
+
+ ret = rzg2l_irqc_parse_interrupts(priv, node);
+ if (ret) {
+ dev_err(&pdev->dev, "cannot parse interrupts: %d\n", ret);
+ return ret;
+ }
+
+ resetn = devm_reset_control_get_exclusive(&pdev->dev, NULL);
+ if (IS_ERR(resetn))
+ return PTR_ERR(resetn);
+
+ ret = reset_control_deassert(resetn);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to deassert resetn pin, %d\n", ret);
+ return ret;
+ }
+
+ pm_runtime_enable(&pdev->dev);
+ ret = pm_runtime_resume_and_get(&pdev->dev);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "pm_runtime_resume_and_get failed: %d\n", ret);
+ goto pm_disable;
+ }
+
+ raw_spin_lock_init(&priv->lock);
+
+ irq_domain = irq_domain_add_hierarchy(parent_domain, 0, IRQC_NUM_IRQ,
+ node, &rzg2l_irqc_domain_ops,
+ priv);
+ if (!irq_domain) {
+ dev_err(&pdev->dev, "failed to add irq domain\n");
+ ret = -ENOMEM;
+ goto pm_put;
+ }
+
+ return 0;
+
+pm_put:
+ pm_runtime_put(&pdev->dev);
+pm_disable:
+ pm_runtime_disable(&pdev->dev);
+ reset_control_assert(resetn);
+ return ret;
+}
+
+IRQCHIP_PLATFORM_DRIVER_BEGIN(rzg2l_irqc)
+IRQCHIP_MATCH("renesas,rzg2l-irqc", rzg2l_irqc_init)
+IRQCHIP_PLATFORM_DRIVER_END(rzg2l_irqc)
+MODULE_AUTHOR("Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>");
+MODULE_DESCRIPTION("Renesas RZ/G2L IRQC Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index bb87e4c3b88e..ba4938188570 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -60,10 +60,13 @@
#define PLIC_DISABLE_THRESHOLD 0x7
#define PLIC_ENABLE_THRESHOLD 0
+#define PLIC_QUIRK_EDGE_INTERRUPT 0
+
struct plic_priv {
struct cpumask lmask;
struct irq_domain *irqdomain;
void __iomem *regs;
+ unsigned long plic_quirks;
};
struct plic_handler {
@@ -81,6 +84,8 @@ static int plic_parent_irq __ro_after_init;
static bool plic_cpuhp_setup_done __ro_after_init;
static DEFINE_PER_CPU(struct plic_handler, plic_handlers);
+static int plic_irq_set_type(struct irq_data *d, unsigned int type);
+
static void __plic_toggle(void __iomem *enable_base, int hwirq, int enable)
{
u32 __iomem *reg = enable_base + (hwirq / 32) * sizeof(u32);
@@ -103,37 +108,43 @@ static inline void plic_irq_toggle(const struct cpumask *mask,
struct irq_data *d, int enable)
{
int cpu;
- struct plic_priv *priv = irq_data_get_irq_chip_data(d);
- writel(enable, priv->regs + PRIORITY_BASE + d->hwirq * PRIORITY_PER_ID);
for_each_cpu(cpu, mask) {
struct plic_handler *handler = per_cpu_ptr(&plic_handlers, cpu);
- if (handler->present &&
- cpumask_test_cpu(cpu, &handler->priv->lmask))
- plic_toggle(handler, d->hwirq, enable);
+ plic_toggle(handler, d->hwirq, enable);
}
}
+static void plic_irq_enable(struct irq_data *d)
+{
+ plic_irq_toggle(irq_data_get_effective_affinity_mask(d), d, 1);
+}
+
+static void plic_irq_disable(struct irq_data *d)
+{
+ plic_irq_toggle(irq_data_get_effective_affinity_mask(d), d, 0);
+}
+
static void plic_irq_unmask(struct irq_data *d)
{
- struct cpumask amask;
- unsigned int cpu;
struct plic_priv *priv = irq_data_get_irq_chip_data(d);
- cpumask_and(&amask, &priv->lmask, cpu_online_mask);
- cpu = cpumask_any_and(irq_data_get_affinity_mask(d),
- &amask);
- if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
- return;
- plic_irq_toggle(cpumask_of(cpu), d, 1);
+ writel(1, priv->regs + PRIORITY_BASE + d->hwirq * PRIORITY_PER_ID);
}
static void plic_irq_mask(struct irq_data *d)
{
struct plic_priv *priv = irq_data_get_irq_chip_data(d);
- plic_irq_toggle(&priv->lmask, d, 0);
+ writel(0, priv->regs + PRIORITY_BASE + d->hwirq * PRIORITY_PER_ID);
+}
+
+static void plic_irq_eoi(struct irq_data *d)
+{
+ struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
+
+ writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
}
#ifdef CONFIG_SMP
@@ -154,38 +165,68 @@ static int plic_set_affinity(struct irq_data *d,
if (cpu >= nr_cpu_ids)
return -EINVAL;
- plic_irq_toggle(&priv->lmask, d, 0);
- plic_irq_toggle(cpumask_of(cpu), d, !irqd_irq_masked(d));
+ plic_irq_disable(d);
irq_data_update_effective_affinity(d, cpumask_of(cpu));
+ if (!irqd_irq_disabled(d))
+ plic_irq_enable(d);
+
return IRQ_SET_MASK_OK_DONE;
}
#endif
-static void plic_irq_eoi(struct irq_data *d)
-{
- struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
-
- if (irqd_irq_masked(d)) {
- plic_irq_unmask(d);
- writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
- plic_irq_mask(d);
- } else {
- writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
- }
-}
+static struct irq_chip plic_edge_chip = {
+ .name = "SiFive PLIC",
+ .irq_enable = plic_irq_enable,
+ .irq_disable = plic_irq_disable,
+ .irq_ack = plic_irq_eoi,
+ .irq_mask = plic_irq_mask,
+ .irq_unmask = plic_irq_unmask,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = plic_set_affinity,
+#endif
+ .irq_set_type = plic_irq_set_type,
+ .flags = IRQCHIP_AFFINITY_PRE_STARTUP,
+};
static struct irq_chip plic_chip = {
.name = "SiFive PLIC",
+ .irq_enable = plic_irq_enable,
+ .irq_disable = plic_irq_disable,
.irq_mask = plic_irq_mask,
.irq_unmask = plic_irq_unmask,
.irq_eoi = plic_irq_eoi,
#ifdef CONFIG_SMP
.irq_set_affinity = plic_set_affinity,
#endif
+ .irq_set_type = plic_irq_set_type,
+ .flags = IRQCHIP_AFFINITY_PRE_STARTUP,
};
+static int plic_irq_set_type(struct irq_data *d, unsigned int type)
+{
+ struct plic_priv *priv = irq_data_get_irq_chip_data(d);
+
+ if (!test_bit(PLIC_QUIRK_EDGE_INTERRUPT, &priv->plic_quirks))
+ return IRQ_SET_MASK_OK_NOCOPY;
+
+ switch (type) {
+ case IRQ_TYPE_EDGE_RISING:
+ irq_set_chip_handler_name_locked(d, &plic_edge_chip,
+ handle_edge_irq, NULL);
+ break;
+ case IRQ_TYPE_LEVEL_HIGH:
+ irq_set_chip_handler_name_locked(d, &plic_chip,
+ handle_fasteoi_irq, NULL);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return IRQ_SET_MASK_OK;
+}
+
static int plic_irqdomain_map(struct irq_domain *d, unsigned int irq,
irq_hw_number_t hwirq)
{
@@ -198,6 +239,19 @@ static int plic_irqdomain_map(struct irq_domain *d, unsigned int irq,
return 0;
}
+static int plic_irq_domain_translate(struct irq_domain *d,
+ struct irq_fwspec *fwspec,
+ unsigned long *hwirq,
+ unsigned int *type)
+{
+ struct plic_priv *priv = d->host_data;
+
+ if (test_bit(PLIC_QUIRK_EDGE_INTERRUPT, &priv->plic_quirks))
+ return irq_domain_translate_twocell(d, fwspec, hwirq, type);
+
+ return irq_domain_translate_onecell(d, fwspec, hwirq, type);
+}
+
static int plic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
@@ -206,7 +260,7 @@ static int plic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int type;
struct irq_fwspec *fwspec = arg;
- ret = irq_domain_translate_onecell(domain, fwspec, &hwirq, &type);
+ ret = plic_irq_domain_translate(domain, fwspec, &hwirq, &type);
if (ret)
return ret;
@@ -220,7 +274,7 @@ static int plic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
}
static const struct irq_domain_ops plic_irqdomain_ops = {
- .translate = irq_domain_translate_onecell,
+ .translate = plic_irq_domain_translate,
.alloc = plic_irq_domain_alloc,
.free = irq_domain_free_irqs_top,
};
@@ -281,8 +335,9 @@ static int plic_starting_cpu(unsigned int cpu)
return 0;
}
-static int __init plic_init(struct device_node *node,
- struct device_node *parent)
+static int __init __plic_init(struct device_node *node,
+ struct device_node *parent,
+ unsigned long plic_quirks)
{
int error = 0, nr_contexts, nr_handlers = 0, i;
u32 nr_irqs;
@@ -293,6 +348,8 @@ static int __init plic_init(struct device_node *node,
if (!priv)
return -ENOMEM;
+ priv->plic_quirks = plic_quirks;
+
priv->regs = of_iomap(node, 0);
if (WARN_ON(!priv->regs)) {
error = -EIO;
@@ -382,8 +439,11 @@ static int __init plic_init(struct device_node *node,
i * CONTEXT_ENABLE_SIZE;
handler->priv = priv;
done:
- for (hwirq = 1; hwirq <= nr_irqs; hwirq++)
+ for (hwirq = 1; hwirq <= nr_irqs; hwirq++) {
plic_toggle(handler, hwirq, 0);
+ writel(1, priv->regs + PRIORITY_BASE +
+ hwirq * PRIORITY_PER_ID);
+ }
nr_handlers++;
}
@@ -410,6 +470,20 @@ out_free_priv:
return error;
}
+static int __init plic_init(struct device_node *node,
+ struct device_node *parent)
+{
+ return __plic_init(node, parent, 0);
+}
+
IRQCHIP_DECLARE(sifive_plic, "sifive,plic-1.0.0", plic_init);
IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */
-IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_init); /* for firmware driver */
+
+static int __init plic_edge_init(struct device_node *node,
+ struct device_node *parent)
+{
+ return __plic_init(node, parent, BIT(PLIC_QUIRK_EDGE_INTERRUPT));
+}
+
+IRQCHIP_DECLARE(andestech_nceplic100, "andestech,nceplic100", plic_edge_init);
+IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_edge_init);
diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
index 9d18f47040eb..a73763d475f0 100644
--- a/drivers/irqchip/irq-stm32-exti.c
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -34,21 +34,15 @@ struct stm32_exti_bank {
u32 swier_ofst;
u32 rpr_ofst;
u32 fpr_ofst;
+ u32 trg_ofst;
};
#define UNDEF_REG ~0
-struct stm32_desc_irq {
- u32 exti;
- u32 irq_parent;
- struct irq_chip *chip;
-};
-
struct stm32_exti_drv_data {
const struct stm32_exti_bank **exti_banks;
- const struct stm32_desc_irq *desc_irqs;
+ const u8 *desc_irqs;
u32 bank_nr;
- u32 irq_nr;
};
struct stm32_exti_chip_data {
@@ -78,6 +72,7 @@ static const struct stm32_exti_bank stm32f4xx_exti_b1 = {
.swier_ofst = 0x10,
.rpr_ofst = 0x14,
.fpr_ofst = UNDEF_REG,
+ .trg_ofst = UNDEF_REG,
};
static const struct stm32_exti_bank *stm32f4xx_exti_banks[] = {
@@ -97,6 +92,7 @@ static const struct stm32_exti_bank stm32h7xx_exti_b1 = {
.swier_ofst = 0x08,
.rpr_ofst = 0x88,
.fpr_ofst = UNDEF_REG,
+ .trg_ofst = UNDEF_REG,
};
static const struct stm32_exti_bank stm32h7xx_exti_b2 = {
@@ -107,6 +103,7 @@ static const struct stm32_exti_bank stm32h7xx_exti_b2 = {
.swier_ofst = 0x28,
.rpr_ofst = 0x98,
.fpr_ofst = UNDEF_REG,
+ .trg_ofst = UNDEF_REG,
};
static const struct stm32_exti_bank stm32h7xx_exti_b3 = {
@@ -117,6 +114,7 @@ static const struct stm32_exti_bank stm32h7xx_exti_b3 = {
.swier_ofst = 0x48,
.rpr_ofst = 0xA8,
.fpr_ofst = UNDEF_REG,
+ .trg_ofst = UNDEF_REG,
};
static const struct stm32_exti_bank *stm32h7xx_exti_banks[] = {
@@ -132,32 +130,35 @@ static const struct stm32_exti_drv_data stm32h7xx_drv_data = {
static const struct stm32_exti_bank stm32mp1_exti_b1 = {
.imr_ofst = 0x80,
- .emr_ofst = 0x84,
+ .emr_ofst = UNDEF_REG,
.rtsr_ofst = 0x00,
.ftsr_ofst = 0x04,
.swier_ofst = 0x08,
.rpr_ofst = 0x0C,
.fpr_ofst = 0x10,
+ .trg_ofst = 0x3EC,
};
static const struct stm32_exti_bank stm32mp1_exti_b2 = {
.imr_ofst = 0x90,
- .emr_ofst = 0x94,
+ .emr_ofst = UNDEF_REG,
.rtsr_ofst = 0x20,
.ftsr_ofst = 0x24,
.swier_ofst = 0x28,
.rpr_ofst = 0x2C,
.fpr_ofst = 0x30,
+ .trg_ofst = 0x3E8,
};
static const struct stm32_exti_bank stm32mp1_exti_b3 = {
.imr_ofst = 0xA0,
- .emr_ofst = 0xA4,
+ .emr_ofst = UNDEF_REG,
.rtsr_ofst = 0x40,
.ftsr_ofst = 0x44,
.swier_ofst = 0x48,
.rpr_ofst = 0x4C,
.fpr_ofst = 0x50,
+ .trg_ofst = 0x3E4,
};
static const struct stm32_exti_bank *stm32mp1_exti_banks[] = {
@@ -169,126 +170,114 @@ static const struct stm32_exti_bank *stm32mp1_exti_banks[] = {
static struct irq_chip stm32_exti_h_chip;
static struct irq_chip stm32_exti_h_chip_direct;
-static const struct stm32_desc_irq stm32mp1_desc_irq[] = {
- { .exti = 0, .irq_parent = 6, .chip = &stm32_exti_h_chip },
- { .exti = 1, .irq_parent = 7, .chip = &stm32_exti_h_chip },
- { .exti = 2, .irq_parent = 8, .chip = &stm32_exti_h_chip },
- { .exti = 3, .irq_parent = 9, .chip = &stm32_exti_h_chip },
- { .exti = 4, .irq_parent = 10, .chip = &stm32_exti_h_chip },
- { .exti = 5, .irq_parent = 23, .chip = &stm32_exti_h_chip },
- { .exti = 6, .irq_parent = 64, .chip = &stm32_exti_h_chip },
- { .exti = 7, .irq_parent = 65, .chip = &stm32_exti_h_chip },
- { .exti = 8, .irq_parent = 66, .chip = &stm32_exti_h_chip },
- { .exti = 9, .irq_parent = 67, .chip = &stm32_exti_h_chip },
- { .exti = 10, .irq_parent = 40, .chip = &stm32_exti_h_chip },
- { .exti = 11, .irq_parent = 42, .chip = &stm32_exti_h_chip },
- { .exti = 12, .irq_parent = 76, .chip = &stm32_exti_h_chip },
- { .exti = 13, .irq_parent = 77, .chip = &stm32_exti_h_chip },
- { .exti = 14, .irq_parent = 121, .chip = &stm32_exti_h_chip },
- { .exti = 15, .irq_parent = 127, .chip = &stm32_exti_h_chip },
- { .exti = 16, .irq_parent = 1, .chip = &stm32_exti_h_chip },
- { .exti = 19, .irq_parent = 3, .chip = &stm32_exti_h_chip_direct },
- { .exti = 21, .irq_parent = 31, .chip = &stm32_exti_h_chip_direct },
- { .exti = 22, .irq_parent = 33, .chip = &stm32_exti_h_chip_direct },
- { .exti = 23, .irq_parent = 72, .chip = &stm32_exti_h_chip_direct },
- { .exti = 24, .irq_parent = 95, .chip = &stm32_exti_h_chip_direct },
- { .exti = 25, .irq_parent = 107, .chip = &stm32_exti_h_chip_direct },
- { .exti = 26, .irq_parent = 37, .chip = &stm32_exti_h_chip_direct },
- { .exti = 27, .irq_parent = 38, .chip = &stm32_exti_h_chip_direct },
- { .exti = 28, .irq_parent = 39, .chip = &stm32_exti_h_chip_direct },
- { .exti = 29, .irq_parent = 71, .chip = &stm32_exti_h_chip_direct },
- { .exti = 30, .irq_parent = 52, .chip = &stm32_exti_h_chip_direct },
- { .exti = 31, .irq_parent = 53, .chip = &stm32_exti_h_chip_direct },
- { .exti = 32, .irq_parent = 82, .chip = &stm32_exti_h_chip_direct },
- { .exti = 33, .irq_parent = 83, .chip = &stm32_exti_h_chip_direct },
- { .exti = 47, .irq_parent = 93, .chip = &stm32_exti_h_chip_direct },
- { .exti = 48, .irq_parent = 138, .chip = &stm32_exti_h_chip_direct },
- { .exti = 50, .irq_parent = 139, .chip = &stm32_exti_h_chip_direct },
- { .exti = 52, .irq_parent = 140, .chip = &stm32_exti_h_chip_direct },
- { .exti = 53, .irq_parent = 141, .chip = &stm32_exti_h_chip_direct },
- { .exti = 54, .irq_parent = 135, .chip = &stm32_exti_h_chip_direct },
- { .exti = 61, .irq_parent = 100, .chip = &stm32_exti_h_chip_direct },
- { .exti = 65, .irq_parent = 144, .chip = &stm32_exti_h_chip },
- { .exti = 68, .irq_parent = 143, .chip = &stm32_exti_h_chip },
- { .exti = 70, .irq_parent = 62, .chip = &stm32_exti_h_chip_direct },
- { .exti = 73, .irq_parent = 129, .chip = &stm32_exti_h_chip },
+#define EXTI_INVALID_IRQ U8_MAX
+#define STM32MP1_DESC_IRQ_SIZE (ARRAY_SIZE(stm32mp1_exti_banks) * IRQS_PER_BANK)
+
+static const u8 stm32mp1_desc_irq[] = {
+ /* default value */
+ [0 ... (STM32MP1_DESC_IRQ_SIZE - 1)] = EXTI_INVALID_IRQ,
+
+ [0] = 6,
+ [1] = 7,
+ [2] = 8,
+ [3] = 9,
+ [4] = 10,
+ [5] = 23,
+ [6] = 64,
+ [7] = 65,
+ [8] = 66,
+ [9] = 67,
+ [10] = 40,
+ [11] = 42,
+ [12] = 76,
+ [13] = 77,
+ [14] = 121,
+ [15] = 127,
+ [16] = 1,
+ [19] = 3,
+ [21] = 31,
+ [22] = 33,
+ [23] = 72,
+ [24] = 95,
+ [25] = 107,
+ [26] = 37,
+ [27] = 38,
+ [28] = 39,
+ [29] = 71,
+ [30] = 52,
+ [31] = 53,
+ [32] = 82,
+ [33] = 83,
+ [47] = 93,
+ [48] = 138,
+ [50] = 139,
+ [52] = 140,
+ [53] = 141,
+ [54] = 135,
+ [61] = 100,
+ [65] = 144,
+ [68] = 143,
+ [70] = 62,
+ [73] = 129,
};
-static const struct stm32_desc_irq stm32mp13_desc_irq[] = {
- { .exti = 0, .irq_parent = 6, .chip = &stm32_exti_h_chip },
- { .exti = 1, .irq_parent = 7, .chip = &stm32_exti_h_chip },
- { .exti = 2, .irq_parent = 8, .chip = &stm32_exti_h_chip },
- { .exti = 3, .irq_parent = 9, .chip = &stm32_exti_h_chip },
- { .exti = 4, .irq_parent = 10, .chip = &stm32_exti_h_chip },
- { .exti = 5, .irq_parent = 24, .chip = &stm32_exti_h_chip },
- { .exti = 6, .irq_parent = 65, .chip = &stm32_exti_h_chip },
- { .exti = 7, .irq_parent = 66, .chip = &stm32_exti_h_chip },
- { .exti = 8, .irq_parent = 67, .chip = &stm32_exti_h_chip },
- { .exti = 9, .irq_parent = 68, .chip = &stm32_exti_h_chip },
- { .exti = 10, .irq_parent = 41, .chip = &stm32_exti_h_chip },
- { .exti = 11, .irq_parent = 43, .chip = &stm32_exti_h_chip },
- { .exti = 12, .irq_parent = 77, .chip = &stm32_exti_h_chip },
- { .exti = 13, .irq_parent = 78, .chip = &stm32_exti_h_chip },
- { .exti = 14, .irq_parent = 106, .chip = &stm32_exti_h_chip },
- { .exti = 15, .irq_parent = 109, .chip = &stm32_exti_h_chip },
- { .exti = 16, .irq_parent = 1, .chip = &stm32_exti_h_chip },
- { .exti = 19, .irq_parent = 3, .chip = &stm32_exti_h_chip_direct },
- { .exti = 21, .irq_parent = 32, .chip = &stm32_exti_h_chip_direct },
- { .exti = 22, .irq_parent = 34, .chip = &stm32_exti_h_chip_direct },
- { .exti = 23, .irq_parent = 73, .chip = &stm32_exti_h_chip_direct },
- { .exti = 24, .irq_parent = 93, .chip = &stm32_exti_h_chip_direct },
- { .exti = 25, .irq_parent = 114, .chip = &stm32_exti_h_chip_direct },
- { .exti = 26, .irq_parent = 38, .chip = &stm32_exti_h_chip_direct },
- { .exti = 27, .irq_parent = 39, .chip = &stm32_exti_h_chip_direct },
- { .exti = 28, .irq_parent = 40, .chip = &stm32_exti_h_chip_direct },
- { .exti = 29, .irq_parent = 72, .chip = &stm32_exti_h_chip_direct },
- { .exti = 30, .irq_parent = 53, .chip = &stm32_exti_h_chip_direct },
- { .exti = 31, .irq_parent = 54, .chip = &stm32_exti_h_chip_direct },
- { .exti = 32, .irq_parent = 83, .chip = &stm32_exti_h_chip_direct },
- { .exti = 33, .irq_parent = 84, .chip = &stm32_exti_h_chip_direct },
- { .exti = 44, .irq_parent = 96, .chip = &stm32_exti_h_chip_direct },
- { .exti = 47, .irq_parent = 92, .chip = &stm32_exti_h_chip_direct },
- { .exti = 48, .irq_parent = 116, .chip = &stm32_exti_h_chip_direct },
- { .exti = 50, .irq_parent = 117, .chip = &stm32_exti_h_chip_direct },
- { .exti = 52, .irq_parent = 118, .chip = &stm32_exti_h_chip_direct },
- { .exti = 53, .irq_parent = 119, .chip = &stm32_exti_h_chip_direct },
- { .exti = 68, .irq_parent = 63, .chip = &stm32_exti_h_chip_direct },
- { .exti = 70, .irq_parent = 98, .chip = &stm32_exti_h_chip_direct },
+static const u8 stm32mp13_desc_irq[] = {
+ /* default value */
+ [0 ... (STM32MP1_DESC_IRQ_SIZE - 1)] = EXTI_INVALID_IRQ,
+
+ [0] = 6,
+ [1] = 7,
+ [2] = 8,
+ [3] = 9,
+ [4] = 10,
+ [5] = 24,
+ [6] = 65,
+ [7] = 66,
+ [8] = 67,
+ [9] = 68,
+ [10] = 41,
+ [11] = 43,
+ [12] = 77,
+ [13] = 78,
+ [14] = 106,
+ [15] = 109,
+ [16] = 1,
+ [19] = 3,
+ [21] = 32,
+ [22] = 34,
+ [23] = 73,
+ [24] = 93,
+ [25] = 114,
+ [26] = 38,
+ [27] = 39,
+ [28] = 40,
+ [29] = 72,
+ [30] = 53,
+ [31] = 54,
+ [32] = 83,
+ [33] = 84,
+ [44] = 96,
+ [47] = 92,
+ [48] = 116,
+ [50] = 117,
+ [52] = 118,
+ [53] = 119,
+ [68] = 63,
+ [70] = 98,
};
static const struct stm32_exti_drv_data stm32mp1_drv_data = {
.exti_banks = stm32mp1_exti_banks,
.bank_nr = ARRAY_SIZE(stm32mp1_exti_banks),
.desc_irqs = stm32mp1_desc_irq,
- .irq_nr = ARRAY_SIZE(stm32mp1_desc_irq),
};
static const struct stm32_exti_drv_data stm32mp13_drv_data = {
.exti_banks = stm32mp1_exti_banks,
.bank_nr = ARRAY_SIZE(stm32mp1_exti_banks),
.desc_irqs = stm32mp13_desc_irq,
- .irq_nr = ARRAY_SIZE(stm32mp13_desc_irq),
};
-static const struct
-stm32_desc_irq *stm32_exti_get_desc(const struct stm32_exti_drv_data *drv_data,
- irq_hw_number_t hwirq)
-{
- const struct stm32_desc_irq *desc = NULL;
- int i;
-
- if (!drv_data->desc_irqs)
- return NULL;
-
- for (i = 0; i < drv_data->irq_nr; i++) {
- desc = &drv_data->desc_irqs[i];
- if (desc->exti == hwirq)
- break;
- }
-
- return desc;
-}
-
static unsigned long stm32_exti_pending(struct irq_chip_generic *gc)
{
struct stm32_exti_chip_data *chip_data = gc->private;
@@ -614,7 +603,7 @@ static int stm32_exti_h_set_affinity(struct irq_data *d,
if (d->parent_data->chip)
return irq_chip_set_affinity_parent(d, dest, force);
- return -EINVAL;
+ return IRQ_SET_MASK_OK_DONE;
}
static int __maybe_unused stm32_exti_h_suspend(void)
@@ -691,8 +680,8 @@ static struct irq_chip stm32_exti_h_chip_direct = {
.name = "stm32-exti-h-direct",
.irq_eoi = irq_chip_eoi_parent,
.irq_ack = irq_chip_ack_parent,
- .irq_mask = irq_chip_mask_parent,
- .irq_unmask = irq_chip_unmask_parent,
+ .irq_mask = stm32_exti_h_mask,
+ .irq_unmask = stm32_exti_h_unmask,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_set_type = irq_chip_set_type_parent,
.irq_set_wake = stm32_exti_h_set_wake,
@@ -706,28 +695,36 @@ static int stm32_exti_h_domain_alloc(struct irq_domain *dm,
{
struct stm32_exti_host_data *host_data = dm->host_data;
struct stm32_exti_chip_data *chip_data;
- const struct stm32_desc_irq *desc;
+ u8 desc_irq;
struct irq_fwspec *fwspec = data;
struct irq_fwspec p_fwspec;
irq_hw_number_t hwirq;
int bank;
+ u32 event_trg;
+ struct irq_chip *chip;
hwirq = fwspec->param[0];
+ if (hwirq >= host_data->drv_data->bank_nr * IRQS_PER_BANK)
+ return -EINVAL;
+
bank = hwirq / IRQS_PER_BANK;
chip_data = &host_data->chips_data[bank];
+ event_trg = readl_relaxed(host_data->base + chip_data->reg_bank->trg_ofst);
+ chip = (event_trg & BIT(hwirq % IRQS_PER_BANK)) ?
+ &stm32_exti_h_chip : &stm32_exti_h_chip_direct;
+
+ irq_domain_set_hwirq_and_chip(dm, virq, hwirq, chip, chip_data);
- desc = stm32_exti_get_desc(host_data->drv_data, hwirq);
- if (!desc)
+ if (!host_data->drv_data || !host_data->drv_data->desc_irqs)
return -EINVAL;
- irq_domain_set_hwirq_and_chip(dm, virq, hwirq, desc->chip,
- chip_data);
- if (desc->irq_parent) {
+ desc_irq = host_data->drv_data->desc_irqs[hwirq];
+ if (desc_irq != EXTI_INVALID_IRQ) {
p_fwspec.fwnode = dm->parent->fwnode;
p_fwspec.param_count = 3;
p_fwspec.param[0] = GIC_SPI;
- p_fwspec.param[1] = desc->irq_parent;
+ p_fwspec.param[1] = desc_irq;
p_fwspec.param[2] = IRQ_TYPE_LEVEL_HIGH;
return irq_domain_alloc_irqs_parent(dm, virq, 1, &p_fwspec);
@@ -792,7 +789,8 @@ stm32_exti_chip_data *stm32_exti_chip_init(struct stm32_exti_host_data *h_data,
* clear registers to avoid residue
*/
writel_relaxed(0, base + stm32_bank->imr_ofst);
- writel_relaxed(0, base + stm32_bank->emr_ofst);
+ if (stm32_bank->emr_ofst != UNDEF_REG)
+ writel_relaxed(0, base + stm32_bank->emr_ofst);
pr_info("%pOF: bank%d\n", node, bank_idx);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 20e53b167f81..c8539d0e12dd 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7304,7 +7304,9 @@ static struct r5conf *setup_conf(struct mddev *mddev)
goto abort;
conf->mddev = mddev;
- if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
+ ret = -ENOMEM;
+ conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!conf->stripe_hashtbl)
goto abort;
/* We init hash_locks[0] separately to that it can be used
diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile
index 2e0aa74ac185..95ef971b5e1c 100644
--- a/drivers/misc/lkdtm/Makefile
+++ b/drivers/misc/lkdtm/Makefile
@@ -13,10 +13,13 @@ lkdtm-$(CONFIG_LKDTM) += cfi.o
lkdtm-$(CONFIG_LKDTM) += fortify.o
lkdtm-$(CONFIG_PPC_64S_HASH_MMU) += powerpc.o
-KASAN_SANITIZE_rodata.o := n
KASAN_SANITIZE_stackleak.o := n
-KCOV_INSTRUMENT_rodata.o := n
-CFLAGS_REMOVE_rodata.o += $(CC_FLAGS_LTO)
+
+KASAN_SANITIZE_rodata.o := n
+KCSAN_SANITIZE_rodata.o := n
+KCOV_INSTRUMENT_rodata.o := n
+OBJECT_FILES_NON_STANDARD_rodata.o := y
+CFLAGS_REMOVE_rodata.o += $(CC_FLAGS_LTO) $(RETHUNK_CFLAGS)
OBJCOPYFLAGS :=
OBJCOPYFLAGS_rodata_objcopy.o := \
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index 86e867ffbb10..033be559a730 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -1298,8 +1298,9 @@ static int sdhci_omap_probe(struct platform_device *pdev)
/*
* omap_device_pm_domain has callbacks to enable the main
* functional clock, interface clock and also configure the
- * SYSCONFIG register of omap devices. The callback will be invoked
- * as part of pm_runtime_get_sync.
+ * SYSCONFIG register to clear any boot loader set voltage
+ * capabilities before calling sdhci_setup_host(). The
+ * callback will be invoked as part of pm_runtime_get_sync.
*/
pm_runtime_use_autosuspend(dev);
pm_runtime_set_autosuspend_delay(dev, 50);
@@ -1441,7 +1442,8 @@ static int __maybe_unused sdhci_omap_runtime_suspend(struct device *dev)
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host);
- sdhci_runtime_suspend_host(host);
+ if (omap_host->con != -EINVAL)
+ sdhci_runtime_suspend_host(host);
sdhci_omap_context_save(omap_host);
@@ -1458,10 +1460,10 @@ static int __maybe_unused sdhci_omap_runtime_resume(struct device *dev)
pinctrl_pm_select_default_state(dev);
- if (omap_host->con != -EINVAL)
+ if (omap_host->con != -EINVAL) {
sdhci_omap_context_restore(omap_host);
-
- sdhci_runtime_resume_host(host, 0);
+ sdhci_runtime_resume_host(host, 0);
+ }
return 0;
}
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index 889e40329956..93da23682d86 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -850,9 +850,10 @@ static int gpmi_nfc_compute_timings(struct gpmi_nand_data *this,
unsigned int tRP_ps;
bool use_half_period;
int sample_delay_ps, sample_delay_factor;
- u16 busy_timeout_cycles;
+ unsigned int busy_timeout_cycles;
u8 wrn_dly_sel;
unsigned long clk_rate, min_rate;
+ u64 busy_timeout_ps;
if (sdr->tRC_min >= 30000) {
/* ONFI non-EDO modes [0-3] */
@@ -885,7 +886,8 @@ static int gpmi_nfc_compute_timings(struct gpmi_nand_data *this,
addr_setup_cycles = TO_CYCLES(sdr->tALS_min, period_ps);
data_setup_cycles = TO_CYCLES(sdr->tDS_min, period_ps);
data_hold_cycles = TO_CYCLES(sdr->tDH_min, period_ps);
- busy_timeout_cycles = TO_CYCLES(sdr->tWB_max + sdr->tR_max, period_ps);
+ busy_timeout_ps = max(sdr->tBERS_max, sdr->tPROG_max);
+ busy_timeout_cycles = TO_CYCLES(busy_timeout_ps, period_ps);
hw->timing0 = BF_GPMI_TIMING0_ADDRESS_SETUP(addr_setup_cycles) |
BF_GPMI_TIMING0_DATA_HOLD(data_hold_cycles) |
diff --git a/drivers/net/amt.c b/drivers/net/amt.c
index be2719a3ba70..e019526e1df6 100644
--- a/drivers/net/amt.c
+++ b/drivers/net/amt.c
@@ -563,7 +563,7 @@ static struct sk_buff *amt_build_igmp_gq(struct amt_dev *amt)
ihv3->nsrcs = 0;
ihv3->resv = 0;
ihv3->suppress = false;
- ihv3->qrv = amt->net->ipv4.sysctl_igmp_qrv;
+ ihv3->qrv = READ_ONCE(amt->net->ipv4.sysctl_igmp_qrv);
ihv3->csum = 0;
csum = &ihv3->csum;
csum_start = (void *)ihv3;
@@ -577,14 +577,14 @@ static struct sk_buff *amt_build_igmp_gq(struct amt_dev *amt)
return skb;
}
-static void __amt_update_gw_status(struct amt_dev *amt, enum amt_status status,
- bool validate)
+static void amt_update_gw_status(struct amt_dev *amt, enum amt_status status,
+ bool validate)
{
if (validate && amt->status >= status)
return;
netdev_dbg(amt->dev, "Update GW status %s -> %s",
status_str[amt->status], status_str[status]);
- amt->status = status;
+ WRITE_ONCE(amt->status, status);
}
static void __amt_update_relay_status(struct amt_tunnel_list *tunnel,
@@ -600,14 +600,6 @@ static void __amt_update_relay_status(struct amt_tunnel_list *tunnel,
tunnel->status = status;
}
-static void amt_update_gw_status(struct amt_dev *amt, enum amt_status status,
- bool validate)
-{
- spin_lock_bh(&amt->lock);
- __amt_update_gw_status(amt, status, validate);
- spin_unlock_bh(&amt->lock);
-}
-
static void amt_update_relay_status(struct amt_tunnel_list *tunnel,
enum amt_status status, bool validate)
{
@@ -700,9 +692,7 @@ static void amt_send_discovery(struct amt_dev *amt)
if (unlikely(net_xmit_eval(err)))
amt->dev->stats.tx_errors++;
- spin_lock_bh(&amt->lock);
- __amt_update_gw_status(amt, AMT_STATUS_SENT_DISCOVERY, true);
- spin_unlock_bh(&amt->lock);
+ amt_update_gw_status(amt, AMT_STATUS_SENT_DISCOVERY, true);
out:
rcu_read_unlock();
}
@@ -900,6 +890,28 @@ static void amt_send_mld_gq(struct amt_dev *amt, struct amt_tunnel_list *tunnel)
}
#endif
+static bool amt_queue_event(struct amt_dev *amt, enum amt_event event,
+ struct sk_buff *skb)
+{
+ int index;
+
+ spin_lock_bh(&amt->lock);
+ if (amt->nr_events >= AMT_MAX_EVENTS) {
+ spin_unlock_bh(&amt->lock);
+ return 1;
+ }
+
+ index = (amt->event_idx + amt->nr_events) % AMT_MAX_EVENTS;
+ amt->events[index].event = event;
+ amt->events[index].skb = skb;
+ amt->nr_events++;
+ amt->event_idx %= AMT_MAX_EVENTS;
+ queue_work(amt_wq, &amt->event_wq);
+ spin_unlock_bh(&amt->lock);
+
+ return 0;
+}
+
static void amt_secret_work(struct work_struct *work)
{
struct amt_dev *amt = container_of(to_delayed_work(work),
@@ -913,58 +925,72 @@ static void amt_secret_work(struct work_struct *work)
msecs_to_jiffies(AMT_SECRET_TIMEOUT));
}
-static void amt_discovery_work(struct work_struct *work)
+static void amt_event_send_discovery(struct amt_dev *amt)
{
- struct amt_dev *amt = container_of(to_delayed_work(work),
- struct amt_dev,
- discovery_wq);
-
- spin_lock_bh(&amt->lock);
if (amt->status > AMT_STATUS_SENT_DISCOVERY)
goto out;
get_random_bytes(&amt->nonce, sizeof(__be32));
- spin_unlock_bh(&amt->lock);
amt_send_discovery(amt);
- spin_lock_bh(&amt->lock);
out:
mod_delayed_work(amt_wq, &amt->discovery_wq,
msecs_to_jiffies(AMT_DISCOVERY_TIMEOUT));
- spin_unlock_bh(&amt->lock);
}
-static void amt_req_work(struct work_struct *work)
+static void amt_discovery_work(struct work_struct *work)
{
struct amt_dev *amt = container_of(to_delayed_work(work),
struct amt_dev,
- req_wq);
+ discovery_wq);
+
+ if (amt_queue_event(amt, AMT_EVENT_SEND_DISCOVERY, NULL))
+ mod_delayed_work(amt_wq, &amt->discovery_wq,
+ msecs_to_jiffies(AMT_DISCOVERY_TIMEOUT));
+}
+
+static void amt_event_send_request(struct amt_dev *amt)
+{
u32 exp;
- spin_lock_bh(&amt->lock);
if (amt->status < AMT_STATUS_RECEIVED_ADVERTISEMENT)
goto out;
if (amt->req_cnt > AMT_MAX_REQ_COUNT) {
netdev_dbg(amt->dev, "Gateway is not ready");
amt->qi = AMT_INIT_REQ_TIMEOUT;
- amt->ready4 = false;
- amt->ready6 = false;
+ WRITE_ONCE(amt->ready4, false);
+ WRITE_ONCE(amt->ready6, false);
amt->remote_ip = 0;
- __amt_update_gw_status(amt, AMT_STATUS_INIT, false);
+ amt_update_gw_status(amt, AMT_STATUS_INIT, false);
amt->req_cnt = 0;
+ amt->nonce = 0;
goto out;
}
- spin_unlock_bh(&amt->lock);
+
+ if (!amt->req_cnt) {
+ WRITE_ONCE(amt->ready4, false);
+ WRITE_ONCE(amt->ready6, false);
+ get_random_bytes(&amt->nonce, sizeof(__be32));
+ }
amt_send_request(amt, false);
amt_send_request(amt, true);
- spin_lock_bh(&amt->lock);
- __amt_update_gw_status(amt, AMT_STATUS_SENT_REQUEST, true);
+ amt_update_gw_status(amt, AMT_STATUS_SENT_REQUEST, true);
amt->req_cnt++;
out:
exp = min_t(u32, (1 * (1 << amt->req_cnt)), AMT_MAX_REQ_TIMEOUT);
mod_delayed_work(amt_wq, &amt->req_wq, msecs_to_jiffies(exp * 1000));
- spin_unlock_bh(&amt->lock);
+}
+
+static void amt_req_work(struct work_struct *work)
+{
+ struct amt_dev *amt = container_of(to_delayed_work(work),
+ struct amt_dev,
+ req_wq);
+
+ if (amt_queue_event(amt, AMT_EVENT_SEND_REQUEST, NULL))
+ mod_delayed_work(amt_wq, &amt->req_wq,
+ msecs_to_jiffies(100));
}
static bool amt_send_membership_update(struct amt_dev *amt,
@@ -1220,7 +1246,8 @@ static netdev_tx_t amt_dev_xmit(struct sk_buff *skb, struct net_device *dev)
/* Gateway only passes IGMP/MLD packets */
if (!report)
goto free;
- if ((!v6 && !amt->ready4) || (v6 && !amt->ready6))
+ if ((!v6 && !READ_ONCE(amt->ready4)) ||
+ (v6 && !READ_ONCE(amt->ready6)))
goto free;
if (amt_send_membership_update(amt, skb, v6))
goto free;
@@ -2236,6 +2263,10 @@ static bool amt_advertisement_handler(struct amt_dev *amt, struct sk_buff *skb)
ipv4_is_zeronet(amta->ip4))
return true;
+ if (amt->status != AMT_STATUS_SENT_DISCOVERY ||
+ amt->nonce != amta->nonce)
+ return true;
+
amt->remote_ip = amta->ip4;
netdev_dbg(amt->dev, "advertised remote ip = %pI4\n", &amt->remote_ip);
mod_delayed_work(amt_wq, &amt->req_wq, 0);
@@ -2251,6 +2282,9 @@ static bool amt_multicast_data_handler(struct amt_dev *amt, struct sk_buff *skb)
struct ethhdr *eth;
struct iphdr *iph;
+ if (READ_ONCE(amt->status) != AMT_STATUS_SENT_UPDATE)
+ return true;
+
hdr_size = sizeof(*amtmd) + sizeof(struct udphdr);
if (!pskb_may_pull(skb, hdr_size))
return true;
@@ -2325,6 +2359,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
if (amtmq->reserved || amtmq->version)
return true;
+ if (amtmq->nonce != amt->nonce)
+ return true;
+
hdr_size -= sizeof(*eth);
if (iptunnel_pull_header(skb, hdr_size, htons(ETH_P_TEB), false))
return true;
@@ -2339,6 +2376,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
iph = ip_hdr(skb);
if (iph->version == 4) {
+ if (READ_ONCE(amt->ready4))
+ return true;
+
if (!pskb_may_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS +
sizeof(*ihv3)))
return true;
@@ -2349,12 +2389,10 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
ihv3 = skb_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS);
skb_reset_transport_header(skb);
skb_push(skb, sizeof(*iph) + AMT_IPHDR_OPTS);
- spin_lock_bh(&amt->lock);
- amt->ready4 = true;
+ WRITE_ONCE(amt->ready4, true);
amt->mac = amtmq->response_mac;
amt->req_cnt = 0;
amt->qi = ihv3->qqic;
- spin_unlock_bh(&amt->lock);
skb->protocol = htons(ETH_P_IP);
eth->h_proto = htons(ETH_P_IP);
ip_eth_mc_map(iph->daddr, eth->h_dest);
@@ -2363,6 +2401,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
struct mld2_query *mld2q;
struct ipv6hdr *ip6h;
+ if (READ_ONCE(amt->ready6))
+ return true;
+
if (!pskb_may_pull(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS +
sizeof(*mld2q)))
return true;
@@ -2374,12 +2415,10 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
mld2q = skb_pull(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS);
skb_reset_transport_header(skb);
skb_push(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS);
- spin_lock_bh(&amt->lock);
- amt->ready6 = true;
+ WRITE_ONCE(amt->ready6, true);
amt->mac = amtmq->response_mac;
amt->req_cnt = 0;
amt->qi = mld2q->mld2q_qqic;
- spin_unlock_bh(&amt->lock);
skb->protocol = htons(ETH_P_IPV6);
eth->h_proto = htons(ETH_P_IPV6);
ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
@@ -2392,12 +2431,14 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
skb->pkt_type = PACKET_MULTICAST;
skb->ip_summed = CHECKSUM_NONE;
len = skb->len;
+ local_bh_disable();
if (__netif_rx(skb) == NET_RX_SUCCESS) {
amt_update_gw_status(amt, AMT_STATUS_RECEIVED_QUERY, true);
dev_sw_netstats_rx_add(amt->dev, len);
} else {
amt->dev->stats.rx_dropped++;
}
+ local_bh_enable();
return false;
}
@@ -2638,7 +2679,9 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb)
if (tunnel->ip4 == iph->saddr)
goto send;
+ spin_lock_bh(&amt->lock);
if (amt->nr_tunnels >= amt->max_tunnels) {
+ spin_unlock_bh(&amt->lock);
icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
return true;
}
@@ -2646,8 +2689,10 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb)
tunnel = kzalloc(sizeof(*tunnel) +
(sizeof(struct hlist_head) * amt->hash_buckets),
GFP_ATOMIC);
- if (!tunnel)
+ if (!tunnel) {
+ spin_unlock_bh(&amt->lock);
return true;
+ }
tunnel->source_port = udph->source;
tunnel->ip4 = iph->saddr;
@@ -2660,10 +2705,9 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb)
INIT_DELAYED_WORK(&tunnel->gc_wq, amt_tunnel_expire);
- spin_lock_bh(&amt->lock);
list_add_tail_rcu(&tunnel->list, &amt->tunnel_list);
tunnel->key = amt->key;
- amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true);
+ __amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true);
amt->nr_tunnels++;
mod_delayed_work(amt_wq, &tunnel->gc_wq,
msecs_to_jiffies(amt_gmi(amt)));
@@ -2688,6 +2732,38 @@ send:
return false;
}
+static void amt_gw_rcv(struct amt_dev *amt, struct sk_buff *skb)
+{
+ int type = amt_parse_type(skb);
+ int err = 1;
+
+ if (type == -1)
+ goto drop;
+
+ if (amt->mode == AMT_MODE_GATEWAY) {
+ switch (type) {
+ case AMT_MSG_ADVERTISEMENT:
+ err = amt_advertisement_handler(amt, skb);
+ break;
+ case AMT_MSG_MEMBERSHIP_QUERY:
+ err = amt_membership_query_handler(amt, skb);
+ if (!err)
+ return;
+ break;
+ default:
+ netdev_dbg(amt->dev, "Invalid type of Gateway\n");
+ break;
+ }
+ }
+drop:
+ if (err) {
+ amt->dev->stats.rx_dropped++;
+ kfree_skb(skb);
+ } else {
+ consume_skb(skb);
+ }
+}
+
static int amt_rcv(struct sock *sk, struct sk_buff *skb)
{
struct amt_dev *amt;
@@ -2719,8 +2795,12 @@ static int amt_rcv(struct sock *sk, struct sk_buff *skb)
err = true;
goto drop;
}
- err = amt_advertisement_handler(amt, skb);
- break;
+ if (amt_queue_event(amt, AMT_EVENT_RECEIVE, skb)) {
+ netdev_dbg(amt->dev, "AMT Event queue full\n");
+ err = true;
+ goto drop;
+ }
+ goto out;
case AMT_MSG_MULTICAST_DATA:
if (iph->saddr != amt->remote_ip) {
netdev_dbg(amt->dev, "Invalid Relay IP\n");
@@ -2738,11 +2818,12 @@ static int amt_rcv(struct sock *sk, struct sk_buff *skb)
err = true;
goto drop;
}
- err = amt_membership_query_handler(amt, skb);
- if (err)
+ if (amt_queue_event(amt, AMT_EVENT_RECEIVE, skb)) {
+ netdev_dbg(amt->dev, "AMT Event queue full\n");
+ err = true;
goto drop;
- else
- goto out;
+ }
+ goto out;
default:
err = true;
netdev_dbg(amt->dev, "Invalid type of Gateway\n");
@@ -2780,6 +2861,46 @@ out:
return 0;
}
+static void amt_event_work(struct work_struct *work)
+{
+ struct amt_dev *amt = container_of(work, struct amt_dev, event_wq);
+ struct sk_buff *skb;
+ u8 event;
+ int i;
+
+ for (i = 0; i < AMT_MAX_EVENTS; i++) {
+ spin_lock_bh(&amt->lock);
+ if (amt->nr_events == 0) {
+ spin_unlock_bh(&amt->lock);
+ return;
+ }
+ event = amt->events[amt->event_idx].event;
+ skb = amt->events[amt->event_idx].skb;
+ amt->events[amt->event_idx].event = AMT_EVENT_NONE;
+ amt->events[amt->event_idx].skb = NULL;
+ amt->nr_events--;
+ amt->event_idx++;
+ amt->event_idx %= AMT_MAX_EVENTS;
+ spin_unlock_bh(&amt->lock);
+
+ switch (event) {
+ case AMT_EVENT_RECEIVE:
+ amt_gw_rcv(amt, skb);
+ break;
+ case AMT_EVENT_SEND_DISCOVERY:
+ amt_event_send_discovery(amt);
+ break;
+ case AMT_EVENT_SEND_REQUEST:
+ amt_event_send_request(amt);
+ break;
+ default:
+ if (skb)
+ kfree_skb(skb);
+ break;
+ }
+ }
+}
+
static int amt_err_lookup(struct sock *sk, struct sk_buff *skb)
{
struct amt_dev *amt;
@@ -2804,7 +2925,7 @@ static int amt_err_lookup(struct sock *sk, struct sk_buff *skb)
break;
case AMT_MSG_REQUEST:
case AMT_MSG_MEMBERSHIP_UPDATE:
- if (amt->status >= AMT_STATUS_RECEIVED_ADVERTISEMENT)
+ if (READ_ONCE(amt->status) >= AMT_STATUS_RECEIVED_ADVERTISEMENT)
mod_delayed_work(amt_wq, &amt->req_wq, 0);
break;
default:
@@ -2867,6 +2988,8 @@ static int amt_dev_open(struct net_device *dev)
amt->ready4 = false;
amt->ready6 = false;
+ amt->event_idx = 0;
+ amt->nr_events = 0;
err = amt_socket_create(amt);
if (err)
@@ -2874,6 +2997,7 @@ static int amt_dev_open(struct net_device *dev)
amt->req_cnt = 0;
amt->remote_ip = 0;
+ amt->nonce = 0;
get_random_bytes(&amt->key, sizeof(siphash_key_t));
amt->status = AMT_STATUS_INIT;
@@ -2892,6 +3016,8 @@ static int amt_dev_stop(struct net_device *dev)
struct amt_dev *amt = netdev_priv(dev);
struct amt_tunnel_list *tunnel, *tmp;
struct socket *sock;
+ struct sk_buff *skb;
+ int i;
cancel_delayed_work_sync(&amt->req_wq);
cancel_delayed_work_sync(&amt->discovery_wq);
@@ -2904,6 +3030,15 @@ static int amt_dev_stop(struct net_device *dev)
if (sock)
udp_tunnel_sock_release(sock);
+ cancel_work_sync(&amt->event_wq);
+ for (i = 0; i < AMT_MAX_EVENTS; i++) {
+ skb = amt->events[i].skb;
+ if (skb)
+ kfree_skb(skb);
+ amt->events[i].event = AMT_EVENT_NONE;
+ amt->events[i].skb = NULL;
+ }
+
amt->ready4 = false;
amt->ready6 = false;
amt->req_cnt = 0;
@@ -3095,7 +3230,7 @@ static int amt_newlink(struct net *net, struct net_device *dev,
goto err;
}
if (amt->mode == AMT_MODE_RELAY) {
- amt->qrv = amt->net->ipv4.sysctl_igmp_qrv;
+ amt->qrv = READ_ONCE(amt->net->ipv4.sysctl_igmp_qrv);
amt->qri = 10;
dev->needed_headroom = amt->stream_dev->needed_headroom +
AMT_RELAY_HLEN;
@@ -3146,8 +3281,8 @@ static int amt_newlink(struct net *net, struct net_device *dev,
INIT_DELAYED_WORK(&amt->discovery_wq, amt_discovery_work);
INIT_DELAYED_WORK(&amt->req_wq, amt_req_work);
INIT_DELAYED_WORK(&amt->secret_wq, amt_secret_work);
+ INIT_WORK(&amt->event_wq, amt_event_work);
INIT_LIST_HEAD(&amt->tunnel_list);
-
return 0;
err:
dev_put(amt->stream_dev);
@@ -3280,7 +3415,7 @@ static int __init amt_init(void)
if (err < 0)
goto unregister_notifier;
- amt_wq = alloc_workqueue("amt", WQ_UNBOUND, 1);
+ amt_wq = alloc_workqueue("amt", WQ_UNBOUND, 0);
if (!amt_wq) {
err = -ENOMEM;
goto rtnl_unregister;
diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c
index ba42cef10a53..cb0321ea853c 100644
--- a/drivers/net/can/rcar/rcar_canfd.c
+++ b/drivers/net/can/rcar/rcar_canfd.c
@@ -1843,6 +1843,7 @@ static int rcar_canfd_probe(struct platform_device *pdev)
of_child = of_get_child_by_name(pdev->dev.of_node, name);
if (of_child && of_device_is_available(of_child))
channels_mask |= BIT(i);
+ of_node_put(of_child);
}
if (chip_id != RENESAS_RZG2L) {
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 9b47b07162fe..bc6518504fd4 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -1690,8 +1690,8 @@ static int mcp251xfd_register_chip_detect(struct mcp251xfd_priv *priv)
u32 osc;
int err;
- /* The OSC_LPMEN is only supported on MCP2518FD, so use it to
- * autodetect the model.
+ /* The OSC_LPMEN is only supported on MCP2518FD and MCP251863,
+ * so use it to autodetect the model.
*/
err = regmap_update_bits(priv->map_reg, MCP251XFD_REG_OSC,
MCP251XFD_REG_OSC_LPMEN,
@@ -1703,10 +1703,18 @@ static int mcp251xfd_register_chip_detect(struct mcp251xfd_priv *priv)
if (err)
return err;
- if (osc & MCP251XFD_REG_OSC_LPMEN)
- devtype_data = &mcp251xfd_devtype_data_mcp2518fd;
- else
+ if (osc & MCP251XFD_REG_OSC_LPMEN) {
+ /* We cannot distinguish between MCP2518FD and
+ * MCP251863. If firmware specifies MCP251863, keep
+ * it, otherwise set to MCP2518FD.
+ */
+ if (mcp251xfd_is_251863(priv))
+ devtype_data = &mcp251xfd_devtype_data_mcp251863;
+ else
+ devtype_data = &mcp251xfd_devtype_data_mcp2518fd;
+ } else {
devtype_data = &mcp251xfd_devtype_data_mcp2517fd;
+ }
if (!mcp251xfd_is_251XFD(priv) &&
priv->devtype_data.model != devtype_data->model) {
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 9ca8c8d7740f..92a500e1ccd2 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -1038,18 +1038,21 @@ int ksz_switch_register(struct ksz_device *dev,
ports = of_get_child_by_name(dev->dev->of_node, "ethernet-ports");
if (!ports)
ports = of_get_child_by_name(dev->dev->of_node, "ports");
- if (ports)
+ if (ports) {
for_each_available_child_of_node(ports, port) {
if (of_property_read_u32(port, "reg",
&port_num))
continue;
if (!(dev->port_mask & BIT(port_num))) {
of_node_put(port);
+ of_node_put(ports);
return -EINVAL;
}
of_get_phy_mode(port,
&dev->ports[port_num].interface);
}
+ of_node_put(ports);
+ }
dev->synclko_125 = of_property_read_bool(dev->dev->of_node,
"microchip,synclko-125");
dev->synclko_disable = of_property_read_bool(dev->dev->of_node,
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 72b6fc1932b5..698c7d1fb45c 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3382,12 +3382,28 @@ static const struct of_device_id sja1105_dt_ids[] = {
};
MODULE_DEVICE_TABLE(of, sja1105_dt_ids);
+static const struct spi_device_id sja1105_spi_ids[] = {
+ { "sja1105e" },
+ { "sja1105t" },
+ { "sja1105p" },
+ { "sja1105q" },
+ { "sja1105r" },
+ { "sja1105s" },
+ { "sja1110a" },
+ { "sja1110b" },
+ { "sja1110c" },
+ { "sja1110d" },
+ { },
+};
+MODULE_DEVICE_TABLE(spi, sja1105_spi_ids);
+
static struct spi_driver sja1105_driver = {
.driver = {
.name = "sja1105",
.owner = THIS_MODULE,
.of_match_table = of_match_ptr(sja1105_dt_ids),
},
+ .id_table = sja1105_spi_ids,
.probe = sja1105_probe,
.remove = sja1105_remove,
.shutdown = sja1105_shutdown,
diff --git a/drivers/net/dsa/vitesse-vsc73xx-spi.c b/drivers/net/dsa/vitesse-vsc73xx-spi.c
index 3110895358d8..97a92e6da60d 100644
--- a/drivers/net/dsa/vitesse-vsc73xx-spi.c
+++ b/drivers/net/dsa/vitesse-vsc73xx-spi.c
@@ -205,10 +205,20 @@ static const struct of_device_id vsc73xx_of_match[] = {
};
MODULE_DEVICE_TABLE(of, vsc73xx_of_match);
+static const struct spi_device_id vsc73xx_spi_ids[] = {
+ { "vsc7385" },
+ { "vsc7388" },
+ { "vsc7395" },
+ { "vsc7398" },
+ { },
+};
+MODULE_DEVICE_TABLE(spi, vsc73xx_spi_ids);
+
static struct spi_driver vsc73xx_spi_driver = {
.probe = vsc73xx_spi_probe,
.remove = vsc73xx_spi_remove,
.shutdown = vsc73xx_spi_shutdown,
+ .id_table = vsc73xx_spi_ids,
.driver = {
.name = "vsc73xx-spi",
.of_match_table = vsc73xx_of_match,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index 831833911a52..8647125d60ae 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -379,7 +379,7 @@ static void aq_pci_shutdown(struct pci_dev *pdev)
}
}
-static int aq_suspend_common(struct device *dev, bool deep)
+static int aq_suspend_common(struct device *dev)
{
struct aq_nic_s *nic = pci_get_drvdata(to_pci_dev(dev));
@@ -392,17 +392,15 @@ static int aq_suspend_common(struct device *dev, bool deep)
if (netif_running(nic->ndev))
aq_nic_stop(nic);
- if (deep) {
- aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol);
- aq_nic_set_power(nic);
- }
+ aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol);
+ aq_nic_set_power(nic);
rtnl_unlock();
return 0;
}
-static int atl_resume_common(struct device *dev, bool deep)
+static int atl_resume_common(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct aq_nic_s *nic;
@@ -415,11 +413,6 @@ static int atl_resume_common(struct device *dev, bool deep)
pci_set_power_state(pdev, PCI_D0);
pci_restore_state(pdev);
- if (deep) {
- /* Reinitialize Nic/Vecs objects */
- aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol);
- }
-
if (netif_running(nic->ndev)) {
ret = aq_nic_init(nic);
if (ret)
@@ -444,22 +437,22 @@ err_exit:
static int aq_pm_freeze(struct device *dev)
{
- return aq_suspend_common(dev, true);
+ return aq_suspend_common(dev);
}
static int aq_pm_suspend_poweroff(struct device *dev)
{
- return aq_suspend_common(dev, true);
+ return aq_suspend_common(dev);
}
static int aq_pm_thaw(struct device *dev)
{
- return atl_resume_common(dev, true);
+ return atl_resume_common(dev);
}
static int aq_pm_resume_restore(struct device *dev)
{
- return atl_resume_common(dev, true);
+ return atl_resume_common(dev);
}
static const struct dev_pm_ops aq_pm_ops = {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 56b46b8206a7..cf9b00576ed3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -7790,7 +7790,7 @@ hwrm_dbg_qcaps_exit:
static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp);
-static int bnxt_hwrm_func_qcaps(struct bnxt *bp)
+int bnxt_hwrm_func_qcaps(struct bnxt *bp)
{
int rc;
@@ -10065,7 +10065,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_RESC_CHANGE)
resc_reinit = true;
- if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE)
+ if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE ||
+ test_bit(BNXT_STATE_FW_RESET_DET, &bp->state))
fw_reset = true;
else
bnxt_remap_fw_health_regs(bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index a1dca8c58f54..075c6206325c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2314,6 +2314,7 @@ int bnxt_cancel_reservations(struct bnxt *bp, bool fw_reset);
int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp);
int bnxt_hwrm_free_wol_fltr(struct bnxt *bp);
int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all);
+int bnxt_hwrm_func_qcaps(struct bnxt *bp);
int bnxt_hwrm_fw_set_time(struct bnxt *);
int bnxt_open_nic(struct bnxt *, bool, bool);
int bnxt_half_open_nic(struct bnxt *bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 3528ce9849e6..6b3d4f4c2a75 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -979,9 +979,11 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
if (rc)
return rc;
- rc = bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_SRT_PATCH);
- if (rc)
- return rc;
+ if (BNXT_CHIP_P5(bp)) {
+ rc = bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_SRT_PATCH);
+ if (rc)
+ return rc;
+ }
return bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_CRT_PATCH);
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index 562f8f68a47d..7f3c0875b6f5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -76,14 +76,23 @@ static int bnxt_refclk_read(struct bnxt *bp, struct ptp_system_timestamp *sts,
u64 *ns)
{
struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+ u32 high_before, high_now, low;
if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
return -EIO;
+ high_before = readl(bp->bar0 + ptp->refclk_mapped_regs[1]);
ptp_read_system_prets(sts);
- *ns = readl(bp->bar0 + ptp->refclk_mapped_regs[0]);
+ low = readl(bp->bar0 + ptp->refclk_mapped_regs[0]);
ptp_read_system_postts(sts);
- *ns |= (u64)readl(bp->bar0 + ptp->refclk_mapped_regs[1]) << 32;
+ high_now = readl(bp->bar0 + ptp->refclk_mapped_regs[1]);
+ if (high_now != high_before) {
+ ptp_read_system_prets(sts);
+ low = readl(bp->bar0 + ptp->refclk_mapped_regs[0]);
+ ptp_read_system_postts(sts);
+ }
+ *ns = ((u64)high_now << 32) | low;
+
return 0;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index ddf2f3963abe..a1a2c7a64fd5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -823,8 +823,10 @@ static int bnxt_sriov_enable(struct bnxt *bp, int *num_vfs)
goto err_out2;
rc = pci_enable_sriov(bp->pdev, *num_vfs);
- if (rc)
+ if (rc) {
+ bnxt_ulp_sriov_cfg(bp, 0);
goto err_out2;
+ }
return 0;
@@ -832,6 +834,9 @@ err_out2:
/* Free the resources reserved for various VF's */
bnxt_hwrm_func_vf_resource_free(bp, *num_vfs);
+ /* Restore the max resources */
+ bnxt_hwrm_func_qcaps(bp);
+
err_out1:
bnxt_free_vf_resources(bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index f02fe906dedb..f53387ed0167 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -28,7 +28,7 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
struct xdp_buff *xdp)
{
struct skb_shared_info *sinfo;
- struct bnxt_sw_tx_bd *tx_buf, *first_buf;
+ struct bnxt_sw_tx_bd *tx_buf;
struct tx_bd *txbd;
int num_frags = 0;
u32 flags;
@@ -43,13 +43,14 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
/* fill up the first buffer */
prod = txr->tx_prod;
tx_buf = &txr->tx_buf_ring[prod];
- first_buf = tx_buf;
tx_buf->nr_frags = num_frags;
if (xdp)
tx_buf->page = virt_to_head_page(xdp->data);
txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
- flags = ((len) << TX_BD_LEN_SHIFT) | ((num_frags + 1) << TX_BD_FLAGS_BD_CNT_SHIFT);
+ flags = (len << TX_BD_LEN_SHIFT) |
+ ((num_frags + 1) << TX_BD_FLAGS_BD_CNT_SHIFT) |
+ bnxt_lhint_arr[len >> 9];
txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
txbd->tx_bd_opaque = prod;
txbd->tx_bd_haddr = cpu_to_le64(mapping);
@@ -82,7 +83,6 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
flags = frag_len << TX_BD_LEN_SHIFT;
txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
- txbd->tx_bd_opaque = prod;
txbd->tx_bd_haddr = cpu_to_le64(frag_mapping);
len = frag_len;
@@ -96,7 +96,7 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
prod = NEXT_TX(prod);
txr->tx_prod = prod;
- return first_buf;
+ return tx_buf;
}
static void __bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
index 4af5561cbfc5..ddfe9208529a 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
@@ -1236,8 +1236,8 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
csk->sndbuf = newsk->sk_sndbuf;
csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx;
RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk),
- sock_net(newsk)->
- ipv4.sysctl_tcp_window_scaling,
+ READ_ONCE(sock_net(newsk)->
+ ipv4.sysctl_tcp_window_scaling),
tp->window_clamp);
neigh_release(n);
inet_inherit_port(&tcp_hashinfo, lsk, newsk);
@@ -1384,7 +1384,7 @@ static void chtls_pass_accept_request(struct sock *sk,
#endif
}
if (req->tcpopt.wsf <= 14 &&
- sock_net(sk)->ipv4.sysctl_tcp_window_scaling) {
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
inet_rsk(oreq)->wscale_ok = 1;
inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf;
}
@@ -1392,7 +1392,7 @@ static void chtls_pass_accept_request(struct sock *sk,
th_ecn = tcph->ece && tcph->cwr;
if (th_ecn) {
ect = !INET_ECN_is_not_ect(ip_dsfield);
- ecn_ok = sock_net(sk)->ipv4.sysctl_tcp_ecn;
+ ecn_ok = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn);
if ((!ect && ecn_ok) || tcp_ca_needs_ecn(sk))
inet_rsk(oreq)->ecn_ok = 1;
}
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 528eb0f223b1..b4f5e57d0285 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -2287,7 +2287,7 @@ err:
/* Uses sync mcc */
int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
- u8 page_num, u8 *data)
+ u8 page_num, u32 off, u32 len, u8 *data)
{
struct be_dma_mem cmd;
struct be_mcc_wrb *wrb;
@@ -2321,10 +2321,10 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
req->port = cpu_to_le32(adapter->hba_port_num);
req->page_num = cpu_to_le32(page_num);
status = be_mcc_notify_wait(adapter);
- if (!status) {
+ if (!status && len > 0) {
struct be_cmd_resp_port_type *resp = cmd.va;
- memcpy(data, resp->page_data, PAGE_DATA_LEN);
+ memcpy(data, resp->page_data + off, len);
}
err:
mutex_unlock(&adapter->mcc_lock);
@@ -2415,7 +2415,7 @@ int be_cmd_query_cable_type(struct be_adapter *adapter)
int status;
status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
- page_data);
+ 0, PAGE_DATA_LEN, page_data);
if (!status) {
switch (adapter->phy.interface_type) {
case PHY_TYPE_QSFP:
@@ -2440,7 +2440,7 @@ int be_cmd_query_sfp_info(struct be_adapter *adapter)
int status;
status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
- page_data);
+ 0, PAGE_DATA_LEN, page_data);
if (!status) {
strlcpy(adapter->phy.vendor_name, page_data +
SFP_VENDOR_NAME_OFFSET, SFP_VENDOR_NAME_LEN - 1);
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index db1f3b908582..e2085c68c0ee 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -2427,7 +2427,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon,
int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num,
u32 *state);
int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
- u8 page_num, u8 *data);
+ u8 page_num, u32 off, u32 len, u8 *data);
int be_cmd_query_cable_type(struct be_adapter *adapter);
int be_cmd_query_sfp_info(struct be_adapter *adapter);
int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd,
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index dfa784339781..bd0df189d871 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -1344,7 +1344,7 @@ static int be_get_module_info(struct net_device *netdev,
return -EOPNOTSUPP;
status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
- page_data);
+ 0, PAGE_DATA_LEN, page_data);
if (!status) {
if (!page_data[SFP_PLUS_SFF_8472_COMP]) {
modinfo->type = ETH_MODULE_SFF_8079;
@@ -1362,25 +1362,32 @@ static int be_get_module_eeprom(struct net_device *netdev,
{
struct be_adapter *adapter = netdev_priv(netdev);
int status;
+ u32 begin, end;
if (!check_privilege(adapter, MAX_PRIVILEGES))
return -EOPNOTSUPP;
- status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
- data);
- if (status)
- goto err;
+ begin = eeprom->offset;
+ end = eeprom->offset + eeprom->len;
+
+ if (begin < PAGE_DATA_LEN) {
+ status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, begin,
+ min_t(u32, end, PAGE_DATA_LEN) - begin,
+ data);
+ if (status)
+ goto err;
+
+ data += PAGE_DATA_LEN - begin;
+ begin = PAGE_DATA_LEN;
+ }
- if (eeprom->offset + eeprom->len > PAGE_DATA_LEN) {
- status = be_cmd_read_port_transceiver_data(adapter,
- TR_PAGE_A2,
- data +
- PAGE_DATA_LEN);
+ if (end > PAGE_DATA_LEN) {
+ status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A2,
+ begin - PAGE_DATA_LEN,
+ end - begin, data);
if (status)
goto err;
}
- if (eeprom->offset)
- memcpy(data, data + eeprom->offset, eeprom->len);
err:
return be_cmd_status(status);
}
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 5231818943c6..c03663785a8d 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1764,6 +1764,19 @@ cleanup_clk:
return rc;
}
+static bool ftgmac100_has_child_node(struct device_node *np, const char *name)
+{
+ struct device_node *child_np = of_get_child_by_name(np, name);
+ bool ret = false;
+
+ if (child_np) {
+ ret = true;
+ of_node_put(child_np);
+ }
+
+ return ret;
+}
+
static int ftgmac100_probe(struct platform_device *pdev)
{
struct resource *res;
@@ -1883,7 +1896,7 @@ static int ftgmac100_probe(struct platform_device *pdev)
/* Display what we found */
phy_attached_info(phy);
- } else if (np && !of_get_child_by_name(np, "mdio")) {
+ } else if (np && !ftgmac100_has_child_node(np, "mdio")) {
/* Support legacy ASPEED devicetree descriptions that decribe a
* MAC with an embedded MDIO controller but have no "mdio"
* child node. Automatically scan the MDIO bus for available
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_rx.c b/drivers/net/ethernet/fungible/funeth/funeth_rx.c
index 0f6a549b9f67..29a6c2ede43a 100644
--- a/drivers/net/ethernet/fungible/funeth/funeth_rx.c
+++ b/drivers/net/ethernet/fungible/funeth/funeth_rx.c
@@ -142,6 +142,7 @@ static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va,
int ref_ok, struct funeth_txq *xdp_q)
{
struct bpf_prog *xdp_prog;
+ struct xdp_frame *xdpf;
struct xdp_buff xdp;
u32 act;
@@ -163,7 +164,9 @@ static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va,
case XDP_TX:
if (unlikely(!ref_ok))
goto pass;
- if (!fun_xdp_tx(xdp_q, xdp.data, xdp.data_end - xdp.data))
+
+ xdpf = xdp_convert_buff_to_frame(&xdp);
+ if (!xdpf || !fun_xdp_tx(xdp_q, xdpf))
goto xdp_error;
FUN_QSTAT_INC(q, xdp_tx);
q->xdp_flush |= FUN_XDP_FLUSH_TX;
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_tx.c b/drivers/net/ethernet/fungible/funeth/funeth_tx.c
index ff6e29237253..2f6698b98b03 100644
--- a/drivers/net/ethernet/fungible/funeth/funeth_tx.c
+++ b/drivers/net/ethernet/fungible/funeth/funeth_tx.c
@@ -466,7 +466,7 @@ static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget)
do {
fun_xdp_unmap(q, reclaim_idx);
- page_frag_free(q->info[reclaim_idx].vaddr);
+ xdp_return_frame(q->info[reclaim_idx].xdpf);
trace_funeth_tx_free(q, reclaim_idx, 1, head);
@@ -479,11 +479,11 @@ static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget)
return npkts;
}
-bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len)
+bool fun_xdp_tx(struct funeth_txq *q, struct xdp_frame *xdpf)
{
struct fun_eth_tx_req *req;
struct fun_dataop_gl *gle;
- unsigned int idx;
+ unsigned int idx, len;
dma_addr_t dma;
if (fun_txq_avail(q) < FUN_XDP_CLEAN_THRES)
@@ -494,7 +494,8 @@ bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len)
return false;
}
- dma = dma_map_single(q->dma_dev, data, len, DMA_TO_DEVICE);
+ len = xdpf->len;
+ dma = dma_map_single(q->dma_dev, xdpf->data, len, DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(q->dma_dev, dma))) {
FUN_QSTAT_INC(q, tx_map_err);
return false;
@@ -514,7 +515,7 @@ bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len)
gle = (struct fun_dataop_gl *)req->dataop.imm;
fun_dataop_gl_init(gle, 0, 0, len, dma);
- q->info[idx].vaddr = data;
+ q->info[idx].xdpf = xdpf;
u64_stats_update_begin(&q->syncp);
q->stats.tx_bytes += len;
@@ -545,12 +546,9 @@ int fun_xdp_xmit_frames(struct net_device *dev, int n,
if (unlikely(q_idx >= fp->num_xdpqs))
return -ENXIO;
- for (q = xdpqs[q_idx], i = 0; i < n; i++) {
- const struct xdp_frame *xdpf = frames[i];
-
- if (!fun_xdp_tx(q, xdpf->data, xdpf->len))
+ for (q = xdpqs[q_idx], i = 0; i < n; i++)
+ if (!fun_xdp_tx(q, frames[i]))
break;
- }
if (unlikely(flags & XDP_XMIT_FLUSH))
fun_txq_wr_db(q);
@@ -577,7 +575,7 @@ static void fun_xdpq_purge(struct funeth_txq *q)
unsigned int idx = q->cons_cnt & q->mask;
fun_xdp_unmap(q, idx);
- page_frag_free(q->info[idx].vaddr);
+ xdp_return_frame(q->info[idx].xdpf);
q->cons_cnt++;
}
}
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h
index 04c9f91b7489..8708e2895946 100644
--- a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h
+++ b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h
@@ -95,8 +95,8 @@ struct funeth_txq_stats { /* per Tx queue SW counters */
struct funeth_tx_info { /* per Tx descriptor state */
union {
- struct sk_buff *skb; /* associated packet */
- void *vaddr; /* start address for XDP */
+ struct sk_buff *skb; /* associated packet (sk_buff path) */
+ struct xdp_frame *xdpf; /* associated XDP frame (XDP path) */
};
};
@@ -245,7 +245,7 @@ static inline int fun_irq_node(const struct fun_irq *p)
int fun_rxq_napi_poll(struct napi_struct *napi, int budget);
int fun_txq_napi_poll(struct napi_struct *napi, int budget);
netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev);
-bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len);
+bool fun_xdp_tx(struct funeth_txq *q, struct xdp_frame *xdpf);
int fun_xdp_xmit_frames(struct net_device *dev, int n,
struct xdp_frame **frames, u32 flags);
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index 13382df2f2ef..bcf680e83811 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -630,7 +630,6 @@ struct e1000_phy_info {
bool disable_polarity_correction;
bool is_mdix;
bool polarity_correction;
- bool reset_disable;
bool speed_downgraded;
bool autoneg_wait_to_complete;
};
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index e6c8e6d5234f..9466f65a6da7 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -2050,10 +2050,6 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw)
bool blocked = false;
int i = 0;
- /* Check the PHY (LCD) reset flag */
- if (hw->phy.reset_disable)
- return true;
-
while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) &&
(i++ < 30))
usleep_range(10000, 11000);
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h
index 638a3ddd7ada..2504b11c3169 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.h
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h
@@ -271,7 +271,6 @@
#define I217_CGFREG_ENABLE_MTA_RESET 0x0002
#define I217_MEMPWR PHY_REG(772, 26)
#define I217_MEMPWR_DISABLE_SMB_RELEASE 0x0010
-#define I217_MEMPWR_MOEM 0x1000
/* Receive Address Initial CRC Calculation */
#define E1000_PCH_RAICC(_n) (0x05F50 + ((_n) * 4))
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index fa06f68c8c80..f1729940e46c 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6494,6 +6494,10 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
hw->mac.type >= e1000_pch_adp) {
+ /* Keep the GPT clock enabled for CSME */
+ mac_data = er32(FEXTNVM);
+ mac_data |= BIT(3);
+ ew32(FEXTNVM, mac_data);
/* Request ME unconfigure the device from S0ix */
mac_data = er32(H2ME);
mac_data &= ~E1000_H2ME_START_DPG;
@@ -6987,21 +6991,8 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev)
struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
struct e1000_adapter *adapter = netdev_priv(netdev);
struct pci_dev *pdev = to_pci_dev(dev);
- struct e1000_hw *hw = &adapter->hw;
- u16 phy_data;
int rc;
- if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
- hw->mac.type >= e1000_pch_adp) {
- /* Mask OEM Bits / Gig Disable / Restart AN (772_26[12] = 1) */
- e1e_rphy(hw, I217_MEMPWR, &phy_data);
- phy_data |= I217_MEMPWR_MOEM;
- e1e_wphy(hw, I217_MEMPWR, phy_data);
-
- /* Disable LCD reset */
- hw->phy.reset_disable = true;
- }
-
e1000e_flush_lpic(pdev);
e1000e_pm_freeze(dev);
@@ -7023,8 +7014,6 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev)
struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
struct e1000_adapter *adapter = netdev_priv(netdev);
struct pci_dev *pdev = to_pci_dev(dev);
- struct e1000_hw *hw = &adapter->hw;
- u16 phy_data;
int rc;
/* Introduce S0ix implementation */
@@ -7035,17 +7024,6 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev)
if (rc)
return rc;
- if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
- hw->mac.type >= e1000_pch_adp) {
- /* Unmask OEM Bits / Gig Disable / Restart AN 772_26[12] = 0 */
- e1e_rphy(hw, I217_MEMPWR, &phy_data);
- phy_data &= ~I217_MEMPWR_MOEM;
- e1e_wphy(hw, I217_MEMPWR, phy_data);
-
- /* Enable LCD reset */
- hw->phy.reset_disable = false;
- }
-
return e1000e_pm_thaw(dev);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index aa786fd55951..685556e968f2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1925,11 +1925,15 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
* non-zero req_queue_pairs says that user requested a new
* queue count via ethtool's set_channels, so use this
* value for queues distribution across traffic classes
+ * We need at least one queue pair for the interface
+ * to be usable as we see in else statement.
*/
if (vsi->req_queue_pairs > 0)
vsi->num_queue_pairs = vsi->req_queue_pairs;
else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
vsi->num_queue_pairs = pf->num_lan_msix;
+ else
+ vsi->num_queue_pairs = 1;
}
/* Number of queues per enabled TC */
@@ -10650,7 +10654,7 @@ static int i40e_reset(struct i40e_pf *pf)
**/
static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
{
- int old_recovery_mode_bit = test_bit(__I40E_RECOVERY_MODE, pf->state);
+ const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf);
struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
struct i40e_hw *hw = &pf->hw;
i40e_status ret;
@@ -10658,13 +10662,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
int v;
if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
- i40e_check_recovery_mode(pf)) {
+ is_recovery_mode_reported)
i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev);
- }
if (test_bit(__I40E_DOWN, pf->state) &&
- !test_bit(__I40E_RECOVERY_MODE, pf->state) &&
- !old_recovery_mode_bit)
+ !test_bit(__I40E_RECOVERY_MODE, pf->state))
goto clear_recovery;
dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n");
@@ -10691,13 +10693,12 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
* accordingly with regard to resources initialization
* and deinitialization
*/
- if (test_bit(__I40E_RECOVERY_MODE, pf->state) ||
- old_recovery_mode_bit) {
+ if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
if (i40e_get_capabilities(pf,
i40e_aqc_opc_list_func_capabilities))
goto end_unlock;
- if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
+ if (is_recovery_mode_reported) {
/* we're staying in recovery mode so we'll reinitialize
* misc vector here
*/
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 49aed3e506a6..0ea0361cd86b 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -64,7 +64,6 @@ struct iavf_vsi {
u16 id;
DECLARE_BITMAP(state, __IAVF_VSI_STATE_SIZE__);
int base_vector;
- u16 work_limit;
u16 qs_handle;
void *priv; /* client driver data reference. */
};
@@ -159,8 +158,12 @@ struct iavf_vlan {
struct iavf_vlan_filter {
struct list_head list;
struct iavf_vlan vlan;
- bool remove; /* filter needs to be removed */
- bool add; /* filter needs to be added */
+ struct {
+ u8 is_new_vlan:1; /* filter is new, wait for PF answer */
+ u8 remove:1; /* filter needs to be removed */
+ u8 add:1; /* filter needs to be added */
+ u8 padding:5;
+ };
};
#define IAVF_MAX_TRAFFIC_CLASS 4
@@ -461,6 +464,10 @@ static inline const char *iavf_state_str(enum iavf_state_t state)
return "__IAVF_INIT_VERSION_CHECK";
case __IAVF_INIT_GET_RESOURCES:
return "__IAVF_INIT_GET_RESOURCES";
+ case __IAVF_INIT_EXTENDED_CAPS:
+ return "__IAVF_INIT_EXTENDED_CAPS";
+ case __IAVF_INIT_CONFIG_ADAPTER:
+ return "__IAVF_INIT_CONFIG_ADAPTER";
case __IAVF_INIT_SW:
return "__IAVF_INIT_SW";
case __IAVF_INIT_FAILED:
@@ -520,6 +527,7 @@ int iavf_get_vf_config(struct iavf_adapter *adapter);
int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter);
int iavf_send_vf_offload_vlan_v2_msg(struct iavf_adapter *adapter);
void iavf_set_queue_vlan_tag_loc(struct iavf_adapter *adapter);
+u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter);
void iavf_irq_enable(struct iavf_adapter *adapter, bool flush);
void iavf_configure_queues(struct iavf_adapter *adapter);
void iavf_deconfigure_queues(struct iavf_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index 3bb56714beb0..e535d4c3da49 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -692,12 +692,8 @@ static int __iavf_get_coalesce(struct net_device *netdev,
struct ethtool_coalesce *ec, int queue)
{
struct iavf_adapter *adapter = netdev_priv(netdev);
- struct iavf_vsi *vsi = &adapter->vsi;
struct iavf_ring *rx_ring, *tx_ring;
- ec->tx_max_coalesced_frames = vsi->work_limit;
- ec->rx_max_coalesced_frames = vsi->work_limit;
-
/* Rx and Tx usecs per queue value. If user doesn't specify the
* queue, return queue 0's value to represent.
*/
@@ -825,12 +821,8 @@ static int __iavf_set_coalesce(struct net_device *netdev,
struct ethtool_coalesce *ec, int queue)
{
struct iavf_adapter *adapter = netdev_priv(netdev);
- struct iavf_vsi *vsi = &adapter->vsi;
int i;
- if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
- vsi->work_limit = ec->tx_max_coalesced_frames_irq;
-
if (ec->rx_coalesce_usecs == 0) {
if (ec->use_adaptive_rx_coalesce)
netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
@@ -1969,8 +1961,6 @@ static int iavf_set_rxfh(struct net_device *netdev, const u32 *indir,
static const struct ethtool_ops iavf_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
- ETHTOOL_COALESCE_MAX_FRAMES |
- ETHTOOL_COALESCE_MAX_FRAMES_IRQ |
ETHTOOL_COALESCE_USE_ADAPTIVE,
.get_drvinfo = iavf_get_drvinfo,
.get_link = ethtool_op_get_link,
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index f3ecb3bca33d..2e2c153ce46a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -843,7 +843,7 @@ static void iavf_restore_filters(struct iavf_adapter *adapter)
* iavf_get_num_vlans_added - get number of VLANs added
* @adapter: board private structure
*/
-static u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter)
+u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter)
{
return bitmap_weight(adapter->vsi.active_cvlans, VLAN_N_VID) +
bitmap_weight(adapter->vsi.active_svlans, VLAN_N_VID);
@@ -906,11 +906,6 @@ static int iavf_vlan_rx_add_vid(struct net_device *netdev,
if (!iavf_add_vlan(adapter, IAVF_VLAN(vid, be16_to_cpu(proto))))
return -ENOMEM;
- if (proto == cpu_to_be16(ETH_P_8021Q))
- set_bit(vid, adapter->vsi.active_cvlans);
- else
- set_bit(vid, adapter->vsi.active_svlans);
-
return 0;
}
@@ -2245,7 +2240,6 @@ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter)
adapter->vsi.back = adapter;
adapter->vsi.base_vector = 1;
- adapter->vsi.work_limit = IAVF_DEFAULT_IRQ_WORK;
vsi->netdev = adapter->netdev;
vsi->qs_handle = adapter->vsi_res->qset_handle;
if (adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
@@ -2956,6 +2950,9 @@ continue_reset:
adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER;
iavf_misc_irq_enable(adapter);
+ bitmap_clear(adapter->vsi.active_cvlans, 0, VLAN_N_VID);
+ bitmap_clear(adapter->vsi.active_svlans, 0, VLAN_N_VID);
+
mod_delayed_work(iavf_wq, &adapter->watchdog_task, 2);
/* We were running when the reset started, so we need to restore some
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index 978f651c6b09..06d18797d25a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -194,7 +194,7 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi,
struct iavf_tx_buffer *tx_buf;
struct iavf_tx_desc *tx_desc;
unsigned int total_bytes = 0, total_packets = 0;
- unsigned int budget = vsi->work_limit;
+ unsigned int budget = IAVF_DEFAULT_IRQ_WORK;
tx_buf = &tx_ring->tx_bi[i];
tx_desc = IAVF_TX_DESC(tx_ring, i);
@@ -1285,11 +1285,10 @@ static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring,
{
struct iavf_rx_buffer *rx_buffer;
- if (!size)
- return NULL;
-
rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
prefetchw(rx_buffer->page);
+ if (!size)
+ return rx_buffer;
/* we are reusing so sync this buffer for CPU use */
dma_sync_single_range_for_cpu(rx_ring->dev,
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 782450d5c12f..1603e99bae4a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -627,6 +627,33 @@ static void iavf_mac_add_reject(struct iavf_adapter *adapter)
}
/**
+ * iavf_vlan_add_reject
+ * @adapter: adapter structure
+ *
+ * Remove VLAN filters from list based on PF response.
+ **/
+static void iavf_vlan_add_reject(struct iavf_adapter *adapter)
+{
+ struct iavf_vlan_filter *f, *ftmp;
+
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
+ list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
+ if (f->is_new_vlan) {
+ if (f->vlan.tpid == ETH_P_8021Q)
+ clear_bit(f->vlan.vid,
+ adapter->vsi.active_cvlans);
+ else
+ clear_bit(f->vlan.vid,
+ adapter->vsi.active_svlans);
+
+ list_del(&f->list);
+ kfree(f);
+ }
+ }
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+}
+
+/**
* iavf_add_vlans
* @adapter: adapter structure
*
@@ -683,6 +710,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
vvfl->vlan_id[i] = f->vlan.vid;
i++;
f->add = false;
+ f->is_new_vlan = true;
if (i == count)
break;
}
@@ -695,10 +723,18 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len);
kfree(vvfl);
} else {
+ u16 max_vlans = adapter->vlan_v2_caps.filtering.max_filters;
+ u16 current_vlans = iavf_get_num_vlans_added(adapter);
struct virtchnl_vlan_filter_list_v2 *vvfl_v2;
adapter->current_op = VIRTCHNL_OP_ADD_VLAN_V2;
+ if ((count + current_vlans) > max_vlans &&
+ current_vlans < max_vlans) {
+ count = max_vlans - iavf_get_num_vlans_added(adapter);
+ more = true;
+ }
+
len = sizeof(*vvfl_v2) + ((count - 1) *
sizeof(struct virtchnl_vlan_filter));
if (len > IAVF_MAX_AQ_BUF_SIZE) {
@@ -725,6 +761,9 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
&adapter->vlan_v2_caps.filtering.filtering_support;
struct virtchnl_vlan *vlan;
+ if (i == count)
+ break;
+
/* give priority over outer if it's enabled */
if (filtering_support->outer)
vlan = &vvfl_v2->filters[i].outer;
@@ -736,8 +775,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
i++;
f->add = false;
- if (i == count)
- break;
+ f->is_new_vlan = true;
}
}
@@ -2080,6 +2118,11 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
*/
iavf_netdev_features_vlan_strip_set(netdev, true);
break;
+ case VIRTCHNL_OP_ADD_VLAN_V2:
+ iavf_vlan_add_reject(adapter);
+ dev_warn(&adapter->pdev->dev, "Failed to add VLAN filter, error %s\n",
+ iavf_stat_str(&adapter->hw, v_retval));
+ break;
default:
dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
v_retval, iavf_stat_str(&adapter->hw, v_retval),
@@ -2332,6 +2375,24 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
spin_unlock_bh(&adapter->adv_rss_lock);
}
break;
+ case VIRTCHNL_OP_ADD_VLAN_V2: {
+ struct iavf_vlan_filter *f;
+
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
+ list_for_each_entry(f, &adapter->vlan_filter_list, list) {
+ if (f->is_new_vlan) {
+ f->is_new_vlan = false;
+ if (f->vlan.tpid == ETH_P_8021Q)
+ set_bit(f->vlan.vid,
+ adapter->vsi.active_cvlans);
+ else
+ set_bit(f->vlan.vid,
+ adapter->vsi.active_svlans);
+ }
+ }
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+ }
+ break;
case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
/* PF enabled vlan strip on this VF.
* Update netdev->features if needed to be in sync with ethtool.
diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h
index 61dd2f18dee8..b41bc3dc1745 100644
--- a/drivers/net/ethernet/intel/ice/ice_devids.h
+++ b/drivers/net/ethernet/intel/ice/ice_devids.h
@@ -5,6 +5,7 @@
#define _ICE_DEVIDS_H_
/* Device IDs */
+#define ICE_DEV_ID_E822_SI_DFLT 0x1888
/* Intel(R) Ethernet Connection E823-L for backplane */
#define ICE_DEV_ID_E823L_BACKPLANE 0x124C
/* Intel(R) Ethernet Connection E823-L for SFP */
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
index 3991d62473bf..3337314a7b35 100644
--- a/drivers/net/ethernet/intel/ice/ice_devlink.c
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
@@ -814,6 +814,8 @@ void ice_devlink_destroy_vf_port(struct ice_vf *vf)
devlink_port_unregister(devlink_port);
}
+#define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024)
+
/**
* ice_devlink_nvm_snapshot - Capture a snapshot of the NVM flash contents
* @devlink: the devlink instance
@@ -840,8 +842,9 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink,
struct ice_pf *pf = devlink_priv(devlink);
struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
- void *nvm_data;
- u32 nvm_size;
+ u8 *nvm_data, *tmp, i;
+ u32 nvm_size, left;
+ s8 num_blks;
int status;
nvm_size = hw->flash.flash_size;
@@ -849,26 +852,44 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink,
if (!nvm_data)
return -ENOMEM;
- status = ice_acquire_nvm(hw, ICE_RES_READ);
- if (status) {
- dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n",
- status, hw->adminq.sq_last_status);
- NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore");
- vfree(nvm_data);
- return status;
- }
- status = ice_read_flat_nvm(hw, 0, &nvm_size, nvm_data, false);
- if (status) {
- dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n",
- nvm_size, status, hw->adminq.sq_last_status);
- NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents");
+ num_blks = DIV_ROUND_UP(nvm_size, ICE_DEVLINK_READ_BLK_SIZE);
+ tmp = nvm_data;
+ left = nvm_size;
+
+ /* Some systems take longer to read the NVM than others which causes the
+ * FW to reclaim the NVM lock before the entire NVM has been read. Fix
+ * this by breaking the reads of the NVM into smaller chunks that will
+ * probably not take as long. This has some overhead since we are
+ * increasing the number of AQ commands, but it should always work
+ */
+ for (i = 0; i < num_blks; i++) {
+ u32 read_sz = min_t(u32, ICE_DEVLINK_READ_BLK_SIZE, left);
+
+ status = ice_acquire_nvm(hw, ICE_RES_READ);
+ if (status) {
+ dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n",
+ status, hw->adminq.sq_last_status);
+ NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore");
+ vfree(nvm_data);
+ return -EIO;
+ }
+
+ status = ice_read_flat_nvm(hw, i * ICE_DEVLINK_READ_BLK_SIZE,
+ &read_sz, tmp, false);
+ if (status) {
+ dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n",
+ read_sz, status, hw->adminq.sq_last_status);
+ NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents");
+ ice_release_nvm(hw);
+ vfree(nvm_data);
+ return -EIO;
+ }
ice_release_nvm(hw);
- vfree(nvm_data);
- return status;
- }
- ice_release_nvm(hw);
+ tmp += read_sz;
+ left -= read_sz;
+ }
*data = nvm_data;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 70335f6e8524..4efa5e5846e0 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -658,7 +658,8 @@ static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring)
rx_desc = ICE_RX_DESC(rx_ring, i);
if (!(rx_desc->wb.status_error0 &
- cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)))
+ (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) |
+ cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)))))
continue;
rx_buf = &rx_ring->rx_buf[i];
diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c
index 665a344fb9c0..3dc5662d62a6 100644
--- a/drivers/net/ethernet/intel/ice/ice_fw_update.c
+++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c
@@ -736,7 +736,87 @@ static int ice_finalize_update(struct pldmfw *context)
return 0;
}
-static const struct pldmfw_ops ice_fwu_ops = {
+struct ice_pldm_pci_record_id {
+ u32 vendor;
+ u32 device;
+ u32 subsystem_vendor;
+ u32 subsystem_device;
+};
+
+/**
+ * ice_op_pci_match_record - Check if a PCI device matches the record
+ * @context: PLDM fw update structure
+ * @record: list of records extracted from the PLDM image
+ *
+ * Determine if the PCI device associated with this device matches the record
+ * data provided.
+ *
+ * Searches the descriptor TLVs and extracts the relevant descriptor data into
+ * a pldm_pci_record_id. This is then compared against the PCI device ID
+ * information.
+ *
+ * Returns: true if the device matches the record, false otherwise.
+ */
+static bool
+ice_op_pci_match_record(struct pldmfw *context, struct pldmfw_record *record)
+{
+ struct pci_dev *pdev = to_pci_dev(context->dev);
+ struct ice_pldm_pci_record_id id = {
+ .vendor = PCI_ANY_ID,
+ .device = PCI_ANY_ID,
+ .subsystem_vendor = PCI_ANY_ID,
+ .subsystem_device = PCI_ANY_ID,
+ };
+ struct pldmfw_desc_tlv *desc;
+
+ list_for_each_entry(desc, &record->descs, entry) {
+ u16 value;
+ int *ptr;
+
+ switch (desc->type) {
+ case PLDM_DESC_ID_PCI_VENDOR_ID:
+ ptr = &id.vendor;
+ break;
+ case PLDM_DESC_ID_PCI_DEVICE_ID:
+ ptr = &id.device;
+ break;
+ case PLDM_DESC_ID_PCI_SUBVENDOR_ID:
+ ptr = &id.subsystem_vendor;
+ break;
+ case PLDM_DESC_ID_PCI_SUBDEV_ID:
+ ptr = &id.subsystem_device;
+ break;
+ default:
+ /* Skip unrelated TLVs */
+ continue;
+ }
+
+ value = get_unaligned_le16(desc->data);
+ /* A value of zero for one of the descriptors is sometimes
+ * used when the record should ignore this field when matching
+ * device. For example if the record applies to any subsystem
+ * device or vendor.
+ */
+ if (value)
+ *ptr = value;
+ else
+ *ptr = PCI_ANY_ID;
+ }
+
+ /* the E822 device can have a generic device ID so check for that */
+ if ((id.vendor == PCI_ANY_ID || id.vendor == pdev->vendor) &&
+ (id.device == PCI_ANY_ID || id.device == pdev->device ||
+ id.device == ICE_DEV_ID_E822_SI_DFLT) &&
+ (id.subsystem_vendor == PCI_ANY_ID ||
+ id.subsystem_vendor == pdev->subsystem_vendor) &&
+ (id.subsystem_device == PCI_ANY_ID ||
+ id.subsystem_device == pdev->subsystem_device))
+ return true;
+
+ return false;
+}
+
+static const struct pldmfw_ops ice_fwu_ops_e810 = {
.match_record = &pldmfw_op_pci_match_record,
.send_package_data = &ice_send_package_data,
.send_component_table = &ice_send_component_table,
@@ -744,6 +824,14 @@ static const struct pldmfw_ops ice_fwu_ops = {
.finalize_update = &ice_finalize_update,
};
+static const struct pldmfw_ops ice_fwu_ops_e822 = {
+ .match_record = &ice_op_pci_match_record,
+ .send_package_data = &ice_send_package_data,
+ .send_component_table = &ice_send_component_table,
+ .flash_component = &ice_flash_component,
+ .finalize_update = &ice_finalize_update,
+};
+
/**
* ice_get_pending_updates - Check if the component has a pending update
* @pf: the PF driver structure
@@ -921,7 +1009,11 @@ int ice_devlink_flash_update(struct devlink *devlink,
memset(&priv, 0, sizeof(priv));
- priv.context.ops = &ice_fwu_ops;
+ /* the E822 device needs a slightly different ops */
+ if (hw->mac_type == ICE_MAC_GENERIC)
+ priv.context.ops = &ice_fwu_ops_e822;
+ else
+ priv.context.ops = &ice_fwu_ops_e810;
priv.context.dev = dev;
priv.extack = extack;
priv.pf = pf;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index c1ac2f746714..9f02b60459f1 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4656,6 +4656,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
ice_set_safe_mode_caps(hw);
}
+ hw->ucast_shared = true;
+
err = ice_init_pf(pf);
if (err) {
dev_err(dev, "ice_init_pf failed: %d\n", err);
@@ -5413,6 +5415,7 @@ static const struct pci_device_id ice_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_10G_BASE_T), 0 },
{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_1GBE), 0 },
{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_QSFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822_SI_DFLT), 0 },
/* required last entry */
{ 0, }
};
@@ -6010,10 +6013,12 @@ int ice_vsi_cfg(struct ice_vsi *vsi)
if (vsi->netdev) {
ice_set_rx_mode(vsi->netdev);
- err = ice_vsi_vlan_setup(vsi);
+ if (vsi->type != ICE_VSI_LB) {
+ err = ice_vsi_vlan_setup(vsi);
- if (err)
- return err;
+ if (err)
+ return err;
+ }
}
ice_vsi_cfg_dcb_rings(vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index bb1721f1321d..f4907a3c2d19 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -1310,39 +1310,6 @@ out_put_vf:
}
/**
- * ice_unicast_mac_exists - check if the unicast MAC exists on the PF's switch
- * @pf: PF used to reference the switch's rules
- * @umac: unicast MAC to compare against existing switch rules
- *
- * Return true on the first/any match, else return false
- */
-static bool ice_unicast_mac_exists(struct ice_pf *pf, u8 *umac)
-{
- struct ice_sw_recipe *mac_recipe_list =
- &pf->hw.switch_info->recp_list[ICE_SW_LKUP_MAC];
- struct ice_fltr_mgmt_list_entry *list_itr;
- struct list_head *rule_head;
- struct mutex *rule_lock; /* protect MAC filter list access */
-
- rule_head = &mac_recipe_list->filt_rules;
- rule_lock = &mac_recipe_list->filt_rule_lock;
-
- mutex_lock(rule_lock);
- list_for_each_entry(list_itr, rule_head, list_entry) {
- u8 *existing_mac = &list_itr->fltr_info.l_data.mac.mac_addr[0];
-
- if (ether_addr_equal(existing_mac, umac)) {
- mutex_unlock(rule_lock);
- return true;
- }
- }
-
- mutex_unlock(rule_lock);
-
- return false;
-}
-
-/**
* ice_set_vf_mac
* @netdev: network interface device structure
* @vf_id: VF identifier
@@ -1376,13 +1343,6 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
if (ret)
goto out_put_vf;
- if (ice_unicast_mac_exists(pf, mac)) {
- netdev_err(netdev, "Unicast MAC %pM already exists on this PF. Preventing setting VF %u unicast MAC address to %pM\n",
- mac, vf_id, mac);
- ret = -EINVAL;
- goto out_put_vf;
- }
-
mutex_lock(&vf->cfg_lock);
/* VF is notified of its new MAC via the PF's response to the
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 3f8b7274ed2f..836dce840712 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1751,11 +1751,13 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
protocol = vlan_get_protocol(skb);
- if (eth_p_mpls(protocol))
+ if (eth_p_mpls(protocol)) {
ip.hdr = skb_inner_network_header(skb);
- else
+ l4.hdr = skb_checksum_start(skb);
+ } else {
ip.hdr = skb_network_header(skb);
- l4.hdr = skb_checksum_start(skb);
+ l4.hdr = skb_transport_header(skb);
+ }
/* compute outer L2 header size */
l2_len = ip.hdr - skb->data;
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
index 4547bc1f7cee..24188ec594d5 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -2948,7 +2948,8 @@ ice_vc_validate_add_vlan_filter_list(struct ice_vsi *vsi,
struct virtchnl_vlan_filtering_caps *vfc,
struct virtchnl_vlan_filter_list_v2 *vfl)
{
- u16 num_requested_filters = vsi->num_vlan + vfl->num_elements;
+ u16 num_requested_filters = ice_vsi_num_non_zero_vlans(vsi) +
+ vfl->num_elements;
if (num_requested_filters > vfc->max_filters)
return false;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index ae17af44fe02..a5ebee7df4a8 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -6171,6 +6171,9 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
u32 value = 0;
+ if (IGC_REMOVED(hw_addr))
+ return ~value;
+
value = readl(&hw_addr[reg]);
/* reads should not return all F's */
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index e197a33d93a0..026c3b65fc37 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -306,7 +306,8 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg);
#define wr32(reg, val) \
do { \
u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
- writel((val), &hw_addr[(reg)]); \
+ if (!IGC_REMOVED(hw_addr)) \
+ writel((val), &hw_addr[(reg)]); \
} while (0)
#define rd32(reg) (igc_rd32(hw, reg))
@@ -318,4 +319,6 @@ do { \
#define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2)))
+#define IGC_REMOVED(h) unlikely(!(h))
+
#endif
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 921a4d977d65..8813b4dd6872 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -779,6 +779,7 @@ struct ixgbe_adapter {
#ifdef CONFIG_IXGBE_IPSEC
struct ixgbe_ipsec *ipsec;
#endif /* CONFIG_IXGBE_IPSEC */
+ spinlock_t vfs_lock;
};
static inline int ixgbe_determine_xdp_q_idx(int cpu)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 77c2e70b0860..55f91c9ff047 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6403,6 +6403,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter,
/* n-tuple support exists, always init our spinlock */
spin_lock_init(&adapter->fdir_perfect_lock);
+ /* init spinlock to avoid concurrency of VF resources */
+ spin_lock_init(&adapter->vfs_lock);
+
#ifdef CONFIG_IXGBE_DCB
ixgbe_init_dcb(adapter);
#endif
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index d4e63f0644c3..a1e69c734863 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -205,10 +205,13 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs)
int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
{
unsigned int num_vfs = adapter->num_vfs, vf;
+ unsigned long flags;
int rss;
+ spin_lock_irqsave(&adapter->vfs_lock, flags);
/* set num VFs to 0 to prevent access to vfinfo */
adapter->num_vfs = 0;
+ spin_unlock_irqrestore(&adapter->vfs_lock, flags);
/* put the reference to all of the vf devices */
for (vf = 0; vf < num_vfs; ++vf) {
@@ -1355,8 +1358,10 @@ static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf)
void ixgbe_msg_task(struct ixgbe_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
+ unsigned long flags;
u32 vf;
+ spin_lock_irqsave(&adapter->vfs_lock, flags);
for (vf = 0; vf < adapter->num_vfs; vf++) {
/* process any reset requests */
if (!ixgbe_check_for_rst(hw, vf))
@@ -1370,6 +1375,7 @@ void ixgbe_msg_task(struct ixgbe_adapter *adapter)
if (!ixgbe_check_for_ack(hw, vf))
ixgbe_rcv_ack_from_vf(adapter, vf);
}
+ spin_unlock_irqrestore(&adapter->vfs_lock, flags);
}
static inline void ixgbe_ping_vf(struct ixgbe_adapter *adapter, int vf)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index 28b19945d716..e64318c110fd 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -28,6 +28,9 @@
#define MAX_RATE_EXPONENT 0x0FULL
#define MAX_RATE_MANTISSA 0xFFULL
+#define CN10K_MAX_BURST_MANTISSA 0x7FFFULL
+#define CN10K_MAX_BURST_SIZE 8453888ULL
+
/* Bitfields in NIX_TLX_PIR register */
#define TLX_RATE_MANTISSA GENMASK_ULL(8, 1)
#define TLX_RATE_EXPONENT GENMASK_ULL(12, 9)
@@ -35,6 +38,9 @@
#define TLX_BURST_MANTISSA GENMASK_ULL(36, 29)
#define TLX_BURST_EXPONENT GENMASK_ULL(40, 37)
+#define CN10K_TLX_BURST_MANTISSA GENMASK_ULL(43, 29)
+#define CN10K_TLX_BURST_EXPONENT GENMASK_ULL(47, 44)
+
struct otx2_tc_flow_stats {
u64 bytes;
u64 pkts;
@@ -77,33 +83,42 @@ int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic)
}
EXPORT_SYMBOL(otx2_tc_alloc_ent_bitmap);
-static void otx2_get_egress_burst_cfg(u32 burst, u32 *burst_exp,
- u32 *burst_mantissa)
+static void otx2_get_egress_burst_cfg(struct otx2_nic *nic, u32 burst,
+ u32 *burst_exp, u32 *burst_mantissa)
{
+ int max_burst, max_mantissa;
unsigned int tmp;
+ if (is_dev_otx2(nic->pdev)) {
+ max_burst = MAX_BURST_SIZE;
+ max_mantissa = MAX_BURST_MANTISSA;
+ } else {
+ max_burst = CN10K_MAX_BURST_SIZE;
+ max_mantissa = CN10K_MAX_BURST_MANTISSA;
+ }
+
/* Burst is calculated as
* ((256 + BURST_MANTISSA) << (1 + BURST_EXPONENT)) / 256
* Max supported burst size is 130,816 bytes.
*/
- burst = min_t(u32, burst, MAX_BURST_SIZE);
+ burst = min_t(u32, burst, max_burst);
if (burst) {
*burst_exp = ilog2(burst) ? ilog2(burst) - 1 : 0;
tmp = burst - rounddown_pow_of_two(burst);
- if (burst < MAX_BURST_MANTISSA)
+ if (burst < max_mantissa)
*burst_mantissa = tmp * 2;
else
*burst_mantissa = tmp / (1ULL << (*burst_exp - 7));
} else {
*burst_exp = MAX_BURST_EXPONENT;
- *burst_mantissa = MAX_BURST_MANTISSA;
+ *burst_mantissa = max_mantissa;
}
}
-static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp,
+static void otx2_get_egress_rate_cfg(u64 maxrate, u32 *exp,
u32 *mantissa, u32 *div_exp)
{
- unsigned int tmp;
+ u64 tmp;
/* Rate calculation by hardware
*
@@ -132,21 +147,44 @@ static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp,
}
}
-static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 maxrate)
+static u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic,
+ u64 maxrate, u32 burst)
{
- struct otx2_hw *hw = &nic->hw;
- struct nix_txschq_config *req;
u32 burst_exp, burst_mantissa;
u32 exp, mantissa, div_exp;
+ u64 regval = 0;
+
+ /* Get exponent and mantissa values from the desired rate */
+ otx2_get_egress_burst_cfg(nic, burst, &burst_exp, &burst_mantissa);
+ otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp);
+
+ if (is_dev_otx2(nic->pdev)) {
+ regval = FIELD_PREP(TLX_BURST_EXPONENT, (u64)burst_exp) |
+ FIELD_PREP(TLX_BURST_MANTISSA, (u64)burst_mantissa) |
+ FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
+ FIELD_PREP(TLX_RATE_EXPONENT, exp) |
+ FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
+ } else {
+ regval = FIELD_PREP(CN10K_TLX_BURST_EXPONENT, (u64)burst_exp) |
+ FIELD_PREP(CN10K_TLX_BURST_MANTISSA, (u64)burst_mantissa) |
+ FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
+ FIELD_PREP(TLX_RATE_EXPONENT, exp) |
+ FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
+ }
+
+ return regval;
+}
+
+static int otx2_set_matchall_egress_rate(struct otx2_nic *nic,
+ u32 burst, u64 maxrate)
+{
+ struct otx2_hw *hw = &nic->hw;
+ struct nix_txschq_config *req;
int txschq, err;
/* All SQs share the same TL4, so pick the first scheduler */
txschq = hw->txschq_list[NIX_TXSCH_LVL_TL4][0];
- /* Get exponent and mantissa values from the desired rate */
- otx2_get_egress_burst_cfg(burst, &burst_exp, &burst_mantissa);
- otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp);
-
mutex_lock(&nic->mbox.lock);
req = otx2_mbox_alloc_msg_nix_txschq_cfg(&nic->mbox);
if (!req) {
@@ -157,11 +195,7 @@ static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 ma
req->lvl = NIX_TXSCH_LVL_TL4;
req->num_regs = 1;
req->reg[0] = NIX_AF_TL4X_PIR(txschq);
- req->regval[0] = FIELD_PREP(TLX_BURST_EXPONENT, burst_exp) |
- FIELD_PREP(TLX_BURST_MANTISSA, burst_mantissa) |
- FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
- FIELD_PREP(TLX_RATE_EXPONENT, exp) |
- FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
+ req->regval[0] = otx2_get_txschq_rate_regval(nic, maxrate, burst);
err = otx2_sync_mbox_msg(&nic->mbox);
mutex_unlock(&nic->mbox.lock);
@@ -230,7 +264,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic,
struct netlink_ext_ack *extack = cls->common.extack;
struct flow_action *actions = &cls->rule->action;
struct flow_action_entry *entry;
- u32 rate;
+ u64 rate;
int err;
err = otx2_tc_validate_flow(nic, actions, extack);
@@ -256,7 +290,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic,
}
/* Convert bytes per second to Mbps */
rate = entry->police.rate_bytes_ps * 8;
- rate = max_t(u32, rate / 1000000, 1);
+ rate = max_t(u64, rate / 1000000, 1);
err = otx2_set_matchall_egress_rate(nic, entry->police.burst, rate);
if (err)
return err;
@@ -614,21 +648,27 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
flow_spec->dport = match.key->dst;
flow_mask->dport = match.mask->dst;
- if (ip_proto == IPPROTO_UDP)
- req->features |= BIT_ULL(NPC_DPORT_UDP);
- else if (ip_proto == IPPROTO_TCP)
- req->features |= BIT_ULL(NPC_DPORT_TCP);
- else if (ip_proto == IPPROTO_SCTP)
- req->features |= BIT_ULL(NPC_DPORT_SCTP);
+
+ if (flow_mask->dport) {
+ if (ip_proto == IPPROTO_UDP)
+ req->features |= BIT_ULL(NPC_DPORT_UDP);
+ else if (ip_proto == IPPROTO_TCP)
+ req->features |= BIT_ULL(NPC_DPORT_TCP);
+ else if (ip_proto == IPPROTO_SCTP)
+ req->features |= BIT_ULL(NPC_DPORT_SCTP);
+ }
flow_spec->sport = match.key->src;
flow_mask->sport = match.mask->src;
- if (ip_proto == IPPROTO_UDP)
- req->features |= BIT_ULL(NPC_SPORT_UDP);
- else if (ip_proto == IPPROTO_TCP)
- req->features |= BIT_ULL(NPC_SPORT_TCP);
- else if (ip_proto == IPPROTO_SCTP)
- req->features |= BIT_ULL(NPC_SPORT_SCTP);
+
+ if (flow_mask->sport) {
+ if (ip_proto == IPPROTO_UDP)
+ req->features |= BIT_ULL(NPC_SPORT_UDP);
+ else if (ip_proto == IPPROTO_TCP)
+ req->features |= BIT_ULL(NPC_SPORT_TCP);
+ else if (ip_proto == IPPROTO_SCTP)
+ req->features |= BIT_ULL(NPC_SPORT_SCTP);
+ }
}
return otx2_tc_parse_actions(nic, &rule->action, req, f, node);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flower.c b/drivers/net/ethernet/marvell/prestera/prestera_flower.c
index d43e503c644f..4d93ad6a284c 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_flower.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_flower.c
@@ -167,12 +167,12 @@ static int prestera_flower_parse_meta(struct prestera_acl_rule *rule,
}
port = netdev_priv(ingress_dev);
- mask = htons(0x1FFF);
- key = htons(port->hw_id);
+ mask = htons(0x1FFF << 3);
+ key = htons(port->hw_id << 3);
rule_match_set(r_match->key, SYS_PORT, key);
rule_match_set(r_match->mask, SYS_PORT, mask);
- mask = htons(0x1FF);
+ mask = htons(0x3FF);
key = htons(port->dev_id);
rule_match_set(r_match->key, SYS_DEV, key);
rule_match_set(r_match->mask, SYS_DEV, mask);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c
index 3754d8aec76d..3c8116f16b4d 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_router.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c
@@ -588,6 +588,7 @@ err_router_lib_init:
void prestera_router_fini(struct prestera_switch *sw)
{
+ unregister_fib_notifier(&init_net, &sw->router->fib_nb);
unregister_inetaddr_notifier(&sw->router->inetaddr_nb);
unregister_inetaddr_validator_notifier(&sw->router->inetaddr_valid_nb);
rhashtable_destroy(&sw->router->kern_fib_cache_ht);
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
index 90e7dfd011c9..5d457bc9acc1 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -93,6 +93,9 @@ mtk_flow_get_wdma_info(struct net_device *dev, const u8 *addr, struct mtk_wdma_i
};
struct net_device_path path = {};
+ if (!ctx.dev)
+ return -ENODEV;
+
memcpy(ctx.daddr, addr, sizeof(ctx.daddr));
if (!IS_ENABLED(CONFIG_NET_MEDIATEK_SOC_WED))
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
index 8f0cd3196aac..29be2fcafea3 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed.c
@@ -651,7 +651,7 @@ mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs)
* WDMA RX.
*/
- BUG_ON(idx > ARRAY_SIZE(dev->tx_ring));
+ BUG_ON(idx >= ARRAY_SIZE(dev->tx_ring));
if (mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE))
return -ENOMEM;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 25f51f80a9b4..ba171c7f0a67 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -76,6 +76,7 @@ struct mlx5_tc_ct_priv {
struct mlx5_ct_fs *fs;
struct mlx5_ct_fs_ops *fs_ops;
spinlock_t ht_lock; /* protects ft entries */
+ struct workqueue_struct *wq;
struct mlx5_tc_ct_debugfs debugfs;
};
@@ -941,14 +942,11 @@ static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
static void
__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
{
- struct mlx5e_priv *priv;
-
if (!refcount_dec_and_test(&entry->refcnt))
return;
- priv = netdev_priv(entry->ct_priv->netdev);
INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
- queue_work(priv->wq, &entry->work);
+ queue_work(entry->ct_priv->wq, &entry->work);
}
static struct mlx5_ct_counter *
@@ -1759,19 +1757,16 @@ mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
static void
mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
{
- struct mlx5e_priv *priv;
-
if (!refcount_dec_and_test(&ft->refcount))
return;
+ flush_workqueue(ct_priv->wq);
nf_flow_table_offload_del_cb(ft->nf_ft,
mlx5_tc_ct_block_flow_offload, ft);
rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
rhashtable_free_and_destroy(&ft->ct_entries_ht,
mlx5_tc_ct_flush_ft_entry,
ct_priv);
- priv = netdev_priv(ct_priv->netdev);
- flush_workqueue(priv->wq);
mlx5_tc_ct_free_pre_ct_tables(ft);
mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
kfree(ft);
@@ -2176,6 +2171,12 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params))
goto err_ct_tuples_nat_ht;
+ ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0);
+ if (!ct_priv->wq) {
+ err = -ENOMEM;
+ goto err_wq;
+ }
+
err = mlx5_tc_ct_fs_init(ct_priv);
if (err)
goto err_init_fs;
@@ -2184,6 +2185,8 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
return ct_priv;
err_init_fs:
+ destroy_workqueue(ct_priv->wq);
+err_wq:
rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
err_ct_tuples_nat_ht:
rhashtable_destroy(&ct_priv->ct_tuples_ht);
@@ -2213,6 +2216,7 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
if (!ct_priv)
return;
+ destroy_workqueue(ct_priv->wq);
mlx5_ct_tc_remove_dbgfs(ct_priv);
chains = ct_priv->chains;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index 0bb0633b7542..27483aa7be8a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -231,8 +231,7 @@ mlx5e_set_ktls_rx_priv_ctx(struct tls_context *tls_ctx,
struct mlx5e_ktls_offload_context_rx **ctx =
__tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX);
- BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_rx *) >
- TLS_OFFLOAD_CONTEXT_SIZE_RX);
+ BUILD_BUG_ON(sizeof(priv_rx) > TLS_DRIVER_STATE_SIZE_RX);
*ctx = priv_rx;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 4b6f0d1ea59a..f239fb2e832f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -68,8 +68,7 @@ mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx,
struct mlx5e_ktls_offload_context_tx **ctx =
__tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX);
- BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_tx *) >
- TLS_OFFLOAD_CONTEXT_SIZE_TX);
+ BUILD_BUG_ON(sizeof(priv_tx) > TLS_DRIVER_STATE_SIZE_TX);
*ctx = priv_tx;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 57fa0489eeb8..1e87bb2b7541 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -688,7 +688,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vnic_env)
u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
struct mlx5_core_dev *mdev = priv->mdev;
- if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
+ if (!mlx5e_stats_grp_vnic_env_num_stats(priv))
return;
MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 3a39a50146dd..9ca2c8763237 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -3793,7 +3793,7 @@ static bool is_lag_dev(struct mlx5e_priv *priv,
static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev)
{
- if (mlx5e_eswitch_uplink_rep(out_dev) &&
+ if (same_hw_reps(priv, out_dev) &&
MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
return true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 50d14cec4894..9a7250be229f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -341,6 +341,26 @@ static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq)
}
}
+static void mlx5e_tx_flush(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_tx_wqe_info *wi;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
+
+ /* Must not be called when a MPWQE session is active but empty. */
+ mlx5e_tx_mpwqe_ensure_complete(sq);
+
+ pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ wi = &sq->db.wqe_info[pi];
+
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .num_wqebbs = 1,
+ };
+
+ wqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl);
+}
+
static inline void
mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
const struct mlx5e_tx_attr *attr,
@@ -459,6 +479,7 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
err_drop:
stats->dropped++;
dev_kfree_skb_any(skb);
+ mlx5e_tx_flush(sq);
}
static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr)
@@ -560,6 +581,13 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5_wqe_ctrl_seg *cseg;
struct mlx5e_xmit_data txd;
+ txd.data = skb->data;
+ txd.len = skb->len;
+
+ txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr)))
+ goto err_unmap;
+
if (!mlx5e_tx_mpwqe_session_is_active(sq)) {
mlx5e_tx_mpwqe_session_start(sq, eseg);
} else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) {
@@ -569,18 +597,9 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
sq->stats->xmit_more += xmit_more;
- txd.data = skb->data;
- txd.len = skb->len;
-
- txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr)))
- goto err_unmap;
mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE);
-
mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb);
-
mlx5e_tx_mpwqe_add_dseg(sq, &txd);
-
mlx5e_tx_skb_update_hwts_flags(skb);
if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) {
@@ -602,6 +621,7 @@ err_unmap:
mlx5e_dma_unmap_wqe_err(sq, 1);
sq->stats->dropped++;
dev_kfree_skb_any(skb);
+ mlx5e_tx_flush(sq);
}
void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
@@ -1006,5 +1026,6 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
err_drop:
stats->dropped++;
dev_kfree_skb_any(skb);
+ mlx5e_tx_flush(sq);
}
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
index 9d17206d1625..fabe49a35a5c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
@@ -11,6 +11,7 @@
#include "mlx5_core.h"
#include "eswitch.h"
#include "fs_core.h"
+#include "fs_ft_pool.h"
#include "esw/qos.h"
enum {
@@ -95,8 +96,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
if (!flow_group_in)
return -ENOMEM;
- table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
- ft_attr.max_fte = table_size;
+ ft_attr.max_fte = POOL_NEXT_SIZE;
ft_attr.prio = LEGACY_FDB_PRIO;
fdb = mlx5_create_flow_table(root_ns, &ft_attr);
if (IS_ERR(fdb)) {
@@ -105,6 +105,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
goto out;
}
esw->fdb_table.legacy.fdb = fdb;
+ table_size = fdb->max_fte;
/* Addresses group : Full match unicast/multicast addresses */
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
index 15e41dc84d53..b8feaf0f5c4c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
@@ -44,7 +44,7 @@ static int port_sel_mode_show(struct seq_file *file, void *priv)
ldev = dev->priv.lag;
mutex_lock(&ldev->lock);
if (__mlx5_lag_is_active(ldev))
- mode = mlx5_get_str_port_sel_mode(ldev);
+ mode = mlx5_get_str_port_sel_mode(ldev->mode, ldev->mode_flags);
else
ret = -EINVAL;
mutex_unlock(&ldev->lock);
@@ -72,6 +72,7 @@ static int state_show(struct seq_file *file, void *priv)
static int flags_show(struct seq_file *file, void *priv)
{
struct mlx5_core_dev *dev = file->private;
+ bool fdb_sel_mode_native;
struct mlx5_lag *ldev;
bool shared_fdb;
bool lag_active;
@@ -79,14 +80,21 @@ static int flags_show(struct seq_file *file, void *priv)
ldev = dev->priv.lag;
mutex_lock(&ldev->lock);
lag_active = __mlx5_lag_is_active(ldev);
- if (lag_active)
- shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
+ if (!lag_active)
+ goto unlock;
+
+ shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
+ fdb_sel_mode_native = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
+ &ldev->mode_flags);
+unlock:
mutex_unlock(&ldev->lock);
if (!lag_active)
return -EINVAL;
seq_printf(file, "%s:%s\n", "shared_fdb", shared_fdb ? "on" : "off");
+ seq_printf(file, "%s:%s\n", "fdb_selection_mode",
+ fdb_sel_mode_native ? "native" : "affinity");
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 2a8fc547eb37..5d41e19378e0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -68,14 +68,15 @@ static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode,
unsigned long flags)
{
- bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
+ bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
+ &flags);
int port_sel_mode = get_port_sel_mode(mode, flags);
u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
void *lag_ctx;
lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
- MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
+ MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
if (port_sel_mode == MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY) {
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
@@ -471,8 +472,13 @@ static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
*flags = 0;
- if (shared_fdb)
+ if (shared_fdb) {
set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
+ set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
+ }
+
+ if (mode == MLX5_LAG_MODE_MPESW)
+ set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
if (roce_lag)
return mlx5_lag_set_port_sel_mode_roce(ldev, flags);
@@ -481,9 +487,9 @@ static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
return 0;
}
-char *mlx5_get_str_port_sel_mode(struct mlx5_lag *ldev)
+char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
{
- int port_sel_mode = get_port_sel_mode(ldev->mode, ldev->mode_flags);
+ int port_sel_mode = get_port_sel_mode(mode, flags);
switch (port_sel_mode) {
case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
@@ -507,7 +513,7 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
if (tracker)
mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
- shared_fdb, mlx5_get_str_port_sel_mode(ldev));
+ shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags);
if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index c81b173156d2..ce2ce8ccbd70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -24,6 +24,7 @@ enum {
enum {
MLX5_LAG_MODE_FLAG_HASH_BASED,
MLX5_LAG_MODE_FLAG_SHARED_FDB,
+ MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
};
enum mlx5_lag_mode {
@@ -114,7 +115,7 @@ bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev);
void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev);
int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev);
-char *mlx5_get_str_port_sel_mode(struct mlx5_lag *ldev);
+char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags);
void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
u8 *ports, int *num_enabled);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
index ee4b25a50315..f643202b29c6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -41,7 +41,6 @@ void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev)
int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev = dev->priv.lag;
- bool shared_fdb;
int err = 0;
if (!ldev)
@@ -55,8 +54,8 @@ int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev)
err = -EINVAL;
goto out;
}
- shared_fdb = mlx5_shared_fdb_supported(ldev);
- err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, shared_fdb);
+
+ err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false);
if (err)
mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 0d8a0068e4ca..ce33dbde124d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -5384,7 +5384,7 @@ static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
{
const struct fib_nh *nh = fib_info_nh(fi, 0);
- return nh->fib_nh_scope == RT_SCOPE_LINK ||
+ return nh->fib_nh_gw_family ||
mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
}
@@ -10324,7 +10324,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
unsigned long *fields = config->fields;
u32 hash_fields;
- switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
+ switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
case 0:
mlxsw_sp_mp4_hash_outer_addr(config);
break;
@@ -10342,7 +10342,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_mp_hash_inner_l3(config);
break;
case 3:
- hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
/* Outer */
MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
@@ -10523,13 +10523,14 @@ static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
{
struct net *net = mlxsw_sp_net(mlxsw_sp);
- bool usp = net->ipv4.sysctl_ip_fwd_update_priority;
char rgcr_pl[MLXSW_REG_RGCR_LEN];
u64 max_rifs;
+ bool usp;
if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
return -EIO;
max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
+ usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority);
mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
index 005e56ea5da1..5893770bfd94 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
@@ -75,6 +75,9 @@ static int __lan966x_mac_learn(struct lan966x *lan966x, int pgid,
unsigned int vid,
enum macaccess_entry_type type)
{
+ int ret;
+
+ spin_lock(&lan966x->mac_lock);
lan966x_mac_select(lan966x, mac, vid);
/* Issue a write command */
@@ -86,7 +89,10 @@ static int __lan966x_mac_learn(struct lan966x *lan966x, int pgid,
ANA_MACACCESS_MAC_TABLE_CMD_SET(MACACCESS_CMD_LEARN),
lan966x, ANA_MACACCESS);
- return lan966x_mac_wait_for_completion(lan966x);
+ ret = lan966x_mac_wait_for_completion(lan966x);
+ spin_unlock(&lan966x->mac_lock);
+
+ return ret;
}
/* The mask of the front ports is encoded inside the mac parameter via a call
@@ -113,11 +119,13 @@ int lan966x_mac_learn(struct lan966x *lan966x, int port,
return __lan966x_mac_learn(lan966x, port, false, mac, vid, type);
}
-int lan966x_mac_forget(struct lan966x *lan966x,
- const unsigned char mac[ETH_ALEN],
- unsigned int vid,
- enum macaccess_entry_type type)
+static int lan966x_mac_forget_locked(struct lan966x *lan966x,
+ const unsigned char mac[ETH_ALEN],
+ unsigned int vid,
+ enum macaccess_entry_type type)
{
+ lockdep_assert_held(&lan966x->mac_lock);
+
lan966x_mac_select(lan966x, mac, vid);
/* Issue a forget command */
@@ -128,6 +136,20 @@ int lan966x_mac_forget(struct lan966x *lan966x,
return lan966x_mac_wait_for_completion(lan966x);
}
+int lan966x_mac_forget(struct lan966x *lan966x,
+ const unsigned char mac[ETH_ALEN],
+ unsigned int vid,
+ enum macaccess_entry_type type)
+{
+ int ret;
+
+ spin_lock(&lan966x->mac_lock);
+ ret = lan966x_mac_forget_locked(lan966x, mac, vid, type);
+ spin_unlock(&lan966x->mac_lock);
+
+ return ret;
+}
+
int lan966x_mac_cpu_learn(struct lan966x *lan966x, const char *addr, u16 vid)
{
return lan966x_mac_learn(lan966x, PGID_CPU, addr, vid, ENTRYTYPE_LOCKED);
@@ -161,7 +183,7 @@ static struct lan966x_mac_entry *lan966x_mac_alloc_entry(const unsigned char *ma
{
struct lan966x_mac_entry *mac_entry;
- mac_entry = kzalloc(sizeof(*mac_entry), GFP_KERNEL);
+ mac_entry = kzalloc(sizeof(*mac_entry), GFP_ATOMIC);
if (!mac_entry)
return NULL;
@@ -179,7 +201,6 @@ static struct lan966x_mac_entry *lan966x_mac_find_entry(struct lan966x *lan966x,
struct lan966x_mac_entry *res = NULL;
struct lan966x_mac_entry *mac_entry;
- spin_lock(&lan966x->mac_lock);
list_for_each_entry(mac_entry, &lan966x->mac_entries, list) {
if (mac_entry->vid == vid &&
ether_addr_equal(mac, mac_entry->mac) &&
@@ -188,7 +209,6 @@ static struct lan966x_mac_entry *lan966x_mac_find_entry(struct lan966x *lan966x,
break;
}
}
- spin_unlock(&lan966x->mac_lock);
return res;
}
@@ -231,8 +251,11 @@ int lan966x_mac_add_entry(struct lan966x *lan966x, struct lan966x_port *port,
{
struct lan966x_mac_entry *mac_entry;
- if (lan966x_mac_lookup(lan966x, addr, vid, ENTRYTYPE_NORMAL))
+ spin_lock(&lan966x->mac_lock);
+ if (lan966x_mac_lookup(lan966x, addr, vid, ENTRYTYPE_NORMAL)) {
+ spin_unlock(&lan966x->mac_lock);
return 0;
+ }
/* In case the entry already exists, don't add it again to SW,
* just update HW, but we need to look in the actual HW because
@@ -241,21 +264,25 @@ int lan966x_mac_add_entry(struct lan966x *lan966x, struct lan966x_port *port,
* add the entry but without the extern_learn flag.
*/
mac_entry = lan966x_mac_find_entry(lan966x, addr, vid, port->chip_port);
- if (mac_entry)
- return lan966x_mac_learn(lan966x, port->chip_port,
- addr, vid, ENTRYTYPE_LOCKED);
+ if (mac_entry) {
+ spin_unlock(&lan966x->mac_lock);
+ goto mac_learn;
+ }
mac_entry = lan966x_mac_alloc_entry(addr, vid, port->chip_port);
- if (!mac_entry)
+ if (!mac_entry) {
+ spin_unlock(&lan966x->mac_lock);
return -ENOMEM;
+ }
- spin_lock(&lan966x->mac_lock);
list_add_tail(&mac_entry->list, &lan966x->mac_entries);
spin_unlock(&lan966x->mac_lock);
- lan966x_mac_learn(lan966x, port->chip_port, addr, vid, ENTRYTYPE_LOCKED);
lan966x_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, addr, vid, port->dev);
+mac_learn:
+ lan966x_mac_learn(lan966x, port->chip_port, addr, vid, ENTRYTYPE_LOCKED);
+
return 0;
}
@@ -269,8 +296,9 @@ int lan966x_mac_del_entry(struct lan966x *lan966x, const unsigned char *addr,
list) {
if (mac_entry->vid == vid &&
ether_addr_equal(addr, mac_entry->mac)) {
- lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid,
- ENTRYTYPE_LOCKED);
+ lan966x_mac_forget_locked(lan966x, mac_entry->mac,
+ mac_entry->vid,
+ ENTRYTYPE_LOCKED);
list_del(&mac_entry->list);
kfree(mac_entry);
@@ -288,8 +316,8 @@ void lan966x_mac_purge_entries(struct lan966x *lan966x)
spin_lock(&lan966x->mac_lock);
list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries,
list) {
- lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid,
- ENTRYTYPE_LOCKED);
+ lan966x_mac_forget_locked(lan966x, mac_entry->mac,
+ mac_entry->vid, ENTRYTYPE_LOCKED);
list_del(&mac_entry->list);
kfree(mac_entry);
@@ -325,10 +353,13 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
{
struct lan966x_mac_entry *mac_entry, *tmp;
unsigned char mac[ETH_ALEN] __aligned(2);
+ struct list_head mac_deleted_entries;
u32 dest_idx;
u32 column;
u16 vid;
+ INIT_LIST_HEAD(&mac_deleted_entries);
+
spin_lock(&lan966x->mac_lock);
list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries, list) {
bool found = false;
@@ -362,20 +393,26 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
}
if (!found) {
- /* Notify the bridge that the entry doesn't exist
- * anymore in the HW and remove the entry from the SW
- * list
- */
- lan966x_mac_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
- mac_entry->mac, mac_entry->vid,
- lan966x->ports[mac_entry->port_index]->dev);
-
list_del(&mac_entry->list);
- kfree(mac_entry);
+ /* Move the entry from SW list to a tmp list such that
+ * it would be deleted later
+ */
+ list_add_tail(&mac_entry->list, &mac_deleted_entries);
}
}
spin_unlock(&lan966x->mac_lock);
+ list_for_each_entry_safe(mac_entry, tmp, &mac_deleted_entries, list) {
+ /* Notify the bridge that the entry doesn't exist
+ * anymore in the HW
+ */
+ lan966x_mac_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
+ mac_entry->mac, mac_entry->vid,
+ lan966x->ports[mac_entry->port_index]->dev);
+ list_del(&mac_entry->list);
+ kfree(mac_entry);
+ }
+
/* Now go to the list of columns and see if any entry was not in the SW
* list, then that means that the entry is new so it needs to notify the
* bridge.
@@ -396,13 +433,20 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
if (WARN_ON(dest_idx >= lan966x->num_phys_ports))
continue;
+ spin_lock(&lan966x->mac_lock);
+ mac_entry = lan966x_mac_find_entry(lan966x, mac, vid, dest_idx);
+ if (mac_entry) {
+ spin_unlock(&lan966x->mac_lock);
+ continue;
+ }
+
mac_entry = lan966x_mac_alloc_entry(mac, vid, dest_idx);
- if (!mac_entry)
+ if (!mac_entry) {
+ spin_unlock(&lan966x->mac_lock);
return;
+ }
mac_entry->row = row;
-
- spin_lock(&lan966x->mac_lock);
list_add_tail(&mac_entry->list, &lan966x->mac_entries);
spin_unlock(&lan966x->mac_lock);
@@ -424,6 +468,7 @@ irqreturn_t lan966x_mac_irq_handler(struct lan966x *lan966x)
lan966x, ANA_MACTINDX);
while (1) {
+ spin_lock(&lan966x->mac_lock);
lan_rmw(ANA_MACACCESS_MAC_TABLE_CMD_SET(MACACCESS_CMD_SYNC_GET_NEXT),
ANA_MACACCESS_MAC_TABLE_CMD,
lan966x, ANA_MACACCESS);
@@ -447,12 +492,15 @@ irqreturn_t lan966x_mac_irq_handler(struct lan966x *lan966x)
stop = false;
if (column == LAN966X_MAC_COLUMNS - 1 &&
- index == 0 && stop)
+ index == 0 && stop) {
+ spin_unlock(&lan966x->mac_lock);
break;
+ }
entry[column].mach = lan_rd(lan966x, ANA_MACHDATA);
entry[column].macl = lan_rd(lan966x, ANA_MACLDATA);
entry[column].maca = lan_rd(lan966x, ANA_MACACCESS);
+ spin_unlock(&lan966x->mac_lock);
/* Once all the columns are read process them */
if (column == LAN966X_MAC_COLUMNS - 1) {
diff --git a/drivers/net/ethernet/mscc/ocelot_fdma.c b/drivers/net/ethernet/mscc/ocelot_fdma.c
index 083fddd263ec..8e3894cf5f7c 100644
--- a/drivers/net/ethernet/mscc/ocelot_fdma.c
+++ b/drivers/net/ethernet/mscc/ocelot_fdma.c
@@ -94,19 +94,18 @@ static void ocelot_fdma_activate_chan(struct ocelot *ocelot, dma_addr_t dma,
ocelot_fdma_writel(ocelot, MSCC_FDMA_CH_ACTIVATE, BIT(chan));
}
+static u32 ocelot_fdma_read_ch_safe(struct ocelot *ocelot)
+{
+ return ocelot_fdma_readl(ocelot, MSCC_FDMA_CH_SAFE);
+}
+
static int ocelot_fdma_wait_chan_safe(struct ocelot *ocelot, int chan)
{
- unsigned long timeout;
u32 safe;
- timeout = jiffies + usecs_to_jiffies(OCELOT_FDMA_CH_SAFE_TIMEOUT_US);
- do {
- safe = ocelot_fdma_readl(ocelot, MSCC_FDMA_CH_SAFE);
- if (safe & BIT(chan))
- return 0;
- } while (time_after(jiffies, timeout));
-
- return -ETIMEDOUT;
+ return readx_poll_timeout_atomic(ocelot_fdma_read_ch_safe, ocelot, safe,
+ safe & BIT(chan), 0,
+ OCELOT_FDMA_CH_SAFE_TIMEOUT_US);
}
static void ocelot_fdma_dcb_set_data(struct ocelot_fdma_dcb *dcb,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index e31f8fbbc696..df2ab5cbd49b 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -4233,7 +4233,7 @@ static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog)
}
/* If the chain is ended by an load/store pair then this
- * could serve as the new head of the the next chain.
+ * could serve as the new head of the next chain.
*/
if (curr_pair_is_memcpy(meta1, meta2)) {
head_ld_meta = meta1;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 0147de405365..ffb6f6d05a07 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -474,7 +474,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun,
set_tun->ttl = ip4_dst_hoplimit(&rt->dst);
ip_rt_put(rt);
} else {
- set_tun->ttl = net->ipv4.sysctl_ip_default_ttl;
+ set_tun->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
}
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index 6bf3ec448e7e..97dcf8db7ed2 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -447,7 +447,8 @@ void nfp_tun_unlink_and_update_nn_entries(struct nfp_app *app,
static void
nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app,
- void *flow, struct neighbour *neigh, bool is_ipv6)
+ void *flow, struct neighbour *neigh, bool is_ipv6,
+ bool override)
{
bool neigh_invalid = !(neigh->nud_state & NUD_VALID) || neigh->dead;
size_t neigh_size = is_ipv6 ? sizeof(struct nfp_tun_neigh_v6) :
@@ -546,6 +547,13 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app,
if (nn_entry->flow)
list_del(&nn_entry->list_head);
kfree(nn_entry);
+ } else if (nn_entry && !neigh_invalid && override) {
+ mtype = is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 :
+ NFP_FLOWER_CMSG_TYPE_TUN_NEIGH;
+ nfp_tun_link_predt_entries(app, nn_entry);
+ nfp_flower_xmit_tun_conf(app, mtype, neigh_size,
+ nn_entry->payload,
+ GFP_ATOMIC);
}
spin_unlock_bh(&priv->predt_lock);
@@ -610,7 +618,7 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event,
dst_release(dst);
}
- nfp_tun_write_neigh(n->dev, app, &flow6, n, true);
+ nfp_tun_write_neigh(n->dev, app, &flow6, n, true, false);
#else
return NOTIFY_DONE;
#endif /* CONFIG_IPV6 */
@@ -633,7 +641,7 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event,
ip_rt_put(rt);
}
- nfp_tun_write_neigh(n->dev, app, &flow4, n, false);
+ nfp_tun_write_neigh(n->dev, app, &flow4, n, false, false);
}
#else
return NOTIFY_DONE;
@@ -676,7 +684,7 @@ void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb)
ip_rt_put(rt);
if (!n)
goto fail_rcu_unlock;
- nfp_tun_write_neigh(n->dev, app, &flow, n, false);
+ nfp_tun_write_neigh(n->dev, app, &flow, n, false, true);
neigh_release(n);
rcu_read_unlock();
return;
@@ -718,7 +726,7 @@ void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb)
if (!n)
goto fail_rcu_unlock;
- nfp_tun_write_neigh(n->dev, app, &flow, n, true);
+ nfp_tun_write_neigh(n->dev, app, &flow, n, true, true);
neigh_release(n);
rcu_read_unlock();
return;
diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
index e509d6dcba5c..805071d64a20 100644
--- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
@@ -125,17 +125,18 @@ nfp_nfdk_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
static int
nfp_nfdk_tx_maybe_close_block(struct nfp_net_tx_ring *tx_ring,
- unsigned int nr_frags, struct sk_buff *skb)
+ struct sk_buff *skb)
{
unsigned int n_descs, wr_p, nop_slots;
const skb_frag_t *frag, *fend;
struct nfp_nfdk_tx_desc *txd;
+ unsigned int nr_frags;
unsigned int wr_idx;
int err;
recount_descs:
n_descs = nfp_nfdk_headlen_to_segs(skb_headlen(skb));
-
+ nr_frags = skb_shinfo(skb)->nr_frags;
frag = skb_shinfo(skb)->frags;
fend = frag + nr_frags;
for (; frag < fend; frag++)
@@ -281,10 +282,13 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev)
if (unlikely((int)metadata < 0))
goto err_flush;
- nr_frags = skb_shinfo(skb)->nr_frags;
- if (nfp_nfdk_tx_maybe_close_block(tx_ring, nr_frags, skb))
+ if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb))
goto err_flush;
+ /* nr_frags will change after skb_linearize so we get nr_frags after
+ * nfp_nfdk_tx_maybe_close_block function
+ */
+ nr_frags = skb_shinfo(skb)->nr_frags;
/* DMA map all */
wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
txd = &tx_ring->ktxds[wr_idx];
@@ -310,7 +314,16 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev)
/* FIELD_PREP() implicitly truncates to chunk */
dma_len -= 1;
- dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) |
+
+ /* We will do our best to pass as much data as we can in descriptor
+ * and we need to make sure the first descriptor includes whole head
+ * since there is limitation in firmware side. Sometimes the value of
+ * dma_len bitwise and NFDK_DESC_TX_DMA_LEN_HEAD will less than
+ * headlen.
+ */
+ dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD,
+ dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ?
+ NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) |
FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
txd->dma_len_type = cpu_to_le16(dlen_type);
@@ -925,7 +938,9 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
/* FIELD_PREP() implicitly truncates to chunk */
dma_len -= 1;
- dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) |
+ dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD,
+ dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ?
+ NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) |
FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
txd->dma_len_type = cpu_to_le16(dlen_type);
@@ -1303,7 +1318,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
skb_push(skb, 4));
}
- if (nfp_nfdk_tx_maybe_close_block(tx_ring, 0, skb))
+ if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb))
goto err_free;
/* DMA map all */
@@ -1328,7 +1343,9 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
txbuf++;
dma_len -= 1;
- dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) |
+ dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD,
+ dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ?
+ NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) |
FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
txd->dma_len_type = cpu_to_le16(dlen_type);
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 186cb28c03bd..8b62ce21aff3 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -1932,7 +1932,10 @@ static int efx_ef10_try_update_nic_stats_vf(struct efx_nic *efx)
efx_update_sw_stats(efx, stats);
out:
+ /* releasing a DMA coherent buffer with BH disabled can panic */
+ spin_unlock_bh(&efx->stats_lock);
efx_nic_free_buffer(efx, &stats_buf);
+ spin_lock_bh(&efx->stats_lock);
return rc;
}
diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c
index 7f5aa4a8c451..92550c7e85ce 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.c
+++ b/drivers/net/ethernet/sfc/ef10_sriov.c
@@ -408,8 +408,9 @@ fail1:
static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force)
{
struct pci_dev *dev = efx->pci_dev;
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
unsigned int vfs_assigned = pci_vfs_assigned(dev);
- int rc = 0;
+ int i, rc = 0;
if (vfs_assigned && !force) {
netif_info(efx, drv, efx->net_dev, "VFs are assigned to guests; "
@@ -417,10 +418,13 @@ static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force)
return -EBUSY;
}
- if (!vfs_assigned)
+ if (!vfs_assigned) {
+ for (i = 0; i < efx->vf_count; i++)
+ nic_data->vf[i].pci_dev = NULL;
pci_disable_sriov(dev);
- else
+ } else {
rc = -EBUSY;
+ }
efx_ef10_sriov_free_vf_vswitching(efx);
efx->vf_count = 0;
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index 4625f85acab2..10ad0b93d283 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -1100,7 +1100,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb)
tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type);
if (tx_queue && tx_queue->timestamping) {
+ /* This code invokes normal driver TX code which is always
+ * protected from softirqs when called from generic TX code,
+ * which in turn disables preemption. Look at __dev_queue_xmit
+ * which uses rcu_read_lock_bh disabling preemption for RCU
+ * plus disabling softirqs. We do not need RCU reader
+ * protection here.
+ *
+ * Although it is theoretically safe for current PTP TX/RX code
+ * running without disabling softirqs, there are three good
+ * reasond for doing so:
+ *
+ * 1) The code invoked is mainly implemented for non-PTP
+ * packets and it is always executed with softirqs
+ * disabled.
+ * 2) This being a single PTP packet, better to not
+ * interrupt its processing by softirqs which can lead
+ * to high latencies.
+ * 3) netdev_xmit_more checks preemption is disabled and
+ * triggers a BUG_ON if not.
+ */
+ local_bh_disable();
efx_enqueue_skb(tx_queue, skb);
+ local_bh_enable();
} else {
WARN_ONCE(1, "PTP channel has no timestamped tx queue\n");
dev_kfree_skb_any(skb);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
index bc91fd867dcd..358fc26f8d1f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
@@ -361,6 +361,7 @@ bypass_clk_reset_gpio:
data->fix_mac_speed = tegra_eqos_fix_speed;
data->init = tegra_eqos_init;
data->bsp_priv = eqos;
+ data->sph_disable = 1;
err = tegra_eqos_init(pdev, eqos);
if (err < 0)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
index 9a6d819b84ae..378b4dd826bb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
@@ -273,7 +273,8 @@ static int ingenic_mac_probe(struct platform_device *pdev)
mac->tx_delay = tx_delay_ps * 1000;
} else {
dev_err(&pdev->dev, "Invalid TX clock delay: %dps\n", tx_delay_ps);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_remove_config_dt;
}
}
@@ -283,7 +284,8 @@ static int ingenic_mac_probe(struct platform_device *pdev)
mac->rx_delay = rx_delay_ps * 1000;
} else {
dev_err(&pdev->dev, "Invalid RX clock delay: %dps\n", rx_delay_ps);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_remove_config_dt;
}
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 38fe77d1035e..3fe720c5dc9f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -298,6 +298,11 @@ static void get_arttime(struct mii_bus *mii, int intel_adhoc_addr,
*art_time = ns;
}
+static int stmmac_cross_ts_isr(struct stmmac_priv *priv)
+{
+ return (readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE);
+}
+
static int intel_crosststamp(ktime_t *device,
struct system_counterval_t *system,
void *ctx)
@@ -313,8 +318,6 @@ static int intel_crosststamp(ktime_t *device,
u32 num_snapshot;
u32 gpio_value;
u32 acr_value;
- int ret;
- u32 v;
int i;
if (!boot_cpu_has(X86_FEATURE_ART))
@@ -328,6 +331,8 @@ static int intel_crosststamp(ktime_t *device,
if (priv->plat->ext_snapshot_en)
return -EBUSY;
+ priv->plat->int_snapshot_en = 1;
+
mutex_lock(&priv->aux_ts_lock);
/* Enable Internal snapshot trigger */
acr_value = readl(ptpaddr + PTP_ACR);
@@ -347,6 +352,7 @@ static int intel_crosststamp(ktime_t *device,
break;
default:
mutex_unlock(&priv->aux_ts_lock);
+ priv->plat->int_snapshot_en = 0;
return -EINVAL;
}
writel(acr_value, ptpaddr + PTP_ACR);
@@ -368,13 +374,12 @@ static int intel_crosststamp(ktime_t *device,
gpio_value |= GMAC_GPO1;
writel(gpio_value, ioaddr + GMAC_GPIO_STATUS);
- /* Poll for time sync operation done */
- ret = readl_poll_timeout(priv->ioaddr + GMAC_INT_STATUS, v,
- (v & GMAC_INT_TSIE), 100, 10000);
-
- if (ret == -ETIMEDOUT) {
- pr_err("%s: Wait for time sync operation timeout\n", __func__);
- return ret;
+ /* Time sync done Indication - Interrupt method */
+ if (!wait_event_interruptible_timeout(priv->tstamp_busy_wait,
+ stmmac_cross_ts_isr(priv),
+ HZ / 100)) {
+ priv->plat->int_snapshot_en = 0;
+ return -ETIMEDOUT;
}
num_snapshot = (readl(ioaddr + GMAC_TIMESTAMP_STATUS) &
@@ -392,6 +397,7 @@ static int intel_crosststamp(ktime_t *device,
}
system->cycles *= intel_priv->crossts_adj;
+ priv->plat->int_snapshot_en = 0;
return 0;
}
@@ -576,6 +582,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
plat->has_crossts = true;
plat->crosststamp = intel_crosststamp;
+ plat->int_snapshot_en = 0;
/* Setup MSI vector offset specific to Intel mGbE controller */
plat->msi_mac_vec = 29;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
index 6ff88df58767..d42e1afb6521 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
@@ -576,32 +576,7 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv)
}
}
- ret = clk_bulk_prepare_enable(variant->num_clks, plat->clks);
- if (ret) {
- dev_err(plat->dev, "failed to enable clks, err = %d\n", ret);
- return ret;
- }
-
- ret = clk_prepare_enable(plat->rmii_internal_clk);
- if (ret) {
- dev_err(plat->dev, "failed to enable rmii internal clk, err = %d\n", ret);
- goto err_clk;
- }
-
return 0;
-
-err_clk:
- clk_bulk_disable_unprepare(variant->num_clks, plat->clks);
- return ret;
-}
-
-static void mediatek_dwmac_exit(struct platform_device *pdev, void *priv)
-{
- struct mediatek_dwmac_plat_data *plat = priv;
- const struct mediatek_dwmac_variant *variant = plat->variant;
-
- clk_disable_unprepare(plat->rmii_internal_clk);
- clk_bulk_disable_unprepare(variant->num_clks, plat->clks);
}
static int mediatek_dwmac_clks_config(void *priv, bool enabled)
@@ -643,7 +618,6 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev,
plat->addr64 = priv_plat->variant->dma_bit_mask;
plat->bsp_priv = priv_plat;
plat->init = mediatek_dwmac_init;
- plat->exit = mediatek_dwmac_exit;
plat->clks_config = mediatek_dwmac_clks_config;
if (priv_plat->variant->dwmac_fix_mac_speed)
plat->fix_mac_speed = priv_plat->variant->dwmac_fix_mac_speed;
@@ -712,13 +686,33 @@ static int mediatek_dwmac_probe(struct platform_device *pdev)
mediatek_dwmac_common_data(pdev, plat_dat, priv_plat);
mediatek_dwmac_init(pdev, priv_plat);
+ ret = mediatek_dwmac_clks_config(priv_plat, true);
+ if (ret)
+ goto err_remove_config_dt;
+
ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
- if (ret) {
- stmmac_remove_config_dt(pdev, plat_dat);
- return ret;
- }
+ if (ret)
+ goto err_drv_probe;
return 0;
+
+err_drv_probe:
+ mediatek_dwmac_clks_config(priv_plat, false);
+err_remove_config_dt:
+ stmmac_remove_config_dt(pdev, plat_dat);
+
+ return ret;
+}
+
+static int mediatek_dwmac_remove(struct platform_device *pdev)
+{
+ struct mediatek_dwmac_plat_data *priv_plat = get_stmmac_bsp_priv(&pdev->dev);
+ int ret;
+
+ ret = stmmac_pltfr_remove(pdev);
+ mediatek_dwmac_clks_config(priv_plat, false);
+
+ return ret;
}
static const struct of_device_id mediatek_dwmac_match[] = {
@@ -733,7 +727,7 @@ MODULE_DEVICE_TABLE(of, mediatek_dwmac_match);
static struct platform_driver mediatek_dwmac_driver = {
.probe = mediatek_dwmac_probe,
- .remove = stmmac_pltfr_remove,
+ .remove = mediatek_dwmac_remove,
.driver = {
.name = "dwmac-mediatek",
.pm = &stmmac_pltfr_pm_ops,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index 462ca7ed095a..71dad409f78b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -150,7 +150,8 @@
#define GMAC_PCS_IRQ_DEFAULT (GMAC_INT_RGSMIIS | GMAC_INT_PCS_LINK | \
GMAC_INT_PCS_ANE)
-#define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN)
+#define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN | \
+ GMAC_INT_TSIE)
enum dwmac4_irq_status {
time_stamp_irq = 0x00001000,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index fd41db65fe1d..d8f1fbc25bdd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -23,6 +23,7 @@
static void dwmac4_core_init(struct mac_device_info *hw,
struct net_device *dev)
{
+ struct stmmac_priv *priv = netdev_priv(dev);
void __iomem *ioaddr = hw->pcsr;
u32 value = readl(ioaddr + GMAC_CONFIG);
@@ -58,6 +59,9 @@ static void dwmac4_core_init(struct mac_device_info *hw,
value |= GMAC_INT_FPE_EN;
writel(value, ioaddr + GMAC_INT_EN);
+
+ if (GMAC_INT_DEFAULT_ENABLE & GMAC_INT_TSIE)
+ init_waitqueue_head(&priv->tstamp_busy_wait);
}
static void dwmac4_rx_queue_enable(struct mac_device_info *hw,
@@ -219,6 +223,9 @@ static void dwmac4_map_mtl_dma(struct mac_device_info *hw, u32 queue, u32 chan)
if (queue == 0 || queue == 4) {
value &= ~MTL_RXQ_DMA_Q04MDMACH_MASK;
value |= MTL_RXQ_DMA_Q04MDMACH(chan);
+ } else if (queue > 4) {
+ value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue - 4);
+ value |= MTL_RXQ_DMA_QXMDMACH(chan, queue - 4);
} else {
value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue);
value |= MTL_RXQ_DMA_QXMDMACH(chan, queue);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 57970ae2178d..f9e83964aa7e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -266,6 +266,7 @@ struct stmmac_priv {
rwlock_t ptp_lock;
/* Protects auxiliary snapshot registers from concurrent access. */
struct mutex aux_ts_lock;
+ wait_queue_head_t tstamp_busy_wait;
void __iomem *mmcaddr;
void __iomem *ptpaddr;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index abfb3cd5958d..9c3055ee2608 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -803,14 +803,6 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev,
netdev_warn(priv->dev,
"Setting EEE tx-lpi is not supported\n");
- if (priv->hw->xpcs) {
- ret = xpcs_config_eee(priv->hw->xpcs,
- priv->plat->mult_fact_100ns,
- edata->eee_enabled);
- if (ret)
- return ret;
- }
-
if (!edata->eee_enabled)
stmmac_disable_eee_mode(priv);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 92d32940aff0..764832f4dae1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -179,6 +179,11 @@ static void timestamp_interrupt(struct stmmac_priv *priv)
u64 ptp_time;
int i;
+ if (priv->plat->int_snapshot_en) {
+ wake_up(&priv->tstamp_busy_wait);
+ return;
+ }
+
tsync_int = readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE;
if (!tsync_int)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d1a7cf4567bc..c5f33630e771 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -834,19 +834,10 @@ int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags)
struct timespec64 now;
u32 sec_inc = 0;
u64 temp = 0;
- int ret;
if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp))
return -EOPNOTSUPP;
- ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
- if (ret < 0) {
- netdev_warn(priv->dev,
- "failed to enable PTP reference clock: %pe\n",
- ERR_PTR(ret));
- return ret;
- }
-
stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags);
priv->systime_flags = systime_flags;
@@ -3270,6 +3261,14 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register)
stmmac_mmc_setup(priv);
+ if (ptp_register) {
+ ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+ if (ret < 0)
+ netdev_warn(priv->dev,
+ "failed to enable PTP reference clock: %pe\n",
+ ERR_PTR(ret));
+ }
+
ret = stmmac_init_ptp(priv);
if (ret == -EOPNOTSUPP)
netdev_info(priv->dev, "PTP not supported by HW\n");
@@ -7213,8 +7212,6 @@ int stmmac_dvr_remove(struct device *dev)
netdev_info(priv->dev, "%s: removing driver", __func__);
pm_runtime_get_sync(dev);
- pm_runtime_disable(dev);
- pm_runtime_put_noidle(dev);
stmmac_stop_all_dma(priv);
stmmac_mac_set(priv, priv->ioaddr, false);
@@ -7241,6 +7238,9 @@ int stmmac_dvr_remove(struct device *dev)
mutex_destroy(&priv->lock);
bitmap_free(priv->af_xdp_zc_qps);
+ pm_runtime_disable(dev);
+ pm_runtime_put_noidle(dev);
+
return 0;
}
EXPORT_SYMBOL_GPL(stmmac_dvr_remove);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 11e1055e8260..9f5cac4000da 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -815,7 +815,13 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev)
if (ret)
return ret;
- stmmac_init_tstamp_counter(priv, priv->systime_flags);
+ ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+ if (ret < 0) {
+ netdev_warn(priv->dev,
+ "failed to enable PTP reference clock: %pe\n",
+ ERR_PTR(ret));
+ return ret;
+ }
}
return 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index e45fb191d8e6..4d11980dcd64 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -175,11 +175,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
struct stmmac_priv *priv =
container_of(ptp, struct stmmac_priv, ptp_clock_ops);
void __iomem *ptpaddr = priv->ptpaddr;
- void __iomem *ioaddr = priv->hw->pcsr;
struct stmmac_pps_cfg *cfg;
- u32 intr_value, acr_value;
int ret = -EOPNOTSUPP;
unsigned long flags;
+ u32 acr_value;
switch (rq->type) {
case PTP_CLK_REQ_PEROUT:
@@ -213,19 +212,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
netdev_dbg(priv->dev, "Auxiliary Snapshot %d enabled.\n",
priv->plat->ext_snapshot_num >>
PTP_ACR_ATSEN_SHIFT);
- /* Enable Timestamp Interrupt */
- intr_value = readl(ioaddr + GMAC_INT_EN);
- intr_value |= GMAC_INT_TSIE;
- writel(intr_value, ioaddr + GMAC_INT_EN);
-
} else {
netdev_dbg(priv->dev, "Auxiliary Snapshot %d disabled.\n",
priv->plat->ext_snapshot_num >>
PTP_ACR_ATSEN_SHIFT);
- /* Disable Timestamp Interrupt */
- intr_value = readl(ioaddr + GMAC_INT_EN);
- intr_value &= ~GMAC_INT_TSIE;
- writel(intr_value, ioaddr + GMAC_INT_EN);
}
writel(acr_value, ptpaddr + PTP_ACR);
mutex_unlock(&priv->aux_ts_lock);
diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index 77e5dffb558f..8594ee839628 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -545,43 +545,24 @@ static int try_next_permutation(struct happy_meal *hp, void __iomem *tregs)
static void display_link_mode(struct happy_meal *hp, void __iomem *tregs)
{
- printk(KERN_INFO "%s: Link is up using ", hp->dev->name);
- if (hp->tcvr_type == external)
- printk("external ");
- else
- printk("internal ");
- printk("transceiver at ");
hp->sw_lpa = happy_meal_tcvr_read(hp, tregs, MII_LPA);
- if (hp->sw_lpa & (LPA_100HALF | LPA_100FULL)) {
- if (hp->sw_lpa & LPA_100FULL)
- printk("100Mb/s, Full Duplex.\n");
- else
- printk("100Mb/s, Half Duplex.\n");
- } else {
- if (hp->sw_lpa & LPA_10FULL)
- printk("10Mb/s, Full Duplex.\n");
- else
- printk("10Mb/s, Half Duplex.\n");
- }
+
+ netdev_info(hp->dev,
+ "Link is up using %s transceiver at %dMb/s, %s Duplex.\n",
+ hp->tcvr_type == external ? "external" : "internal",
+ hp->sw_lpa & (LPA_100HALF | LPA_100FULL) ? 100 : 10,
+ hp->sw_lpa & (LPA_100FULL | LPA_10FULL) ? "Full" : "Half");
}
static void display_forced_link_mode(struct happy_meal *hp, void __iomem *tregs)
{
- printk(KERN_INFO "%s: Link has been forced up using ", hp->dev->name);
- if (hp->tcvr_type == external)
- printk("external ");
- else
- printk("internal ");
- printk("transceiver at ");
hp->sw_bmcr = happy_meal_tcvr_read(hp, tregs, MII_BMCR);
- if (hp->sw_bmcr & BMCR_SPEED100)
- printk("100Mb/s, ");
- else
- printk("10Mb/s, ");
- if (hp->sw_bmcr & BMCR_FULLDPLX)
- printk("Full Duplex.\n");
- else
- printk("Half Duplex.\n");
+
+ netdev_info(hp->dev,
+ "Link has been forced up using %s transceiver at %dMb/s, %s Duplex.\n",
+ hp->tcvr_type == external ? "external" : "internal",
+ hp->sw_bmcr & BMCR_SPEED100 ? 100 : 10,
+ hp->sw_bmcr & BMCR_FULLDPLX ? "Full" : "Half");
}
static int set_happy_link_modes(struct happy_meal *hp, void __iomem *tregs)
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index fb92d4c1547d..f4a6b590a1e3 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -2467,7 +2467,6 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common)
port->port_id, ret);
goto dl_port_unreg;
}
- devlink_port_type_eth_set(dl_port, port->ndev);
}
devlink_register(common->devlink);
return ret;
@@ -2511,6 +2510,7 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common)
static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
{
struct device *dev = common->dev;
+ struct devlink_port *dl_port;
struct am65_cpsw_port *port;
int ret = 0, i;
@@ -2527,6 +2527,10 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
return ret;
}
+ ret = am65_cpsw_nuss_register_devlink(common);
+ if (ret)
+ return ret;
+
for (i = 0; i < common->port_num; i++) {
port = &common->ports[i];
@@ -2539,25 +2543,24 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
i, ret);
goto err_cleanup_ndev;
}
+
+ dl_port = &port->devlink_port;
+ devlink_port_type_eth_set(dl_port, port->ndev);
}
ret = am65_cpsw_register_notifiers(common);
if (ret)
goto err_cleanup_ndev;
- ret = am65_cpsw_nuss_register_devlink(common);
- if (ret)
- goto clean_unregister_notifiers;
-
/* can't auto unregister ndev using devm_add_action() due to
* devres release sequence in DD core for DMA
*/
return 0;
-clean_unregister_notifiers:
- am65_cpsw_unregister_notifiers(common);
+
err_cleanup_ndev:
am65_cpsw_nuss_cleanup_ndev(common);
+ am65_cpsw_unregister_devlink(common);
return ret;
}
diff --git a/drivers/net/ipa/ipa_qmi_msg.h b/drivers/net/ipa/ipa_qmi_msg.h
index 3233d145fd87..495e85abe50b 100644
--- a/drivers/net/ipa/ipa_qmi_msg.h
+++ b/drivers/net/ipa/ipa_qmi_msg.h
@@ -214,7 +214,7 @@ struct ipa_init_modem_driver_req {
/* The response to a IPA_QMI_INIT_DRIVER request begins with a standard
* QMI response, but contains other information as well. Currently we
- * simply wait for the the INIT_DRIVER transaction to complete and
+ * simply wait for the INIT_DRIVER transaction to complete and
* ignore any other data that might be returned.
*/
struct ipa_init_modem_driver_rsp {
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 817577e713d7..f354fad05714 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -243,6 +243,7 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb)
#define DEFAULT_SEND_SCI true
#define DEFAULT_ENCRYPT false
#define DEFAULT_ENCODING_SA 0
+#define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1))
static bool send_sci(const struct macsec_secy *secy)
{
@@ -1697,7 +1698,7 @@ static bool validate_add_rxsa(struct nlattr **attrs)
return false;
if (attrs[MACSEC_SA_ATTR_PN] &&
- *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
return false;
if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
@@ -1753,7 +1754,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
}
pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN;
- if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
+ if (tb_sa[MACSEC_SA_ATTR_PN] &&
+ nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n",
nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len);
rtnl_unlock();
@@ -1769,7 +1771,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n",
nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
- MACSEC_SA_ATTR_SALT);
+ MACSEC_SALT_LEN);
rtnl_unlock();
return -EINVAL;
}
@@ -1842,7 +1844,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
return 0;
cleanup:
- kfree(rx_sa);
+ macsec_rxsa_put(rx_sa);
rtnl_unlock();
return err;
}
@@ -1939,7 +1941,7 @@ static bool validate_add_txsa(struct nlattr **attrs)
if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
return false;
- if (nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
return false;
if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
@@ -2011,7 +2013,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n",
nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
- MACSEC_SA_ATTR_SALT);
+ MACSEC_SALT_LEN);
rtnl_unlock();
return -EINVAL;
}
@@ -2085,7 +2087,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
cleanup:
secy->operational = was_operational;
- kfree(tx_sa);
+ macsec_txsa_put(tx_sa);
rtnl_unlock();
return err;
}
@@ -2293,7 +2295,7 @@ static bool validate_upd_sa(struct nlattr **attrs)
if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
return false;
- if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
return false;
if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
@@ -3745,9 +3747,6 @@ static int macsec_changelink_common(struct net_device *dev,
secy->operational = tx_sa && tx_sa->active;
}
- if (data[IFLA_MACSEC_WINDOW])
- secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
-
if (data[IFLA_MACSEC_ENCRYPT])
tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]);
@@ -3793,6 +3792,16 @@ static int macsec_changelink_common(struct net_device *dev,
}
}
+ if (data[IFLA_MACSEC_WINDOW]) {
+ secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
+
+ /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window
+ * for XPN cipher suites */
+ if (secy->xpn &&
+ secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW)
+ return -EINVAL;
+ }
+
return 0;
}
@@ -3822,7 +3831,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[],
ret = macsec_changelink_common(dev, data);
if (ret)
- return ret;
+ goto cleanup;
/* If h/w offloading is available, propagate to the device */
if (macsec_is_offloaded(macsec)) {
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 4cfd05c15aee..d25fbb9caeba 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -896,7 +896,7 @@ static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs,
*/
ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS);
if (ret < 0)
- return false;
+ return ret;
if (ret & DW_VR_MII_C37_ANSGM_SP_LNKSTS) {
int speed_value;
diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c
index ff22b6b1c686..36803d932dff 100644
--- a/drivers/net/sungem_phy.c
+++ b/drivers/net/sungem_phy.c
@@ -450,6 +450,7 @@ static int bcm5421_init(struct mii_phy* phy)
int can_low_power = 1;
if (np == NULL || of_get_property(np, "no-autolowpower", NULL))
can_low_power = 0;
+ of_node_put(np);
if (can_low_power) {
/* Enable automatic low-power */
sungem_phy_write(phy, 0x1c, 0x9002);
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 7389d6ef8569..0f6efaabaa32 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -32,7 +32,7 @@
#define NETNEXT_VERSION "12"
/* Information for net */
-#define NET_VERSION "12"
+#define NET_VERSION "13"
#define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION
#define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -2156,7 +2156,7 @@ static inline void rtl_rx_vlan_tag(struct rx_desc *desc, struct sk_buff *skb)
}
static int r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc,
- struct sk_buff *skb, u32 len, u32 transport_offset)
+ struct sk_buff *skb, u32 len)
{
u32 mss = skb_shinfo(skb)->gso_size;
u32 opts1, opts2 = 0;
@@ -2167,6 +2167,8 @@ static int r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc,
opts1 = len | TX_FS | TX_LS;
if (mss) {
+ u32 transport_offset = (u32)skb_transport_offset(skb);
+
if (transport_offset > GTTCPHO_MAX) {
netif_warn(tp, tx_err, tp->netdev,
"Invalid transport offset 0x%x for TSO\n",
@@ -2197,6 +2199,7 @@ static int r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc,
opts1 |= transport_offset << GTTCPHO_SHIFT;
opts2 |= min(mss, MSS_MAX) << MSS_SHIFT;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ u32 transport_offset = (u32)skb_transport_offset(skb);
u8 ip_protocol;
if (transport_offset > TCPHO_MAX) {
@@ -2260,7 +2263,6 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
struct tx_desc *tx_desc;
struct sk_buff *skb;
unsigned int len;
- u32 offset;
skb = __skb_dequeue(&skb_head);
if (!skb)
@@ -2276,9 +2278,7 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
tx_data = tx_agg_align(tx_data);
tx_desc = (struct tx_desc *)tx_data;
- offset = (u32)skb_transport_offset(skb);
-
- if (r8152_tx_csum(tp, tx_desc, skb, skb->len, offset)) {
+ if (r8152_tx_csum(tp, tx_desc, skb, skb->len)) {
r8152_csum_workaround(tp, skb, &skb_head);
continue;
}
@@ -2759,9 +2759,9 @@ rtl8152_features_check(struct sk_buff *skb, struct net_device *dev,
{
u32 mss = skb_shinfo(skb)->gso_size;
int max_offset = mss ? GTTCPHO_MAX : TCPHO_MAX;
- int offset = skb_transport_offset(skb);
- if ((mss || skb->ip_summed == CHECKSUM_PARTIAL) && offset > max_offset)
+ if ((mss || skb->ip_summed == CHECKSUM_PARTIAL) &&
+ skb_transport_offset(skb) > max_offset)
features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
else if ((skb->len + sizeof(struct tx_desc)) > agg_buf_sz)
features &= ~NETIF_F_GSO_MASK;
@@ -5917,7 +5917,8 @@ static void r8153_enter_oob(struct r8152 *tp)
wait_oob_link_list_ready(tp);
- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, mtu_to_size(tp->netdev->mtu));
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522);
+ ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT);
switch (tp->version) {
case RTL_VER_03:
@@ -5953,6 +5954,10 @@ static void r8153_enter_oob(struct r8152 *tp)
ocp_data |= NOW_IS_OOB | DIS_MCU_CLROOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+ ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
+ ocp_data |= MCU_BORW_EN;
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
+
rxdy_gated_en(tp, false);
ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
@@ -6555,6 +6560,9 @@ static void rtl8156_down(struct r8152 *tp)
rtl_disable(tp);
rtl_reset_bmu(tp);
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522);
+ ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT);
+
/* Clear teredo wake event. bit[15:8] is the teredo wakeup
* type. Set it to zero. bits[7:0] are the W1C bits about
* the events. Set them to all 1 to clear them.
@@ -6565,6 +6573,10 @@ static void rtl8156_down(struct r8152 *tp)
ocp_data |= NOW_IS_OOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+ ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
+ ocp_data |= MCU_BORW_EN;
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
+
rtl_rx_vlan_en(tp, true);
rxdy_gated_en(tp, false);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 356cf8dd4164..ec8e1b3108c3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -242,9 +242,15 @@ struct virtnet_info {
/* Packet virtio header size */
u8 hdr_len;
- /* Work struct for refilling if we run low on memory. */
+ /* Work struct for delayed refilling if we run low on memory. */
struct delayed_work refill;
+ /* Is delayed refill enabled? */
+ bool refill_enabled;
+
+ /* The lock to synchronize the access to refill_enabled */
+ spinlock_t refill_lock;
+
/* Work struct for config space updates */
struct work_struct config_work;
@@ -348,6 +354,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
return p;
}
+static void enable_delayed_refill(struct virtnet_info *vi)
+{
+ spin_lock_bh(&vi->refill_lock);
+ vi->refill_enabled = true;
+ spin_unlock_bh(&vi->refill_lock);
+}
+
+static void disable_delayed_refill(struct virtnet_info *vi)
+{
+ spin_lock_bh(&vi->refill_lock);
+ vi->refill_enabled = false;
+ spin_unlock_bh(&vi->refill_lock);
+}
+
static void virtqueue_napi_schedule(struct napi_struct *napi,
struct virtqueue *vq)
{
@@ -1527,8 +1547,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
}
if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
- if (!try_fill_recv(vi, rq, GFP_ATOMIC))
- schedule_delayed_work(&vi->refill, 0);
+ if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
+ spin_lock(&vi->refill_lock);
+ if (vi->refill_enabled)
+ schedule_delayed_work(&vi->refill, 0);
+ spin_unlock(&vi->refill_lock);
+ }
}
u64_stats_update_begin(&rq->stats.syncp);
@@ -1651,6 +1675,8 @@ static int virtnet_open(struct net_device *dev)
struct virtnet_info *vi = netdev_priv(dev);
int i, err;
+ enable_delayed_refill(vi);
+
for (i = 0; i < vi->max_queue_pairs; i++) {
if (i < vi->curr_queue_pairs)
/* Make sure we have some buffers: if oom use wq. */
@@ -2033,6 +2059,8 @@ static int virtnet_close(struct net_device *dev)
struct virtnet_info *vi = netdev_priv(dev);
int i;
+ /* Make sure NAPI doesn't schedule refill work */
+ disable_delayed_refill(vi);
/* Make sure refill_work doesn't re-enable napi! */
cancel_delayed_work_sync(&vi->refill);
@@ -2792,6 +2820,8 @@ static int virtnet_restore_up(struct virtio_device *vdev)
virtio_device_ready(vdev);
+ enable_delayed_refill(vi);
+
if (netif_running(vi->dev)) {
err = virtnet_open(vi->dev);
if (err)
@@ -3535,6 +3565,7 @@ static int virtnet_probe(struct virtio_device *vdev)
vdev->priv = vi;
INIT_WORK(&vi->config_work, virtnet_config_changed_work);
+ spin_lock_init(&vi->refill_lock);
/* If we can receive ANY GSO packets, we must allocate large ones. */
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
index 84d1c7054013..7b1dc19c565e 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
@@ -3822,7 +3822,8 @@ ath11k_wmi_obss_color_collision_event(struct ath11k_base *ab, struct sk_buff *sk
switch (ev->evt_type) {
case WMI_BSS_COLOR_COLLISION_DETECTION:
- ieeee80211_obss_color_collision_notify(arvif->vif, ev->obss_color_bitmap);
+ ieeee80211_obss_color_collision_notify(arvif->vif, ev->obss_color_bitmap,
+ GFP_KERNEL);
ath11k_dbg(ab, ATH11K_DBG_WMI,
"OBSS color collision detected vdev:%d, event:%d, bitmap:%08llx\n",
ev->vdev_id, ev->evt_type, ev->obss_color_bitmap);
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 2f746eb64507..6f83af849f2e 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -4912,6 +4912,8 @@ static int hwsim_virtio_probe(struct virtio_device *vdev)
if (err)
return err;
+ virtio_device_ready(vdev);
+
err = fill_vq(hwsim_vqs[HWSIM_VQ_RX]);
if (err)
goto out_remove;
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index c02be4ac159e..7db627fc26be 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -1233,9 +1233,6 @@ struct rtw_chip_info {
const struct wiphy_wowlan_support *wowlan_stub;
const u8 max_sched_scan_ssids;
- /* for 8821c set channel */
- u32 ch_param[3];
-
/* coex paras */
u32 coex_para_ver;
u8 bt_desired_ver;
@@ -1937,6 +1934,9 @@ struct rtw_hal {
enum rtw_sar_bands sar_band;
struct rtw_sar sar;
+
+ /* for 8821c set channel */
+ u32 ch_param[3];
};
struct rtw_path_div {
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c
index ffee39ea5df6..488a7ddd507c 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c
@@ -125,6 +125,7 @@ static void rtw8821c_phy_bf_init(struct rtw_dev *rtwdev)
static void rtw8821c_phy_set_param(struct rtw_dev *rtwdev)
{
+ struct rtw_hal *hal = &rtwdev->hal;
u8 crystal_cap, val;
/* power on BB/RF domain */
@@ -159,9 +160,9 @@ static void rtw8821c_phy_set_param(struct rtw_dev *rtwdev)
/* post init after header files config */
rtw_write32_set(rtwdev, REG_RXPSEL, BIT_RX_PSEL_RST);
- rtwdev->chip->ch_param[0] = rtw_read32_mask(rtwdev, REG_TXSF2, MASKDWORD);
- rtwdev->chip->ch_param[1] = rtw_read32_mask(rtwdev, REG_TXSF6, MASKDWORD);
- rtwdev->chip->ch_param[2] = rtw_read32_mask(rtwdev, REG_TXFILTER, MASKDWORD);
+ hal->ch_param[0] = rtw_read32_mask(rtwdev, REG_TXSF2, MASKDWORD);
+ hal->ch_param[1] = rtw_read32_mask(rtwdev, REG_TXSF6, MASKDWORD);
+ hal->ch_param[2] = rtw_read32_mask(rtwdev, REG_TXFILTER, MASKDWORD);
rtw_phy_init(rtwdev);
rtwdev->dm_info.cck_pd_default = rtw_read8(rtwdev, REG_CSRATIO) & 0x1f;
@@ -351,6 +352,7 @@ static void rtw8821c_set_channel_rxdfir(struct rtw_dev *rtwdev, u8 bw)
static void rtw8821c_set_channel_bb(struct rtw_dev *rtwdev, u8 channel, u8 bw,
u8 primary_ch_idx)
{
+ struct rtw_hal *hal = &rtwdev->hal;
u32 val32;
if (channel <= 14) {
@@ -367,11 +369,11 @@ static void rtw8821c_set_channel_bb(struct rtw_dev *rtwdev, u8 channel, u8 bw,
rtw_write32_mask(rtwdev, REG_TXFILTER, MASKDWORD, 0x00003667);
} else {
rtw_write32_mask(rtwdev, REG_TXSF2, MASKDWORD,
- rtwdev->chip->ch_param[0]);
+ hal->ch_param[0]);
rtw_write32_mask(rtwdev, REG_TXSF6, MASKLWORD,
- rtwdev->chip->ch_param[1] & MASKLWORD);
+ hal->ch_param[1] & MASKLWORD);
rtw_write32_mask(rtwdev, REG_TXFILTER, MASKDWORD,
- rtwdev->chip->ch_param[2]);
+ hal->ch_param[2]);
}
} else if (channel > 35) {
rtw_write32_mask(rtwdev, REG_ENTXCCK, BIT(18), 0x1);
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index dbac4c03d21a..a0335407be42 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -495,6 +495,7 @@ void xenvif_rx_action(struct xenvif_queue *queue)
queue->rx_copy.completed = &completed_skbs;
while (xenvif_rx_ring_slots_available(queue) &&
+ !skb_queue_empty(&queue->rx_queue) &&
work_done < RX_BATCH_SIZE) {
xenvif_rx_skb(queue);
work_done++;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index ec6ac298d8de..6a12a906a11e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3786,7 +3786,7 @@ static int nvme_add_ns_cdev(struct nvme_ns *ns)
}
static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
- unsigned nsid, struct nvme_ns_ids *ids)
+ unsigned nsid, struct nvme_ns_ids *ids, bool is_shared)
{
struct nvme_ns_head *head;
size_t size = sizeof(*head);
@@ -3810,6 +3810,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
head->subsys = ctrl->subsys;
head->ns_id = nsid;
head->ids = *ids;
+ head->shared = is_shared;
kref_init(&head->ref);
if (head->ids.csi) {
@@ -3891,12 +3892,11 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
nsid);
goto out_unlock;
}
- head = nvme_alloc_ns_head(ctrl, nsid, ids);
+ head = nvme_alloc_ns_head(ctrl, nsid, ids, is_shared);
if (IS_ERR(head)) {
ret = PTR_ERR(head);
goto out_unlock;
}
- head->shared = is_shared;
} else {
ret = -EINVAL;
if (!is_shared || !head->shared) {
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 193b44755662..73d9fcba3b1c 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2690,8 +2690,13 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
struct pci_dev *pdev = to_pci_dev(dev->dev);
mutex_lock(&dev->shutdown_lock);
- if (pci_device_is_present(pdev) && pci_is_enabled(pdev)) {
- u32 csts = readl(dev->bar + NVME_REG_CSTS);
+ if (pci_is_enabled(pdev)) {
+ u32 csts;
+
+ if (pci_device_is_present(pdev))
+ csts = readl(dev->bar + NVME_REG_CSTS);
+ else
+ csts = ~0;
if (dev->ctrl.state == NVME_CTRL_LIVE ||
dev->ctrl.state == NVME_CTRL_RESETTING) {
@@ -3510,6 +3515,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1e49, 0x0041), /* ZHITAI TiPro7000 NVMe SSD */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
+ { PCI_DEVICE(0xc0a9, 0x540a), /* Crucial P2 */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065),
diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c
index 8d374cc552be..f2e58ddfaed2 100644
--- a/drivers/of/kexec.c
+++ b/drivers/of/kexec.c
@@ -9,6 +9,7 @@
* Copyright (C) 2016 IBM Corporation
*/
+#include <linux/ima.h>
#include <linux/kernel.h>
#include <linux/kexec.h>
#include <linux/memblock.h>
@@ -115,6 +116,7 @@ static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr,
return 0;
}
+#ifdef CONFIG_HAVE_IMA_KEXEC
/**
* ima_get_kexec_buffer - get IMA buffer from the previous kernel
* @addr: On successful return, set to point to the buffer contents.
@@ -122,16 +124,13 @@ static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr,
*
* Return: 0 on success, negative errno on error.
*/
-int ima_get_kexec_buffer(void **addr, size_t *size)
+int __init ima_get_kexec_buffer(void **addr, size_t *size)
{
int ret, len;
unsigned long tmp_addr;
size_t tmp_size;
const void *prop;
- if (!IS_ENABLED(CONFIG_HAVE_IMA_KEXEC))
- return -ENOTSUPP;
-
prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len);
if (!prop)
return -ENOENT;
@@ -149,16 +148,13 @@ int ima_get_kexec_buffer(void **addr, size_t *size)
/**
* ima_free_kexec_buffer - free memory used by the IMA buffer
*/
-int ima_free_kexec_buffer(void)
+int __init ima_free_kexec_buffer(void)
{
int ret;
unsigned long addr;
size_t size;
struct property *prop;
- if (!IS_ENABLED(CONFIG_HAVE_IMA_KEXEC))
- return -ENOTSUPP;
-
prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL);
if (!prop)
return -ENOENT;
@@ -173,6 +169,7 @@ int ima_free_kexec_buffer(void)
return memblock_phys_free(addr, size);
}
+#endif
/**
* remove_ima_buffer - remove the IMA buffer property and reservation from @fdt
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 8a3b0c3a1e92..3a8c98615634 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -677,7 +677,7 @@ static int iosapic_set_affinity_irq(struct irq_data *d,
if (dest_cpu < 0)
return -1;
- cpumask_copy(irq_data_get_affinity_mask(d), cpumask_of(dest_cpu));
+ irq_data_update_affinity(d, cpumask_of(dest_cpu));
vi->txn_addr = txn_affinity_addr(d->irq, dest_cpu);
spin_lock_irqsave(&iosapic_lock, flags);
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index db814f7b93ba..e7c6f6629e7c 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -642,7 +642,7 @@ static void hv_arch_irq_unmask(struct irq_data *data)
struct hv_retarget_device_interrupt *params;
struct tran_int_desc *int_desc;
struct hv_pcibus_device *hbus;
- struct cpumask *dest;
+ const struct cpumask *dest;
cpumask_var_t tmp;
struct pci_bus *pbus;
struct pci_dev *pdev;
@@ -1613,7 +1613,7 @@ out:
}
static u32 hv_compose_msi_req_v1(
- struct pci_create_interrupt *int_pkt, struct cpumask *affinity,
+ struct pci_create_interrupt *int_pkt, const struct cpumask *affinity,
u32 slot, u8 vector, u8 vector_count)
{
int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE;
@@ -1635,13 +1635,13 @@ static u32 hv_compose_msi_req_v1(
* Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
* by subsequent retarget in hv_irq_unmask().
*/
-static int hv_compose_msi_req_get_cpu(struct cpumask *affinity)
+static int hv_compose_msi_req_get_cpu(const struct cpumask *affinity)
{
return cpumask_first_and(affinity, cpu_online_mask);
}
static u32 hv_compose_msi_req_v2(
- struct pci_create_interrupt2 *int_pkt, struct cpumask *affinity,
+ struct pci_create_interrupt2 *int_pkt, const struct cpumask *affinity,
u32 slot, u8 vector, u8 vector_count)
{
int cpu;
@@ -1660,7 +1660,7 @@ static u32 hv_compose_msi_req_v2(
}
static u32 hv_compose_msi_req_v3(
- struct pci_create_interrupt3 *int_pkt, struct cpumask *affinity,
+ struct pci_create_interrupt3 *int_pkt, const struct cpumask *affinity,
u32 slot, u32 vector, u8 vector_count)
{
int cpu;
@@ -1697,7 +1697,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
struct hv_pci_dev *hpdev;
struct pci_bus *pbus;
struct pci_dev *pdev;
- struct cpumask *dest;
+ const struct cpumask *dest;
struct compose_comp_ctxt comp;
struct tran_int_desc *int_desc;
struct msi_desc *msi_desc;
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 96e09fa40909..03b1309875ae 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -1139,7 +1139,7 @@ static void cci_pmu_start(struct perf_event *event, int pmu_flags)
/*
* To handle interrupt latency, we always reprogram the period
- * regardlesss of PERF_EF_RELOAD.
+ * regardless of PERF_EF_RELOAD.
*/
if (pmu_flags & PERF_EF_RELOAD)
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
@@ -1261,7 +1261,7 @@ static int validate_group(struct perf_event *event)
*/
.used_mask = mask,
};
- memset(mask, 0, BITS_TO_LONGS(cci_pmu->num_cntrs) * sizeof(unsigned long));
+ bitmap_zero(mask, cci_pmu->num_cntrs);
if (!validate_event(event->pmu, &fake_pmu, leader))
return -EINVAL;
@@ -1629,10 +1629,9 @@ static struct cci_pmu *cci_pmu_alloc(struct device *dev)
GFP_KERNEL);
if (!cci_pmu->hw_events.events)
return ERR_PTR(-ENOMEM);
- cci_pmu->hw_events.used_mask = devm_kcalloc(dev,
- BITS_TO_LONGS(CCI_PMU_MAX_HW_CNTRS(model)),
- sizeof(*cci_pmu->hw_events.used_mask),
- GFP_KERNEL);
+ cci_pmu->hw_events.used_mask = devm_bitmap_zalloc(dev,
+ CCI_PMU_MAX_HW_CNTRS(model),
+ GFP_KERNEL);
if (!cci_pmu->hw_events.used_mask)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 40b352e8aa7f..728d13d8e98a 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -1250,7 +1250,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
ccn->dt.cmp_mask[CCN_IDX_MASK_OPCODE].h = ~(0x1f << 9);
/* Get a convenient /sys/event_source/devices/ name */
- ccn->dt.id = ida_simple_get(&arm_ccn_pmu_ida, 0, 0, GFP_KERNEL);
+ ccn->dt.id = ida_alloc(&arm_ccn_pmu_ida, GFP_KERNEL);
if (ccn->dt.id == 0) {
name = "ccn";
} else {
@@ -1312,7 +1312,7 @@ error_pmu_register:
&ccn->dt.node);
error_set_affinity:
error_choose_name:
- ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id);
+ ida_free(&arm_ccn_pmu_ida, ccn->dt.id);
for (i = 0; i < ccn->num_xps; i++)
writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL);
writel(0, ccn->dt.base + CCN_DT_PMCR);
@@ -1329,7 +1329,7 @@ static void arm_ccn_pmu_cleanup(struct arm_ccn *ccn)
writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL);
writel(0, ccn->dt.base + CCN_DT_PMCR);
perf_pmu_unregister(&ccn->dt.pmu);
- ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id);
+ ida_free(&arm_ccn_pmu_ida, ccn->dt.id);
}
static int arm_ccn_for_each_valid_region(struct arm_ccn *ccn,
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index db670b265897..b65a7d9640e1 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -39,6 +39,24 @@
#include <asm/mmu.h>
#include <asm/sysreg.h>
+/*
+ * Cache if the event is allowed to trace Context information.
+ * This allows us to perform the check, i.e, perfmon_capable(),
+ * in the context of the event owner, once, during the event_init().
+ */
+#define SPE_PMU_HW_FLAGS_CX BIT(0)
+
+static void set_spe_event_has_cx(struct perf_event *event)
+{
+ if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable())
+ event->hw.flags |= SPE_PMU_HW_FLAGS_CX;
+}
+
+static bool get_spe_event_has_cx(struct perf_event *event)
+{
+ return !!(event->hw.flags & SPE_PMU_HW_FLAGS_CX);
+}
+
#define ARM_SPE_BUF_PAD_BYTE 0
struct arm_spe_pmu_buf {
@@ -272,7 +290,7 @@ static u64 arm_spe_event_to_pmscr(struct perf_event *event)
if (!attr->exclude_kernel)
reg |= BIT(SYS_PMSCR_EL1_E1SPE_SHIFT);
- if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable())
+ if (get_spe_event_has_cx(event))
reg |= BIT(SYS_PMSCR_EL1_CX_SHIFT);
return reg;
@@ -709,10 +727,10 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
!(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT))
return -EOPNOTSUPP;
+ set_spe_event_has_cx(event);
reg = arm_spe_event_to_pmscr(event);
if (!perfmon_capable() &&
(reg & (BIT(SYS_PMSCR_EL1_PA_SHIFT) |
- BIT(SYS_PMSCR_EL1_CX_SHIFT) |
BIT(SYS_PMSCR_EL1_PCT_SHIFT))))
return -EACCES;
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index b1b2a55de77f..8e058e08fe81 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -611,7 +611,7 @@ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
.dev = dev,
};
- pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL);
+ pmu->id = ida_alloc(&ddr_ida, GFP_KERNEL);
return pmu->id;
}
@@ -765,7 +765,7 @@ ddr_perf_err:
cpuhp_instance_err:
cpuhp_remove_multi_state(pmu->cpuhp_state);
cpuhp_state_err:
- ida_simple_remove(&ddr_ida, pmu->id);
+ ida_free(&ddr_ida, pmu->id);
dev_warn(&pdev->dev, "i.MX8 DDR Perf PMU failed (%d), disabled\n", ret);
return ret;
}
@@ -779,7 +779,7 @@ static int ddr_perf_remove(struct platform_device *pdev)
perf_pmu_unregister(&pmu->pmu);
- ida_simple_remove(&ddr_ida, pmu->id);
+ ida_free(&ddr_ida, pmu->id);
return 0;
}
diff --git a/drivers/perf/hisilicon/Kconfig b/drivers/perf/hisilicon/Kconfig
index 5546218b5598..171bfc1b6bc2 100644
--- a/drivers/perf/hisilicon/Kconfig
+++ b/drivers/perf/hisilicon/Kconfig
@@ -14,3 +14,13 @@ config HISI_PCIE_PMU
RCiEP devices.
Adds the PCIe PMU into perf events system for monitoring latency,
bandwidth etc.
+
+config HNS3_PMU
+ tristate "HNS3 PERF PMU"
+ depends on ARM64 || COMPILE_TEST
+ depends on PCI
+ help
+ Provide support for HNS3 performance monitoring unit (PMU) RCiEP
+ devices.
+ Adds the HNS3 PMU into perf events system for monitoring latency,
+ bandwidth etc.
diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile
index 6be83517acaa..4d2c9abe3372 100644
--- a/drivers/perf/hisilicon/Makefile
+++ b/drivers/perf/hisilicon/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \
hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o
obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o
+obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o
diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
index 62299ab5a9be..50d0c0a2f1fe 100644
--- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
@@ -516,21 +516,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev)
"hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id,
ddrc_pmu->index_id);
- ddrc_pmu->pmu = (struct pmu) {
- .name = name,
- .module = THIS_MODULE,
- .task_ctx_nr = perf_invalid_context,
- .event_init = hisi_uncore_pmu_event_init,
- .pmu_enable = hisi_uncore_pmu_enable,
- .pmu_disable = hisi_uncore_pmu_disable,
- .add = hisi_uncore_pmu_add,
- .del = hisi_uncore_pmu_del,
- .start = hisi_uncore_pmu_start,
- .stop = hisi_uncore_pmu_stop,
- .read = hisi_uncore_pmu_read,
- .attr_groups = ddrc_pmu->pmu_events.attr_groups,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
- };
+ hisi_pmu_init(&ddrc_pmu->pmu, name, ddrc_pmu->pmu_events.attr_groups, THIS_MODULE);
ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1);
if (ret) {
diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
index 393513150106..13017b3412a5 100644
--- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
@@ -519,21 +519,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev)
name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u",
hha_pmu->sccl_id, hha_pmu->index_id);
- hha_pmu->pmu = (struct pmu) {
- .name = name,
- .module = THIS_MODULE,
- .task_ctx_nr = perf_invalid_context,
- .event_init = hisi_uncore_pmu_event_init,
- .pmu_enable = hisi_uncore_pmu_enable,
- .pmu_disable = hisi_uncore_pmu_disable,
- .add = hisi_uncore_pmu_add,
- .del = hisi_uncore_pmu_del,
- .start = hisi_uncore_pmu_start,
- .stop = hisi_uncore_pmu_stop,
- .read = hisi_uncore_pmu_read,
- .attr_groups = hha_pmu->pmu_events.attr_groups,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
- };
+ hisi_pmu_init(&hha_pmu->pmu, name, hha_pmu->pmu_events.attr_groups, THIS_MODULE);
ret = perf_pmu_register(&hha_pmu->pmu, name, -1);
if (ret) {
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
index 560ab964c8b5..2995f3630d49 100644
--- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -557,21 +557,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
*/
name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u",
l3c_pmu->sccl_id, l3c_pmu->ccl_id);
- l3c_pmu->pmu = (struct pmu) {
- .name = name,
- .module = THIS_MODULE,
- .task_ctx_nr = perf_invalid_context,
- .event_init = hisi_uncore_pmu_event_init,
- .pmu_enable = hisi_uncore_pmu_enable,
- .pmu_disable = hisi_uncore_pmu_disable,
- .add = hisi_uncore_pmu_add,
- .del = hisi_uncore_pmu_del,
- .start = hisi_uncore_pmu_start,
- .stop = hisi_uncore_pmu_stop,
- .read = hisi_uncore_pmu_read,
- .attr_groups = l3c_pmu->pmu_events.attr_groups,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
- };
+ hisi_pmu_init(&l3c_pmu->pmu, name, l3c_pmu->pmu_events.attr_groups, THIS_MODULE);
ret = perf_pmu_register(&l3c_pmu->pmu, name, -1);
if (ret) {
diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
index a0ee84d97c41..47d3cc9b6eec 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
@@ -412,21 +412,7 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev)
return ret;
}
- pa_pmu->pmu = (struct pmu) {
- .module = THIS_MODULE,
- .task_ctx_nr = perf_invalid_context,
- .event_init = hisi_uncore_pmu_event_init,
- .pmu_enable = hisi_uncore_pmu_enable,
- .pmu_disable = hisi_uncore_pmu_disable,
- .add = hisi_uncore_pmu_add,
- .del = hisi_uncore_pmu_del,
- .start = hisi_uncore_pmu_start,
- .stop = hisi_uncore_pmu_stop,
- .read = hisi_uncore_pmu_read,
- .attr_groups = pa_pmu->pmu_events.attr_groups,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
- };
-
+ hisi_pmu_init(&pa_pmu->pmu, name, pa_pmu->pmu_events.attr_groups, THIS_MODULE);
ret = perf_pmu_register(&pa_pmu->pmu, name, -1);
if (ret) {
dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret);
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index 980b9ee6eb14..fbc8a93d5eac 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -531,4 +531,22 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
}
EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu);
+void hisi_pmu_init(struct pmu *pmu, const char *name,
+ const struct attribute_group **attr_groups, struct module *module)
+{
+ pmu->name = name;
+ pmu->module = module;
+ pmu->task_ctx_nr = perf_invalid_context;
+ pmu->event_init = hisi_uncore_pmu_event_init;
+ pmu->pmu_enable = hisi_uncore_pmu_enable;
+ pmu->pmu_disable = hisi_uncore_pmu_disable;
+ pmu->add = hisi_uncore_pmu_add;
+ pmu->del = hisi_uncore_pmu_del;
+ pmu->start = hisi_uncore_pmu_start;
+ pmu->stop = hisi_uncore_pmu_stop;
+ pmu->read = hisi_uncore_pmu_read;
+ pmu->attr_groups = attr_groups;
+}
+EXPORT_SYMBOL_GPL(hisi_pmu_init);
+
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h
index 96eeddad55ff..b59de33cd059 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.h
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h
@@ -121,4 +121,6 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev,
int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu,
struct platform_device *pdev);
+void hisi_pmu_init(struct pmu *pmu, const char *name,
+ const struct attribute_group **attr_groups, struct module *module);
#endif /* __HISI_UNCORE_PMU_H__ */
diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
index 6aedc303ff56..b9c79f17230c 100644
--- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
@@ -445,20 +445,7 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev)
return ret;
}
- sllc_pmu->pmu = (struct pmu) {
- .module = THIS_MODULE,
- .task_ctx_nr = perf_invalid_context,
- .event_init = hisi_uncore_pmu_event_init,
- .pmu_enable = hisi_uncore_pmu_enable,
- .pmu_disable = hisi_uncore_pmu_disable,
- .add = hisi_uncore_pmu_add,
- .del = hisi_uncore_pmu_del,
- .start = hisi_uncore_pmu_start,
- .stop = hisi_uncore_pmu_stop,
- .read = hisi_uncore_pmu_read,
- .attr_groups = sllc_pmu->pmu_events.attr_groups,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
- };
+ hisi_pmu_init(&sllc_pmu->pmu, name, sllc_pmu->pmu_events.attr_groups, THIS_MODULE);
ret = perf_pmu_register(&sllc_pmu->pmu, name, -1);
if (ret) {
diff --git a/drivers/perf/hisilicon/hns3_pmu.c b/drivers/perf/hisilicon/hns3_pmu.c
new file mode 100644
index 000000000000..e0457d84af6b
--- /dev/null
+++ b/drivers/perf/hisilicon/hns3_pmu.c
@@ -0,0 +1,1671 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This driver adds support for HNS3 PMU iEP device. Related perf events are
+ * bandwidth, latency, packet rate, interrupt rate etc.
+ *
+ * Copyright (C) 2022 HiSilicon Limited
+ */
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pci-epf.h>
+#include <linux/perf_event.h>
+#include <linux/smp.h>
+
+/* registers offset address */
+#define HNS3_PMU_REG_GLOBAL_CTRL 0x0000
+#define HNS3_PMU_REG_CLOCK_FREQ 0x0020
+#define HNS3_PMU_REG_BDF 0x0fe0
+#define HNS3_PMU_REG_VERSION 0x0fe4
+#define HNS3_PMU_REG_DEVICE_ID 0x0fe8
+
+#define HNS3_PMU_REG_EVENT_OFFSET 0x1000
+#define HNS3_PMU_REG_EVENT_SIZE 0x1000
+#define HNS3_PMU_REG_EVENT_CTRL_LOW 0x00
+#define HNS3_PMU_REG_EVENT_CTRL_HIGH 0x04
+#define HNS3_PMU_REG_EVENT_INTR_STATUS 0x08
+#define HNS3_PMU_REG_EVENT_INTR_MASK 0x0c
+#define HNS3_PMU_REG_EVENT_COUNTER 0x10
+#define HNS3_PMU_REG_EVENT_EXT_COUNTER 0x18
+#define HNS3_PMU_REG_EVENT_QID_CTRL 0x28
+#define HNS3_PMU_REG_EVENT_QID_PARA 0x2c
+
+#define HNS3_PMU_FILTER_SUPPORT_GLOBAL BIT(0)
+#define HNS3_PMU_FILTER_SUPPORT_PORT BIT(1)
+#define HNS3_PMU_FILTER_SUPPORT_PORT_TC BIT(2)
+#define HNS3_PMU_FILTER_SUPPORT_FUNC BIT(3)
+#define HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE BIT(4)
+#define HNS3_PMU_FILTER_SUPPORT_FUNC_INTR BIT(5)
+
+#define HNS3_PMU_FILTER_ALL_TC 0xf
+#define HNS3_PMU_FILTER_ALL_QUEUE 0xffff
+
+#define HNS3_PMU_CTRL_SUBEVENT_S 4
+#define HNS3_PMU_CTRL_FILTER_MODE_S 24
+
+#define HNS3_PMU_GLOBAL_START BIT(0)
+
+#define HNS3_PMU_EVENT_STATUS_RESET BIT(11)
+#define HNS3_PMU_EVENT_EN BIT(12)
+#define HNS3_PMU_EVENT_OVERFLOW_RESTART BIT(15)
+
+#define HNS3_PMU_QID_PARA_FUNC_S 0
+#define HNS3_PMU_QID_PARA_QUEUE_S 16
+
+#define HNS3_PMU_QID_CTRL_REQ_ENABLE BIT(0)
+#define HNS3_PMU_QID_CTRL_DONE BIT(1)
+#define HNS3_PMU_QID_CTRL_MISS BIT(2)
+
+#define HNS3_PMU_INTR_MASK_OVERFLOW BIT(1)
+
+#define HNS3_PMU_MAX_HW_EVENTS 8
+
+/*
+ * Each hardware event contains two registers (counter and ext_counter) for
+ * bandwidth, packet rate, latency and interrupt rate. These two registers will
+ * be triggered to run at the same when a hardware event is enabled. The meaning
+ * of counter and ext_counter of different event type are different, their
+ * meaning show as follow:
+ *
+ * +----------------+------------------+---------------+
+ * | event type | counter | ext_counter |
+ * +----------------+------------------+---------------+
+ * | bandwidth | byte number | cycle number |
+ * +----------------+------------------+---------------+
+ * | packet rate | packet number | cycle number |
+ * +----------------+------------------+---------------+
+ * | latency | cycle number | packet number |
+ * +----------------+------------------+---------------+
+ * | interrupt rate | interrupt number | cycle number |
+ * +----------------+------------------+---------------+
+ *
+ * The cycle number indicates increment of counter of hardware timer, the
+ * frequency of hardware timer can be read from hw_clk_freq file.
+ *
+ * Performance of each hardware event is calculated by: counter / ext_counter.
+ *
+ * Since processing of data is preferred to be done in userspace, we expose
+ * ext_counter as a separate event for userspace and use bit 16 to indicate it.
+ * For example, event 0x00001 and 0x10001 are actually one event for hardware
+ * because bit 0-15 are same. If the bit 16 of one event is 0 means to read
+ * counter register, otherwise means to read ext_counter register.
+ */
+/* bandwidth events */
+#define HNS3_PMU_EVT_BW_SSU_EGU_BYTE_NUM 0x00001
+#define HNS3_PMU_EVT_BW_SSU_EGU_TIME 0x10001
+#define HNS3_PMU_EVT_BW_SSU_RPU_BYTE_NUM 0x00002
+#define HNS3_PMU_EVT_BW_SSU_RPU_TIME 0x10002
+#define HNS3_PMU_EVT_BW_SSU_ROCE_BYTE_NUM 0x00003
+#define HNS3_PMU_EVT_BW_SSU_ROCE_TIME 0x10003
+#define HNS3_PMU_EVT_BW_ROCE_SSU_BYTE_NUM 0x00004
+#define HNS3_PMU_EVT_BW_ROCE_SSU_TIME 0x10004
+#define HNS3_PMU_EVT_BW_TPU_SSU_BYTE_NUM 0x00005
+#define HNS3_PMU_EVT_BW_TPU_SSU_TIME 0x10005
+#define HNS3_PMU_EVT_BW_RPU_RCBRX_BYTE_NUM 0x00006
+#define HNS3_PMU_EVT_BW_RPU_RCBRX_TIME 0x10006
+#define HNS3_PMU_EVT_BW_RCBTX_TXSCH_BYTE_NUM 0x00008
+#define HNS3_PMU_EVT_BW_RCBTX_TXSCH_TIME 0x10008
+#define HNS3_PMU_EVT_BW_WR_FBD_BYTE_NUM 0x00009
+#define HNS3_PMU_EVT_BW_WR_FBD_TIME 0x10009
+#define HNS3_PMU_EVT_BW_WR_EBD_BYTE_NUM 0x0000a
+#define HNS3_PMU_EVT_BW_WR_EBD_TIME 0x1000a
+#define HNS3_PMU_EVT_BW_RD_FBD_BYTE_NUM 0x0000b
+#define HNS3_PMU_EVT_BW_RD_FBD_TIME 0x1000b
+#define HNS3_PMU_EVT_BW_RD_EBD_BYTE_NUM 0x0000c
+#define HNS3_PMU_EVT_BW_RD_EBD_TIME 0x1000c
+#define HNS3_PMU_EVT_BW_RD_PAY_M0_BYTE_NUM 0x0000d
+#define HNS3_PMU_EVT_BW_RD_PAY_M0_TIME 0x1000d
+#define HNS3_PMU_EVT_BW_RD_PAY_M1_BYTE_NUM 0x0000e
+#define HNS3_PMU_EVT_BW_RD_PAY_M1_TIME 0x1000e
+#define HNS3_PMU_EVT_BW_WR_PAY_M0_BYTE_NUM 0x0000f
+#define HNS3_PMU_EVT_BW_WR_PAY_M0_TIME 0x1000f
+#define HNS3_PMU_EVT_BW_WR_PAY_M1_BYTE_NUM 0x00010
+#define HNS3_PMU_EVT_BW_WR_PAY_M1_TIME 0x10010
+
+/* packet rate events */
+#define HNS3_PMU_EVT_PPS_IGU_SSU_PACKET_NUM 0x00100
+#define HNS3_PMU_EVT_PPS_IGU_SSU_TIME 0x10100
+#define HNS3_PMU_EVT_PPS_SSU_EGU_PACKET_NUM 0x00101
+#define HNS3_PMU_EVT_PPS_SSU_EGU_TIME 0x10101
+#define HNS3_PMU_EVT_PPS_SSU_RPU_PACKET_NUM 0x00102
+#define HNS3_PMU_EVT_PPS_SSU_RPU_TIME 0x10102
+#define HNS3_PMU_EVT_PPS_SSU_ROCE_PACKET_NUM 0x00103
+#define HNS3_PMU_EVT_PPS_SSU_ROCE_TIME 0x10103
+#define HNS3_PMU_EVT_PPS_ROCE_SSU_PACKET_NUM 0x00104
+#define HNS3_PMU_EVT_PPS_ROCE_SSU_TIME 0x10104
+#define HNS3_PMU_EVT_PPS_TPU_SSU_PACKET_NUM 0x00105
+#define HNS3_PMU_EVT_PPS_TPU_SSU_TIME 0x10105
+#define HNS3_PMU_EVT_PPS_RPU_RCBRX_PACKET_NUM 0x00106
+#define HNS3_PMU_EVT_PPS_RPU_RCBRX_TIME 0x10106
+#define HNS3_PMU_EVT_PPS_RCBTX_TPU_PACKET_NUM 0x00107
+#define HNS3_PMU_EVT_PPS_RCBTX_TPU_TIME 0x10107
+#define HNS3_PMU_EVT_PPS_RCBTX_TXSCH_PACKET_NUM 0x00108
+#define HNS3_PMU_EVT_PPS_RCBTX_TXSCH_TIME 0x10108
+#define HNS3_PMU_EVT_PPS_WR_FBD_PACKET_NUM 0x00109
+#define HNS3_PMU_EVT_PPS_WR_FBD_TIME 0x10109
+#define HNS3_PMU_EVT_PPS_WR_EBD_PACKET_NUM 0x0010a
+#define HNS3_PMU_EVT_PPS_WR_EBD_TIME 0x1010a
+#define HNS3_PMU_EVT_PPS_RD_FBD_PACKET_NUM 0x0010b
+#define HNS3_PMU_EVT_PPS_RD_FBD_TIME 0x1010b
+#define HNS3_PMU_EVT_PPS_RD_EBD_PACKET_NUM 0x0010c
+#define HNS3_PMU_EVT_PPS_RD_EBD_TIME 0x1010c
+#define HNS3_PMU_EVT_PPS_RD_PAY_M0_PACKET_NUM 0x0010d
+#define HNS3_PMU_EVT_PPS_RD_PAY_M0_TIME 0x1010d
+#define HNS3_PMU_EVT_PPS_RD_PAY_M1_PACKET_NUM 0x0010e
+#define HNS3_PMU_EVT_PPS_RD_PAY_M1_TIME 0x1010e
+#define HNS3_PMU_EVT_PPS_WR_PAY_M0_PACKET_NUM 0x0010f
+#define HNS3_PMU_EVT_PPS_WR_PAY_M0_TIME 0x1010f
+#define HNS3_PMU_EVT_PPS_WR_PAY_M1_PACKET_NUM 0x00110
+#define HNS3_PMU_EVT_PPS_WR_PAY_M1_TIME 0x10110
+#define HNS3_PMU_EVT_PPS_NICROH_TX_PRE_PACKET_NUM 0x00111
+#define HNS3_PMU_EVT_PPS_NICROH_TX_PRE_TIME 0x10111
+#define HNS3_PMU_EVT_PPS_NICROH_RX_PRE_PACKET_NUM 0x00112
+#define HNS3_PMU_EVT_PPS_NICROH_RX_PRE_TIME 0x10112
+
+/* latency events */
+#define HNS3_PMU_EVT_DLY_TX_PUSH_TIME 0x00202
+#define HNS3_PMU_EVT_DLY_TX_PUSH_PACKET_NUM 0x10202
+#define HNS3_PMU_EVT_DLY_TX_TIME 0x00204
+#define HNS3_PMU_EVT_DLY_TX_PACKET_NUM 0x10204
+#define HNS3_PMU_EVT_DLY_SSU_TX_NIC_TIME 0x00206
+#define HNS3_PMU_EVT_DLY_SSU_TX_NIC_PACKET_NUM 0x10206
+#define HNS3_PMU_EVT_DLY_SSU_TX_ROCE_TIME 0x00207
+#define HNS3_PMU_EVT_DLY_SSU_TX_ROCE_PACKET_NUM 0x10207
+#define HNS3_PMU_EVT_DLY_SSU_RX_NIC_TIME 0x00208
+#define HNS3_PMU_EVT_DLY_SSU_RX_NIC_PACKET_NUM 0x10208
+#define HNS3_PMU_EVT_DLY_SSU_RX_ROCE_TIME 0x00209
+#define HNS3_PMU_EVT_DLY_SSU_RX_ROCE_PACKET_NUM 0x10209
+#define HNS3_PMU_EVT_DLY_RPU_TIME 0x0020e
+#define HNS3_PMU_EVT_DLY_RPU_PACKET_NUM 0x1020e
+#define HNS3_PMU_EVT_DLY_TPU_TIME 0x0020f
+#define HNS3_PMU_EVT_DLY_TPU_PACKET_NUM 0x1020f
+#define HNS3_PMU_EVT_DLY_RPE_TIME 0x00210
+#define HNS3_PMU_EVT_DLY_RPE_PACKET_NUM 0x10210
+#define HNS3_PMU_EVT_DLY_TPE_TIME 0x00211
+#define HNS3_PMU_EVT_DLY_TPE_PACKET_NUM 0x10211
+#define HNS3_PMU_EVT_DLY_TPE_PUSH_TIME 0x00212
+#define HNS3_PMU_EVT_DLY_TPE_PUSH_PACKET_NUM 0x10212
+#define HNS3_PMU_EVT_DLY_WR_FBD_TIME 0x00213
+#define HNS3_PMU_EVT_DLY_WR_FBD_PACKET_NUM 0x10213
+#define HNS3_PMU_EVT_DLY_WR_EBD_TIME 0x00214
+#define HNS3_PMU_EVT_DLY_WR_EBD_PACKET_NUM 0x10214
+#define HNS3_PMU_EVT_DLY_RD_FBD_TIME 0x00215
+#define HNS3_PMU_EVT_DLY_RD_FBD_PACKET_NUM 0x10215
+#define HNS3_PMU_EVT_DLY_RD_EBD_TIME 0x00216
+#define HNS3_PMU_EVT_DLY_RD_EBD_PACKET_NUM 0x10216
+#define HNS3_PMU_EVT_DLY_RD_PAY_M0_TIME 0x00217
+#define HNS3_PMU_EVT_DLY_RD_PAY_M0_PACKET_NUM 0x10217
+#define HNS3_PMU_EVT_DLY_RD_PAY_M1_TIME 0x00218
+#define HNS3_PMU_EVT_DLY_RD_PAY_M1_PACKET_NUM 0x10218
+#define HNS3_PMU_EVT_DLY_WR_PAY_M0_TIME 0x00219
+#define HNS3_PMU_EVT_DLY_WR_PAY_M0_PACKET_NUM 0x10219
+#define HNS3_PMU_EVT_DLY_WR_PAY_M1_TIME 0x0021a
+#define HNS3_PMU_EVT_DLY_WR_PAY_M1_PACKET_NUM 0x1021a
+#define HNS3_PMU_EVT_DLY_MSIX_WRITE_TIME 0x0021c
+#define HNS3_PMU_EVT_DLY_MSIX_WRITE_PACKET_NUM 0x1021c
+
+/* interrupt rate events */
+#define HNS3_PMU_EVT_PPS_MSIX_NIC_INTR_NUM 0x00300
+#define HNS3_PMU_EVT_PPS_MSIX_NIC_TIME 0x10300
+
+/* filter mode supported by each bandwidth event */
+#define HNS3_PMU_FILTER_BW_SSU_EGU 0x07
+#define HNS3_PMU_FILTER_BW_SSU_RPU 0x1f
+#define HNS3_PMU_FILTER_BW_SSU_ROCE 0x0f
+#define HNS3_PMU_FILTER_BW_ROCE_SSU 0x0f
+#define HNS3_PMU_FILTER_BW_TPU_SSU 0x1f
+#define HNS3_PMU_FILTER_BW_RPU_RCBRX 0x11
+#define HNS3_PMU_FILTER_BW_RCBTX_TXSCH 0x11
+#define HNS3_PMU_FILTER_BW_WR_FBD 0x1b
+#define HNS3_PMU_FILTER_BW_WR_EBD 0x11
+#define HNS3_PMU_FILTER_BW_RD_FBD 0x01
+#define HNS3_PMU_FILTER_BW_RD_EBD 0x1b
+#define HNS3_PMU_FILTER_BW_RD_PAY_M0 0x01
+#define HNS3_PMU_FILTER_BW_RD_PAY_M1 0x01
+#define HNS3_PMU_FILTER_BW_WR_PAY_M0 0x01
+#define HNS3_PMU_FILTER_BW_WR_PAY_M1 0x01
+
+/* filter mode supported by each packet rate event */
+#define HNS3_PMU_FILTER_PPS_IGU_SSU 0x07
+#define HNS3_PMU_FILTER_PPS_SSU_EGU 0x07
+#define HNS3_PMU_FILTER_PPS_SSU_RPU 0x1f
+#define HNS3_PMU_FILTER_PPS_SSU_ROCE 0x0f
+#define HNS3_PMU_FILTER_PPS_ROCE_SSU 0x0f
+#define HNS3_PMU_FILTER_PPS_TPU_SSU 0x1f
+#define HNS3_PMU_FILTER_PPS_RPU_RCBRX 0x11
+#define HNS3_PMU_FILTER_PPS_RCBTX_TPU 0x1f
+#define HNS3_PMU_FILTER_PPS_RCBTX_TXSCH 0x11
+#define HNS3_PMU_FILTER_PPS_WR_FBD 0x1b
+#define HNS3_PMU_FILTER_PPS_WR_EBD 0x11
+#define HNS3_PMU_FILTER_PPS_RD_FBD 0x01
+#define HNS3_PMU_FILTER_PPS_RD_EBD 0x1b
+#define HNS3_PMU_FILTER_PPS_RD_PAY_M0 0x01
+#define HNS3_PMU_FILTER_PPS_RD_PAY_M1 0x01
+#define HNS3_PMU_FILTER_PPS_WR_PAY_M0 0x01
+#define HNS3_PMU_FILTER_PPS_WR_PAY_M1 0x01
+#define HNS3_PMU_FILTER_PPS_NICROH_TX_PRE 0x01
+#define HNS3_PMU_FILTER_PPS_NICROH_RX_PRE 0x01
+
+/* filter mode supported by each latency event */
+#define HNS3_PMU_FILTER_DLY_TX_PUSH 0x01
+#define HNS3_PMU_FILTER_DLY_TX 0x01
+#define HNS3_PMU_FILTER_DLY_SSU_TX_NIC 0x07
+#define HNS3_PMU_FILTER_DLY_SSU_TX_ROCE 0x07
+#define HNS3_PMU_FILTER_DLY_SSU_RX_NIC 0x07
+#define HNS3_PMU_FILTER_DLY_SSU_RX_ROCE 0x07
+#define HNS3_PMU_FILTER_DLY_RPU 0x11
+#define HNS3_PMU_FILTER_DLY_TPU 0x1f
+#define HNS3_PMU_FILTER_DLY_RPE 0x01
+#define HNS3_PMU_FILTER_DLY_TPE 0x0b
+#define HNS3_PMU_FILTER_DLY_TPE_PUSH 0x1b
+#define HNS3_PMU_FILTER_DLY_WR_FBD 0x1b
+#define HNS3_PMU_FILTER_DLY_WR_EBD 0x11
+#define HNS3_PMU_FILTER_DLY_RD_FBD 0x01
+#define HNS3_PMU_FILTER_DLY_RD_EBD 0x1b
+#define HNS3_PMU_FILTER_DLY_RD_PAY_M0 0x01
+#define HNS3_PMU_FILTER_DLY_RD_PAY_M1 0x01
+#define HNS3_PMU_FILTER_DLY_WR_PAY_M0 0x01
+#define HNS3_PMU_FILTER_DLY_WR_PAY_M1 0x01
+#define HNS3_PMU_FILTER_DLY_MSIX_WRITE 0x01
+
+/* filter mode supported by each interrupt rate event */
+#define HNS3_PMU_FILTER_INTR_MSIX_NIC 0x01
+
+enum hns3_pmu_hw_filter_mode {
+ HNS3_PMU_HW_FILTER_GLOBAL,
+ HNS3_PMU_HW_FILTER_PORT,
+ HNS3_PMU_HW_FILTER_PORT_TC,
+ HNS3_PMU_HW_FILTER_FUNC,
+ HNS3_PMU_HW_FILTER_FUNC_QUEUE,
+ HNS3_PMU_HW_FILTER_FUNC_INTR,
+};
+
+struct hns3_pmu_event_attr {
+ u32 event;
+ u16 filter_support;
+};
+
+struct hns3_pmu {
+ struct perf_event *hw_events[HNS3_PMU_MAX_HW_EVENTS];
+ struct hlist_node node;
+ struct pci_dev *pdev;
+ struct pmu pmu;
+ void __iomem *base;
+ int irq;
+ int on_cpu;
+ u32 identifier;
+ u32 hw_clk_freq; /* hardware clock frequency of PMU */
+ /* maximum and minimum bdf allowed by PMU */
+ u16 bdf_min;
+ u16 bdf_max;
+};
+
+#define to_hns3_pmu(p) (container_of((p), struct hns3_pmu, pmu))
+
+#define GET_PCI_DEVFN(bdf) ((bdf) & 0xff)
+
+#define FILTER_CONDITION_PORT(port) ((1 << (port)) & 0xff)
+#define FILTER_CONDITION_PORT_TC(port, tc) (((port) << 3) | ((tc) & 0x07))
+#define FILTER_CONDITION_FUNC_INTR(func, intr) (((intr) << 8) | (func))
+
+#define HNS3_PMU_FILTER_ATTR(_name, _config, _start, _end) \
+ static inline u64 hns3_pmu_get_##_name(struct perf_event *event) \
+ { \
+ return FIELD_GET(GENMASK_ULL(_end, _start), \
+ event->attr._config); \
+ }
+
+HNS3_PMU_FILTER_ATTR(subevent, config, 0, 7);
+HNS3_PMU_FILTER_ATTR(event_type, config, 8, 15);
+HNS3_PMU_FILTER_ATTR(ext_counter_used, config, 16, 16);
+HNS3_PMU_FILTER_ATTR(port, config1, 0, 3);
+HNS3_PMU_FILTER_ATTR(tc, config1, 4, 7);
+HNS3_PMU_FILTER_ATTR(bdf, config1, 8, 23);
+HNS3_PMU_FILTER_ATTR(queue, config1, 24, 39);
+HNS3_PMU_FILTER_ATTR(intr, config1, 40, 51);
+HNS3_PMU_FILTER_ATTR(global, config1, 52, 52);
+
+#define HNS3_BW_EVT_BYTE_NUM(_name) (&(struct hns3_pmu_event_attr) {\
+ HNS3_PMU_EVT_BW_##_name##_BYTE_NUM, \
+ HNS3_PMU_FILTER_BW_##_name})
+#define HNS3_BW_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\
+ HNS3_PMU_EVT_BW_##_name##_TIME, \
+ HNS3_PMU_FILTER_BW_##_name})
+#define HNS3_PPS_EVT_PACKET_NUM(_name) (&(struct hns3_pmu_event_attr) {\
+ HNS3_PMU_EVT_PPS_##_name##_PACKET_NUM, \
+ HNS3_PMU_FILTER_PPS_##_name})
+#define HNS3_PPS_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\
+ HNS3_PMU_EVT_PPS_##_name##_TIME, \
+ HNS3_PMU_FILTER_PPS_##_name})
+#define HNS3_DLY_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\
+ HNS3_PMU_EVT_DLY_##_name##_TIME, \
+ HNS3_PMU_FILTER_DLY_##_name})
+#define HNS3_DLY_EVT_PACKET_NUM(_name) (&(struct hns3_pmu_event_attr) {\
+ HNS3_PMU_EVT_DLY_##_name##_PACKET_NUM, \
+ HNS3_PMU_FILTER_DLY_##_name})
+#define HNS3_INTR_EVT_INTR_NUM(_name) (&(struct hns3_pmu_event_attr) {\
+ HNS3_PMU_EVT_PPS_##_name##_INTR_NUM, \
+ HNS3_PMU_FILTER_INTR_##_name})
+#define HNS3_INTR_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\
+ HNS3_PMU_EVT_PPS_##_name##_TIME, \
+ HNS3_PMU_FILTER_INTR_##_name})
+
+static ssize_t hns3_pmu_format_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dev_ext_attribute *eattr;
+
+ eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+ return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+
+static ssize_t hns3_pmu_event_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hns3_pmu_event_attr *event;
+ struct dev_ext_attribute *eattr;
+
+ eattr = container_of(attr, struct dev_ext_attribute, attr);
+ event = eattr->var;
+
+ return sysfs_emit(buf, "config=0x%x\n", event->event);
+}
+
+static ssize_t hns3_pmu_filter_mode_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct hns3_pmu_event_attr *event;
+ struct dev_ext_attribute *eattr;
+ int len;
+
+ eattr = container_of(attr, struct dev_ext_attribute, attr);
+ event = eattr->var;
+
+ len = sysfs_emit_at(buf, 0, "filter mode supported: ");
+ if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_GLOBAL)
+ len += sysfs_emit_at(buf, len, "global ");
+ if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT)
+ len += sysfs_emit_at(buf, len, "port ");
+ if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT_TC)
+ len += sysfs_emit_at(buf, len, "port-tc ");
+ if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC)
+ len += sysfs_emit_at(buf, len, "func ");
+ if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE)
+ len += sysfs_emit_at(buf, len, "func-queue ");
+ if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_INTR)
+ len += sysfs_emit_at(buf, len, "func-intr ");
+
+ len += sysfs_emit_at(buf, len, "\n");
+
+ return len;
+}
+
+#define HNS3_PMU_ATTR(_name, _func, _config) \
+ (&((struct dev_ext_attribute[]) { \
+ { __ATTR(_name, 0444, _func, NULL), (void *)_config } \
+ })[0].attr.attr)
+
+#define HNS3_PMU_FORMAT_ATTR(_name, _format) \
+ HNS3_PMU_ATTR(_name, hns3_pmu_format_show, (void *)_format)
+#define HNS3_PMU_EVENT_ATTR(_name, _event) \
+ HNS3_PMU_ATTR(_name, hns3_pmu_event_show, (void *)_event)
+#define HNS3_PMU_FLT_MODE_ATTR(_name, _event) \
+ HNS3_PMU_ATTR(_name, hns3_pmu_filter_mode_show, (void *)_event)
+
+#define HNS3_PMU_BW_EVT_PAIR(_name, _macro) \
+ HNS3_PMU_EVENT_ATTR(_name##_byte_num, HNS3_BW_EVT_BYTE_NUM(_macro)), \
+ HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_BW_EVT_TIME(_macro))
+#define HNS3_PMU_PPS_EVT_PAIR(_name, _macro) \
+ HNS3_PMU_EVENT_ATTR(_name##_packet_num, HNS3_PPS_EVT_PACKET_NUM(_macro)), \
+ HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_PPS_EVT_TIME(_macro))
+#define HNS3_PMU_DLY_EVT_PAIR(_name, _macro) \
+ HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_DLY_EVT_TIME(_macro)), \
+ HNS3_PMU_EVENT_ATTR(_name##_packet_num, HNS3_DLY_EVT_PACKET_NUM(_macro))
+#define HNS3_PMU_INTR_EVT_PAIR(_name, _macro) \
+ HNS3_PMU_EVENT_ATTR(_name##_intr_num, HNS3_INTR_EVT_INTR_NUM(_macro)), \
+ HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_INTR_EVT_TIME(_macro))
+
+#define HNS3_PMU_BW_FLT_MODE_PAIR(_name, _macro) \
+ HNS3_PMU_FLT_MODE_ATTR(_name##_byte_num, HNS3_BW_EVT_BYTE_NUM(_macro)), \
+ HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_BW_EVT_TIME(_macro))
+#define HNS3_PMU_PPS_FLT_MODE_PAIR(_name, _macro) \
+ HNS3_PMU_FLT_MODE_ATTR(_name##_packet_num, HNS3_PPS_EVT_PACKET_NUM(_macro)), \
+ HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_PPS_EVT_TIME(_macro))
+#define HNS3_PMU_DLY_FLT_MODE_PAIR(_name, _macro) \
+ HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_DLY_EVT_TIME(_macro)), \
+ HNS3_PMU_FLT_MODE_ATTR(_name##_packet_num, HNS3_DLY_EVT_PACKET_NUM(_macro))
+#define HNS3_PMU_INTR_FLT_MODE_PAIR(_name, _macro) \
+ HNS3_PMU_FLT_MODE_ATTR(_name##_intr_num, HNS3_INTR_EVT_INTR_NUM(_macro)), \
+ HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_INTR_EVT_TIME(_macro))
+
+static u8 hns3_pmu_hw_filter_modes[] = {
+ HNS3_PMU_HW_FILTER_GLOBAL,
+ HNS3_PMU_HW_FILTER_PORT,
+ HNS3_PMU_HW_FILTER_PORT_TC,
+ HNS3_PMU_HW_FILTER_FUNC,
+ HNS3_PMU_HW_FILTER_FUNC_QUEUE,
+ HNS3_PMU_HW_FILTER_FUNC_INTR,
+};
+
+#define HNS3_PMU_SET_HW_FILTER(_hwc, _mode) \
+ ((_hwc)->addr_filters = (void *)&hns3_pmu_hw_filter_modes[(_mode)])
+
+static ssize_t identifier_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "0x%x\n", hns3_pmu->identifier);
+}
+static DEVICE_ATTR_RO(identifier);
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "%d\n", hns3_pmu->on_cpu);
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static ssize_t bdf_min_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev));
+ u16 bdf = hns3_pmu->bdf_min;
+
+ return sysfs_emit(buf, "%02x:%02x.%x\n", PCI_BUS_NUM(bdf),
+ PCI_SLOT(bdf), PCI_FUNC(bdf));
+}
+static DEVICE_ATTR_RO(bdf_min);
+
+static ssize_t bdf_max_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev));
+ u16 bdf = hns3_pmu->bdf_max;
+
+ return sysfs_emit(buf, "%02x:%02x.%x\n", PCI_BUS_NUM(bdf),
+ PCI_SLOT(bdf), PCI_FUNC(bdf));
+}
+static DEVICE_ATTR_RO(bdf_max);
+
+static ssize_t hw_clk_freq_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "%u\n", hns3_pmu->hw_clk_freq);
+}
+static DEVICE_ATTR_RO(hw_clk_freq);
+
+static struct attribute *hns3_pmu_events_attr[] = {
+ /* bandwidth events */
+ HNS3_PMU_BW_EVT_PAIR(bw_ssu_egu, SSU_EGU),
+ HNS3_PMU_BW_EVT_PAIR(bw_ssu_rpu, SSU_RPU),
+ HNS3_PMU_BW_EVT_PAIR(bw_ssu_roce, SSU_ROCE),
+ HNS3_PMU_BW_EVT_PAIR(bw_roce_ssu, ROCE_SSU),
+ HNS3_PMU_BW_EVT_PAIR(bw_tpu_ssu, TPU_SSU),
+ HNS3_PMU_BW_EVT_PAIR(bw_rpu_rcbrx, RPU_RCBRX),
+ HNS3_PMU_BW_EVT_PAIR(bw_rcbtx_txsch, RCBTX_TXSCH),
+ HNS3_PMU_BW_EVT_PAIR(bw_wr_fbd, WR_FBD),
+ HNS3_PMU_BW_EVT_PAIR(bw_wr_ebd, WR_EBD),
+ HNS3_PMU_BW_EVT_PAIR(bw_rd_fbd, RD_FBD),
+ HNS3_PMU_BW_EVT_PAIR(bw_rd_ebd, RD_EBD),
+ HNS3_PMU_BW_EVT_PAIR(bw_rd_pay_m0, RD_PAY_M0),
+ HNS3_PMU_BW_EVT_PAIR(bw_rd_pay_m1, RD_PAY_M1),
+ HNS3_PMU_BW_EVT_PAIR(bw_wr_pay_m0, WR_PAY_M0),
+ HNS3_PMU_BW_EVT_PAIR(bw_wr_pay_m1, WR_PAY_M1),
+
+ /* packet rate events */
+ HNS3_PMU_PPS_EVT_PAIR(pps_igu_ssu, IGU_SSU),
+ HNS3_PMU_PPS_EVT_PAIR(pps_ssu_egu, SSU_EGU),
+ HNS3_PMU_PPS_EVT_PAIR(pps_ssu_rpu, SSU_RPU),
+ HNS3_PMU_PPS_EVT_PAIR(pps_ssu_roce, SSU_ROCE),
+ HNS3_PMU_PPS_EVT_PAIR(pps_roce_ssu, ROCE_SSU),
+ HNS3_PMU_PPS_EVT_PAIR(pps_tpu_ssu, TPU_SSU),
+ HNS3_PMU_PPS_EVT_PAIR(pps_rpu_rcbrx, RPU_RCBRX),
+ HNS3_PMU_PPS_EVT_PAIR(pps_rcbtx_tpu, RCBTX_TPU),
+ HNS3_PMU_PPS_EVT_PAIR(pps_rcbtx_txsch, RCBTX_TXSCH),
+ HNS3_PMU_PPS_EVT_PAIR(pps_wr_fbd, WR_FBD),
+ HNS3_PMU_PPS_EVT_PAIR(pps_wr_ebd, WR_EBD),
+ HNS3_PMU_PPS_EVT_PAIR(pps_rd_fbd, RD_FBD),
+ HNS3_PMU_PPS_EVT_PAIR(pps_rd_ebd, RD_EBD),
+ HNS3_PMU_PPS_EVT_PAIR(pps_rd_pay_m0, RD_PAY_M0),
+ HNS3_PMU_PPS_EVT_PAIR(pps_rd_pay_m1, RD_PAY_M1),
+ HNS3_PMU_PPS_EVT_PAIR(pps_wr_pay_m0, WR_PAY_M0),
+ HNS3_PMU_PPS_EVT_PAIR(pps_wr_pay_m1, WR_PAY_M1),
+ HNS3_PMU_PPS_EVT_PAIR(pps_intr_nicroh_tx_pre, NICROH_TX_PRE),
+ HNS3_PMU_PPS_EVT_PAIR(pps_intr_nicroh_rx_pre, NICROH_RX_PRE),
+
+ /* latency events */
+ HNS3_PMU_DLY_EVT_PAIR(dly_tx_push_to_mac, TX_PUSH),
+ HNS3_PMU_DLY_EVT_PAIR(dly_tx_normal_to_mac, TX),
+ HNS3_PMU_DLY_EVT_PAIR(dly_ssu_tx_th_nic, SSU_TX_NIC),
+ HNS3_PMU_DLY_EVT_PAIR(dly_ssu_tx_th_roce, SSU_TX_ROCE),
+ HNS3_PMU_DLY_EVT_PAIR(dly_ssu_rx_th_nic, SSU_RX_NIC),
+ HNS3_PMU_DLY_EVT_PAIR(dly_ssu_rx_th_roce, SSU_RX_ROCE),
+ HNS3_PMU_DLY_EVT_PAIR(dly_rpu, RPU),
+ HNS3_PMU_DLY_EVT_PAIR(dly_tpu, TPU),
+ HNS3_PMU_DLY_EVT_PAIR(dly_rpe, RPE),
+ HNS3_PMU_DLY_EVT_PAIR(dly_tpe_normal, TPE),
+ HNS3_PMU_DLY_EVT_PAIR(dly_tpe_push, TPE_PUSH),
+ HNS3_PMU_DLY_EVT_PAIR(dly_wr_fbd, WR_FBD),
+ HNS3_PMU_DLY_EVT_PAIR(dly_wr_ebd, WR_EBD),
+ HNS3_PMU_DLY_EVT_PAIR(dly_rd_fbd, RD_FBD),
+ HNS3_PMU_DLY_EVT_PAIR(dly_rd_ebd, RD_EBD),
+ HNS3_PMU_DLY_EVT_PAIR(dly_rd_pay_m0, RD_PAY_M0),
+ HNS3_PMU_DLY_EVT_PAIR(dly_rd_pay_m1, RD_PAY_M1),
+ HNS3_PMU_DLY_EVT_PAIR(dly_wr_pay_m0, WR_PAY_M0),
+ HNS3_PMU_DLY_EVT_PAIR(dly_wr_pay_m1, WR_PAY_M1),
+ HNS3_PMU_DLY_EVT_PAIR(dly_msix_write, MSIX_WRITE),
+
+ /* interrupt rate events */
+ HNS3_PMU_INTR_EVT_PAIR(pps_intr_msix_nic, MSIX_NIC),
+
+ NULL
+};
+
+static struct attribute *hns3_pmu_filter_mode_attr[] = {
+ /* bandwidth events */
+ HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_egu, SSU_EGU),
+ HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_rpu, SSU_RPU),
+ HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_roce, SSU_ROCE),
+ HNS3_PMU_BW_FLT_MODE_PAIR(bw_roce_ssu, ROCE_SSU),
+ HNS3_PMU_BW_FLT_MODE_PAIR(bw_tpu_ssu, TPU_SSU),
+ HNS3_PMU_BW_FLT_MODE_PAIR(bw_rpu_rcbrx, RPU_RCBRX),
+ HNS3_PMU_BW_FLT_MODE_PAIR(bw_rcbtx_txsch, RCBTX_TXSCH),
+ HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_fbd, WR_FBD),
+ HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_ebd, WR_EBD),
+ HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_fbd, RD_FBD),
+ HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_ebd, RD_EBD),
+ HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_pay_m0, RD_PAY_M0),
+ HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_pay_m1, RD_PAY_M1),
+ HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_pay_m0, WR_PAY_M0),
+ HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_pay_m1, WR_PAY_M1),
+
+ /* packet rate events */
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_igu_ssu, IGU_SSU),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_egu, SSU_EGU),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_rpu, SSU_RPU),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_roce, SSU_ROCE),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_roce_ssu, ROCE_SSU),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_tpu_ssu, TPU_SSU),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rpu_rcbrx, RPU_RCBRX),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rcbtx_tpu, RCBTX_TPU),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rcbtx_txsch, RCBTX_TXSCH),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_fbd, WR_FBD),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_ebd, WR_EBD),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_fbd, RD_FBD),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_ebd, RD_EBD),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_pay_m0, RD_PAY_M0),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_pay_m1, RD_PAY_M1),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_pay_m0, WR_PAY_M0),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_pay_m1, WR_PAY_M1),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_intr_nicroh_tx_pre, NICROH_TX_PRE),
+ HNS3_PMU_PPS_FLT_MODE_PAIR(pps_intr_nicroh_rx_pre, NICROH_RX_PRE),
+
+ /* latency events */
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tx_push_to_mac, TX_PUSH),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tx_normal_to_mac, TX),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_tx_th_nic, SSU_TX_NIC),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_tx_th_roce, SSU_TX_ROCE),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_rx_th_nic, SSU_RX_NIC),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_rx_th_roce, SSU_RX_ROCE),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rpu, RPU),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpu, TPU),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rpe, RPE),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpe_normal, TPE),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpe_push, TPE_PUSH),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_fbd, WR_FBD),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_ebd, WR_EBD),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_fbd, RD_FBD),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_ebd, RD_EBD),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_pay_m0, RD_PAY_M0),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_pay_m1, RD_PAY_M1),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_pay_m0, WR_PAY_M0),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_pay_m1, WR_PAY_M1),
+ HNS3_PMU_DLY_FLT_MODE_PAIR(dly_msix_write, MSIX_WRITE),
+
+ /* interrupt rate events */
+ HNS3_PMU_INTR_FLT_MODE_PAIR(pps_intr_msix_nic, MSIX_NIC),
+
+ NULL
+};
+
+static struct attribute_group hns3_pmu_events_group = {
+ .name = "events",
+ .attrs = hns3_pmu_events_attr,
+};
+
+static struct attribute_group hns3_pmu_filter_mode_group = {
+ .name = "filtermode",
+ .attrs = hns3_pmu_filter_mode_attr,
+};
+
+static struct attribute *hns3_pmu_format_attr[] = {
+ HNS3_PMU_FORMAT_ATTR(subevent, "config:0-7"),
+ HNS3_PMU_FORMAT_ATTR(event_type, "config:8-15"),
+ HNS3_PMU_FORMAT_ATTR(ext_counter_used, "config:16"),
+ HNS3_PMU_FORMAT_ATTR(port, "config1:0-3"),
+ HNS3_PMU_FORMAT_ATTR(tc, "config1:4-7"),
+ HNS3_PMU_FORMAT_ATTR(bdf, "config1:8-23"),
+ HNS3_PMU_FORMAT_ATTR(queue, "config1:24-39"),
+ HNS3_PMU_FORMAT_ATTR(intr, "config1:40-51"),
+ HNS3_PMU_FORMAT_ATTR(global, "config1:52"),
+ NULL
+};
+
+static struct attribute_group hns3_pmu_format_group = {
+ .name = "format",
+ .attrs = hns3_pmu_format_attr,
+};
+
+static struct attribute *hns3_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL
+};
+
+static struct attribute_group hns3_pmu_cpumask_attr_group = {
+ .attrs = hns3_pmu_cpumask_attrs,
+};
+
+static struct attribute *hns3_pmu_identifier_attrs[] = {
+ &dev_attr_identifier.attr,
+ NULL
+};
+
+static struct attribute_group hns3_pmu_identifier_attr_group = {
+ .attrs = hns3_pmu_identifier_attrs,
+};
+
+static struct attribute *hns3_pmu_bdf_range_attrs[] = {
+ &dev_attr_bdf_min.attr,
+ &dev_attr_bdf_max.attr,
+ NULL
+};
+
+static struct attribute_group hns3_pmu_bdf_range_attr_group = {
+ .attrs = hns3_pmu_bdf_range_attrs,
+};
+
+static struct attribute *hns3_pmu_hw_clk_freq_attrs[] = {
+ &dev_attr_hw_clk_freq.attr,
+ NULL
+};
+
+static struct attribute_group hns3_pmu_hw_clk_freq_attr_group = {
+ .attrs = hns3_pmu_hw_clk_freq_attrs,
+};
+
+static const struct attribute_group *hns3_pmu_attr_groups[] = {
+ &hns3_pmu_events_group,
+ &hns3_pmu_filter_mode_group,
+ &hns3_pmu_format_group,
+ &hns3_pmu_cpumask_attr_group,
+ &hns3_pmu_identifier_attr_group,
+ &hns3_pmu_bdf_range_attr_group,
+ &hns3_pmu_hw_clk_freq_attr_group,
+ NULL
+};
+
+static u32 hns3_pmu_get_event(struct perf_event *event)
+{
+ return hns3_pmu_get_ext_counter_used(event) << 16 |
+ hns3_pmu_get_event_type(event) << 8 |
+ hns3_pmu_get_subevent(event);
+}
+
+static u32 hns3_pmu_get_real_event(struct perf_event *event)
+{
+ return hns3_pmu_get_event_type(event) << 8 |
+ hns3_pmu_get_subevent(event);
+}
+
+static u32 hns3_pmu_get_offset(u32 offset, u32 idx)
+{
+ return offset + HNS3_PMU_REG_EVENT_OFFSET +
+ HNS3_PMU_REG_EVENT_SIZE * idx;
+}
+
+static u32 hns3_pmu_readl(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx)
+{
+ u32 offset = hns3_pmu_get_offset(reg_offset, idx);
+
+ return readl(hns3_pmu->base + offset);
+}
+
+static void hns3_pmu_writel(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx,
+ u32 val)
+{
+ u32 offset = hns3_pmu_get_offset(reg_offset, idx);
+
+ writel(val, hns3_pmu->base + offset);
+}
+
+static u64 hns3_pmu_readq(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx)
+{
+ u32 offset = hns3_pmu_get_offset(reg_offset, idx);
+
+ return readq(hns3_pmu->base + offset);
+}
+
+static void hns3_pmu_writeq(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx,
+ u64 val)
+{
+ u32 offset = hns3_pmu_get_offset(reg_offset, idx);
+
+ writeq(val, hns3_pmu->base + offset);
+}
+
+static bool hns3_pmu_cmp_event(struct perf_event *target,
+ struct perf_event *event)
+{
+ return hns3_pmu_get_real_event(target) == hns3_pmu_get_real_event(event);
+}
+
+static int hns3_pmu_find_related_event_idx(struct hns3_pmu *hns3_pmu,
+ struct perf_event *event)
+{
+ struct perf_event *sibling;
+ int hw_event_used = 0;
+ int idx;
+
+ for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) {
+ sibling = hns3_pmu->hw_events[idx];
+ if (!sibling)
+ continue;
+
+ hw_event_used++;
+
+ if (!hns3_pmu_cmp_event(sibling, event))
+ continue;
+
+ /* Related events is used in group */
+ if (sibling->group_leader == event->group_leader)
+ return idx;
+ }
+
+ /* No related event and all hardware events are used up */
+ if (hw_event_used >= HNS3_PMU_MAX_HW_EVENTS)
+ return -EBUSY;
+
+ /* No related event and there is extra hardware events can be use */
+ return -ENOENT;
+}
+
+static int hns3_pmu_get_event_idx(struct hns3_pmu *hns3_pmu)
+{
+ int idx;
+
+ for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) {
+ if (!hns3_pmu->hw_events[idx])
+ return idx;
+ }
+
+ return -EBUSY;
+}
+
+static bool hns3_pmu_valid_bdf(struct hns3_pmu *hns3_pmu, u16 bdf)
+{
+ struct pci_dev *pdev;
+
+ if (bdf < hns3_pmu->bdf_min || bdf > hns3_pmu->bdf_max) {
+ pci_err(hns3_pmu->pdev, "Invalid EP device: %#x!\n", bdf);
+ return false;
+ }
+
+ pdev = pci_get_domain_bus_and_slot(pci_domain_nr(hns3_pmu->pdev->bus),
+ PCI_BUS_NUM(bdf),
+ GET_PCI_DEVFN(bdf));
+ if (!pdev) {
+ pci_err(hns3_pmu->pdev, "Nonexistent EP device: %#x!\n", bdf);
+ return false;
+ }
+
+ pci_dev_put(pdev);
+ return true;
+}
+
+static void hns3_pmu_set_qid_para(struct hns3_pmu *hns3_pmu, u32 idx, u16 bdf,
+ u16 queue)
+{
+ u32 val;
+
+ val = GET_PCI_DEVFN(bdf);
+ val |= (u32)queue << HNS3_PMU_QID_PARA_QUEUE_S;
+ hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_PARA, idx, val);
+}
+
+static bool hns3_pmu_qid_req_start(struct hns3_pmu *hns3_pmu, u32 idx)
+{
+ bool queue_id_valid = false;
+ u32 reg_qid_ctrl, val;
+ int err;
+
+ /* enable queue id request */
+ hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_CTRL, idx,
+ HNS3_PMU_QID_CTRL_REQ_ENABLE);
+
+ reg_qid_ctrl = hns3_pmu_get_offset(HNS3_PMU_REG_EVENT_QID_CTRL, idx);
+ err = readl_poll_timeout(hns3_pmu->base + reg_qid_ctrl, val,
+ val & HNS3_PMU_QID_CTRL_DONE, 1, 1000);
+ if (err == -ETIMEDOUT) {
+ pci_err(hns3_pmu->pdev, "QID request timeout!\n");
+ goto out;
+ }
+
+ queue_id_valid = !(val & HNS3_PMU_QID_CTRL_MISS);
+
+out:
+ /* disable qid request and clear status */
+ hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_CTRL, idx, 0);
+
+ return queue_id_valid;
+}
+
+static bool hns3_pmu_valid_queue(struct hns3_pmu *hns3_pmu, u32 idx, u16 bdf,
+ u16 queue)
+{
+ hns3_pmu_set_qid_para(hns3_pmu, idx, bdf, queue);
+
+ return hns3_pmu_qid_req_start(hns3_pmu, idx);
+}
+
+static struct hns3_pmu_event_attr *hns3_pmu_get_pmu_event(u32 event)
+{
+ struct hns3_pmu_event_attr *pmu_event;
+ struct dev_ext_attribute *eattr;
+ struct device_attribute *dattr;
+ struct attribute *attr;
+ u32 i;
+
+ for (i = 0; i < ARRAY_SIZE(hns3_pmu_events_attr) - 1; i++) {
+ attr = hns3_pmu_events_attr[i];
+ dattr = container_of(attr, struct device_attribute, attr);
+ eattr = container_of(dattr, struct dev_ext_attribute, attr);
+ pmu_event = eattr->var;
+
+ if (event == pmu_event->event)
+ return pmu_event;
+ }
+
+ return NULL;
+}
+
+static int hns3_pmu_set_func_mode(struct perf_event *event,
+ struct hns3_pmu *hns3_pmu)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u16 bdf = hns3_pmu_get_bdf(event);
+
+ if (!hns3_pmu_valid_bdf(hns3_pmu, bdf))
+ return -ENOENT;
+
+ HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC);
+
+ return 0;
+}
+
+static int hns3_pmu_set_func_queue_mode(struct perf_event *event,
+ struct hns3_pmu *hns3_pmu)
+{
+ u16 queue_id = hns3_pmu_get_queue(event);
+ struct hw_perf_event *hwc = &event->hw;
+ u16 bdf = hns3_pmu_get_bdf(event);
+
+ if (!hns3_pmu_valid_bdf(hns3_pmu, bdf))
+ return -ENOENT;
+
+ if (!hns3_pmu_valid_queue(hns3_pmu, hwc->idx, bdf, queue_id)) {
+ pci_err(hns3_pmu->pdev, "Invalid queue: %u\n", queue_id);
+ return -ENOENT;
+ }
+
+ HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC_QUEUE);
+
+ return 0;
+}
+
+static bool
+hns3_pmu_is_enabled_global_mode(struct perf_event *event,
+ struct hns3_pmu_event_attr *pmu_event)
+{
+ u8 global = hns3_pmu_get_global(event);
+
+ if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_GLOBAL))
+ return false;
+
+ return global;
+}
+
+static bool hns3_pmu_is_enabled_func_mode(struct perf_event *event,
+ struct hns3_pmu_event_attr *pmu_event)
+{
+ u16 queue_id = hns3_pmu_get_queue(event);
+ u16 bdf = hns3_pmu_get_bdf(event);
+
+ if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC))
+ return false;
+ else if (queue_id != HNS3_PMU_FILTER_ALL_QUEUE)
+ return false;
+
+ return bdf;
+}
+
+static bool
+hns3_pmu_is_enabled_func_queue_mode(struct perf_event *event,
+ struct hns3_pmu_event_attr *pmu_event)
+{
+ u16 queue_id = hns3_pmu_get_queue(event);
+ u16 bdf = hns3_pmu_get_bdf(event);
+
+ if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE))
+ return false;
+ else if (queue_id == HNS3_PMU_FILTER_ALL_QUEUE)
+ return false;
+
+ return bdf;
+}
+
+static bool hns3_pmu_is_enabled_port_mode(struct perf_event *event,
+ struct hns3_pmu_event_attr *pmu_event)
+{
+ u8 tc_id = hns3_pmu_get_tc(event);
+
+ if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT))
+ return false;
+
+ return tc_id == HNS3_PMU_FILTER_ALL_TC;
+}
+
+static bool
+hns3_pmu_is_enabled_port_tc_mode(struct perf_event *event,
+ struct hns3_pmu_event_attr *pmu_event)
+{
+ u8 tc_id = hns3_pmu_get_tc(event);
+
+ if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT_TC))
+ return false;
+
+ return tc_id != HNS3_PMU_FILTER_ALL_TC;
+}
+
+static bool
+hns3_pmu_is_enabled_func_intr_mode(struct perf_event *event,
+ struct hns3_pmu *hns3_pmu,
+ struct hns3_pmu_event_attr *pmu_event)
+{
+ u16 bdf = hns3_pmu_get_bdf(event);
+
+ if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_INTR))
+ return false;
+
+ return hns3_pmu_valid_bdf(hns3_pmu, bdf);
+}
+
+static int hns3_pmu_select_filter_mode(struct perf_event *event,
+ struct hns3_pmu *hns3_pmu)
+{
+ u32 event_id = hns3_pmu_get_event(event);
+ struct hw_perf_event *hwc = &event->hw;
+ struct hns3_pmu_event_attr *pmu_event;
+
+ pmu_event = hns3_pmu_get_pmu_event(event_id);
+ if (!pmu_event) {
+ pci_err(hns3_pmu->pdev, "Invalid pmu event\n");
+ return -ENOENT;
+ }
+
+ if (hns3_pmu_is_enabled_global_mode(event, pmu_event)) {
+ HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_GLOBAL);
+ return 0;
+ }
+
+ if (hns3_pmu_is_enabled_func_mode(event, pmu_event))
+ return hns3_pmu_set_func_mode(event, hns3_pmu);
+
+ if (hns3_pmu_is_enabled_func_queue_mode(event, pmu_event))
+ return hns3_pmu_set_func_queue_mode(event, hns3_pmu);
+
+ if (hns3_pmu_is_enabled_port_mode(event, pmu_event)) {
+ HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_PORT);
+ return 0;
+ }
+
+ if (hns3_pmu_is_enabled_port_tc_mode(event, pmu_event)) {
+ HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_PORT_TC);
+ return 0;
+ }
+
+ if (hns3_pmu_is_enabled_func_intr_mode(event, hns3_pmu, pmu_event)) {
+ HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC_INTR);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static bool hns3_pmu_validate_event_group(struct perf_event *event)
+{
+ struct perf_event *sibling, *leader = event->group_leader;
+ struct perf_event *event_group[HNS3_PMU_MAX_HW_EVENTS];
+ int counters = 1;
+ int num;
+
+ event_group[0] = leader;
+ if (!is_software_event(leader)) {
+ if (leader->pmu != event->pmu)
+ return false;
+
+ if (leader != event && !hns3_pmu_cmp_event(leader, event))
+ event_group[counters++] = event;
+ }
+
+ for_each_sibling_event(sibling, event->group_leader) {
+ if (is_software_event(sibling))
+ continue;
+
+ if (sibling->pmu != event->pmu)
+ return false;
+
+ for (num = 0; num < counters; num++) {
+ if (hns3_pmu_cmp_event(event_group[num], sibling))
+ break;
+ }
+
+ if (num == counters)
+ event_group[counters++] = sibling;
+ }
+
+ return counters <= HNS3_PMU_MAX_HW_EVENTS;
+}
+
+static u32 hns3_pmu_get_filter_condition(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u16 intr_id = hns3_pmu_get_intr(event);
+ u8 port_id = hns3_pmu_get_port(event);
+ u16 bdf = hns3_pmu_get_bdf(event);
+ u8 tc_id = hns3_pmu_get_tc(event);
+ u8 filter_mode;
+
+ filter_mode = *(u8 *)hwc->addr_filters;
+ switch (filter_mode) {
+ case HNS3_PMU_HW_FILTER_PORT:
+ return FILTER_CONDITION_PORT(port_id);
+ case HNS3_PMU_HW_FILTER_PORT_TC:
+ return FILTER_CONDITION_PORT_TC(port_id, tc_id);
+ case HNS3_PMU_HW_FILTER_FUNC:
+ case HNS3_PMU_HW_FILTER_FUNC_QUEUE:
+ return GET_PCI_DEVFN(bdf);
+ case HNS3_PMU_HW_FILTER_FUNC_INTR:
+ return FILTER_CONDITION_FUNC_INTR(GET_PCI_DEVFN(bdf), intr_id);
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void hns3_pmu_config_filter(struct perf_event *event)
+{
+ struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+ u8 event_type = hns3_pmu_get_event_type(event);
+ u8 subevent_id = hns3_pmu_get_subevent(event);
+ u16 queue_id = hns3_pmu_get_queue(event);
+ struct hw_perf_event *hwc = &event->hw;
+ u8 filter_mode = *(u8 *)hwc->addr_filters;
+ u16 bdf = hns3_pmu_get_bdf(event);
+ u32 idx = hwc->idx;
+ u32 val;
+
+ val = event_type;
+ val |= subevent_id << HNS3_PMU_CTRL_SUBEVENT_S;
+ val |= filter_mode << HNS3_PMU_CTRL_FILTER_MODE_S;
+ val |= HNS3_PMU_EVENT_OVERFLOW_RESTART;
+ hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val);
+
+ val = hns3_pmu_get_filter_condition(event);
+ hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_HIGH, idx, val);
+
+ if (filter_mode == HNS3_PMU_HW_FILTER_FUNC_QUEUE)
+ hns3_pmu_set_qid_para(hns3_pmu, idx, bdf, queue_id);
+}
+
+static void hns3_pmu_enable_counter(struct hns3_pmu *hns3_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 idx = hwc->idx;
+ u32 val;
+
+ val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx);
+ val |= HNS3_PMU_EVENT_EN;
+ hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val);
+}
+
+static void hns3_pmu_disable_counter(struct hns3_pmu *hns3_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 idx = hwc->idx;
+ u32 val;
+
+ val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx);
+ val &= ~HNS3_PMU_EVENT_EN;
+ hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val);
+}
+
+static void hns3_pmu_enable_intr(struct hns3_pmu *hns3_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 idx = hwc->idx;
+ u32 val;
+
+ val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx);
+ val &= ~HNS3_PMU_INTR_MASK_OVERFLOW;
+ hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx, val);
+}
+
+static void hns3_pmu_disable_intr(struct hns3_pmu *hns3_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 idx = hwc->idx;
+ u32 val;
+
+ val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx);
+ val |= HNS3_PMU_INTR_MASK_OVERFLOW;
+ hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx, val);
+}
+
+static void hns3_pmu_clear_intr_status(struct hns3_pmu *hns3_pmu, u32 idx)
+{
+ u32 val;
+
+ val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx);
+ val |= HNS3_PMU_EVENT_STATUS_RESET;
+ hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val);
+
+ val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx);
+ val &= ~HNS3_PMU_EVENT_STATUS_RESET;
+ hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val);
+}
+
+static u64 hns3_pmu_read_counter(struct perf_event *event)
+{
+ struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+
+ return hns3_pmu_readq(hns3_pmu, event->hw.event_base, event->hw.idx);
+}
+
+static void hns3_pmu_write_counter(struct perf_event *event, u64 value)
+{
+ struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+ u32 idx = event->hw.idx;
+
+ hns3_pmu_writeq(hns3_pmu, HNS3_PMU_REG_EVENT_COUNTER, idx, value);
+ hns3_pmu_writeq(hns3_pmu, HNS3_PMU_REG_EVENT_EXT_COUNTER, idx, value);
+}
+
+static void hns3_pmu_init_counter(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ local64_set(&hwc->prev_count, 0);
+ hns3_pmu_write_counter(event, 0);
+}
+
+static int hns3_pmu_event_init(struct perf_event *event)
+{
+ struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+ int ret;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* Sampling is not supported */
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EOPNOTSUPP;
+
+ event->cpu = hns3_pmu->on_cpu;
+
+ idx = hns3_pmu_get_event_idx(hns3_pmu);
+ if (idx < 0) {
+ pci_err(hns3_pmu->pdev, "Up to %u events are supported!\n",
+ HNS3_PMU_MAX_HW_EVENTS);
+ return -EBUSY;
+ }
+
+ hwc->idx = idx;
+
+ ret = hns3_pmu_select_filter_mode(event, hns3_pmu);
+ if (ret) {
+ pci_err(hns3_pmu->pdev, "Invalid filter, ret = %d.\n", ret);
+ return ret;
+ }
+
+ if (!hns3_pmu_validate_event_group(event)) {
+ pci_err(hns3_pmu->pdev, "Invalid event group.\n");
+ return -EINVAL;
+ }
+
+ if (hns3_pmu_get_ext_counter_used(event))
+ hwc->event_base = HNS3_PMU_REG_EVENT_EXT_COUNTER;
+ else
+ hwc->event_base = HNS3_PMU_REG_EVENT_COUNTER;
+
+ return 0;
+}
+
+static void hns3_pmu_read(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 new_cnt, prev_cnt, delta;
+
+ do {
+ prev_cnt = local64_read(&hwc->prev_count);
+ new_cnt = hns3_pmu_read_counter(event);
+ } while (local64_cmpxchg(&hwc->prev_count, prev_cnt, new_cnt) !=
+ prev_cnt);
+
+ delta = new_cnt - prev_cnt;
+ local64_add(delta, &event->count);
+}
+
+static void hns3_pmu_start(struct perf_event *event, int flags)
+{
+ struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+ hwc->state = 0;
+
+ hns3_pmu_config_filter(event);
+ hns3_pmu_init_counter(event);
+ hns3_pmu_enable_intr(hns3_pmu, hwc);
+ hns3_pmu_enable_counter(hns3_pmu, hwc);
+
+ perf_event_update_userpage(event);
+}
+
+static void hns3_pmu_stop(struct perf_event *event, int flags)
+{
+ struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ hns3_pmu_disable_counter(hns3_pmu, hwc);
+ hns3_pmu_disable_intr(hns3_pmu, hwc);
+
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ /* Read hardware counter and update the perf counter statistics */
+ hns3_pmu_read(event);
+ hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int hns3_pmu_add(struct perf_event *event, int flags)
+{
+ struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+ /* Check all working events to find a related event. */
+ idx = hns3_pmu_find_related_event_idx(hns3_pmu, event);
+ if (idx < 0 && idx != -ENOENT)
+ return idx;
+
+ /* Current event shares an enabled hardware event with related event */
+ if (idx >= 0 && idx < HNS3_PMU_MAX_HW_EVENTS) {
+ hwc->idx = idx;
+ goto start_count;
+ }
+
+ idx = hns3_pmu_get_event_idx(hns3_pmu);
+ if (idx < 0)
+ return idx;
+
+ hwc->idx = idx;
+ hns3_pmu->hw_events[idx] = event;
+
+start_count:
+ if (flags & PERF_EF_START)
+ hns3_pmu_start(event, PERF_EF_RELOAD);
+
+ return 0;
+}
+
+static void hns3_pmu_del(struct perf_event *event, int flags)
+{
+ struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ hns3_pmu_stop(event, PERF_EF_UPDATE);
+ hns3_pmu->hw_events[hwc->idx] = NULL;
+ perf_event_update_userpage(event);
+}
+
+static void hns3_pmu_enable(struct pmu *pmu)
+{
+ struct hns3_pmu *hns3_pmu = to_hns3_pmu(pmu);
+ u32 val;
+
+ val = readl(hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL);
+ val |= HNS3_PMU_GLOBAL_START;
+ writel(val, hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL);
+}
+
+static void hns3_pmu_disable(struct pmu *pmu)
+{
+ struct hns3_pmu *hns3_pmu = to_hns3_pmu(pmu);
+ u32 val;
+
+ val = readl(hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL);
+ val &= ~HNS3_PMU_GLOBAL_START;
+ writel(val, hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL);
+}
+
+static int hns3_pmu_alloc_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu)
+{
+ u16 device_id;
+ char *name;
+ u32 val;
+
+ hns3_pmu->base = pcim_iomap_table(pdev)[BAR_2];
+ if (!hns3_pmu->base) {
+ pci_err(pdev, "ioremap failed\n");
+ return -ENOMEM;
+ }
+
+ hns3_pmu->hw_clk_freq = readl(hns3_pmu->base + HNS3_PMU_REG_CLOCK_FREQ);
+
+ val = readl(hns3_pmu->base + HNS3_PMU_REG_BDF);
+ hns3_pmu->bdf_min = val & 0xffff;
+ hns3_pmu->bdf_max = val >> 16;
+
+ val = readl(hns3_pmu->base + HNS3_PMU_REG_DEVICE_ID);
+ device_id = val & 0xffff;
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hns3_pmu_sicl_%u", device_id);
+ if (!name)
+ return -ENOMEM;
+
+ hns3_pmu->pdev = pdev;
+ hns3_pmu->on_cpu = -1;
+ hns3_pmu->identifier = readl(hns3_pmu->base + HNS3_PMU_REG_VERSION);
+ hns3_pmu->pmu = (struct pmu) {
+ .name = name,
+ .module = THIS_MODULE,
+ .event_init = hns3_pmu_event_init,
+ .pmu_enable = hns3_pmu_enable,
+ .pmu_disable = hns3_pmu_disable,
+ .add = hns3_pmu_add,
+ .del = hns3_pmu_del,
+ .start = hns3_pmu_start,
+ .stop = hns3_pmu_stop,
+ .read = hns3_pmu_read,
+ .task_ctx_nr = perf_invalid_context,
+ .attr_groups = hns3_pmu_attr_groups,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ };
+
+ return 0;
+}
+
+static irqreturn_t hns3_pmu_irq(int irq, void *data)
+{
+ struct hns3_pmu *hns3_pmu = data;
+ u32 intr_status, idx;
+
+ for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) {
+ intr_status = hns3_pmu_readl(hns3_pmu,
+ HNS3_PMU_REG_EVENT_INTR_STATUS,
+ idx);
+
+ /*
+ * As each counter will restart from 0 when it is overflowed,
+ * extra processing is no need, just clear interrupt status.
+ */
+ if (intr_status)
+ hns3_pmu_clear_intr_status(hns3_pmu, idx);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int hns3_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct hns3_pmu *hns3_pmu;
+
+ hns3_pmu = hlist_entry_safe(node, struct hns3_pmu, node);
+ if (!hns3_pmu)
+ return -ENODEV;
+
+ if (hns3_pmu->on_cpu == -1) {
+ hns3_pmu->on_cpu = cpu;
+ irq_set_affinity(hns3_pmu->irq, cpumask_of(cpu));
+ }
+
+ return 0;
+}
+
+static int hns3_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct hns3_pmu *hns3_pmu;
+ unsigned int target;
+
+ hns3_pmu = hlist_entry_safe(node, struct hns3_pmu, node);
+ if (!hns3_pmu)
+ return -ENODEV;
+
+ /* Nothing to do if this CPU doesn't own the PMU */
+ if (hns3_pmu->on_cpu != cpu)
+ return 0;
+
+ /* Choose a new CPU from all online cpus */
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ return 0;
+
+ perf_pmu_migrate_context(&hns3_pmu->pmu, cpu, target);
+ hns3_pmu->on_cpu = target;
+ irq_set_affinity(hns3_pmu->irq, cpumask_of(target));
+
+ return 0;
+}
+
+static void hns3_pmu_free_irq(void *data)
+{
+ struct pci_dev *pdev = data;
+
+ pci_free_irq_vectors(pdev);
+}
+
+static int hns3_pmu_irq_register(struct pci_dev *pdev,
+ struct hns3_pmu *hns3_pmu)
+{
+ int irq, ret;
+
+ ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+ if (ret < 0) {
+ pci_err(pdev, "failed to enable MSI vectors, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = devm_add_action(&pdev->dev, hns3_pmu_free_irq, pdev);
+ if (ret) {
+ pci_err(pdev, "failed to add free irq action, ret = %d.\n", ret);
+ return ret;
+ }
+
+ irq = pci_irq_vector(pdev, 0);
+ ret = devm_request_irq(&pdev->dev, irq, hns3_pmu_irq, 0,
+ hns3_pmu->pmu.name, hns3_pmu);
+ if (ret) {
+ pci_err(pdev, "failed to register irq, ret = %d.\n", ret);
+ return ret;
+ }
+
+ hns3_pmu->irq = irq;
+
+ return 0;
+}
+
+static int hns3_pmu_init_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu)
+{
+ int ret;
+
+ ret = hns3_pmu_alloc_pmu(pdev, hns3_pmu);
+ if (ret)
+ return ret;
+
+ ret = hns3_pmu_irq_register(pdev, hns3_pmu);
+ if (ret)
+ return ret;
+
+ ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+ &hns3_pmu->node);
+ if (ret) {
+ pci_err(pdev, "failed to register hotplug, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = perf_pmu_register(&hns3_pmu->pmu, hns3_pmu->pmu.name, -1);
+ if (ret) {
+ pci_err(pdev, "failed to register perf PMU, ret = %d.\n", ret);
+ cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+ &hns3_pmu->node);
+ }
+
+ return ret;
+}
+
+static void hns3_pmu_uninit_pmu(struct pci_dev *pdev)
+{
+ struct hns3_pmu *hns3_pmu = pci_get_drvdata(pdev);
+
+ perf_pmu_unregister(&hns3_pmu->pmu);
+ cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+ &hns3_pmu->node);
+}
+
+static int hns3_pmu_init_dev(struct pci_dev *pdev)
+{
+ int ret;
+
+ ret = pcim_enable_device(pdev);
+ if (ret) {
+ pci_err(pdev, "failed to enable pci device, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = pcim_iomap_regions(pdev, BIT(BAR_2), "hns3_pmu");
+ if (ret < 0) {
+ pci_err(pdev, "failed to request pci region, ret = %d.\n", ret);
+ return ret;
+ }
+
+ pci_set_master(pdev);
+
+ return 0;
+}
+
+static int hns3_pmu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct hns3_pmu *hns3_pmu;
+ int ret;
+
+ hns3_pmu = devm_kzalloc(&pdev->dev, sizeof(*hns3_pmu), GFP_KERNEL);
+ if (!hns3_pmu)
+ return -ENOMEM;
+
+ ret = hns3_pmu_init_dev(pdev);
+ if (ret)
+ return ret;
+
+ ret = hns3_pmu_init_pmu(pdev, hns3_pmu);
+ if (ret) {
+ pci_clear_master(pdev);
+ return ret;
+ }
+
+ pci_set_drvdata(pdev, hns3_pmu);
+
+ return ret;
+}
+
+static void hns3_pmu_remove(struct pci_dev *pdev)
+{
+ hns3_pmu_uninit_pmu(pdev);
+ pci_clear_master(pdev);
+ pci_set_drvdata(pdev, NULL);
+}
+
+static const struct pci_device_id hns3_pmu_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa22b) },
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, hns3_pmu_ids);
+
+static struct pci_driver hns3_pmu_driver = {
+ .name = "hns3_pmu",
+ .id_table = hns3_pmu_ids,
+ .probe = hns3_pmu_probe,
+ .remove = hns3_pmu_remove,
+};
+
+static int __init hns3_pmu_module_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+ "AP_PERF_ARM_HNS3_PMU_ONLINE",
+ hns3_pmu_online_cpu,
+ hns3_pmu_offline_cpu);
+ if (ret) {
+ pr_err("failed to setup HNS3 PMU hotplug, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = pci_register_driver(&hns3_pmu_driver);
+ if (ret) {
+ pr_err("failed to register pci driver, ret = %d.\n", ret);
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE);
+ }
+
+ return ret;
+}
+module_init(hns3_pmu_module_init);
+
+static void __exit hns3_pmu_module_exit(void)
+{
+ pci_unregister_driver(&hns3_pmu_driver);
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE);
+}
+module_exit(hns3_pmu_module_exit);
+
+MODULE_DESCRIPTION("HNS3 PMU driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
index 282d3a071a67..69c3050a4348 100644
--- a/drivers/perf/marvell_cn10k_tad_pmu.c
+++ b/drivers/perf/marvell_cn10k_tad_pmu.c
@@ -2,10 +2,6 @@
/* Marvell CN10K LLC-TAD perf driver
*
* Copyright (C) 2021 Marvell
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#define pr_fmt(fmt) "tad_pmu: " fmt
@@ -18,9 +14,9 @@
#include <linux/perf_event.h>
#include <linux/platform_device.h>
-#define TAD_PFC_OFFSET 0x0
+#define TAD_PFC_OFFSET 0x800
#define TAD_PFC(counter) (TAD_PFC_OFFSET | (counter << 3))
-#define TAD_PRF_OFFSET 0x100
+#define TAD_PRF_OFFSET 0x900
#define TAD_PRF(counter) (TAD_PRF_OFFSET | (counter << 3))
#define TAD_PRF_CNTSEL_MASK 0xFF
#define TAD_MAX_COUNTERS 8
@@ -100,9 +96,7 @@ static void tad_pmu_event_counter_start(struct perf_event *event, int flags)
* which sets TAD()_PRF()[CNTSEL] != 0
*/
for (i = 0; i < tad_pmu->region_cnt; i++) {
- reg_val = readq_relaxed(tad_pmu->regions[i].base +
- TAD_PRF(counter_idx));
- reg_val |= (event_idx & 0xFF);
+ reg_val = event_idx & 0xFF;
writeq_relaxed(reg_val, tad_pmu->regions[i].base +
TAD_PRF(counter_idx));
}
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
index b2b8d2074ed0..2c961839903d 100644
--- a/drivers/perf/riscv_pmu.c
+++ b/drivers/perf/riscv_pmu.c
@@ -121,7 +121,7 @@ u64 riscv_pmu_event_update(struct perf_event *event)
return delta;
}
-static void riscv_pmu_stop(struct perf_event *event, int flags)
+void riscv_pmu_stop(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
@@ -175,7 +175,7 @@ int riscv_pmu_event_set_period(struct perf_event *event)
return overflow;
}
-static void riscv_pmu_start(struct perf_event *event, int flags)
+void riscv_pmu_start(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index dca3537a8dcc..79a3de515ece 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -17,10 +17,30 @@
#include <linux/irqdomain.h>
#include <linux/of_irq.h>
#include <linux/of.h>
+#include <linux/cpu_pm.h>
#include <asm/sbi.h>
#include <asm/hwcap.h>
+PMU_FORMAT_ATTR(event, "config:0-47");
+PMU_FORMAT_ATTR(firmware, "config:63");
+
+static struct attribute *riscv_arch_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_firmware.attr,
+ NULL,
+};
+
+static struct attribute_group riscv_pmu_format_group = {
+ .name = "format",
+ .attrs = riscv_arch_formats_attr,
+};
+
+static const struct attribute_group *riscv_pmu_attr_groups[] = {
+ &riscv_pmu_format_group,
+ NULL,
+};
+
union sbi_pmu_ctr_info {
unsigned long value;
struct {
@@ -666,12 +686,15 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde
child = of_get_compatible_child(cpu, "riscv,cpu-intc");
if (!child) {
pr_err("Failed to find INTC node\n");
+ of_node_put(cpu);
return -ENODEV;
}
domain = irq_find_host(child);
of_node_put(child);
- if (domain)
+ if (domain) {
+ of_node_put(cpu);
break;
+ }
}
if (!domain) {
pr_err("Failed to find INTC IRQ root domain\n");
@@ -693,6 +716,73 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde
return 0;
}
+#ifdef CONFIG_CPU_PM
+static int riscv_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
+ void *v)
+{
+ struct riscv_pmu *rvpmu = container_of(b, struct riscv_pmu, riscv_pm_nb);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+ int enabled = bitmap_weight(cpuc->used_hw_ctrs, RISCV_MAX_COUNTERS);
+ struct perf_event *event;
+ int idx;
+
+ if (!enabled)
+ return NOTIFY_OK;
+
+ for (idx = 0; idx < RISCV_MAX_COUNTERS; idx++) {
+ event = cpuc->events[idx];
+ if (!event)
+ continue;
+
+ switch (cmd) {
+ case CPU_PM_ENTER:
+ /*
+ * Stop and update the counter
+ */
+ riscv_pmu_stop(event, PERF_EF_UPDATE);
+ break;
+ case CPU_PM_EXIT:
+ case CPU_PM_ENTER_FAILED:
+ /*
+ * Restore and enable the counter.
+ *
+ * Requires RCU read locking to be functional,
+ * wrap the call within RCU_NONIDLE to make the
+ * RCU subsystem aware this cpu is not idle from
+ * an RCU perspective for the riscv_pmu_start() call
+ * duration.
+ */
+ RCU_NONIDLE(riscv_pmu_start(event, PERF_EF_RELOAD));
+ break;
+ default:
+ break;
+ }
+ }
+
+ return NOTIFY_OK;
+}
+
+static int riscv_pm_pmu_register(struct riscv_pmu *pmu)
+{
+ pmu->riscv_pm_nb.notifier_call = riscv_pm_pmu_notify;
+ return cpu_pm_register_notifier(&pmu->riscv_pm_nb);
+}
+
+static void riscv_pm_pmu_unregister(struct riscv_pmu *pmu)
+{
+ cpu_pm_unregister_notifier(&pmu->riscv_pm_nb);
+}
+#else
+static inline int riscv_pm_pmu_register(struct riscv_pmu *pmu) { return 0; }
+static inline void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) { }
+#endif
+
+static void riscv_pmu_destroy(struct riscv_pmu *pmu)
+{
+ riscv_pm_pmu_unregister(pmu);
+ cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
+}
+
static int pmu_sbi_device_probe(struct platform_device *pdev)
{
struct riscv_pmu *pmu = NULL;
@@ -720,6 +810,7 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
}
+ pmu->pmu.attr_groups = riscv_pmu_attr_groups;
pmu->num_counters = num_counters;
pmu->ctr_start = pmu_sbi_ctr_start;
pmu->ctr_stop = pmu_sbi_ctr_stop;
@@ -733,14 +824,19 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
if (ret)
return ret;
+ ret = riscv_pm_pmu_register(pmu);
+ if (ret)
+ goto out_unregister;
+
ret = perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW);
- if (ret) {
- cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
- return ret;
- }
+ if (ret)
+ goto out_unregister;
return 0;
+out_unregister:
+ riscv_pmu_destroy(pmu);
+
out_free:
kfree(pmu);
return ret;
diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig
index f52960d2dfbe..bff144c97e66 100644
--- a/drivers/pinctrl/Kconfig
+++ b/drivers/pinctrl/Kconfig
@@ -32,7 +32,7 @@ config DEBUG_PINCTRL
Say Y here to add some extra checks and diagnostics to PINCTRL calls.
config PINCTRL_AMD
- tristate "AMD GPIO pin control"
+ bool "AMD GPIO pin control"
depends on HAS_IOMEM
depends on ACPI || COMPILE_TEST
select GPIOLIB
diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
index a140b6bfbfaa..bcde042d29dc 100644
--- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
+++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
@@ -102,7 +102,7 @@ struct armada_37xx_pinctrl {
struct device *dev;
struct gpio_chip gpio_chip;
struct irq_chip irq_chip;
- spinlock_t irq_lock;
+ raw_spinlock_t irq_lock;
struct pinctrl_desc pctl;
struct pinctrl_dev *pctl_dev;
struct armada_37xx_pin_group *groups;
@@ -523,9 +523,9 @@ static void armada_37xx_irq_ack(struct irq_data *d)
unsigned long flags;
armada_37xx_irq_update_reg(&reg, d);
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
writel(d->mask, info->base + reg);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
}
static void armada_37xx_irq_mask(struct irq_data *d)
@@ -536,10 +536,10 @@ static void armada_37xx_irq_mask(struct irq_data *d)
unsigned long flags;
armada_37xx_irq_update_reg(&reg, d);
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
val = readl(info->base + reg);
writel(val & ~d->mask, info->base + reg);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
}
static void armada_37xx_irq_unmask(struct irq_data *d)
@@ -550,10 +550,10 @@ static void armada_37xx_irq_unmask(struct irq_data *d)
unsigned long flags;
armada_37xx_irq_update_reg(&reg, d);
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
val = readl(info->base + reg);
writel(val | d->mask, info->base + reg);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
}
static int armada_37xx_irq_set_wake(struct irq_data *d, unsigned int on)
@@ -564,14 +564,14 @@ static int armada_37xx_irq_set_wake(struct irq_data *d, unsigned int on)
unsigned long flags;
armada_37xx_irq_update_reg(&reg, d);
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
val = readl(info->base + reg);
if (on)
val |= (BIT(d->hwirq % GPIO_PER_REG));
else
val &= ~(BIT(d->hwirq % GPIO_PER_REG));
writel(val, info->base + reg);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
return 0;
}
@@ -583,7 +583,7 @@ static int armada_37xx_irq_set_type(struct irq_data *d, unsigned int type)
u32 val, reg = IRQ_POL;
unsigned long flags;
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
armada_37xx_irq_update_reg(&reg, d);
val = readl(info->base + reg);
switch (type) {
@@ -607,11 +607,11 @@ static int armada_37xx_irq_set_type(struct irq_data *d, unsigned int type)
break;
}
default:
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
return -EINVAL;
}
writel(val, info->base + reg);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
return 0;
}
@@ -626,7 +626,7 @@ static int armada_37xx_edge_both_irq_swap_pol(struct armada_37xx_pinctrl *info,
regmap_read(info->regmap, INPUT_VAL + 4*reg_idx, &l);
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
p = readl(info->base + IRQ_POL + 4 * reg_idx);
if ((p ^ l) & (1 << bit_num)) {
/*
@@ -647,7 +647,7 @@ static int armada_37xx_edge_both_irq_swap_pol(struct armada_37xx_pinctrl *info,
ret = -1;
}
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
return ret;
}
@@ -664,11 +664,11 @@ static void armada_37xx_irq_handler(struct irq_desc *desc)
u32 status;
unsigned long flags;
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
status = readl_relaxed(info->base + IRQ_STATUS + 4 * i);
/* Manage only the interrupt that was enabled */
status &= readl_relaxed(info->base + IRQ_EN + 4 * i);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
while (status) {
u32 hwirq = ffs(status) - 1;
u32 virq = irq_find_mapping(d, hwirq +
@@ -695,12 +695,12 @@ static void armada_37xx_irq_handler(struct irq_desc *desc)
update_status:
/* Update status in case a new IRQ appears */
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
status = readl_relaxed(info->base +
IRQ_STATUS + 4 * i);
/* Manage only the interrupt that was enabled */
status &= readl_relaxed(info->base + IRQ_EN + 4 * i);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
}
}
chained_irq_exit(chip, desc);
@@ -731,7 +731,7 @@ static int armada_37xx_irqchip_register(struct platform_device *pdev,
struct device *dev = &pdev->dev;
unsigned int i, nr_irq_parent;
- spin_lock_init(&info->irq_lock);
+ raw_spin_lock_init(&info->irq_lock);
nr_irq_parent = of_irq_count(np);
if (!nr_irq_parent) {
@@ -1107,25 +1107,40 @@ static const struct of_device_id armada_37xx_pinctrl_of_match[] = {
{ },
};
+static const struct regmap_config armada_37xx_pinctrl_regmap_config = {
+ .reg_bits = 32,
+ .val_bits = 32,
+ .reg_stride = 4,
+ .use_raw_spinlock = true,
+};
+
static int __init armada_37xx_pinctrl_probe(struct platform_device *pdev)
{
struct armada_37xx_pinctrl *info;
struct device *dev = &pdev->dev;
- struct device_node *np = dev->of_node;
struct regmap *regmap;
+ void __iomem *base;
int ret;
+ base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
+ if (IS_ERR(base)) {
+ dev_err(dev, "failed to ioremap base address: %pe\n", base);
+ return PTR_ERR(base);
+ }
+
+ regmap = devm_regmap_init_mmio(dev, base,
+ &armada_37xx_pinctrl_regmap_config);
+ if (IS_ERR(regmap)) {
+ dev_err(dev, "failed to create regmap: %pe\n", regmap);
+ return PTR_ERR(regmap);
+ }
+
info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
info->dev = dev;
-
- regmap = syscon_node_to_regmap(np);
- if (IS_ERR(regmap))
- return dev_err_probe(dev, PTR_ERR(regmap), "cannot get regmap\n");
info->regmap = regmap;
-
info->data = of_device_get_match_data(dev);
ret = armada_37xx_pinctrl_register(pdev, info);
diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c
index 5f4a8c5c6650..771dd1f4fbe0 100644
--- a/drivers/pinctrl/pinctrl-ocelot.c
+++ b/drivers/pinctrl/pinctrl-ocelot.c
@@ -29,19 +29,12 @@
#define ocelot_clrsetbits(addr, clear, set) \
writel((readl(addr) & ~(clear)) | (set), (addr))
-/* PINCONFIG bits (sparx5 only) */
enum {
PINCONF_BIAS,
PINCONF_SCHMITT,
PINCONF_DRIVE_STRENGTH,
};
-#define BIAS_PD_BIT BIT(4)
-#define BIAS_PU_BIT BIT(3)
-#define BIAS_BITS (BIAS_PD_BIT|BIAS_PU_BIT)
-#define SCHMITT_BIT BIT(2)
-#define DRIVE_BITS GENMASK(1, 0)
-
/* GPIO standard registers */
#define OCELOT_GPIO_OUT_SET 0x0
#define OCELOT_GPIO_OUT_CLR 0x4
@@ -321,6 +314,13 @@ struct ocelot_pin_caps {
unsigned char a_functions[OCELOT_FUNC_PER_PIN]; /* Additional functions */
};
+struct ocelot_pincfg_data {
+ u8 pd_bit;
+ u8 pu_bit;
+ u8 drive_bits;
+ u8 schmitt_bit;
+};
+
struct ocelot_pinctrl {
struct device *dev;
struct pinctrl_dev *pctl;
@@ -328,10 +328,16 @@ struct ocelot_pinctrl {
struct regmap *map;
struct regmap *pincfg;
struct pinctrl_desc *desc;
+ const struct ocelot_pincfg_data *pincfg_data;
struct ocelot_pmx_func func[FUNC_MAX];
u8 stride;
};
+struct ocelot_match_data {
+ struct pinctrl_desc desc;
+ struct ocelot_pincfg_data pincfg_data;
+};
+
#define LUTON_P(p, f0, f1) \
static struct ocelot_pin_caps luton_pin_##p = { \
.pin = p, \
@@ -1325,24 +1331,27 @@ static int ocelot_hw_get_value(struct ocelot_pinctrl *info,
int ret = -EOPNOTSUPP;
if (info->pincfg) {
+ const struct ocelot_pincfg_data *opd = info->pincfg_data;
u32 regcfg;
- ret = regmap_read(info->pincfg, pin, &regcfg);
+ ret = regmap_read(info->pincfg,
+ pin * regmap_get_reg_stride(info->pincfg),
+ &regcfg);
if (ret)
return ret;
ret = 0;
switch (reg) {
case PINCONF_BIAS:
- *val = regcfg & BIAS_BITS;
+ *val = regcfg & (opd->pd_bit | opd->pu_bit);
break;
case PINCONF_SCHMITT:
- *val = regcfg & SCHMITT_BIT;
+ *val = regcfg & opd->schmitt_bit;
break;
case PINCONF_DRIVE_STRENGTH:
- *val = regcfg & DRIVE_BITS;
+ *val = regcfg & opd->drive_bits;
break;
default:
@@ -1359,14 +1368,18 @@ static int ocelot_pincfg_clrsetbits(struct ocelot_pinctrl *info, u32 regaddr,
u32 val;
int ret;
- ret = regmap_read(info->pincfg, regaddr, &val);
+ ret = regmap_read(info->pincfg,
+ regaddr * regmap_get_reg_stride(info->pincfg),
+ &val);
if (ret)
return ret;
val &= ~clrbits;
val |= setbits;
- ret = regmap_write(info->pincfg, regaddr, val);
+ ret = regmap_write(info->pincfg,
+ regaddr * regmap_get_reg_stride(info->pincfg),
+ val);
return ret;
}
@@ -1379,23 +1392,27 @@ static int ocelot_hw_set_value(struct ocelot_pinctrl *info,
int ret = -EOPNOTSUPP;
if (info->pincfg) {
+ const struct ocelot_pincfg_data *opd = info->pincfg_data;
ret = 0;
switch (reg) {
case PINCONF_BIAS:
- ret = ocelot_pincfg_clrsetbits(info, pin, BIAS_BITS,
+ ret = ocelot_pincfg_clrsetbits(info, pin,
+ opd->pd_bit | opd->pu_bit,
val);
break;
case PINCONF_SCHMITT:
- ret = ocelot_pincfg_clrsetbits(info, pin, SCHMITT_BIT,
+ ret = ocelot_pincfg_clrsetbits(info, pin,
+ opd->schmitt_bit,
val);
break;
case PINCONF_DRIVE_STRENGTH:
if (val <= 3)
ret = ocelot_pincfg_clrsetbits(info, pin,
- DRIVE_BITS, val);
+ opd->drive_bits,
+ val);
else
ret = -EINVAL;
break;
@@ -1425,17 +1442,20 @@ static int ocelot_pinconf_get(struct pinctrl_dev *pctldev,
if (param == PIN_CONFIG_BIAS_DISABLE)
val = (val == 0);
else if (param == PIN_CONFIG_BIAS_PULL_DOWN)
- val = (val & BIAS_PD_BIT ? true : false);
+ val = !!(val & info->pincfg_data->pd_bit);
else /* PIN_CONFIG_BIAS_PULL_UP */
- val = (val & BIAS_PU_BIT ? true : false);
+ val = !!(val & info->pincfg_data->pu_bit);
break;
case PIN_CONFIG_INPUT_SCHMITT_ENABLE:
+ if (!info->pincfg_data->schmitt_bit)
+ return -EOPNOTSUPP;
+
err = ocelot_hw_get_value(info, pin, PINCONF_SCHMITT, &val);
if (err)
return err;
- val = (val & SCHMITT_BIT ? true : false);
+ val = !!(val & info->pincfg_data->schmitt_bit);
break;
case PIN_CONFIG_DRIVE_STRENGTH:
@@ -1479,6 +1499,7 @@ static int ocelot_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
unsigned long *configs, unsigned int num_configs)
{
struct ocelot_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
+ const struct ocelot_pincfg_data *opd = info->pincfg_data;
u32 param, arg, p;
int cfg, err = 0;
@@ -1491,8 +1512,8 @@ static int ocelot_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
case PIN_CONFIG_BIAS_PULL_UP:
case PIN_CONFIG_BIAS_PULL_DOWN:
arg = (param == PIN_CONFIG_BIAS_DISABLE) ? 0 :
- (param == PIN_CONFIG_BIAS_PULL_UP) ? BIAS_PU_BIT :
- BIAS_PD_BIT;
+ (param == PIN_CONFIG_BIAS_PULL_UP) ?
+ opd->pu_bit : opd->pd_bit;
err = ocelot_hw_set_value(info, pin, PINCONF_BIAS, arg);
if (err)
@@ -1501,7 +1522,10 @@ static int ocelot_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
break;
case PIN_CONFIG_INPUT_SCHMITT_ENABLE:
- arg = arg ? SCHMITT_BIT : 0;
+ if (!opd->schmitt_bit)
+ return -EOPNOTSUPP;
+
+ arg = arg ? opd->schmitt_bit : 0;
err = ocelot_hw_set_value(info, pin, PINCONF_SCHMITT,
arg);
if (err)
@@ -1562,69 +1586,94 @@ static const struct pinctrl_ops ocelot_pctl_ops = {
.dt_free_map = pinconf_generic_dt_free_map,
};
-static struct pinctrl_desc luton_desc = {
- .name = "luton-pinctrl",
- .pins = luton_pins,
- .npins = ARRAY_SIZE(luton_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data luton_desc = {
+ .desc = {
+ .name = "luton-pinctrl",
+ .pins = luton_pins,
+ .npins = ARRAY_SIZE(luton_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .owner = THIS_MODULE,
+ },
};
-static struct pinctrl_desc serval_desc = {
- .name = "serval-pinctrl",
- .pins = serval_pins,
- .npins = ARRAY_SIZE(serval_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data serval_desc = {
+ .desc = {
+ .name = "serval-pinctrl",
+ .pins = serval_pins,
+ .npins = ARRAY_SIZE(serval_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .owner = THIS_MODULE,
+ },
};
-static struct pinctrl_desc ocelot_desc = {
- .name = "ocelot-pinctrl",
- .pins = ocelot_pins,
- .npins = ARRAY_SIZE(ocelot_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data ocelot_desc = {
+ .desc = {
+ .name = "ocelot-pinctrl",
+ .pins = ocelot_pins,
+ .npins = ARRAY_SIZE(ocelot_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .owner = THIS_MODULE,
+ },
};
-static struct pinctrl_desc jaguar2_desc = {
- .name = "jaguar2-pinctrl",
- .pins = jaguar2_pins,
- .npins = ARRAY_SIZE(jaguar2_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data jaguar2_desc = {
+ .desc = {
+ .name = "jaguar2-pinctrl",
+ .pins = jaguar2_pins,
+ .npins = ARRAY_SIZE(jaguar2_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .owner = THIS_MODULE,
+ },
};
-static struct pinctrl_desc servalt_desc = {
- .name = "servalt-pinctrl",
- .pins = servalt_pins,
- .npins = ARRAY_SIZE(servalt_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data servalt_desc = {
+ .desc = {
+ .name = "servalt-pinctrl",
+ .pins = servalt_pins,
+ .npins = ARRAY_SIZE(servalt_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .owner = THIS_MODULE,
+ },
};
-static struct pinctrl_desc sparx5_desc = {
- .name = "sparx5-pinctrl",
- .pins = sparx5_pins,
- .npins = ARRAY_SIZE(sparx5_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .confops = &ocelot_confops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data sparx5_desc = {
+ .desc = {
+ .name = "sparx5-pinctrl",
+ .pins = sparx5_pins,
+ .npins = ARRAY_SIZE(sparx5_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .confops = &ocelot_confops,
+ .owner = THIS_MODULE,
+ },
+ .pincfg_data = {
+ .pd_bit = BIT(4),
+ .pu_bit = BIT(3),
+ .drive_bits = GENMASK(1, 0),
+ .schmitt_bit = BIT(2),
+ },
};
-static struct pinctrl_desc lan966x_desc = {
- .name = "lan966x-pinctrl",
- .pins = lan966x_pins,
- .npins = ARRAY_SIZE(lan966x_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &lan966x_pmx_ops,
- .confops = &ocelot_confops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data lan966x_desc = {
+ .desc = {
+ .name = "lan966x-pinctrl",
+ .pins = lan966x_pins,
+ .npins = ARRAY_SIZE(lan966x_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &lan966x_pmx_ops,
+ .confops = &ocelot_confops,
+ .owner = THIS_MODULE,
+ },
+ .pincfg_data = {
+ .pd_bit = BIT(3),
+ .pu_bit = BIT(2),
+ .drive_bits = GENMASK(1, 0),
+ },
};
static int ocelot_create_group_func_map(struct device *dev,
@@ -1761,6 +1810,7 @@ static void ocelot_irq_mask(struct irq_data *data)
regmap_update_bits(info->map, REG(OCELOT_GPIO_INTR_ENA, info, gpio),
BIT(gpio % 32), 0);
+ gpiochip_disable_irq(chip, gpio);
}
static void ocelot_irq_unmask(struct irq_data *data)
@@ -1769,6 +1819,7 @@ static void ocelot_irq_unmask(struct irq_data *data)
struct ocelot_pinctrl *info = gpiochip_get_data(chip);
unsigned int gpio = irqd_to_hwirq(data);
+ gpiochip_enable_irq(chip, gpio);
regmap_update_bits(info->map, REG(OCELOT_GPIO_INTR_ENA, info, gpio),
BIT(gpio % 32), BIT(gpio % 32));
}
@@ -1790,8 +1841,10 @@ static struct irq_chip ocelot_eoi_irqchip = {
.irq_mask = ocelot_irq_mask,
.irq_eoi = ocelot_irq_ack,
.irq_unmask = ocelot_irq_unmask,
- .flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED,
+ .flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED |
+ IRQCHIP_IMMUTABLE,
.irq_set_type = ocelot_irq_set_type,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS
};
static struct irq_chip ocelot_irqchip = {
@@ -1800,6 +1853,8 @@ static struct irq_chip ocelot_irqchip = {
.irq_ack = ocelot_irq_ack,
.irq_unmask = ocelot_irq_unmask,
.irq_set_type = ocelot_irq_set_type,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS
};
static int ocelot_irq_set_type(struct irq_data *data, unsigned int type)
@@ -1863,7 +1918,7 @@ static int ocelot_gpiochip_register(struct platform_device *pdev,
irq = platform_get_irq_optional(pdev, 0);
if (irq > 0) {
girq = &gc->irq;
- girq->chip = &ocelot_irqchip;
+ gpio_irq_chip_set_chip(girq, &ocelot_irqchip);
girq->parent_handler = ocelot_irq_handler;
girq->num_parents = 1;
girq->parents = devm_kcalloc(&pdev->dev, 1,
@@ -1890,7 +1945,8 @@ static const struct of_device_id ocelot_pinctrl_of_match[] = {
{},
};
-static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev)
+static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev,
+ const struct ocelot_pinctrl *info)
{
void __iomem *base;
@@ -1898,7 +1954,7 @@ static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev)
.reg_bits = 32,
.val_bits = 32,
.reg_stride = 4,
- .max_register = 32,
+ .max_register = info->desc->npins * 4,
.name = "pincfg",
};
@@ -1913,6 +1969,7 @@ static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev)
static int ocelot_pinctrl_probe(struct platform_device *pdev)
{
+ const struct ocelot_match_data *data;
struct device *dev = &pdev->dev;
struct ocelot_pinctrl *info;
struct reset_control *reset;
@@ -1929,7 +1986,16 @@ static int ocelot_pinctrl_probe(struct platform_device *pdev)
if (!info)
return -ENOMEM;
- info->desc = (struct pinctrl_desc *)device_get_match_data(dev);
+ data = device_get_match_data(dev);
+ if (!data)
+ return -EINVAL;
+
+ info->desc = devm_kmemdup(dev, &data->desc, sizeof(*info->desc),
+ GFP_KERNEL);
+ if (!info->desc)
+ return -ENOMEM;
+
+ info->pincfg_data = &data->pincfg_data;
reset = devm_reset_control_get_optional_shared(dev, "switch");
if (IS_ERR(reset))
@@ -1956,7 +2022,7 @@ static int ocelot_pinctrl_probe(struct platform_device *pdev)
/* Pinconf registers */
if (info->desc->confops) {
- pincfg = ocelot_pinctrl_create_pincfg(pdev);
+ pincfg = ocelot_pinctrl_create_pincfg(pdev, info);
if (IS_ERR(pincfg))
dev_dbg(dev, "Failed to create pincfg regmap\n");
else
diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
index fd5fff9adff0..3be2a08ae3a6 100644
--- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
@@ -966,16 +966,13 @@ static int pmic_gpio_child_to_parent_hwirq(struct gpio_chip *chip,
return 0;
}
-static void *pmic_gpio_populate_parent_fwspec(struct gpio_chip *chip,
- unsigned int parent_hwirq,
- unsigned int parent_type)
+static int pmic_gpio_populate_parent_fwspec(struct gpio_chip *chip,
+ union gpio_irq_fwspec *gfwspec,
+ unsigned int parent_hwirq,
+ unsigned int parent_type)
{
struct pmic_gpio_state *state = gpiochip_get_data(chip);
- struct irq_fwspec *fwspec;
-
- fwspec = kzalloc(sizeof(*fwspec), GFP_KERNEL);
- if (!fwspec)
- return NULL;
+ struct irq_fwspec *fwspec = &gfwspec->fwspec;
fwspec->fwnode = chip->irq.parent_domain->fwnode;
@@ -985,7 +982,7 @@ static void *pmic_gpio_populate_parent_fwspec(struct gpio_chip *chip,
/* param[2] must be left as 0 */
fwspec->param[3] = parent_type;
- return fwspec;
+ return 0;
}
static int pmic_gpio_probe(struct platform_device *pdev)
diff --git a/drivers/pinctrl/ralink/pinctrl-ralink.c b/drivers/pinctrl/ralink/pinctrl-ralink.c
index 63429a287434..770862f45b3f 100644
--- a/drivers/pinctrl/ralink/pinctrl-ralink.c
+++ b/drivers/pinctrl/ralink/pinctrl-ralink.c
@@ -266,6 +266,8 @@ static int ralink_pinctrl_pins(struct ralink_priv *p)
p->func[i]->pin_count,
sizeof(int),
GFP_KERNEL);
+ if (!p->func[i]->pins)
+ return -ENOMEM;
for (j = 0; j < p->func[i]->pin_count; j++)
p->func[i]->pins[j] = p->func[i]->pin_first + j;
diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
index a48cac55152c..c47eed9d948f 100644
--- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c
+++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
@@ -9,8 +9,10 @@
#include <linux/clk.h>
#include <linux/gpio/driver.h>
#include <linux/io.h>
+#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/of_device.h>
+#include <linux/of_irq.h>
#include <linux/pinctrl/pinconf-generic.h>
#include <linux/pinctrl/pinconf.h>
#include <linux/pinctrl/pinctrl.h>
@@ -89,6 +91,7 @@
#define PIN(n) (0x0800 + 0x10 + (n))
#define IOLH(n) (0x1000 + (n) * 8)
#define IEN(n) (0x1800 + (n) * 8)
+#define ISEL(n) (0x2c80 + (n) * 8)
#define PWPR (0x3014)
#define SD_CH(n) (0x3000 + (n) * 4)
#define QSPI (0x3008)
@@ -112,6 +115,10 @@
#define RZG2L_PIN_ID_TO_PORT_OFFSET(id) (RZG2L_PIN_ID_TO_PORT(id) + 0x10)
#define RZG2L_PIN_ID_TO_PIN(id) ((id) % RZG2L_PINS_PER_PORT)
+#define RZG2L_TINT_MAX_INTERRUPT 32
+#define RZG2L_TINT_IRQ_START_INDEX 9
+#define RZG2L_PACK_HWIRQ(t, i) (((t) << 16) | (i))
+
struct rzg2l_dedicated_configs {
const char *name;
u32 config;
@@ -137,6 +144,9 @@ struct rzg2l_pinctrl {
struct gpio_chip gpio_chip;
struct pinctrl_gpio_range gpio_range;
+ DECLARE_BITMAP(tint_slot, RZG2L_TINT_MAX_INTERRUPT);
+ spinlock_t bitmap_lock;
+ unsigned int hwirq[RZG2L_TINT_MAX_INTERRUPT];
spinlock_t lock;
};
@@ -883,8 +893,14 @@ static int rzg2l_gpio_get(struct gpio_chip *chip, unsigned int offset)
static void rzg2l_gpio_free(struct gpio_chip *chip, unsigned int offset)
{
+ unsigned int virq;
+
pinctrl_gpio_free(chip->base + offset);
+ virq = irq_find_mapping(chip->irq.domain, offset);
+ if (virq)
+ irq_dispose_mapping(virq);
+
/*
* Set the GPIO as an input to ensure that the next GPIO request won't
* drive the GPIO pin as an output.
@@ -1104,14 +1120,221 @@ static struct {
}
};
+static int rzg2l_gpio_get_gpioint(unsigned int virq)
+{
+ unsigned int gpioint;
+ unsigned int i;
+ u32 port, bit;
+
+ port = virq / 8;
+ bit = virq % 8;
+
+ if (port >= ARRAY_SIZE(rzg2l_gpio_configs) ||
+ bit >= RZG2L_GPIO_PORT_GET_PINCNT(rzg2l_gpio_configs[port]))
+ return -EINVAL;
+
+ gpioint = bit;
+ for (i = 0; i < port; i++)
+ gpioint += RZG2L_GPIO_PORT_GET_PINCNT(rzg2l_gpio_configs[i]);
+
+ return gpioint;
+}
+
+static void rzg2l_gpio_irq_disable(struct irq_data *d)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct rzg2l_pinctrl *pctrl = container_of(gc, struct rzg2l_pinctrl, gpio_chip);
+ unsigned int hwirq = irqd_to_hwirq(d);
+ unsigned long flags;
+ void __iomem *addr;
+ u32 port;
+ u8 bit;
+
+ port = RZG2L_PIN_ID_TO_PORT(hwirq);
+ bit = RZG2L_PIN_ID_TO_PIN(hwirq);
+
+ addr = pctrl->base + ISEL(port);
+ if (bit >= 4) {
+ bit -= 4;
+ addr += 4;
+ }
+
+ spin_lock_irqsave(&pctrl->lock, flags);
+ writel(readl(addr) & ~BIT(bit * 8), addr);
+ spin_unlock_irqrestore(&pctrl->lock, flags);
+
+ gpiochip_disable_irq(gc, hwirq);
+ irq_chip_disable_parent(d);
+}
+
+static void rzg2l_gpio_irq_enable(struct irq_data *d)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct rzg2l_pinctrl *pctrl = container_of(gc, struct rzg2l_pinctrl, gpio_chip);
+ unsigned int hwirq = irqd_to_hwirq(d);
+ unsigned long flags;
+ void __iomem *addr;
+ u32 port;
+ u8 bit;
+
+ gpiochip_enable_irq(gc, hwirq);
+
+ port = RZG2L_PIN_ID_TO_PORT(hwirq);
+ bit = RZG2L_PIN_ID_TO_PIN(hwirq);
+
+ addr = pctrl->base + ISEL(port);
+ if (bit >= 4) {
+ bit -= 4;
+ addr += 4;
+ }
+
+ spin_lock_irqsave(&pctrl->lock, flags);
+ writel(readl(addr) | BIT(bit * 8), addr);
+ spin_unlock_irqrestore(&pctrl->lock, flags);
+
+ irq_chip_enable_parent(d);
+}
+
+static int rzg2l_gpio_irq_set_type(struct irq_data *d, unsigned int type)
+{
+ return irq_chip_set_type_parent(d, type);
+}
+
+static void rzg2l_gpio_irqc_eoi(struct irq_data *d)
+{
+ irq_chip_eoi_parent(d);
+}
+
+static void rzg2l_gpio_irq_print_chip(struct irq_data *data, struct seq_file *p)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+
+ seq_printf(p, dev_name(gc->parent));
+}
+
+static const struct irq_chip rzg2l_gpio_irqchip = {
+ .name = "rzg2l-gpio",
+ .irq_disable = rzg2l_gpio_irq_disable,
+ .irq_enable = rzg2l_gpio_irq_enable,
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_set_type = rzg2l_gpio_irq_set_type,
+ .irq_eoi = rzg2l_gpio_irqc_eoi,
+ .irq_print_chip = rzg2l_gpio_irq_print_chip,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
+static int rzg2l_gpio_child_to_parent_hwirq(struct gpio_chip *gc,
+ unsigned int child,
+ unsigned int child_type,
+ unsigned int *parent,
+ unsigned int *parent_type)
+{
+ struct rzg2l_pinctrl *pctrl = gpiochip_get_data(gc);
+ unsigned long flags;
+ int gpioint, irq;
+
+ gpioint = rzg2l_gpio_get_gpioint(child);
+ if (gpioint < 0)
+ return gpioint;
+
+ spin_lock_irqsave(&pctrl->bitmap_lock, flags);
+ irq = bitmap_find_free_region(pctrl->tint_slot, RZG2L_TINT_MAX_INTERRUPT, get_order(1));
+ spin_unlock_irqrestore(&pctrl->bitmap_lock, flags);
+ if (irq < 0)
+ return -ENOSPC;
+ pctrl->hwirq[irq] = child;
+ irq += RZG2L_TINT_IRQ_START_INDEX;
+
+ /* All these interrupts are level high in the CPU */
+ *parent_type = IRQ_TYPE_LEVEL_HIGH;
+ *parent = RZG2L_PACK_HWIRQ(gpioint, irq);
+ return 0;
+}
+
+static int rzg2l_gpio_populate_parent_fwspec(struct gpio_chip *chip,
+ union gpio_irq_fwspec *gfwspec,
+ unsigned int parent_hwirq,
+ unsigned int parent_type)
+{
+ struct irq_fwspec *fwspec = &gfwspec->fwspec;
+
+ fwspec->fwnode = chip->irq.parent_domain->fwnode;
+ fwspec->param_count = 2;
+ fwspec->param[0] = parent_hwirq;
+ fwspec->param[1] = parent_type;
+
+ return 0;
+}
+
+static void rzg2l_gpio_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *d;
+
+ d = irq_domain_get_irq_data(domain, virq);
+ if (d) {
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct rzg2l_pinctrl *pctrl = container_of(gc, struct rzg2l_pinctrl, gpio_chip);
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
+ unsigned long flags;
+ unsigned int i;
+
+ for (i = 0; i < RZG2L_TINT_MAX_INTERRUPT; i++) {
+ if (pctrl->hwirq[i] == hwirq) {
+ spin_lock_irqsave(&pctrl->bitmap_lock, flags);
+ bitmap_release_region(pctrl->tint_slot, i, get_order(1));
+ spin_unlock_irqrestore(&pctrl->bitmap_lock, flags);
+ pctrl->hwirq[i] = 0;
+ break;
+ }
+ }
+ }
+ irq_domain_free_irqs_common(domain, virq, nr_irqs);
+}
+
+static void rzg2l_init_irq_valid_mask(struct gpio_chip *gc,
+ unsigned long *valid_mask,
+ unsigned int ngpios)
+{
+ struct rzg2l_pinctrl *pctrl = gpiochip_get_data(gc);
+ struct gpio_chip *chip = &pctrl->gpio_chip;
+ unsigned int offset;
+
+ /* Forbid unused lines to be mapped as IRQs */
+ for (offset = 0; offset < chip->ngpio; offset++) {
+ u32 port, bit;
+
+ port = offset / 8;
+ bit = offset % 8;
+
+ if (port >= ARRAY_SIZE(rzg2l_gpio_configs) ||
+ bit >= RZG2L_GPIO_PORT_GET_PINCNT(rzg2l_gpio_configs[port]))
+ clear_bit(offset, valid_mask);
+ }
+}
+
static int rzg2l_gpio_register(struct rzg2l_pinctrl *pctrl)
{
struct device_node *np = pctrl->dev->of_node;
struct gpio_chip *chip = &pctrl->gpio_chip;
const char *name = dev_name(pctrl->dev);
+ struct irq_domain *parent_domain;
struct of_phandle_args of_args;
+ struct device_node *parent_np;
+ struct gpio_irq_chip *girq;
int ret;
+ parent_np = of_irq_find_parent(np);
+ if (!parent_np)
+ return -ENXIO;
+
+ parent_domain = irq_find_host(parent_np);
+ of_node_put(parent_np);
+ if (!parent_domain)
+ return -EPROBE_DEFER;
+
ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &of_args);
if (ret) {
dev_err(pctrl->dev, "Unable to parse gpio-ranges\n");
@@ -1138,6 +1361,15 @@ static int rzg2l_gpio_register(struct rzg2l_pinctrl *pctrl)
chip->base = -1;
chip->ngpio = of_args.args[2];
+ girq = &chip->irq;
+ gpio_irq_chip_set_chip(girq, &rzg2l_gpio_irqchip);
+ girq->fwnode = of_node_to_fwnode(np);
+ girq->parent_domain = parent_domain;
+ girq->child_to_parent_hwirq = rzg2l_gpio_child_to_parent_hwirq;
+ girq->populate_parent_alloc_arg = rzg2l_gpio_populate_parent_fwspec;
+ girq->child_irq_domain_ops.free = rzg2l_gpio_irq_domain_free;
+ girq->init_valid_mask = rzg2l_init_irq_valid_mask;
+
pctrl->gpio_range.id = 0;
pctrl->gpio_range.pin_base = 0;
pctrl->gpio_range.base = 0;
@@ -1253,6 +1485,7 @@ static int rzg2l_pinctrl_probe(struct platform_device *pdev)
}
spin_lock_init(&pctrl->lock);
+ spin_lock_init(&pctrl->bitmap_lock);
platform_set_drvdata(pdev, pctrl);
diff --git a/drivers/pinctrl/sunplus/sppctl.c b/drivers/pinctrl/sunplus/sppctl.c
index 3ba47040ac42..2b3335ab56c6 100644
--- a/drivers/pinctrl/sunplus/sppctl.c
+++ b/drivers/pinctrl/sunplus/sppctl.c
@@ -871,6 +871,9 @@ static int sppctl_dt_node_to_map(struct pinctrl_dev *pctldev, struct device_node
}
*map = kcalloc(*num_maps + nmG, sizeof(**map), GFP_KERNEL);
+ if (*map == NULL)
+ return -ENOMEM;
+
for (i = 0; i < (*num_maps); i++) {
dt_pin = be32_to_cpu(list[i]);
pin_num = FIELD_GET(GENMASK(31, 24), dt_pin);
diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c
index f11d18beac18..700eb19e8450 100644
--- a/drivers/platform/x86/amd-pmc.c
+++ b/drivers/platform/x86/amd-pmc.c
@@ -91,6 +91,8 @@
#define AMD_CPU_ID_PCO AMD_CPU_ID_RV
#define AMD_CPU_ID_CZN AMD_CPU_ID_RN
#define AMD_CPU_ID_YC 0x14B5
+#define AMD_CPU_ID_CB 0x14D8
+#define AMD_CPU_ID_PS 0x14E8
#define PMC_MSG_DELAY_MIN_US 50
#define RESPONSE_REGISTER_LOOP_MAX 20000
@@ -318,6 +320,8 @@ static int amd_pmc_idlemask_read(struct amd_pmc_dev *pdev, struct device *dev,
val = amd_pmc_reg_read(pdev, AMD_PMC_SCRATCH_REG_CZN);
break;
case AMD_CPU_ID_YC:
+ case AMD_CPU_ID_CB:
+ case AMD_CPU_ID_PS:
val = amd_pmc_reg_read(pdev, AMD_PMC_SCRATCH_REG_YC);
break;
default:
@@ -491,7 +495,8 @@ static void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev)
&amd_pmc_idlemask_fops);
/* Enable STB only when the module_param is set */
if (enable_stb) {
- if (dev->cpu_id == AMD_CPU_ID_YC)
+ if (dev->cpu_id == AMD_CPU_ID_YC || dev->cpu_id == AMD_CPU_ID_CB ||
+ dev->cpu_id == AMD_CPU_ID_PS)
debugfs_create_file("stb_read", 0644, dev->dbgfs_dir, dev,
&amd_pmc_stb_debugfs_fops_v2);
else
@@ -615,6 +620,8 @@ static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev)
return MSG_OS_HINT_PCO;
case AMD_CPU_ID_RN:
case AMD_CPU_ID_YC:
+ case AMD_CPU_ID_CB:
+ case AMD_CPU_ID_PS:
return MSG_OS_HINT_RN;
}
return -EINVAL;
@@ -735,6 +742,8 @@ static struct acpi_s2idle_dev_ops amd_pmc_s2idle_dev_ops = {
#endif
static const struct pci_device_id pmc_pci_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_PS) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_CB) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_YC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_CZN) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RN) },
@@ -877,7 +886,7 @@ static int amd_pmc_probe(struct platform_device *pdev)
mutex_init(&dev->lock);
- if (enable_stb && dev->cpu_id == AMD_CPU_ID_YC) {
+ if (enable_stb && (dev->cpu_id == AMD_CPU_ID_YC || dev->cpu_id == AMD_CPU_ID_CB)) {
err = amd_pmc_s2d_init(dev);
if (err)
return err;
@@ -915,6 +924,7 @@ static const struct acpi_device_id amd_pmc_acpi_ids[] = {
{"AMDI0005", 0},
{"AMDI0006", 0},
{"AMDI0007", 0},
+ {"AMDI0008", 0},
{"AMD0004", 0},
{"AMD0005", 0},
{ }
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index 57a07db659cb..478dd300b9c9 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -522,6 +522,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = {
{ KE_KEY, 0x31, { KEY_VOLUMEDOWN } },
{ KE_KEY, 0x32, { KEY_MUTE } },
{ KE_KEY, 0x35, { KEY_SCREENLOCK } },
+ { KE_KEY, 0x38, { KEY_PROG3 } }, /* Armoury Crate */
{ KE_KEY, 0x40, { KEY_PREVIOUSSONG } },
{ KE_KEY, 0x41, { KEY_NEXTSONG } },
{ KE_KEY, 0x43, { KEY_STOPCD } }, /* Stop/Eject */
@@ -574,6 +575,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = {
{ KE_KEY, 0xA5, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV + HDMI */
{ KE_KEY, 0xA6, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV + HDMI */
{ KE_KEY, 0xA7, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV + HDMI */
+ { KE_KEY, 0xB3, { KEY_PROG4 } }, /* AURA */
{ KE_KEY, 0xB5, { KEY_CALC } },
{ KE_KEY, 0xC4, { KEY_KBDILLUMUP } },
{ KE_KEY, 0xC5, { KEY_KBDILLUMDOWN } },
diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c
index 497ad2f64a51..5e7e6659a849 100644
--- a/drivers/platform/x86/gigabyte-wmi.c
+++ b/drivers/platform/x86/gigabyte-wmi.c
@@ -150,6 +150,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660 GAMING X DDR4"),
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660I AORUS PRO DDR4"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z490 AORUS ELITE AC"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE"),
diff --git a/drivers/platform/x86/intel/atomisp2/led.c b/drivers/platform/x86/intel/atomisp2/led.c
index 5935dfca166f..10077a61d8c5 100644
--- a/drivers/platform/x86/intel/atomisp2/led.c
+++ b/drivers/platform/x86/intel/atomisp2/led.c
@@ -50,7 +50,8 @@ static const struct dmi_system_id atomisp2_led_systems[] __initconst = {
{
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"),
+ /* Non exact match to also match T100TAF */
+ DMI_MATCH(DMI_PRODUCT_NAME, "T100TA"),
},
.driver_data = &asus_t100ta_lookup,
},
diff --git a/drivers/platform/x86/intel/ifs/Kconfig b/drivers/platform/x86/intel/ifs/Kconfig
index 7ce896434b8f..c341a27cc1a3 100644
--- a/drivers/platform/x86/intel/ifs/Kconfig
+++ b/drivers/platform/x86/intel/ifs/Kconfig
@@ -1,6 +1,9 @@
config INTEL_IFS
tristate "Intel In Field Scan"
depends on X86 && CPU_SUP_INTEL && 64BIT && SMP
+ # Discussion on the list has shown that the sysfs API needs a bit
+ # more work, mark this as broken for now
+ depends on BROKEN
select INTEL_IFS_DEVICE
help
Enable support for the In Field Scan capability in select
diff --git a/drivers/platform/x86/x86-android-tablets.c b/drivers/platform/x86/x86-android-tablets.c
index f446be72e539..480375977435 100644
--- a/drivers/platform/x86/x86-android-tablets.c
+++ b/drivers/platform/x86/x86-android-tablets.c
@@ -27,8 +27,8 @@
#include <linux/pinctrl/machine.h>
#include <linux/platform_data/lp855x.h>
#include <linux/platform_device.h>
-#include <linux/pm.h>
#include <linux/power/bq24190_charger.h>
+#include <linux/reboot.h>
#include <linux/rmi.h>
#include <linux/serdev.h>
#include <linux/spi/spi.h>
@@ -889,6 +889,7 @@ static const struct pinctrl_map lenovo_yoga_tab2_830_1050_codec_pinctrl_map =
"INT33FC:02", "pmu_clk2_grp", "pmu_clk");
static struct pinctrl *lenovo_yoga_tab2_830_1050_codec_pinctrl;
+static struct sys_off_handler *lenovo_yoga_tab2_830_1050_sys_off_handler;
static int __init lenovo_yoga_tab2_830_1050_init_codec(void)
{
@@ -933,9 +934,11 @@ err_put_device:
* followed by a normal 3 second press to recover. Avoid this by doing an EFI
* poweroff instead.
*/
-static void lenovo_yoga_tab2_830_1050_power_off(void)
+static int lenovo_yoga_tab2_830_1050_power_off(struct sys_off_data *data)
{
efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL);
+
+ return NOTIFY_DONE;
}
static int __init lenovo_yoga_tab2_830_1050_init(void)
@@ -950,13 +953,19 @@ static int __init lenovo_yoga_tab2_830_1050_init(void)
if (ret)
return ret;
- pm_power_off = lenovo_yoga_tab2_830_1050_power_off;
+ /* SYS_OFF_PRIO_FIRMWARE + 1 so that it runs before acpi_power_off */
+ lenovo_yoga_tab2_830_1050_sys_off_handler =
+ register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, SYS_OFF_PRIO_FIRMWARE + 1,
+ lenovo_yoga_tab2_830_1050_power_off, NULL);
+ if (IS_ERR(lenovo_yoga_tab2_830_1050_sys_off_handler))
+ return PTR_ERR(lenovo_yoga_tab2_830_1050_sys_off_handler);
+
return 0;
}
static void lenovo_yoga_tab2_830_1050_exit(void)
{
- pm_power_off = NULL; /* Just turn poweroff into halt on module unload */
+ unregister_sys_off_handler(lenovo_yoga_tab2_830_1050_sys_off_handler);
if (lenovo_yoga_tab2_830_1050_codec_pinctrl) {
pinctrl_put(lenovo_yoga_tab2_830_1050_codec_pinctrl);
diff --git a/drivers/power/reset/arm-versatile-reboot.c b/drivers/power/reset/arm-versatile-reboot.c
index 08d0a07b58ef..c7624d7611a7 100644
--- a/drivers/power/reset/arm-versatile-reboot.c
+++ b/drivers/power/reset/arm-versatile-reboot.c
@@ -146,6 +146,7 @@ static int __init versatile_reboot_probe(void)
versatile_reboot_type = (enum versatile_reboot)reboot_id->data;
syscon_regmap = syscon_node_to_regmap(np);
+ of_node_put(np);
if (IS_ERR(syscon_regmap))
return PTR_ERR(syscon_regmap);
diff --git a/drivers/power/supply/ab8500_fg.c b/drivers/power/supply/ab8500_fg.c
index ec8a404d71b4..4339fa9ff009 100644
--- a/drivers/power/supply/ab8500_fg.c
+++ b/drivers/power/supply/ab8500_fg.c
@@ -3148,6 +3148,7 @@ static int ab8500_fg_probe(struct platform_device *pdev)
ret = ab8500_fg_init_hw_registers(di);
if (ret) {
dev_err(dev, "failed to initialize registers\n");
+ destroy_workqueue(di->fg_wq);
return ret;
}
@@ -3159,6 +3160,7 @@ static int ab8500_fg_probe(struct platform_device *pdev)
di->fg_psy = devm_power_supply_register(dev, &ab8500_fg_desc, &psy_cfg);
if (IS_ERR(di->fg_psy)) {
dev_err(dev, "failed to register FG psy\n");
+ destroy_workqueue(di->fg_wq);
return PTR_ERR(di->fg_psy);
}
@@ -3174,8 +3176,10 @@ static int ab8500_fg_probe(struct platform_device *pdev)
/* Register primary interrupt handlers */
for (i = 0; i < ARRAY_SIZE(ab8500_fg_irq); i++) {
irq = platform_get_irq_byname(pdev, ab8500_fg_irq[i].name);
- if (irq < 0)
+ if (irq < 0) {
+ destroy_workqueue(di->fg_wq);
return irq;
+ }
ret = devm_request_threaded_irq(dev, irq, NULL,
ab8500_fg_irq[i].isr,
@@ -3185,6 +3189,7 @@ static int ab8500_fg_probe(struct platform_device *pdev)
if (ret != 0) {
dev_err(dev, "failed to request %s IRQ %d: %d\n",
ab8500_fg_irq[i].name, irq, ret);
+ destroy_workqueue(di->fg_wq);
return ret;
}
dev_dbg(dev, "Requested %s IRQ %d: %d\n",
@@ -3200,6 +3205,7 @@ static int ab8500_fg_probe(struct platform_device *pdev)
ret = ab8500_fg_sysfs_init(di);
if (ret) {
dev_err(dev, "failed to create sysfs entry\n");
+ destroy_workqueue(di->fg_wq);
return ret;
}
@@ -3207,6 +3213,7 @@ static int ab8500_fg_probe(struct platform_device *pdev)
if (ret) {
dev_err(dev, "failed to create FG psy\n");
ab8500_fg_sysfs_exit(di);
+ destroy_workqueue(di->fg_wq);
return ret;
}
diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index fad5890c899e..470253c337c7 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -846,17 +846,17 @@ int power_supply_temp2resist_simple(struct power_supply_resistance_temp_table *t
{
int i, high, low;
- /* Break loop at table_len - 1 because that is the highest index */
- for (i = 0; i < table_len - 1; i++)
+ for (i = 0; i < table_len; i++)
if (temp > table[i].temp)
break;
/* The library function will deal with high == low */
- if ((i == 0) || (i == (table_len - 1)))
- high = i;
+ if (i == 0)
+ high = low = i;
+ else if (i == table_len)
+ high = low = i - 1;
else
- high = i - 1;
- low = i;
+ high = (low = i) - 1;
return fixp_linear_interpolate(table[low].temp,
table[low].resistance,
@@ -958,17 +958,17 @@ int power_supply_ocv2cap_simple(struct power_supply_battery_ocv_table *table,
{
int i, high, low;
- /* Break loop at table_len - 1 because that is the highest index */
- for (i = 0; i < table_len - 1; i++)
+ for (i = 0; i < table_len; i++)
if (ocv > table[i].ocv)
break;
/* The library function will deal with high == low */
- if ((i == 0) || (i == (table_len - 1)))
- high = i - 1;
+ if (i == 0)
+ high = low = i;
+ else if (i == table_len)
+ high = low = i - 1;
else
- high = i; /* i.e. i == 0 */
- low = i;
+ high = (low = i) - 1;
return fixp_linear_interpolate(table[low].ocv,
table[low].capacity,
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index f5eced0842b3..6a88eb7e9f75 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -71,34 +71,19 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
static u64 scale_pd_power_uw(struct cpumask *pd_mask, u64 power)
{
- unsigned long max = 0, sum_util = 0;
+ unsigned long max, sum_util = 0;
int cpu;
- for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
-
- /*
- * The capacity is the same for all CPUs belonging to
- * the same perf domain, so a single call to
- * arch_scale_cpu_capacity() is enough. However, we
- * need the CPU parameter to be initialized by the
- * loop, so the call ends up in this block.
- *
- * We can initialize 'max' with a cpumask_first() call
- * before the loop but the bits computation is not
- * worth given the arch_scale_cpu_capacity() just
- * returns a value where the resulting assembly code
- * will be optimized by the compiler.
- */
- max = arch_scale_cpu_capacity(cpu);
- sum_util += sched_cpu_util(cpu, max);
- }
-
/*
- * In the improbable case where all the CPUs of the perf
- * domain are offline, 'max' will be zero and will lead to an
- * illegal operation with a zero division.
+ * The capacity is the same for all CPUs belonging to
+ * the same perf domain.
*/
- return max ? (power * ((sum_util << 10) / max)) >> 10 : 0;
+ max = arch_scale_cpu_capacity(cpumask_first(pd_mask));
+
+ for_each_cpu_and(cpu, pd_mask, cpu_online_mask)
+ sum_util += sched_cpu_util(cpu);
+
+ return (power * ((sum_util << 10) / max)) >> 10;
}
static u64 get_pd_power_uw(struct dtpm *dtpm)
diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index 458218f88c5e..fe4971b65c64 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -176,6 +176,7 @@ config PTP_1588_CLOCK_OCP
depends on !S390
depends on COMMON_CLK
select NET_DEVLINK
+ select CRC16
help
This driver adds support for an OpenCompute time card.
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 5c13d2079d96..0a9045b49c50 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -1435,7 +1435,7 @@ static int __verify_queue_reservations(struct device_driver *drv, void *data)
if (ap_drv->in_use) {
rc = ap_drv->in_use(ap_perms.apm, newaqm);
if (rc)
- return -EBUSY;
+ rc = -EBUSY;
}
/* release the driver's module */
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 9e54fe76a9b2..35d4b398c197 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -3565,7 +3565,7 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
if (!atomic_read(&queue->set_pci_flags_count)) {
/*
* there's no outstanding PCI any more, so we
- * have to request a PCI to be sure the the PCI
+ * have to request a PCI to be sure the PCI
* will wake at some time in the future then we
* can flush packed buffers that might still be
* hanging around, which can happen if no
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index c95360a3c186..0917b05059b4 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -3195,6 +3195,9 @@ static int megasas_map_queues(struct Scsi_Host *shost)
qoff += map->nr_queues;
offset += map->nr_queues;
+ /* we never use READ queue, so can't cheat blk-mq */
+ shost->tag_set.map[HCTX_TYPE_READ].nr_queues = 0;
+
/* Setup Poll hctx */
map = &shost->tag_set.map[HCTX_TYPE_POLL];
map->nr_queues = instance->iopoll_q_count;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index b519f4b59d30..5e8887fa02c8 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -11386,6 +11386,7 @@ scsih_shutdown(struct pci_dev *pdev)
_scsih_ir_shutdown(ioc);
_scsih_nvme_shutdown(ioc);
mpt3sas_base_mask_interrupts(ioc);
+ mpt3sas_base_stop_watchdog(ioc);
ioc->shost_recovery = 1;
mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET);
ioc->shost_recovery = 0;
diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
index f7466a895d3b..991eb01bb1e0 100644
--- a/drivers/scsi/pm8001/pm8001_hwi.c
+++ b/drivers/scsi/pm8001/pm8001_hwi.c
@@ -3145,15 +3145,6 @@ void pm8001_bytes_dmaed(struct pm8001_hba_info *pm8001_ha, int i)
if (!phy->phy_attached)
return;
- if (sas_phy->phy) {
- struct sas_phy *sphy = sas_phy->phy;
- sphy->negotiated_linkrate = sas_phy->linkrate;
- sphy->minimum_linkrate = phy->minimum_linkrate;
- sphy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
- sphy->maximum_linkrate = phy->maximum_linkrate;
- sphy->maximum_linkrate_hw = phy->maximum_linkrate;
- }
-
if (phy->phy_type & PORT_TYPE_SAS) {
struct sas_identify_frame *id;
id = (struct sas_identify_frame *)phy->frame_rcvd;
@@ -3177,26 +3168,22 @@ void pm8001_get_lrate_mode(struct pm8001_phy *phy, u8 link_rate)
switch (link_rate) {
case PHY_SPEED_120:
phy->sas_phy.linkrate = SAS_LINK_RATE_12_0_GBPS;
- phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_12_0_GBPS;
break;
case PHY_SPEED_60:
phy->sas_phy.linkrate = SAS_LINK_RATE_6_0_GBPS;
- phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_6_0_GBPS;
break;
case PHY_SPEED_30:
phy->sas_phy.linkrate = SAS_LINK_RATE_3_0_GBPS;
- phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_3_0_GBPS;
break;
case PHY_SPEED_15:
phy->sas_phy.linkrate = SAS_LINK_RATE_1_5_GBPS;
- phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS;
break;
}
sas_phy->negotiated_linkrate = phy->sas_phy.linkrate;
- sas_phy->maximum_linkrate_hw = SAS_LINK_RATE_6_0_GBPS;
+ sas_phy->maximum_linkrate_hw = phy->maximum_linkrate;
sas_phy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
- sas_phy->maximum_linkrate = SAS_LINK_RATE_6_0_GBPS;
- sas_phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS;
+ sas_phy->maximum_linkrate = phy->maximum_linkrate;
+ sas_phy->minimum_linkrate = phy->minimum_linkrate;
}
/**
diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c
index 9b04f1a6a67d..01f2f41928eb 100644
--- a/drivers/scsi/pm8001/pm8001_init.c
+++ b/drivers/scsi/pm8001/pm8001_init.c
@@ -143,6 +143,8 @@ static void pm8001_phy_init(struct pm8001_hba_info *pm8001_ha, int phy_id)
struct asd_sas_phy *sas_phy = &phy->sas_phy;
phy->phy_state = PHY_LINK_DISABLE;
phy->pm8001_ha = pm8001_ha;
+ phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS;
+ phy->maximum_linkrate = SAS_LINK_RATE_6_0_GBPS;
sas_phy->enabled = (phy_id < pm8001_ha->chip->n_phy) ? 1 : 0;
sas_phy->class = SAS;
sas_phy->iproto = SAS_PROTOCOL_ALL;
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index 01c5e8ff4cc5..303cd05fec50 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -3723,8 +3723,12 @@ static int mpi_phy_stop_resp(struct pm8001_hba_info *pm8001_ha, void *piomb)
pm8001_dbg(pm8001_ha, MSG, "phy:0x%x status:0x%x\n",
phyid, status);
if (status == PHY_STOP_SUCCESS ||
- status == PHY_STOP_ERR_DEVICE_ATTACHED)
+ status == PHY_STOP_ERR_DEVICE_ATTACHED) {
phy->phy_state = PHY_LINK_DISABLE;
+ phy->sas_phy.phy->negotiated_linkrate = SAS_PHY_DISABLED;
+ phy->sas_phy.linkrate = SAS_PHY_DISABLED;
+ }
+
return 0;
}
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index a480c4d589f5..729e309e6034 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -450,7 +450,7 @@ static int sg_io(struct scsi_device *sdev, struct sg_io_hdr *hdr, fmode_t mode)
goto out_put_request;
ret = 0;
- if (hdr->iovec_count) {
+ if (hdr->iovec_count && hdr->dxfer_len) {
struct iov_iter i;
struct iovec *iov = NULL;
diff --git a/drivers/sh/intc/chip.c b/drivers/sh/intc/chip.c
index 358df7510186..828d81e02b37 100644
--- a/drivers/sh/intc/chip.c
+++ b/drivers/sh/intc/chip.c
@@ -72,7 +72,7 @@ static int intc_set_affinity(struct irq_data *data,
if (!cpumask_intersects(cpumask, cpu_online_mask))
return -1;
- cpumask_copy(irq_data_get_affinity_mask(data), cpumask);
+ irq_data_update_affinity(data, cpumask);
return IRQ_SET_MASK_OK_NOCOPY;
}
diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c
index cba6a4486c24..efdcbe6c4c26 100644
--- a/drivers/spi/spi-amd.c
+++ b/drivers/spi/spi-amd.c
@@ -33,6 +33,7 @@
#define AMD_SPI_RX_COUNT_REG 0x4B
#define AMD_SPI_STATUS_REG 0x4C
+#define AMD_SPI_FIFO_SIZE 70
#define AMD_SPI_MEM_SIZE 200
/* M_CMD OP codes for SPI */
@@ -270,6 +271,11 @@ static int amd_spi_master_transfer(struct spi_master *master,
return 0;
}
+static size_t amd_spi_max_transfer_size(struct spi_device *spi)
+{
+ return AMD_SPI_FIFO_SIZE;
+}
+
static int amd_spi_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@@ -302,6 +308,8 @@ static int amd_spi_probe(struct platform_device *pdev)
master->flags = SPI_MASTER_HALF_DUPLEX;
master->setup = amd_spi_master_setup;
master->transfer_one_message = amd_spi_master_transfer;
+ master->max_transfer_size = amd_spi_max_transfer_size;
+ master->max_message_size = amd_spi_max_transfer_size;
/* Register the controller with SPI framework */
err = devm_spi_register_master(dev, master);
diff --git a/drivers/spi/spi-aspeed-smc.c b/drivers/spi/spi-aspeed-smc.c
index 496f3e1e9079..3e891bf22470 100644
--- a/drivers/spi/spi-aspeed-smc.c
+++ b/drivers/spi/spi-aspeed-smc.c
@@ -558,6 +558,14 @@ static int aspeed_spi_dirmap_create(struct spi_mem_dirmap_desc *desc)
u32 ctl_val;
int ret = 0;
+ dev_dbg(aspi->dev,
+ "CE%d %s dirmap [ 0x%.8llx - 0x%.8llx ] OP %#x mode:%d.%d.%d.%d naddr:%#x ndummies:%#x\n",
+ chip->cs, op->data.dir == SPI_MEM_DATA_IN ? "read" : "write",
+ desc->info.offset, desc->info.offset + desc->info.length,
+ op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth,
+ op->dummy.buswidth, op->data.buswidth,
+ op->addr.nbytes, op->dummy.nbytes);
+
chip->clk_freq = desc->mem->spi->max_speed_hz;
/* Only for reads */
@@ -574,9 +582,11 @@ static int aspeed_spi_dirmap_create(struct spi_mem_dirmap_desc *desc)
ctl_val = readl(chip->ctl) & ~CTRL_IO_CMD_MASK;
ctl_val |= aspeed_spi_get_io_mode(op) |
op->cmd.opcode << CTRL_COMMAND_SHIFT |
- CTRL_IO_DUMMY_SET(op->dummy.nbytes / op->dummy.buswidth) |
CTRL_IO_MODE_READ;
+ if (op->dummy.nbytes)
+ ctl_val |= CTRL_IO_DUMMY_SET(op->dummy.nbytes / op->dummy.buswidth);
+
/* Tune 4BYTE address mode */
if (op->addr.nbytes) {
u32 addr_mode = readl(aspi->regs + CE_CTRL_REG);
diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index 775c0bf2f923..0933948d7df3 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -1138,10 +1138,14 @@ static void bcm2835_spi_handle_err(struct spi_controller *ctlr,
struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
/* if an error occurred and we have an active dma, then terminate */
- dmaengine_terminate_sync(ctlr->dma_tx);
- bs->tx_dma_active = false;
- dmaengine_terminate_sync(ctlr->dma_rx);
- bs->rx_dma_active = false;
+ if (ctlr->dma_tx) {
+ dmaengine_terminate_sync(ctlr->dma_tx);
+ bs->tx_dma_active = false;
+ }
+ if (ctlr->dma_rx) {
+ dmaengine_terminate_sync(ctlr->dma_rx);
+ bs->rx_dma_active = false;
+ }
bcm2835_spi_undo_prologue(bs);
/* and reset */
diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index 2b9fc8449a62..72b1a5a2298c 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -1578,8 +1578,7 @@ static int cqspi_probe(struct platform_device *pdev)
ret = cqspi_of_get_pdata(cqspi);
if (ret) {
dev_err(dev, "Cannot get mandatory OF data.\n");
- ret = -ENODEV;
- goto probe_master_put;
+ return -ENODEV;
}
/* Obtain QSPI clock. */
@@ -1587,7 +1586,7 @@ static int cqspi_probe(struct platform_device *pdev)
if (IS_ERR(cqspi->clk)) {
dev_err(dev, "Cannot claim QSPI clock.\n");
ret = PTR_ERR(cqspi->clk);
- goto probe_master_put;
+ return ret;
}
/* Obtain and remap controller address. */
@@ -1596,7 +1595,7 @@ static int cqspi_probe(struct platform_device *pdev)
if (IS_ERR(cqspi->iobase)) {
dev_err(dev, "Cannot remap controller address.\n");
ret = PTR_ERR(cqspi->iobase);
- goto probe_master_put;
+ return ret;
}
/* Obtain and remap AHB address. */
@@ -1605,7 +1604,7 @@ static int cqspi_probe(struct platform_device *pdev)
if (IS_ERR(cqspi->ahb_base)) {
dev_err(dev, "Cannot remap AHB address.\n");
ret = PTR_ERR(cqspi->ahb_base);
- goto probe_master_put;
+ return ret;
}
cqspi->mmap_phys_base = (dma_addr_t)res_ahb->start;
cqspi->ahb_size = resource_size(res_ahb);
@@ -1614,15 +1613,13 @@ static int cqspi_probe(struct platform_device *pdev)
/* Obtain IRQ line. */
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- ret = -ENXIO;
- goto probe_master_put;
- }
+ if (irq < 0)
+ return -ENXIO;
pm_runtime_enable(dev);
ret = pm_runtime_resume_and_get(dev);
if (ret < 0)
- goto probe_master_put;
+ return ret;
ret = clk_prepare_enable(cqspi->clk);
if (ret) {
@@ -1716,8 +1713,6 @@ probe_reset_failed:
probe_clk_failed:
pm_runtime_put_sync(dev);
pm_runtime_disable(dev);
-probe_master_put:
- spi_master_put(master);
return ret;
}
diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c
index 31d778e9d255..6a7f7df1e776 100644
--- a/drivers/spi/spi-cadence.c
+++ b/drivers/spi/spi-cadence.c
@@ -69,7 +69,7 @@
#define CDNS_SPI_BAUD_DIV_SHIFT 3 /* Baud rate divisor shift in CR */
#define CDNS_SPI_SS_SHIFT 10 /* Slave Select field shift in CR */
#define CDNS_SPI_SS0 0x1 /* Slave Select zero */
-#define CDNS_SPI_NOSS 0x3C /* No Slave select */
+#define CDNS_SPI_NOSS 0xF /* No Slave select */
/*
* SPI Interrupt Registers bit Masks
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 7a014eeec2d0..411b1307b7fd 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -613,6 +613,10 @@ static int rspi_dma_transfer(struct rspi_data *rspi, struct sg_table *tx,
rspi->dma_callbacked, HZ);
if (ret > 0 && rspi->dma_callbacked) {
ret = 0;
+ if (tx)
+ dmaengine_synchronize(rspi->ctlr->dma_tx);
+ if (rx)
+ dmaengine_synchronize(rspi->ctlr->dma_rx);
} else {
if (!ret) {
dev_err(&rspi->ctlr->dev, "DMA timeout\n");
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index e68f1cc8ef98..6c8d8b051bfd 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -448,6 +448,9 @@ fd_execute_write_same(struct se_cmd *cmd)
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
}
+ if (!cmd->t_data_nents)
+ return TCM_INVALID_CDB_FIELD;
+
if (cmd->t_data_nents > 1 ||
cmd->t_data_sg[0].length != cmd->se_dev->dev_attrib.block_size) {
pr_err("WRITE_SAME: Illegal SGL t_data_nents: %u length: %u"
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 378c80313a0f..1ed9381751e6 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -494,6 +494,10 @@ iblock_execute_write_same(struct se_cmd *cmd)
" backends not supported\n");
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
}
+
+ if (!cmd->t_data_nents)
+ return TCM_INVALID_CDB_FIELD;
+
sg = &cmd->t_data_sg[0];
if (cmd->t_data_nents > 1 ||
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index ca1b2312d6e7..f6132836eb38 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -312,6 +312,12 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char flags, struct sbc_ops *op
pr_warn("WRITE SAME with ANCHOR not supported\n");
return TCM_INVALID_CDB_FIELD;
}
+
+ if (flags & 0x01) {
+ pr_warn("WRITE SAME with NDOB not supported\n");
+ return TCM_INVALID_CDB_FIELD;
+ }
+
/*
* Special case for WRITE_SAME w/ UNMAP=1 that ends up getting
* translated into block discard requests within backend code.
diff --git a/drivers/tee/optee/optee_smc.h b/drivers/tee/optee/optee_smc.h
index c60896cf71cb..73b5e7760d10 100644
--- a/drivers/tee/optee/optee_smc.h
+++ b/drivers/tee/optee/optee_smc.h
@@ -189,7 +189,7 @@ struct optee_smc_call_get_os_revision_result {
* Have config return register usage:
* a0 OPTEE_SMC_RETURN_OK
* a1 Physical address of start of SHM
- * a2 Size of of SHM
+ * a2 Size of SHM
* a3 Cache settings of memory, as defined by the
* OPTEE_SMC_SHM_* values above
* a4-7 Preserved
diff --git a/drivers/tee/optee/smc_abi.c b/drivers/tee/optee/smc_abi.c
index 385cb0aee610..a1c1fa1a9c28 100644
--- a/drivers/tee/optee/smc_abi.c
+++ b/drivers/tee/optee/smc_abi.c
@@ -884,8 +884,8 @@ static int optee_smc_do_call_with_arg(struct tee_context *ctx,
rpc_arg_offs = OPTEE_MSG_GET_ARG_SIZE(arg->num_params);
rpc_arg = tee_shm_get_va(shm, offs + rpc_arg_offs);
- if (IS_ERR(arg))
- return PTR_ERR(arg);
+ if (IS_ERR(rpc_arg))
+ return PTR_ERR(rpc_arg);
}
if (rpc_arg && tee_shm_is_dynamic(shm)) {
diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c
index af0f7c603fa4..98da206cd761 100644
--- a/drivers/tee/tee_core.c
+++ b/drivers/tee/tee_core.c
@@ -1073,7 +1073,7 @@ EXPORT_SYMBOL_GPL(tee_device_unregister);
/**
* tee_get_drvdata() - Return driver_data pointer
* @teedev: Device containing the driver_data pointer
- * @returns the driver_data pointer supplied to tee_register().
+ * @returns the driver_data pointer supplied to tee_device_alloc().
*/
void *tee_get_drvdata(struct tee_device *teedev)
{
diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c
index b8151d95a806..b263b0fde03c 100644
--- a/drivers/thermal/cpufreq_cooling.c
+++ b/drivers/thermal/cpufreq_cooling.c
@@ -137,11 +137,9 @@ static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu,
int cpu_idx)
{
- unsigned long max = arch_scale_cpu_capacity(cpu);
- unsigned long util;
+ unsigned long util = sched_cpu_util(cpu);
- util = sched_cpu_util(cpu, max);
- return (util * 100) / max;
+ return (util * 100) / arch_scale_cpu_capacity(cpu);
}
#else /* !CONFIG_SMP */
static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu,
diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c
index 43eb25b167bc..ccdf8a24ddc7 100644
--- a/drivers/thermal/rcar_gen3_thermal.c
+++ b/drivers/thermal/rcar_gen3_thermal.c
@@ -399,6 +399,10 @@ static const struct of_device_id rcar_gen3_thermal_dt_ids[] = {
.compatible = "renesas,r8a779a0-thermal",
.data = &rcar_gen3_ths_tj_1,
},
+ {
+ .compatible = "renesas,r8a779f0-thermal",
+ .data = &rcar_gen3_ths_tj_1,
+ },
{},
};
MODULE_DEVICE_TABLE(of, rcar_gen3_thermal_dt_ids);
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 74bfabe5b453..752dab3356d7 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -111,21 +111,11 @@ static void pty_unthrottle(struct tty_struct *tty)
static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c)
{
struct tty_struct *to = tty->link;
- unsigned long flags;
- if (tty->flow.stopped)
+ if (tty->flow.stopped || !c)
return 0;
- if (c > 0) {
- spin_lock_irqsave(&to->port->lock, flags);
- /* Stuff the data into the input queue of the other end */
- c = tty_insert_flip_string(to->port, buf, c);
- spin_unlock_irqrestore(&to->port->lock, flags);
- /* And shovel */
- if (c)
- tty_flip_buffer_push(to->port);
- }
- return c;
+ return tty_insert_flip_string_and_push_buffer(to->port, buf, c);
}
/**
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index cfbd2de0ca6e..3f56dbc9432b 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -23,6 +23,7 @@
#include <linux/sysrq.h>
#include <linux/delay.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <linux/tty.h>
#include <linux/ratelimit.h>
#include <linux/tty_flip.h>
@@ -559,6 +560,9 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev)
up->port.dev = dev;
+ if (uart_console_enabled(&up->port))
+ pm_runtime_get_sync(up->port.dev);
+
serial8250_apply_quirks(up);
uart_add_one_port(drv, &up->port);
}
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 7133fceed35e..a8dba4a0a8fb 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -106,10 +106,10 @@ int serial8250_tx_dma(struct uart_8250_port *p)
UART_XMIT_SIZE, DMA_TO_DEVICE);
dma_async_issue_pending(dma->txchan);
- if (dma->tx_err) {
+ serial8250_clear_THRI(p);
+ if (dma->tx_err)
dma->tx_err = 0;
- serial8250_clear_THRI(p);
- }
+
return 0;
err:
dma->tx_err = 1;
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index f57bbd32ef11..bb6aca07ab56 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -47,7 +47,7 @@
#define RZN1_UART_xDMACR_DMA_EN BIT(0)
#define RZN1_UART_xDMACR_1_WORD_BURST (0 << 1)
#define RZN1_UART_xDMACR_4_WORD_BURST (1 << 1)
-#define RZN1_UART_xDMACR_8_WORD_BURST (3 << 1)
+#define RZN1_UART_xDMACR_8_WORD_BURST (2 << 1)
#define RZN1_UART_xDMACR_BLK_SZ(x) ((x) << 3)
/* Quirks */
@@ -773,18 +773,18 @@ static const struct of_device_id dw8250_of_match[] = {
MODULE_DEVICE_TABLE(of, dw8250_of_match);
static const struct acpi_device_id dw8250_acpi_match[] = {
- { "INT33C4", 0 },
- { "INT33C5", 0 },
- { "INT3434", 0 },
- { "INT3435", 0 },
- { "80860F0A", 0 },
- { "8086228A", 0 },
- { "APMC0D08", 0},
- { "AMD0020", 0 },
- { "AMDI0020", 0 },
- { "AMDI0022", 0 },
- { "BRCM2032", 0 },
- { "HISI0031", 0 },
+ { "80860F0A", (kernel_ulong_t)&dw8250_dw_apb },
+ { "8086228A", (kernel_ulong_t)&dw8250_dw_apb },
+ { "AMD0020", (kernel_ulong_t)&dw8250_dw_apb },
+ { "AMDI0020", (kernel_ulong_t)&dw8250_dw_apb },
+ { "AMDI0022", (kernel_ulong_t)&dw8250_dw_apb },
+ { "APMC0D08", (kernel_ulong_t)&dw8250_dw_apb},
+ { "BRCM2032", (kernel_ulong_t)&dw8250_dw_apb },
+ { "HISI0031", (kernel_ulong_t)&dw8250_dw_apb },
+ { "INT33C4", (kernel_ulong_t)&dw8250_dw_apb },
+ { "INT33C5", (kernel_ulong_t)&dw8250_dw_apb },
+ { "INT3434", (kernel_ulong_t)&dw8250_dw_apb },
+ { "INT3435", (kernel_ulong_t)&dw8250_dw_apb },
{ },
};
MODULE_DEVICE_TABLE(acpi, dw8250_acpi_match);
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 8f32fe9e149e..3c36a06a20b0 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -1949,7 +1949,7 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir)
if ((status & UART_LSR_THRE) && (up->ier & UART_IER_THRI)) {
if (!up->dma || up->dma->tx_err)
serial8250_tx_chars(up);
- else
+ else if (!up->dma->tx_running)
__stop_tx(up);
}
@@ -2975,8 +2975,10 @@ static int serial8250_request_std_resource(struct uart_8250_port *up)
case UPIO_MEM32BE:
case UPIO_MEM16:
case UPIO_MEM:
- if (!port->mapbase)
+ if (!port->mapbase) {
+ ret = -EINVAL;
break;
+ }
if (!request_mem_region(port->mapbase, size, "serial")) {
ret = -EBUSY;
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 97ef41cb2721..16a21422ddce 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -1367,6 +1367,15 @@ static void pl011_stop_rx(struct uart_port *port)
pl011_dma_rx_stop(uap);
}
+static void pl011_throttle_rx(struct uart_port *port)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ pl011_stop_rx(port);
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
static void pl011_enable_ms(struct uart_port *port)
{
struct uart_amba_port *uap =
@@ -1788,9 +1797,10 @@ static int pl011_allocate_irq(struct uart_amba_port *uap)
*/
static void pl011_enable_interrupts(struct uart_amba_port *uap)
{
+ unsigned long flags;
unsigned int i;
- spin_lock_irq(&uap->port.lock);
+ spin_lock_irqsave(&uap->port.lock, flags);
/* Clear out any spuriously appearing RX interrupts */
pl011_write(UART011_RTIS | UART011_RXIS, uap, REG_ICR);
@@ -1812,7 +1822,14 @@ static void pl011_enable_interrupts(struct uart_amba_port *uap)
if (!pl011_dma_rx_running(uap))
uap->im |= UART011_RXIM;
pl011_write(uap->im, uap, REG_IMSC);
- spin_unlock_irq(&uap->port.lock);
+ spin_unlock_irqrestore(&uap->port.lock, flags);
+}
+
+static void pl011_unthrottle_rx(struct uart_port *port)
+{
+ struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port);
+
+ pl011_enable_interrupts(uap);
}
static int pl011_startup(struct uart_port *port)
@@ -2225,6 +2242,8 @@ static const struct uart_ops amba_pl011_pops = {
.stop_tx = pl011_stop_tx,
.start_tx = pl011_start_tx,
.stop_rx = pl011_stop_rx,
+ .throttle = pl011_throttle_rx,
+ .unthrottle = pl011_unthrottle_rx,
.enable_ms = pl011_enable_ms,
.break_ctl = pl011_break_ctl,
.startup = pl011_startup,
diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c
index 0429c2a54290..93489fe334d0 100644
--- a/drivers/tty/serial/mvebu-uart.c
+++ b/drivers/tty/serial/mvebu-uart.c
@@ -470,14 +470,14 @@ static void mvebu_uart_shutdown(struct uart_port *port)
}
}
-static int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud)
+static unsigned int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud)
{
unsigned int d_divisor, m_divisor;
unsigned long flags;
u32 brdv, osamp;
if (!port->uartclk)
- return -EOPNOTSUPP;
+ return 0;
/*
* The baudrate is derived from the UART clock thanks to divisors:
@@ -548,7 +548,7 @@ static int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud)
(m_divisor << 16) | (m_divisor << 24);
writel(osamp, port->membase + UART_OSAMP);
- return 0;
+ return DIV_ROUND_CLOSEST(port->uartclk, d_divisor * m_divisor);
}
static void mvebu_uart_set_termios(struct uart_port *port,
@@ -587,15 +587,11 @@ static void mvebu_uart_set_termios(struct uart_port *port,
max_baud = port->uartclk / 80;
baud = uart_get_baud_rate(port, termios, old, min_baud, max_baud);
- if (mvebu_uart_baud_rate_set(port, baud)) {
- /* No clock available, baudrate cannot be changed */
- if (old)
- baud = uart_get_baud_rate(port, old, NULL,
- min_baud, max_baud);
- } else {
- tty_termios_encode_baud_rate(termios, baud, baud);
- uart_update_timeout(port, termios->c_cflag, baud);
- }
+ baud = mvebu_uart_baud_rate_set(port, baud);
+
+ /* In case baudrate cannot be changed, report previous old value */
+ if (baud == 0 && old)
+ baud = tty_termios_baud_rate(old);
/* Only the following flag changes are supported */
if (old) {
@@ -606,6 +602,11 @@ static void mvebu_uart_set_termios(struct uart_port *port,
termios->c_cflag |= CS8;
}
+ if (baud != 0) {
+ tty_termios_encode_baud_rate(termios, baud, baud);
+ uart_update_timeout(port, termios->c_cflag, baud);
+ }
+
spin_unlock_irqrestore(&port->lock, flags);
}
diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c
index d5ca904def34..1afe47b62ad5 100644
--- a/drivers/tty/serial/samsung_tty.c
+++ b/drivers/tty/serial/samsung_tty.c
@@ -377,8 +377,7 @@ static void enable_tx_dma(struct s3c24xx_uart_port *ourport)
/* Enable tx dma mode */
ucon = rd_regl(port, S3C2410_UCON);
ucon &= ~(S3C64XX_UCON_TXBURST_MASK | S3C64XX_UCON_TXMODE_MASK);
- ucon |= (dma_get_cache_alignment() >= 16) ?
- S3C64XX_UCON_TXBURST_16 : S3C64XX_UCON_TXBURST_1;
+ ucon |= S3C64XX_UCON_TXBURST_1;
ucon |= S3C64XX_UCON_TXMODE_DMA;
wr_regl(port, S3C2410_UCON, ucon);
@@ -674,7 +673,7 @@ static void enable_rx_dma(struct s3c24xx_uart_port *ourport)
S3C64XX_UCON_DMASUS_EN |
S3C64XX_UCON_TIMEOUT_EN |
S3C64XX_UCON_RXMODE_MASK);
- ucon |= S3C64XX_UCON_RXBURST_16 |
+ ucon |= S3C64XX_UCON_RXBURST_1 |
0xf << S3C64XX_UCON_TIMEOUT_SHIFT |
S3C64XX_UCON_EMPTYINT_EN |
S3C64XX_UCON_TIMEOUT_EN |
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 338ebadfd44b..3dc926d6c00a 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -1941,11 +1941,6 @@ static int uart_proc_show(struct seq_file *m, void *v)
}
#endif
-static inline bool uart_console_enabled(struct uart_port *port)
-{
- return uart_console(port) && (port->cons->flags & CON_ENABLED);
-}
-
static void uart_port_spin_lock_init(struct uart_port *port)
{
spin_lock_init(&port->lock);
diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
index b7b44f4050d4..0973b03eeeaa 100644
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -72,6 +72,8 @@ static void stm32_usart_config_reg_rs485(u32 *cr1, u32 *cr3, u32 delay_ADE,
*cr3 |= USART_CR3_DEM;
over8 = *cr1 & USART_CR1_OVER8;
+ *cr1 &= ~(USART_CR1_DEDT_MASK | USART_CR1_DEAT_MASK);
+
if (over8)
rs485_deat_dedt = delay_ADE * baud * 8;
else
diff --git a/drivers/tty/tty.h b/drivers/tty/tty.h
index b710c5ef89ab..f310a8274df1 100644
--- a/drivers/tty/tty.h
+++ b/drivers/tty/tty.h
@@ -111,4 +111,7 @@ static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch)
ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *);
+int tty_insert_flip_string_and_push_buffer(struct tty_port *port,
+ const unsigned char *chars, size_t cnt);
+
#endif
diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index bfa431a8e690..595d8b49c745 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -532,6 +532,15 @@ static void flush_to_ldisc(struct work_struct *work)
}
+static inline void tty_flip_buffer_commit(struct tty_buffer *tail)
+{
+ /*
+ * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees
+ * buffer data.
+ */
+ smp_store_release(&tail->commit, tail->used);
+}
+
/**
* tty_flip_buffer_push - push terminal buffers
* @port: tty port to push
@@ -546,16 +555,43 @@ void tty_flip_buffer_push(struct tty_port *port)
{
struct tty_bufhead *buf = &port->buf;
- /*
- * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees
- * buffer data.
- */
- smp_store_release(&buf->tail->commit, buf->tail->used);
+ tty_flip_buffer_commit(buf->tail);
queue_work(system_unbound_wq, &buf->work);
}
EXPORT_SYMBOL(tty_flip_buffer_push);
/**
+ * tty_insert_flip_string_and_push_buffer - add characters to the tty buffer and
+ * push
+ * @port: tty port
+ * @chars: characters
+ * @size: size
+ *
+ * The function combines tty_insert_flip_string() and tty_flip_buffer_push()
+ * with the exception of properly holding the @port->lock.
+ *
+ * To be used only internally (by pty currently).
+ *
+ * Returns: the number added.
+ */
+int tty_insert_flip_string_and_push_buffer(struct tty_port *port,
+ const unsigned char *chars, size_t size)
+{
+ struct tty_bufhead *buf = &port->buf;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ size = tty_insert_flip_string(port, chars, size);
+ if (size)
+ tty_flip_buffer_commit(buf->tail);
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ queue_work(system_unbound_wq, &buf->work);
+
+ return size;
+}
+
+/**
* tty_buffer_init - prepare a tty buffer structure
* @port: tty port to initialise
*
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index f8c87c4d7399..dfc1f4b445f3 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -855,7 +855,7 @@ static void delete_char(struct vc_data *vc, unsigned int nr)
unsigned short *p = (unsigned short *) vc->vc_pos;
vc_uniscr_delete(vc, nr);
- scr_memcpyw(p, p + nr, (vc->vc_cols - vc->state.x - nr) * 2);
+ scr_memmovew(p, p + nr, (vc->vc_cols - vc->state.x - nr) * 2);
scr_memsetw(p + vc->vc_cols - vc->state.x - nr, vc->vc_video_erase_char,
nr * 2);
vc->vc_need_wrap = 0;
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index ce86d1b790c0..3d367be71728 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -2953,37 +2953,59 @@ ufshcd_dev_cmd_completion(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
struct ufshcd_lrb *lrbp, int max_timeout)
{
- int err = 0;
- unsigned long time_left;
+ unsigned long time_left = msecs_to_jiffies(max_timeout);
unsigned long flags;
+ bool pending;
+ int err;
+retry:
time_left = wait_for_completion_timeout(hba->dev_cmd.complete,
- msecs_to_jiffies(max_timeout));
+ time_left);
- spin_lock_irqsave(hba->host->host_lock, flags);
- hba->dev_cmd.complete = NULL;
if (likely(time_left)) {
+ /*
+ * The completion handler called complete() and the caller of
+ * this function still owns the @lrbp tag so the code below does
+ * not trigger any race conditions.
+ */
+ hba->dev_cmd.complete = NULL;
err = ufshcd_get_tr_ocs(lrbp);
if (!err)
err = ufshcd_dev_cmd_completion(hba, lrbp);
- }
- spin_unlock_irqrestore(hba->host->host_lock, flags);
-
- if (!time_left) {
+ } else {
err = -ETIMEDOUT;
dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n",
__func__, lrbp->task_tag);
- if (!ufshcd_clear_cmds(hba, 1U << lrbp->task_tag))
+ if (ufshcd_clear_cmds(hba, 1U << lrbp->task_tag) == 0) {
/* successfully cleared the command, retry if needed */
err = -EAGAIN;
- /*
- * in case of an error, after clearing the doorbell,
- * we also need to clear the outstanding_request
- * field in hba
- */
- spin_lock_irqsave(&hba->outstanding_lock, flags);
- __clear_bit(lrbp->task_tag, &hba->outstanding_reqs);
- spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+ /*
+ * Since clearing the command succeeded we also need to
+ * clear the task tag bit from the outstanding_reqs
+ * variable.
+ */
+ spin_lock_irqsave(&hba->outstanding_lock, flags);
+ pending = test_bit(lrbp->task_tag,
+ &hba->outstanding_reqs);
+ if (pending) {
+ hba->dev_cmd.complete = NULL;
+ __clear_bit(lrbp->task_tag,
+ &hba->outstanding_reqs);
+ }
+ spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+
+ if (!pending) {
+ /*
+ * The completion handler ran while we tried to
+ * clear the command.
+ */
+ time_left = 1;
+ goto retry;
+ }
+ } else {
+ dev_err(hba->dev, "%s: failed to clear tag %d\n",
+ __func__, lrbp->task_tag);
+ }
}
return err;
@@ -5738,7 +5760,7 @@ int ufshcd_wb_toggle(struct ufs_hba *hba, bool enable)
}
hba->dev_info.wb_enabled = enable;
- dev_info(hba->dev, "%s Write Booster %s\n",
+ dev_dbg(hba->dev, "%s Write Booster %s\n",
__func__, enable ? "enabled" : "disabled");
return ret;
@@ -7253,7 +7275,7 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
hba->silence_err_logs = false;
/* scale up clocks to max frequency before full reinitialization */
- ufshcd_set_clk_freq(hba, true);
+ ufshcd_scale_clks(hba, true);
err = ufshcd_hba_enable(hba);
diff --git a/drivers/ufs/host/ufshcd-pltfrm.c b/drivers/ufs/host/ufshcd-pltfrm.c
index e7332cc65b1f..173aea8e9997 100644
--- a/drivers/ufs/host/ufshcd-pltfrm.c
+++ b/drivers/ufs/host/ufshcd-pltfrm.c
@@ -108,9 +108,20 @@ out:
return ret;
}
+static bool phandle_exists(const struct device_node *np,
+ const char *phandle_name, int index)
+{
+ struct device_node *parse_np = of_parse_phandle(np, phandle_name, index);
+
+ if (parse_np)
+ of_node_put(parse_np);
+
+ return parse_np != NULL;
+}
+
#define MAX_PROP_SIZE 32
static int ufshcd_populate_vreg(struct device *dev, const char *name,
- struct ufs_vreg **out_vreg)
+ struct ufs_vreg **out_vreg)
{
char prop_name[MAX_PROP_SIZE];
struct ufs_vreg *vreg = NULL;
@@ -122,7 +133,7 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name,
}
snprintf(prop_name, MAX_PROP_SIZE, "%s-supply", name);
- if (!of_parse_phandle(np, prop_name, 0)) {
+ if (!phandle_exists(np, prop_name, 0)) {
dev_info(dev, "%s: Unable to find %s regulator, assuming enabled\n",
__func__, prop_name);
goto out;
diff --git a/drivers/usb/dwc3/dwc3-am62.c b/drivers/usb/dwc3/dwc3-am62.c
index fea7aca35dc8..173cf3579c55 100644
--- a/drivers/usb/dwc3/dwc3-am62.c
+++ b/drivers/usb/dwc3/dwc3-am62.c
@@ -195,8 +195,7 @@ static int dwc3_ti_probe(struct platform_device *pdev)
if (i == ARRAY_SIZE(dwc3_ti_rate_table)) {
dev_err(dev, "unsupported usb2_refclk rate: %lu KHz\n", rate);
- ret = -EINVAL;
- goto err_clk_disable;
+ return -EINVAL;
}
data->rate_code = i;
@@ -204,7 +203,7 @@ static int dwc3_ti_probe(struct platform_device *pdev)
/* Read the syscon property and set the rate code */
ret = phy_syscon_pll_refclk(data);
if (ret)
- goto err_clk_disable;
+ return ret;
/* VBUS divider select */
data->vbus_divider = device_property_read_bool(dev, "ti,vbus-divider");
@@ -245,8 +244,6 @@ err_pm_disable:
clk_disable_unprepare(data->usb2_refclk);
pm_runtime_disable(dev);
pm_runtime_set_suspended(dev);
-err_clk_disable:
- clk_put(data->usb2_refclk);
return ret;
}
@@ -276,7 +273,6 @@ static int dwc3_ti_remove(struct platform_device *pdev)
pm_runtime_disable(dev);
pm_runtime_set_suspended(dev);
- clk_put(data->usb2_refclk);
platform_set_drvdata(pdev, NULL);
return 0;
}
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 8716bece1072..0d89dfa6eef5 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -4249,7 +4249,6 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt)
}
evt->count = 0;
- evt->flags &= ~DWC3_EVENT_PENDING;
ret = IRQ_HANDLED;
/* Unmask interrupt */
@@ -4261,6 +4260,9 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt)
dwc3_writel(dwc->regs, DWC3_DEV_IMOD(0), dwc->imod_interval);
}
+ /* Keep the clearing of DWC3_EVENT_PENDING at the end */
+ evt->flags &= ~DWC3_EVENT_PENDING;
+
return ret;
}
diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c
index e5a6b6e36b3d..4303a3283ba0 100644
--- a/drivers/usb/gadget/function/uvc_configfs.c
+++ b/drivers/usb/gadget/function/uvc_configfs.c
@@ -2371,6 +2371,7 @@ static ssize_t f_uvc_opts_string_##cname##_store(struct config_item *item,\
const char *page, size_t len) \
{ \
struct f_uvc_opts *opts = to_f_uvc_opts(item); \
+ int size = min(sizeof(opts->aname), len + 1); \
int ret = 0; \
\
mutex_lock(&opts->lock); \
@@ -2379,8 +2380,9 @@ static ssize_t f_uvc_opts_string_##cname##_store(struct config_item *item,\
goto end; \
} \
\
- ret = snprintf(opts->aname, min(sizeof(opts->aname), len), \
- "%s", page); \
+ ret = strscpy(opts->aname, page, size); \
+ if (ret == -E2BIG) \
+ ret = size - 1; \
\
end: \
mutex_unlock(&opts->lock); \
diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c
index 385be30baad3..896c0d107f72 100644
--- a/drivers/usb/host/ehci-fsl.c
+++ b/drivers/usb/host/ehci-fsl.c
@@ -76,14 +76,9 @@ static int fsl_ehci_drv_probe(struct platform_device *pdev)
return -ENODEV;
}
- res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
- if (!res) {
- dev_err(&pdev->dev,
- "Found HC with no IRQ. Check %s setup!\n",
- dev_name(&pdev->dev));
- return -ENODEV;
- }
- irq = res->start;
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
hcd = __usb_create_hcd(&fsl_ehci_hc_driver, pdev->dev.parent,
&pdev->dev, dev_name(&pdev->dev), NULL);
diff --git a/drivers/usb/host/fsl-mph-dr-of.c b/drivers/usb/host/fsl-mph-dr-of.c
index 44a7e58a26e3..e5df17522892 100644
--- a/drivers/usb/host/fsl-mph-dr-of.c
+++ b/drivers/usb/host/fsl-mph-dr-of.c
@@ -112,6 +112,9 @@ static struct platform_device *fsl_usb2_device_register(
goto error;
}
+ pdev->dev.of_node = ofdev->dev.of_node;
+ pdev->dev.of_node_reused = true;
+
retval = platform_device_add(pdev);
if (retval)
goto error;
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index b440d338a895..d5a3986dfee7 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1023,6 +1023,9 @@ static const struct usb_device_id id_table_combined[] = {
{ USB_DEVICE(FTDI_VID, CHETCO_SEASMART_DISPLAY_PID) },
{ USB_DEVICE(FTDI_VID, CHETCO_SEASMART_LITE_PID) },
{ USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ANALOG_PID) },
+ /* Belimo Automation devices */
+ { USB_DEVICE(FTDI_VID, BELIMO_ZTH_PID) },
+ { USB_DEVICE(FTDI_VID, BELIMO_ZIP_PID) },
/* ICP DAS I-756xU devices */
{ USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) },
{ USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index d1a9564697a4..4e92c165c86b 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -1569,6 +1569,12 @@
#define CHETCO_SEASMART_ANALOG_PID 0xA5AF /* SeaSmart Analog Adapter */
/*
+ * Belimo Automation
+ */
+#define BELIMO_ZTH_PID 0x8050
+#define BELIMO_ZIP_PID 0xC811
+
+/*
* Unjo AB
*/
#define UNJO_VID 0x22B7
diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index ee0e520707dd..c4724750c81a 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -1718,6 +1718,7 @@ void typec_set_pwr_opmode(struct typec_port *port,
partner->usb_pd = 1;
sysfs_notify(&partner_dev->kobj, NULL,
"supports_usb_power_delivery");
+ kobject_uevent(&partner_dev->kobj, KOBJ_CHANGE);
}
put_device(partner_dev);
}
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 61e71c1154be..e60b06f2ac22 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -549,6 +549,16 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
if (!iommu_group)
return ERR_PTR(-EINVAL);
+ /*
+ * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
+ * restore cache coherency. It has to be checked here because it is only
+ * valid for cases where we are using iommu groups.
+ */
+ if (!iommu_capable(dev->bus, IOMMU_CAP_CACHE_COHERENCY)) {
+ iommu_group_put(iommu_group);
+ return ERR_PTR(-EINVAL);
+ }
+
group = vfio_group_get_from_iommu(iommu_group);
if (!group)
group = vfio_create_group(iommu_group, VFIO_IOMMU);
@@ -601,13 +611,6 @@ static int __vfio_register_dev(struct vfio_device *device,
int vfio_register_group_dev(struct vfio_device *device)
{
- /*
- * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
- * restore cache coherency.
- */
- if (!iommu_capable(device->dev->bus, IOMMU_CAP_CACHE_COHERENCY))
- return -EINVAL;
-
return __vfio_register_dev(device,
vfio_group_find_or_alloc(device->dev));
}
diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
index 90ce16b6e05f..f422f9c58ba7 100644
--- a/drivers/virt/coco/sev-guest/sev-guest.c
+++ b/drivers/virt/coco/sev-guest/sev-guest.c
@@ -632,16 +632,19 @@ static int __init sev_guest_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct snp_guest_dev *snp_dev;
struct miscdevice *misc;
+ void __iomem *mapping;
int ret;
if (!dev->platform_data)
return -ENODEV;
data = (struct sev_guest_platform_data *)dev->platform_data;
- layout = (__force void *)ioremap_encrypted(data->secrets_gpa, PAGE_SIZE);
- if (!layout)
+ mapping = ioremap_encrypted(data->secrets_gpa, PAGE_SIZE);
+ if (!mapping)
return -ENODEV;
+ layout = (__force void *)mapping;
+
ret = -ENOMEM;
snp_dev = devm_kzalloc(&pdev->dev, sizeof(struct snp_guest_dev), GFP_KERNEL);
if (!snp_dev)
@@ -706,7 +709,7 @@ e_free_response:
e_free_request:
free_shared_pages(snp_dev->request, sizeof(struct snp_guest_msg));
e_unmap:
- iounmap(layout);
+ iounmap(mapping);
return ret;
}
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 46d9295d9a6e..5e8321f43cbd 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -528,9 +528,10 @@ static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
BUG_ON(irq == -1);
if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
- cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
- cpumask_copy(irq_get_effective_affinity_mask(irq),
- cpumask_of(cpu));
+ struct irq_data *data = irq_get_irq_data(irq);
+
+ irq_data_update_affinity(data, cpumask_of(cpu));
+ irq_data_update_effective_affinity(data, cpumask_of(cpu));
}
xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 4b56c39f766d..84b143eef395 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -396,13 +396,15 @@ static void __unmap_grant_pages_done(int result,
unsigned int offset = data->unmap_ops - map->unmap_ops;
for (i = 0; i < data->count; i++) {
- WARN_ON(map->unmap_ops[offset+i].status);
+ WARN_ON(map->unmap_ops[offset + i].status != GNTST_okay &&
+ map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
pr_debug("unmap handle=%d st=%d\n",
map->unmap_ops[offset+i].handle,
map->unmap_ops[offset+i].status);
map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
if (use_ptemod) {
- WARN_ON(map->kunmap_ops[offset+i].status);
+ WARN_ON(map->kunmap_ops[offset + i].status != GNTST_okay &&
+ map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
pr_debug("kunmap handle=%u st=%d\n",
map->kunmap_ops[offset+i].handle,
map->kunmap_ops[offset+i].status);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 42118a4f3383..d1cfb235c4b9 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -375,7 +375,7 @@ static int afs_begin_cache_operation(struct netfs_io_request *rreq)
}
static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len,
- struct folio *folio, void **_fsdata)
+ struct folio **foliop, void **_fsdata)
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
diff --git a/fs/attr.c b/fs/attr.c
index dbe996b0dedf..b5b8835ddf15 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -22,7 +22,7 @@
* chown_ok - verify permissions to chown inode
* @mnt_userns: user namespace of the mount @inode was found from
* @inode: inode to check permissions on
- * @uid: uid to chown @inode to
+ * @ia_vfsuid: uid to chown @inode to
*
* If the inode has been found through an idmapped mount the user namespace of
* the vfsmount must be passed through @mnt_userns. This function will then
@@ -31,15 +31,15 @@
* performed on the raw inode simply passs init_user_ns.
*/
static bool chown_ok(struct user_namespace *mnt_userns,
- const struct inode *inode,
- kuid_t uid)
+ const struct inode *inode, vfsuid_t ia_vfsuid)
{
- kuid_t kuid = i_uid_into_mnt(mnt_userns, inode);
- if (uid_eq(current_fsuid(), kuid) && uid_eq(uid, inode->i_uid))
+ vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ if (vfsuid_eq_kuid(vfsuid, current_fsuid()) &&
+ vfsuid_eq(ia_vfsuid, vfsuid))
return true;
if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
return true;
- if (uid_eq(kuid, INVALID_UID) &&
+ if (!vfsuid_valid(vfsuid) &&
ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
return true;
return false;
@@ -49,7 +49,7 @@ static bool chown_ok(struct user_namespace *mnt_userns,
* chgrp_ok - verify permissions to chgrp inode
* @mnt_userns: user namespace of the mount @inode was found from
* @inode: inode to check permissions on
- * @gid: gid to chown @inode to
+ * @ia_vfsgid: gid to chown @inode to
*
* If the inode has been found through an idmapped mount the user namespace of
* the vfsmount must be passed through @mnt_userns. This function will then
@@ -58,21 +58,19 @@ static bool chown_ok(struct user_namespace *mnt_userns,
* performed on the raw inode simply passs init_user_ns.
*/
static bool chgrp_ok(struct user_namespace *mnt_userns,
- const struct inode *inode, kgid_t gid)
+ const struct inode *inode, vfsgid_t ia_vfsgid)
{
- kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
- if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode))) {
- kgid_t mapped_gid;
-
- if (gid_eq(gid, inode->i_gid))
+ vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ if (vfsuid_eq_kuid(vfsuid, current_fsuid())) {
+ if (vfsgid_eq(ia_vfsgid, vfsgid))
return true;
- mapped_gid = mapped_kgid_fs(mnt_userns, i_user_ns(inode), gid);
- if (in_group_p(mapped_gid))
+ if (vfsgid_in_group_p(ia_vfsgid))
return true;
}
if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
return true;
- if (gid_eq(kgid, INVALID_GID) &&
+ if (!vfsgid_valid(vfsgid) &&
ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
return true;
return false;
@@ -120,28 +118,29 @@ int setattr_prepare(struct user_namespace *mnt_userns, struct dentry *dentry,
goto kill_priv;
/* Make sure a caller can chown. */
- if ((ia_valid & ATTR_UID) && !chown_ok(mnt_userns, inode, attr->ia_uid))
+ if ((ia_valid & ATTR_UID) &&
+ !chown_ok(mnt_userns, inode, attr->ia_vfsuid))
return -EPERM;
/* Make sure caller can chgrp. */
- if ((ia_valid & ATTR_GID) && !chgrp_ok(mnt_userns, inode, attr->ia_gid))
+ if ((ia_valid & ATTR_GID) &&
+ !chgrp_ok(mnt_userns, inode, attr->ia_vfsgid))
return -EPERM;
/* Make sure a caller can chmod. */
if (ia_valid & ATTR_MODE) {
- kgid_t mapped_gid;
+ vfsgid_t vfsgid;
if (!inode_owner_or_capable(mnt_userns, inode))
return -EPERM;
if (ia_valid & ATTR_GID)
- mapped_gid = mapped_kgid_fs(mnt_userns,
- i_user_ns(inode), attr->ia_gid);
+ vfsgid = attr->ia_vfsgid;
else
- mapped_gid = i_gid_into_mnt(mnt_userns, inode);
+ vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
/* Also check the setgid bit! */
- if (!in_group_p(mapped_gid) &&
+ if (!vfsgid_in_group_p(vfsgid) &&
!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
attr->ia_mode &= ~S_ISGID;
}
@@ -219,9 +218,7 @@ EXPORT_SYMBOL(inode_newsize_ok);
* setattr_copy must be called with i_mutex held.
*
* setattr_copy updates the inode's metadata with that specified
- * in attr on idmapped mounts. If file ownership is changed setattr_copy
- * doesn't map ia_uid and ia_gid. It will asssume the caller has already
- * provided the intended values. Necessary permission checks to determine
+ * in attr on idmapped mounts. Necessary permission checks to determine
* whether or not the S_ISGID property needs to be removed are performed with
* the correct idmapped mount permission helpers.
* Noticeably missing is inode size update, which is more complex
@@ -242,10 +239,8 @@ void setattr_copy(struct user_namespace *mnt_userns, struct inode *inode,
{
unsigned int ia_valid = attr->ia_valid;
- if (ia_valid & ATTR_UID)
- inode->i_uid = attr->ia_uid;
- if (ia_valid & ATTR_GID)
- inode->i_gid = attr->ia_gid;
+ i_uid_update(mnt_userns, attr, inode);
+ i_gid_update(mnt_userns, attr, inode);
if (ia_valid & ATTR_ATIME)
inode->i_atime = attr->ia_atime;
if (ia_valid & ATTR_MTIME)
@@ -254,8 +249,8 @@ void setattr_copy(struct user_namespace *mnt_userns, struct inode *inode,
inode->i_ctime = attr->ia_ctime;
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
- kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
- if (!in_group_p(kgid) &&
+ vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+ if (!vfsgid_in_group_p(vfsgid) &&
!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
mode &= ~S_ISGID;
inode->i_mode = mode;
@@ -306,9 +301,6 @@ EXPORT_SYMBOL(may_setattr);
* retry. Because breaking a delegation may take a long time, the
* caller should drop the i_mutex before doing so.
*
- * If file ownership is changed notify_change() doesn't map ia_uid and
- * ia_gid. It will asssume the caller has already provided the intended values.
- *
* Alternatively, a caller may pass NULL for delegated_inode. This may
* be appropriate for callers that expect the underlying filesystem not
* to be NFS exported. Also, passing NULL is fine for callers holding
@@ -397,23 +389,25 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
* namespace of the superblock.
*/
if (ia_valid & ATTR_UID &&
- !kuid_has_mapping(inode->i_sb->s_user_ns, attr->ia_uid))
+ !vfsuid_has_fsmapping(mnt_userns, inode->i_sb->s_user_ns,
+ attr->ia_vfsuid))
return -EOVERFLOW;
if (ia_valid & ATTR_GID &&
- !kgid_has_mapping(inode->i_sb->s_user_ns, attr->ia_gid))
+ !vfsgid_has_fsmapping(mnt_userns, inode->i_sb->s_user_ns,
+ attr->ia_vfsgid))
return -EOVERFLOW;
/* Don't allow modifications of files with invalid uids or
* gids unless those uids & gids are being made valid.
*/
if (!(ia_valid & ATTR_UID) &&
- !uid_valid(i_uid_into_mnt(mnt_userns, inode)))
+ !vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)))
return -EOVERFLOW;
if (!(ia_valid & ATTR_GID) &&
- !gid_valid(i_gid_into_mnt(mnt_userns, inode)))
+ !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode)))
return -EOVERFLOW;
- error = security_inode_setattr(dentry, attr);
+ error = security_inode_setattr(mnt_userns, dentry, attr);
if (error)
return error;
error = try_break_deleg(inode, delegated_inode);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 415bf1823fb3..9c21e214d29e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -675,9 +675,8 @@ struct btrfs_fs_info {
rwlock_t global_root_lock;
struct rb_root global_root_tree;
- /* The xarray that holds all the FS roots */
- spinlock_t fs_roots_lock;
- struct xarray fs_roots;
+ spinlock_t fs_roots_radix_lock;
+ struct radix_tree_root fs_roots_radix;
/* block group cache stuff */
rwlock_t block_group_cache_lock;
@@ -995,10 +994,10 @@ struct btrfs_fs_info {
struct btrfs_delayed_root *delayed_root;
- /* Extent buffer xarray */
+ /* Extent buffer radix tree */
spinlock_t buffer_lock;
/* Entries are eb->start / sectorsize */
- struct xarray extent_buffers;
+ struct radix_tree_root buffer_radix;
/* next backup root to be overwritten */
int backup_root_index;
@@ -1119,8 +1118,7 @@ enum {
*/
BTRFS_ROOT_SHAREABLE,
BTRFS_ROOT_TRACK_DIRTY,
- /* The root is tracked in fs_info::fs_roots */
- BTRFS_ROOT_REGISTERED,
+ BTRFS_ROOT_IN_RADIX,
BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
BTRFS_ROOT_DEFRAG_RUNNING,
BTRFS_ROOT_FORCE_COW,
@@ -1224,10 +1222,10 @@ struct btrfs_root {
struct rb_root inode_tree;
/*
- * Xarray that keeps track of delayed nodes of every inode, protected
- * by inode_lock
+ * radix tree that keeps track of delayed nodes of every inode,
+ * protected by inode_lock
*/
- struct xarray delayed_nodes;
+ struct radix_tree_root delayed_nodes_tree;
/*
* right now this just gets used so that a root has its own devid
* for stat. It may be used for more later
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 66779ab3ed4a..748bf6b0d860 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -78,7 +78,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
}
spin_lock(&root->inode_lock);
- node = xa_load(&root->delayed_nodes, ino);
+ node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
if (node) {
if (btrfs_inode->delayed_node) {
@@ -90,9 +90,9 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
/*
* It's possible that we're racing into the middle of removing
- * this node from the xarray. In this case, the refcount
+ * this node from the radix tree. In this case, the refcount
* was zero and it should never go back to one. Just return
- * NULL like it was never in the xarray at all; our release
+ * NULL like it was never in the radix at all; our release
* function is in the process of removing it.
*
* Some implementations of refcount_inc refuse to bump the
@@ -100,7 +100,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
* here, refcount_inc() may decide to just WARN_ONCE() instead
* of actually bumping the refcount.
*
- * If this node is properly in the xarray, we want to bump the
+ * If this node is properly in the radix, we want to bump the
* refcount twice, once for the inode and once for this get
* operation.
*/
@@ -128,30 +128,36 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
u64 ino = btrfs_ino(btrfs_inode);
int ret;
- do {
- node = btrfs_get_delayed_node(btrfs_inode);
- if (node)
- return node;
+again:
+ node = btrfs_get_delayed_node(btrfs_inode);
+ if (node)
+ return node;
- node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
- if (!node)
- return ERR_PTR(-ENOMEM);
- btrfs_init_delayed_node(node, root, ino);
+ node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+ btrfs_init_delayed_node(node, root, ino);
- /* Cached in the inode and can be accessed */
- refcount_set(&node->refs, 2);
+ /* cached in the btrfs inode and can be accessed */
+ refcount_set(&node->refs, 2);
- spin_lock(&root->inode_lock);
- ret = xa_insert(&root->delayed_nodes, ino, node, GFP_NOFS);
- if (ret) {
- spin_unlock(&root->inode_lock);
- kmem_cache_free(delayed_node_cache, node);
- if (ret != -EBUSY)
- return ERR_PTR(ret);
- }
- } while (ret);
+ ret = radix_tree_preload(GFP_NOFS);
+ if (ret) {
+ kmem_cache_free(delayed_node_cache, node);
+ return ERR_PTR(ret);
+ }
+
+ spin_lock(&root->inode_lock);
+ ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
+ if (ret == -EEXIST) {
+ spin_unlock(&root->inode_lock);
+ kmem_cache_free(delayed_node_cache, node);
+ radix_tree_preload_end();
+ goto again;
+ }
btrfs_inode->delayed_node = node;
spin_unlock(&root->inode_lock);
+ radix_tree_preload_end();
return node;
}
@@ -270,7 +276,8 @@ static void __btrfs_release_delayed_node(
* back up. We can delete it now.
*/
ASSERT(refcount_read(&delayed_node->refs) == 0);
- xa_erase(&root->delayed_nodes, delayed_node->inode_id);
+ radix_tree_delete(&root->delayed_nodes_tree,
+ delayed_node->inode_id);
spin_unlock(&root->inode_lock);
kmem_cache_free(delayed_node_cache, delayed_node);
}
@@ -1863,35 +1870,34 @@ void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
{
- unsigned long index = 0;
- struct btrfs_delayed_node *delayed_node;
+ u64 inode_id = 0;
struct btrfs_delayed_node *delayed_nodes[8];
+ int i, n;
while (1) {
- int n = 0;
-
spin_lock(&root->inode_lock);
- if (xa_empty(&root->delayed_nodes)) {
+ n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
+ (void **)delayed_nodes, inode_id,
+ ARRAY_SIZE(delayed_nodes));
+ if (!n) {
spin_unlock(&root->inode_lock);
- return;
+ break;
}
- xa_for_each_start(&root->delayed_nodes, index, delayed_node, index) {
+ inode_id = delayed_nodes[n - 1]->inode_id + 1;
+ for (i = 0; i < n; i++) {
/*
* Don't increase refs in case the node is dead and
* about to be removed from the tree in the loop below
*/
- if (refcount_inc_not_zero(&delayed_node->refs)) {
- delayed_nodes[n] = delayed_node;
- n++;
- }
- if (n >= ARRAY_SIZE(delayed_nodes))
- break;
+ if (!refcount_inc_not_zero(&delayed_nodes[i]->refs))
+ delayed_nodes[i] = NULL;
}
- index++;
spin_unlock(&root->inode_lock);
- for (int i = 0; i < n; i++) {
+ for (i = 0; i < n; i++) {
+ if (!delayed_nodes[i])
+ continue;
__btrfs_kill_delayed_node(delayed_nodes[i]);
btrfs_release_delayed_node(delayed_nodes[i]);
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4ba005c41983..de440ebf5648 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -5,6 +5,7 @@
#include <linux/fs.h>
#include <linux/blkdev.h>
+#include <linux/radix-tree.h>
#include <linux/writeback.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
@@ -485,7 +486,7 @@ static int csum_dirty_subpage_buffers(struct btrfs_fs_info *fs_info,
uptodate = btrfs_subpage_test_uptodate(fs_info, page, cur,
fs_info->nodesize);
- /* A dirty eb shouldn't disappear from extent_buffers */
+ /* A dirty eb shouldn't disappear from buffer_radix */
if (WARN_ON(!eb))
return -EUCLEAN;
@@ -1158,7 +1159,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
root->nr_delalloc_inodes = 0;
root->nr_ordered_extents = 0;
root->inode_tree = RB_ROOT;
- xa_init_flags(&root->delayed_nodes, GFP_ATOMIC);
+ INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
btrfs_init_root_block_rsv(root);
@@ -1210,9 +1211,9 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks);
#ifdef CONFIG_BTRFS_DEBUG
INIT_LIST_HEAD(&root->leak_list);
- spin_lock(&fs_info->fs_roots_lock);
+ spin_lock(&fs_info->fs_roots_radix_lock);
list_add_tail(&root->leak_list, &fs_info->allocated_roots);
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
#endif
}
@@ -1659,11 +1660,12 @@ static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
{
struct btrfs_root *root;
- spin_lock(&fs_info->fs_roots_lock);
- root = xa_load(&fs_info->fs_roots, (unsigned long)root_id);
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ root = radix_tree_lookup(&fs_info->fs_roots_radix,
+ (unsigned long)root_id);
if (root)
root = btrfs_grab_root(root);
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
return root;
}
@@ -1705,14 +1707,20 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
{
int ret;
- spin_lock(&fs_info->fs_roots_lock);
- ret = xa_insert(&fs_info->fs_roots, (unsigned long)root->root_key.objectid,
- root, GFP_NOFS);
+ ret = radix_tree_preload(GFP_NOFS);
+ if (ret)
+ return ret;
+
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ ret = radix_tree_insert(&fs_info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid,
+ root);
if (ret == 0) {
btrfs_grab_root(root);
- set_bit(BTRFS_ROOT_REGISTERED, &root->state);
+ set_bit(BTRFS_ROOT_IN_RADIX, &root->state);
}
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
+ radix_tree_preload_end();
return ret;
}
@@ -2342,9 +2350,9 @@ void btrfs_put_root(struct btrfs_root *root)
btrfs_drew_lock_destroy(&root->snapshot_lock);
free_root_extent_buffers(root);
#ifdef CONFIG_BTRFS_DEBUG
- spin_lock(&root->fs_info->fs_roots_lock);
+ spin_lock(&root->fs_info->fs_roots_radix_lock);
list_del_init(&root->leak_list);
- spin_unlock(&root->fs_info->fs_roots_lock);
+ spin_unlock(&root->fs_info->fs_roots_radix_lock);
#endif
kfree(root);
}
@@ -2352,21 +2360,28 @@ void btrfs_put_root(struct btrfs_root *root)
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
{
- struct btrfs_root *root;
- unsigned long index = 0;
+ int ret;
+ struct btrfs_root *gang[8];
+ int i;
while (!list_empty(&fs_info->dead_roots)) {
- root = list_entry(fs_info->dead_roots.next,
- struct btrfs_root, root_list);
- list_del(&root->root_list);
+ gang[0] = list_entry(fs_info->dead_roots.next,
+ struct btrfs_root, root_list);
+ list_del(&gang[0]->root_list);
- if (test_bit(BTRFS_ROOT_REGISTERED, &root->state))
- btrfs_drop_and_free_fs_root(fs_info, root);
- btrfs_put_root(root);
+ if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state))
+ btrfs_drop_and_free_fs_root(fs_info, gang[0]);
+ btrfs_put_root(gang[0]);
}
- xa_for_each(&fs_info->fs_roots, index, root) {
- btrfs_drop_and_free_fs_root(fs_info, root);
+ while (1) {
+ ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+ (void **)gang, 0,
+ ARRAY_SIZE(gang));
+ if (!ret)
+ break;
+ for (i = 0; i < ret; i++)
+ btrfs_drop_and_free_fs_root(fs_info, gang[i]);
}
}
@@ -3134,8 +3149,8 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
{
- xa_init_flags(&fs_info->fs_roots, GFP_ATOMIC);
- xa_init_flags(&fs_info->extent_buffers, GFP_ATOMIC);
+ INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
+ INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
INIT_LIST_HEAD(&fs_info->delayed_iputs);
@@ -3143,7 +3158,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
INIT_LIST_HEAD(&fs_info->caching_block_groups);
spin_lock_init(&fs_info->delalloc_root_lock);
spin_lock_init(&fs_info->trans_lock);
- spin_lock_init(&fs_info->fs_roots_lock);
+ spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
spin_lock_init(&fs_info->super_lock);
@@ -3374,7 +3389,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
/*
* btrfs_find_orphan_roots() is responsible for finding all the dead
* roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load
- * them into the fs_info->fs_roots. This must be done before
+ * them into the fs_info->fs_roots_radix tree. This must be done before
* calling btrfs_orphan_cleanup() on the tree root. If we don't do it
* first, then btrfs_orphan_cleanup() will delete a dead root's orphan
* item before the root's tree is deleted - this means that if we unmount
@@ -4499,11 +4514,12 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
{
bool drop_ref = false;
- spin_lock(&fs_info->fs_roots_lock);
- xa_erase(&fs_info->fs_roots, (unsigned long)root->root_key.objectid);
- if (test_and_clear_bit(BTRFS_ROOT_REGISTERED, &root->state))
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ radix_tree_delete(&fs_info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid);
+ if (test_and_clear_bit(BTRFS_ROOT_IN_RADIX, &root->state))
drop_ref = true;
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
if (BTRFS_FS_ERROR(fs_info)) {
ASSERT(root->log_root == NULL);
@@ -4519,48 +4535,50 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
{
- struct btrfs_root *roots[8];
- unsigned long index = 0;
- int i;
+ u64 root_objectid = 0;
+ struct btrfs_root *gang[8];
+ int i = 0;
int err = 0;
- int grabbed;
+ unsigned int ret = 0;
while (1) {
- struct btrfs_root *root;
-
- spin_lock(&fs_info->fs_roots_lock);
- if (!xa_find(&fs_info->fs_roots, &index, ULONG_MAX, XA_PRESENT)) {
- spin_unlock(&fs_info->fs_roots_lock);
- return err;
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+ (void **)gang, root_objectid,
+ ARRAY_SIZE(gang));
+ if (!ret) {
+ spin_unlock(&fs_info->fs_roots_radix_lock);
+ break;
}
+ root_objectid = gang[ret - 1]->root_key.objectid + 1;
- grabbed = 0;
- xa_for_each_start(&fs_info->fs_roots, index, root, index) {
- /* Avoid grabbing roots in dead_roots */
- if (btrfs_root_refs(&root->root_item) > 0)
- roots[grabbed++] = btrfs_grab_root(root);
- if (grabbed >= ARRAY_SIZE(roots))
- break;
+ for (i = 0; i < ret; i++) {
+ /* Avoid to grab roots in dead_roots */
+ if (btrfs_root_refs(&gang[i]->root_item) == 0) {
+ gang[i] = NULL;
+ continue;
+ }
+ /* grab all the search result for later use */
+ gang[i] = btrfs_grab_root(gang[i]);
}
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
- for (i = 0; i < grabbed; i++) {
- if (!roots[i])
+ for (i = 0; i < ret; i++) {
+ if (!gang[i])
continue;
- index = roots[i]->root_key.objectid;
- err = btrfs_orphan_cleanup(roots[i]);
+ root_objectid = gang[i]->root_key.objectid;
+ err = btrfs_orphan_cleanup(gang[i]);
if (err)
- goto out;
- btrfs_put_root(roots[i]);
+ break;
+ btrfs_put_root(gang[i]);
}
- index++;
+ root_objectid++;
}
-out:
- /* Release the roots that remain uncleaned due to error */
- for (; i < grabbed; i++) {
- if (roots[i])
- btrfs_put_root(roots[i]);
+ /* release the uncleaned roots due to error */
+ for (; i < ret; i++) {
+ if (gang[i])
+ btrfs_put_root(gang[i]);
}
return err;
}
@@ -4879,28 +4897,31 @@ static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info)
{
- unsigned long index = 0;
- int grabbed = 0;
- struct btrfs_root *roots[8];
+ struct btrfs_root *gang[8];
+ u64 root_objectid = 0;
+ int ret;
+
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ while ((ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+ (void **)gang, root_objectid,
+ ARRAY_SIZE(gang))) != 0) {
+ int i;
- spin_lock(&fs_info->fs_roots_lock);
- while ((grabbed = xa_extract(&fs_info->fs_roots, (void **)roots, index,
- ULONG_MAX, 8, XA_PRESENT))) {
- for (int i = 0; i < grabbed; i++)
- roots[i] = btrfs_grab_root(roots[i]);
- spin_unlock(&fs_info->fs_roots_lock);
+ for (i = 0; i < ret; i++)
+ gang[i] = btrfs_grab_root(gang[i]);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
- for (int i = 0; i < grabbed; i++) {
- if (!roots[i])
+ for (i = 0; i < ret; i++) {
+ if (!gang[i])
continue;
- index = roots[i]->root_key.objectid;
- btrfs_free_log(NULL, roots[i]);
- btrfs_put_root(roots[i]);
+ root_objectid = gang[i]->root_key.objectid;
+ btrfs_free_log(NULL, gang[i]);
+ btrfs_put_root(gang[i]);
}
- index++;
- spin_lock(&fs_info->fs_roots_lock);
+ root_objectid++;
+ spin_lock(&fs_info->fs_roots_radix_lock);
}
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
btrfs_free_log_root_tree(NULL, fs_info);
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4157ecc27d4b..a3afc15430ce 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5829,7 +5829,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
btrfs_qgroup_free_meta_all_pertrans(root);
- if (test_bit(BTRFS_ROOT_REGISTERED, &root->state))
+ if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
btrfs_add_dropped_root(trans, root);
else
btrfs_put_root(root);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 04e36343da3a..f03ab5dbda7a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2966,7 +2966,7 @@ static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page)
}
/*
- * Find extent buffer for a given bytenr.
+ * Find extent buffer for a givne bytenr.
*
* This is for end_bio_extent_readpage(), thus we can't do any unsafe locking
* in endio context.
@@ -2985,9 +2985,11 @@ static struct extent_buffer *find_extent_buffer_readpage(
return (struct extent_buffer *)page->private;
}
- /* For subpage case, we need to lookup extent buffer xarray */
- eb = xa_load(&fs_info->extent_buffers,
- bytenr >> fs_info->sectorsize_bits);
+ /* For subpage case, we need to lookup buffer radix tree */
+ rcu_read_lock();
+ eb = radix_tree_lookup(&fs_info->buffer_radix,
+ bytenr >> fs_info->sectorsize_bits);
+ rcu_read_unlock();
ASSERT(eb);
return eb;
}
@@ -4435,8 +4437,8 @@ static struct extent_buffer *find_extent_buffer_nolock(
struct extent_buffer *eb;
rcu_read_lock();
- eb = xa_load(&fs_info->extent_buffers,
- start >> fs_info->sectorsize_bits);
+ eb = radix_tree_lookup(&fs_info->buffer_radix,
+ start >> fs_info->sectorsize_bits);
if (eb && atomic_inc_not_zero(&eb->refs)) {
rcu_read_unlock();
return eb;
@@ -6129,22 +6131,24 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
if (!eb)
return ERR_PTR(-ENOMEM);
eb->fs_info = fs_info;
-
- do {
- ret = xa_insert(&fs_info->extent_buffers,
- start >> fs_info->sectorsize_bits,
- eb, GFP_NOFS);
- if (ret == -ENOMEM) {
- exists = ERR_PTR(ret);
+again:
+ ret = radix_tree_preload(GFP_NOFS);
+ if (ret) {
+ exists = ERR_PTR(ret);
+ goto free_eb;
+ }
+ spin_lock(&fs_info->buffer_lock);
+ ret = radix_tree_insert(&fs_info->buffer_radix,
+ start >> fs_info->sectorsize_bits, eb);
+ spin_unlock(&fs_info->buffer_lock);
+ radix_tree_preload_end();
+ if (ret == -EEXIST) {
+ exists = find_extent_buffer(fs_info, start);
+ if (exists)
goto free_eb;
- }
- if (ret == -EBUSY) {
- exists = find_extent_buffer(fs_info, start);
- if (exists)
- goto free_eb;
- }
- } while (ret);
-
+ else
+ goto again;
+ }
check_buffer_tree_ref(eb);
set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
@@ -6319,22 +6323,25 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
}
if (uptodate)
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
-
- do {
- ret = xa_insert(&fs_info->extent_buffers,
- start >> fs_info->sectorsize_bits,
- eb, GFP_NOFS);
- if (ret == -ENOMEM) {
- exists = ERR_PTR(ret);
+again:
+ ret = radix_tree_preload(GFP_NOFS);
+ if (ret) {
+ exists = ERR_PTR(ret);
+ goto free_eb;
+ }
+
+ spin_lock(&fs_info->buffer_lock);
+ ret = radix_tree_insert(&fs_info->buffer_radix,
+ start >> fs_info->sectorsize_bits, eb);
+ spin_unlock(&fs_info->buffer_lock);
+ radix_tree_preload_end();
+ if (ret == -EEXIST) {
+ exists = find_extent_buffer(fs_info, start);
+ if (exists)
goto free_eb;
- }
- if (ret == -EBUSY) {
- exists = find_extent_buffer(fs_info, start);
- if (exists)
- goto free_eb;
- }
- } while (ret);
-
+ else
+ goto again;
+ }
/* add one reference for the tree */
check_buffer_tree_ref(eb);
set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
@@ -6379,8 +6386,10 @@ static int release_extent_buffer(struct extent_buffer *eb)
spin_unlock(&eb->refs_lock);
- xa_erase(&fs_info->extent_buffers,
- eb->start >> fs_info->sectorsize_bits);
+ spin_lock(&fs_info->buffer_lock);
+ radix_tree_delete(&fs_info->buffer_radix,
+ eb->start >> fs_info->sectorsize_bits);
+ spin_unlock(&fs_info->buffer_lock);
} else {
spin_unlock(&eb->refs_lock);
}
@@ -7325,25 +7334,42 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
}
}
+#define GANG_LOOKUP_SIZE 16
static struct extent_buffer *get_next_extent_buffer(
struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
{
- struct extent_buffer *eb;
- unsigned long index;
+ struct extent_buffer *gang[GANG_LOOKUP_SIZE];
+ struct extent_buffer *found = NULL;
u64 page_start = page_offset(page);
+ u64 cur = page_start;
ASSERT(in_range(bytenr, page_start, PAGE_SIZE));
lockdep_assert_held(&fs_info->buffer_lock);
- xa_for_each_start(&fs_info->extent_buffers, index, eb,
- page_start >> fs_info->sectorsize_bits) {
- if (in_range(eb->start, page_start, PAGE_SIZE))
- return eb;
- else if (eb->start >= page_start + PAGE_SIZE)
- /* Already beyond page end */
- return NULL;
+ while (cur < page_start + PAGE_SIZE) {
+ int ret;
+ int i;
+
+ ret = radix_tree_gang_lookup(&fs_info->buffer_radix,
+ (void **)gang, cur >> fs_info->sectorsize_bits,
+ min_t(unsigned int, GANG_LOOKUP_SIZE,
+ PAGE_SIZE / fs_info->nodesize));
+ if (ret == 0)
+ goto out;
+ for (i = 0; i < ret; i++) {
+ /* Already beyond page end */
+ if (gang[i]->start >= page_start + PAGE_SIZE)
+ goto out;
+ /* Found one */
+ if (gang[i]->start >= bytenr) {
+ found = gang[i];
+ goto out;
+ }
+ }
+ cur = gang[ret - 1]->start + gang[ret - 1]->len;
}
- return NULL;
+out:
+ return found;
}
static int try_release_subpage_extent_buffer(struct page *page)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 05e0c4a5affd..d50448bf8eed 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3578,7 +3578,6 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
u64 last_objectid = 0;
int ret = 0, nr_unlink = 0;
- /* Bail out if the cleanup is already running. */
if (test_and_set_bit(BTRFS_ROOT_ORPHAN_CLEANUP, &root->state))
return 0;
@@ -3661,17 +3660,17 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
*
* btrfs_find_orphan_roots() ran before us, which has
* found all deleted roots and loaded them into
- * fs_info->fs_roots. So here we can find if an
+ * fs_info->fs_roots_radix. So here we can find if an
* orphan item corresponds to a deleted root by looking
- * up the root from that xarray.
+ * up the root from that radix tree.
*/
- spin_lock(&fs_info->fs_roots_lock);
- dead_root = xa_load(&fs_info->fs_roots,
- (unsigned long)found_key.objectid);
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ dead_root = radix_tree_lookup(&fs_info->fs_roots_radix,
+ (unsigned long)found_key.objectid);
if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0)
is_dead_root = 1;
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
if (is_dead_root) {
/* prevent this orphan from being found again */
@@ -3911,7 +3910,7 @@ cache_index:
* cache.
*
* This is required for both inode re-read from disk and delayed inode
- * in the delayed_nodes xarray.
+ * in delayed_nodes_tree.
*/
if (BTRFS_I(inode)->last_trans == fs_info->generation)
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
@@ -7681,7 +7680,19 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
em->block_start == EXTENT_MAP_INLINE) {
free_extent_map(em);
- ret = -ENOTBLK;
+ /*
+ * If we are in a NOWAIT context, return -EAGAIN in order to
+ * fallback to buffered IO. This is not only because we can
+ * block with buffered IO (no support for NOWAIT semantics at
+ * the moment) but also to avoid returning short reads to user
+ * space - this happens if we were able to read some data from
+ * previous non-compressed extents and then when we fallback to
+ * buffered IO, at btrfs_file_read_iter() by calling
+ * filemap_read(), we fail to fault in pages for the read buffer,
+ * in which case filemap_read() returns a short read (the number
+ * of bytes previously read is > 0, so it does not return -EFAULT).
+ */
+ ret = (flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOTBLK;
goto unlock_err;
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index fa56890ff81f..c7dea639a56f 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -10,6 +10,7 @@
#include <linux/mount.h>
#include <linux/xattr.h>
#include <linux/posix_acl_xattr.h>
+#include <linux/radix-tree.h>
#include <linux/vmalloc.h>
#include <linux/string.h>
#include <linux/compat.h>
@@ -127,7 +128,7 @@ struct send_ctx {
struct list_head new_refs;
struct list_head deleted_refs;
- struct xarray name_cache;
+ struct radix_tree_root name_cache;
struct list_head name_cache_list;
int name_cache_size;
@@ -268,13 +269,14 @@ struct orphan_dir_info {
struct name_cache_entry {
struct list_head list;
/*
- * On 32bit kernels, xarray has only 32bit indices, but we need to
- * handle 64bit inums. We use the lower 32bit of the 64bit inum to store
- * it in the tree. If more than one inum would fall into the same entry,
- * we use inum_aliases to store the additional entries. inum_aliases is
- * also used to store entries with the same inum but different generations.
+ * radix_tree has only 32bit entries but we need to handle 64bit inums.
+ * We use the lower 32bit of the 64bit inum to store it in the tree. If
+ * more then one inum would fall into the same entry, we use radix_list
+ * to store the additional entries. radix_list is also used to store
+ * entries where two entries have the same inum but different
+ * generations.
*/
- struct list_head inum_aliases;
+ struct list_head radix_list;
u64 ino;
u64 gen;
u64 parent_ino;
@@ -2024,9 +2026,9 @@ out:
}
/*
- * Insert a name cache entry. On 32bit kernels the xarray index is 32bit,
+ * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
* so we need to do some special handling in case we have clashes. This function
- * takes care of this with the help of name_cache_entry::inum_aliases.
+ * takes care of this with the help of name_cache_entry::radix_list.
* In case of error, nce is kfreed.
*/
static int name_cache_insert(struct send_ctx *sctx,
@@ -2035,7 +2037,8 @@ static int name_cache_insert(struct send_ctx *sctx,
int ret = 0;
struct list_head *nce_head;
- nce_head = xa_load(&sctx->name_cache, (unsigned long)nce->ino);
+ nce_head = radix_tree_lookup(&sctx->name_cache,
+ (unsigned long)nce->ino);
if (!nce_head) {
nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL);
if (!nce_head) {
@@ -2044,14 +2047,14 @@ static int name_cache_insert(struct send_ctx *sctx,
}
INIT_LIST_HEAD(nce_head);
- ret = xa_insert(&sctx->name_cache, nce->ino, nce_head, GFP_KERNEL);
+ ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
if (ret < 0) {
kfree(nce_head);
kfree(nce);
return ret;
}
}
- list_add_tail(&nce->inum_aliases, nce_head);
+ list_add_tail(&nce->radix_list, nce_head);
list_add_tail(&nce->list, &sctx->name_cache_list);
sctx->name_cache_size++;
@@ -2063,14 +2066,15 @@ static void name_cache_delete(struct send_ctx *sctx,
{
struct list_head *nce_head;
- nce_head = xa_load(&sctx->name_cache, (unsigned long)nce->ino);
+ nce_head = radix_tree_lookup(&sctx->name_cache,
+ (unsigned long)nce->ino);
if (!nce_head) {
btrfs_err(sctx->send_root->fs_info,
"name_cache_delete lookup failed ino %llu cache size %d, leaking memory",
nce->ino, sctx->name_cache_size);
}
- list_del(&nce->inum_aliases);
+ list_del(&nce->radix_list);
list_del(&nce->list);
sctx->name_cache_size--;
@@ -2078,7 +2082,7 @@ static void name_cache_delete(struct send_ctx *sctx,
* We may not get to the final release of nce_head if the lookup fails
*/
if (nce_head && list_empty(nce_head)) {
- xa_erase(&sctx->name_cache, (unsigned long)nce->ino);
+ radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
kfree(nce_head);
}
}
@@ -2089,11 +2093,11 @@ static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
struct list_head *nce_head;
struct name_cache_entry *cur;
- nce_head = xa_load(&sctx->name_cache, (unsigned long)ino);
+ nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
if (!nce_head)
return NULL;
- list_for_each_entry(cur, nce_head, inum_aliases) {
+ list_for_each_entry(cur, nce_head, radix_list) {
if (cur->ino == ino && cur->gen == gen)
return cur;
}
@@ -7518,7 +7522,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
INIT_LIST_HEAD(&sctx->new_refs);
INIT_LIST_HEAD(&sctx->deleted_refs);
- xa_init_flags(&sctx->name_cache, GFP_KERNEL);
+ INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL);
INIT_LIST_HEAD(&sctx->name_cache_list);
sctx->flags = arg->flags;
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 1591bfa55bcc..d8e56edd6991 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -150,8 +150,8 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
{
- unsigned long index;
- struct extent_buffer *eb;
+ struct radix_tree_iter iter;
+ void **slot;
struct btrfs_device *dev, *tmp;
if (!fs_info)
@@ -163,9 +163,25 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
test_mnt->mnt_sb->s_fs_info = NULL;
- xa_for_each(&fs_info->extent_buffers, index, eb) {
+ spin_lock(&fs_info->buffer_lock);
+ radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) {
+ struct extent_buffer *eb;
+
+ eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock);
+ if (!eb)
+ continue;
+ /* Shouldn't happen but that kind of thinking creates CVE's */
+ if (radix_tree_exception(eb)) {
+ if (radix_tree_deref_retry(eb))
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+ slot = radix_tree_iter_resume(slot, &iter);
+ spin_unlock(&fs_info->buffer_lock);
free_extent_buffer_stale(eb);
+ spin_lock(&fs_info->buffer_lock);
}
+ spin_unlock(&fs_info->buffer_lock);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
list_for_each_entry_safe(dev, tmp, &fs_info->fs_devices->devices,
@@ -186,7 +202,7 @@ void btrfs_free_dummy_root(struct btrfs_root *root)
if (!root)
return;
/* Will be freed by btrfs_free_fs_roots */
- if (WARN_ON(test_bit(BTRFS_ROOT_REGISTERED, &root->state)))
+ if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state)))
return;
btrfs_global_root_delete(root);
btrfs_put_root(root);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 06c0a958d114..875b801ab3d7 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -23,7 +23,7 @@
#include "space-info.h"
#include "zoned.h"
-#define BTRFS_ROOT_TRANS_TAG XA_MARK_0
+#define BTRFS_ROOT_TRANS_TAG 0
/*
* Transaction states and transitions
@@ -437,15 +437,15 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
*/
smp_wmb();
- spin_lock(&fs_info->fs_roots_lock);
+ spin_lock(&fs_info->fs_roots_radix_lock);
if (root->last_trans == trans->transid && !force) {
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
return 0;
}
- xa_set_mark(&fs_info->fs_roots,
- (unsigned long)root->root_key.objectid,
- BTRFS_ROOT_TRANS_TAG);
- spin_unlock(&fs_info->fs_roots_lock);
+ radix_tree_tag_set(&fs_info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid,
+ BTRFS_ROOT_TRANS_TAG);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
root->last_trans = trans->transid;
/* this is pretty tricky. We don't want to
@@ -487,9 +487,11 @@ void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
spin_unlock(&cur_trans->dropped_roots_lock);
/* Make sure we don't try to update the root at commit time */
- xa_clear_mark(&fs_info->fs_roots,
- (unsigned long)root->root_key.objectid,
- BTRFS_ROOT_TRANS_TAG);
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ radix_tree_tag_clear(&fs_info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid,
+ BTRFS_ROOT_TRANS_TAG);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
}
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
@@ -1402,8 +1404,9 @@ void btrfs_add_dead_root(struct btrfs_root *root)
static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_root *root;
- unsigned long index;
+ struct btrfs_root *gang[8];
+ int i;
+ int ret;
/*
* At this point no one can be using this transaction to modify any tree
@@ -1411,46 +1414,57 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
*/
ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING);
- spin_lock(&fs_info->fs_roots_lock);
- xa_for_each_marked(&fs_info->fs_roots, index, root, BTRFS_ROOT_TRANS_TAG) {
- int ret;
-
- /*
- * At this point we can neither have tasks logging inodes
- * from a root nor trying to commit a log tree.
- */
- ASSERT(atomic_read(&root->log_writers) == 0);
- ASSERT(atomic_read(&root->log_commit[0]) == 0);
- ASSERT(atomic_read(&root->log_commit[1]) == 0);
-
- xa_clear_mark(&fs_info->fs_roots,
- (unsigned long)root->root_key.objectid,
- BTRFS_ROOT_TRANS_TAG);
- spin_unlock(&fs_info->fs_roots_lock);
-
- btrfs_free_log(trans, root);
- ret = btrfs_update_reloc_root(trans, root);
- if (ret)
- return ret;
-
- /* See comments in should_cow_block() */
- clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
- smp_mb__after_atomic();
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ while (1) {
+ ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
+ (void **)gang, 0,
+ ARRAY_SIZE(gang),
+ BTRFS_ROOT_TRANS_TAG);
+ if (ret == 0)
+ break;
+ for (i = 0; i < ret; i++) {
+ struct btrfs_root *root = gang[i];
+ int ret2;
+
+ /*
+ * At this point we can neither have tasks logging inodes
+ * from a root nor trying to commit a log tree.
+ */
+ ASSERT(atomic_read(&root->log_writers) == 0);
+ ASSERT(atomic_read(&root->log_commit[0]) == 0);
+ ASSERT(atomic_read(&root->log_commit[1]) == 0);
+
+ radix_tree_tag_clear(&fs_info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid,
+ BTRFS_ROOT_TRANS_TAG);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
+
+ btrfs_free_log(trans, root);
+ ret2 = btrfs_update_reloc_root(trans, root);
+ if (ret2)
+ return ret2;
+
+ /* see comments in should_cow_block() */
+ clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
+ smp_mb__after_atomic();
+
+ if (root->commit_root != root->node) {
+ list_add_tail(&root->dirty_list,
+ &trans->transaction->switch_commits);
+ btrfs_set_root_node(&root->root_item,
+ root->node);
+ }
- if (root->commit_root != root->node) {
- list_add_tail(&root->dirty_list,
- &trans->transaction->switch_commits);
- btrfs_set_root_node(&root->root_item, root->node);
+ ret2 = btrfs_update_root(trans, fs_info->tree_root,
+ &root->root_key,
+ &root->root_item);
+ if (ret2)
+ return ret2;
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ btrfs_qgroup_free_meta_all_pertrans(root);
}
-
- ret = btrfs_update_root(trans, fs_info->tree_root,
- &root->root_key, &root->root_item);
- if (ret)
- return ret;
- spin_lock(&fs_info->fs_roots_lock);
- btrfs_qgroup_free_meta_all_pertrans(root);
}
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
return 0;
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 79e8c8cd75ed..d99026df6f67 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1735,12 +1735,14 @@ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical,
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
&mapped_length, &bioc);
if (ret || !bioc || mapped_length < PAGE_SIZE) {
- btrfs_put_bioc(bioc);
- return -EIO;
+ ret = -EIO;
+ goto out_put_bioc;
}
- if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK)
- return -EINVAL;
+ if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+ ret = -EINVAL;
+ goto out_put_bioc;
+ }
nofs_flag = memalloc_nofs_save();
nmirrors = (int)bioc->num_stripes;
@@ -1759,7 +1761,8 @@ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical,
break;
}
memalloc_nofs_restore(nofs_flag);
-
+out_put_bioc:
+ btrfs_put_bioc(bioc);
return ret;
}
@@ -1885,7 +1888,6 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct map_lookup *map;
- bool need_zone_finish;
int ret = 0;
int i;
@@ -1942,12 +1944,6 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
}
}
- /*
- * The block group is not fully allocated, so not fully written yet. We
- * need to send ZONE_FINISH command to free up an active zone.
- */
- need_zone_finish = !btrfs_zoned_bg_is_full(block_group);
-
block_group->zone_is_active = 0;
block_group->alloc_offset = block_group->zone_capacity;
block_group->free_space_ctl->free_space = 0;
@@ -1963,15 +1959,13 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
if (device->zone_info->max_active_zones == 0)
continue;
- if (need_zone_finish) {
- ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
- physical >> SECTOR_SHIFT,
- device->zone_info->zone_size >> SECTOR_SHIFT,
- GFP_NOFS);
+ ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
+ physical >> SECTOR_SHIFT,
+ device->zone_info->zone_size >> SECTOR_SHIFT,
+ GFP_NOFS);
- if (ret)
- return ret;
- }
+ if (ret)
+ return ret;
btrfs_dev_clear_active_zone(device, physical);
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 6dee88815491..d6e5916138e4 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -63,7 +63,7 @@
(CONGESTION_ON_THRESH(congestion_kb) >> 2))
static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
- struct folio *folio, void **_fsdata);
+ struct folio **foliop, void **_fsdata);
static inline struct ceph_snap_context *page_snap_context(struct page *page)
{
@@ -1288,18 +1288,19 @@ ceph_find_incompatible(struct page *page)
}
static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
- struct folio *folio, void **_fsdata)
+ struct folio **foliop, void **_fsdata)
{
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_snap_context *snapc;
- snapc = ceph_find_incompatible(folio_page(folio, 0));
+ snapc = ceph_find_incompatible(folio_page(*foliop, 0));
if (snapc) {
int r;
- folio_unlock(folio);
- folio_put(folio);
+ folio_unlock(*foliop);
+ folio_put(*foliop);
+ *foliop = NULL;
if (IS_ERR(snapc))
return PTR_ERR(snapc);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index fa29c9aae24b..386bb523c69e 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1918,7 +1918,6 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
list_del_init(&ses->smb_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
- spin_lock(&ses->chan_lock);
chan_count = ses->chan_count;
/* close any extra channels */
@@ -1934,7 +1933,6 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
ses->chans[i].server = NULL;
}
}
- spin_unlock(&ses->chan_lock);
sesInfoFree(ses);
cifs_put_tcp_session(server, 0);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index b85718f32b53..02c8b2906196 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -474,6 +474,14 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
out:
if (rc && chan->server) {
+ /*
+ * we should avoid race with these delayed works before we
+ * remove this channel
+ */
+ cancel_delayed_work_sync(&chan->server->echo);
+ cancel_delayed_work_sync(&chan->server->resolve);
+ cancel_delayed_work_sync(&chan->server->reconnect);
+
spin_lock(&ses->chan_lock);
/* we rely on all bits beyond chan_count to be clear */
cifs_chan_clear_need_reconnect(ses, chan->server);
@@ -484,10 +492,9 @@ out:
*/
WARN_ON(ses->chan_count < 1);
spin_unlock(&ses->chan_lock);
- }
- if (rc && chan->server)
cifs_put_tcp_session(chan->server, 0);
+ }
return rc;
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 12b4dddaedb0..c705de32e225 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -571,10 +571,6 @@ assemble_neg_contexts(struct smb2_negotiate_req *req,
*total_len += ctxt_len;
pneg_ctxt += ctxt_len;
- build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt);
- *total_len += sizeof(struct smb2_posix_neg_context);
- pneg_ctxt += sizeof(struct smb2_posix_neg_context);
-
/*
* secondary channels don't have the hostname field populated
* use the hostname field in the primary channel instead
@@ -586,9 +582,14 @@ assemble_neg_contexts(struct smb2_negotiate_req *req,
hostname);
*total_len += ctxt_len;
pneg_ctxt += ctxt_len;
- neg_context_count = 4;
- } else /* second channels do not have a hostname */
neg_context_count = 3;
+ } else
+ neg_context_count = 2;
+
+ build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt);
+ *total_len += sizeof(struct smb2_posix_neg_context);
+ pneg_ctxt += sizeof(struct smb2_posix_neg_context);
+ neg_context_count++;
if (server->compress_algorithm) {
build_compression_ctxt((struct smb2_compression_capabilities_context *)
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index ee92634196a8..1105ce3c80cb 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -9,6 +9,15 @@ menuconfig DLM
A general purpose distributed lock manager for kernel or userspace
applications.
+config DLM_DEPRECATED_API
+ bool "DLM deprecated API"
+ depends on DLM
+ help
+ Enables deprecated DLM timeout features that will be removed in
+ later Linux kernel releases.
+
+ If you are unsure, say N.
+
config DLM_DEBUG
bool "DLM debugging"
depends on DLM
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile
index 3545fdafc6fb..71dab733cf9a 100644
--- a/fs/dlm/Makefile
+++ b/fs/dlm/Makefile
@@ -9,7 +9,6 @@ dlm-y := ast.o \
member.o \
memory.o \
midcomms.o \
- netlink.o \
lowcomms.o \
plock.o \
rcom.o \
@@ -18,5 +17,6 @@ dlm-y := ast.o \
requestqueue.o \
user.o \
util.o
+dlm-$(CONFIG_DLM_DEPRECATED_API) += netlink.o
dlm-$(CONFIG_DLM_DEBUG) += debug_fs.o
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index bfac462dd3e8..19ef136f9e4f 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -255,13 +255,13 @@ void dlm_callback_work(struct work_struct *work)
if (callbacks[i].flags & DLM_CB_SKIP) {
continue;
} else if (callbacks[i].flags & DLM_CB_BAST) {
- bastfn(lkb->lkb_astparam, callbacks[i].mode);
trace_dlm_bast(ls, lkb, callbacks[i].mode);
+ bastfn(lkb->lkb_astparam, callbacks[i].mode);
} else if (callbacks[i].flags & DLM_CB_CAST) {
lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
+ trace_dlm_ast(ls, lkb);
castfn(lkb->lkb_astparam);
- trace_dlm_ast(ls, lkb, lkb->lkb_lksb);
}
}
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 42eee2783756..ac8b62106ce0 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -75,8 +75,9 @@ struct dlm_cluster {
unsigned int cl_log_info;
unsigned int cl_protocol;
unsigned int cl_mark;
+#ifdef CONFIG_DLM_DEPRECATED_API
unsigned int cl_timewarn_cs;
- unsigned int cl_waitwarn_us;
+#endif
unsigned int cl_new_rsb_count;
unsigned int cl_recover_callbacks;
char cl_cluster_name[DLM_LOCKSPACE_LEN];
@@ -102,8 +103,9 @@ enum {
CLUSTER_ATTR_LOG_INFO,
CLUSTER_ATTR_PROTOCOL,
CLUSTER_ATTR_MARK,
+#ifdef CONFIG_DLM_DEPRECATED_API
CLUSTER_ATTR_TIMEWARN_CS,
- CLUSTER_ATTR_WAITWARN_US,
+#endif
CLUSTER_ATTR_NEW_RSB_COUNT,
CLUSTER_ATTR_RECOVER_CALLBACKS,
CLUSTER_ATTR_CLUSTER_NAME,
@@ -224,8 +226,9 @@ CLUSTER_ATTR(log_debug, NULL);
CLUSTER_ATTR(log_info, NULL);
CLUSTER_ATTR(protocol, dlm_check_protocol_and_dlm_running);
CLUSTER_ATTR(mark, NULL);
+#ifdef CONFIG_DLM_DEPRECATED_API
CLUSTER_ATTR(timewarn_cs, dlm_check_zero);
-CLUSTER_ATTR(waitwarn_us, NULL);
+#endif
CLUSTER_ATTR(new_rsb_count, NULL);
CLUSTER_ATTR(recover_callbacks, NULL);
@@ -240,8 +243,9 @@ static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_LOG_INFO] = &cluster_attr_log_info,
[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol,
[CLUSTER_ATTR_MARK] = &cluster_attr_mark,
+#ifdef CONFIG_DLM_DEPRECATED_API
[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs,
- [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us,
+#endif
[CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count,
[CLUSTER_ATTR_RECOVER_CALLBACKS] = &cluster_attr_recover_callbacks,
[CLUSTER_ATTR_CLUSTER_NAME] = &cluster_attr_cluster_name,
@@ -432,8 +436,9 @@ static struct config_group *make_cluster(struct config_group *g,
cl->cl_log_debug = dlm_config.ci_log_debug;
cl->cl_log_info = dlm_config.ci_log_info;
cl->cl_protocol = dlm_config.ci_protocol;
+#ifdef CONFIG_DLM_DEPRECATED_API
cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
- cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
+#endif
cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count;
cl->cl_recover_callbacks = dlm_config.ci_recover_callbacks;
memcpy(cl->cl_cluster_name, dlm_config.ci_cluster_name,
@@ -954,8 +959,9 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
#define DEFAULT_LOG_INFO 1
#define DEFAULT_PROTOCOL DLM_PROTO_TCP
#define DEFAULT_MARK 0
+#ifdef CONFIG_DLM_DEPRECATED_API
#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
-#define DEFAULT_WAITWARN_US 0
+#endif
#define DEFAULT_NEW_RSB_COUNT 128
#define DEFAULT_RECOVER_CALLBACKS 0
#define DEFAULT_CLUSTER_NAME ""
@@ -971,8 +977,9 @@ struct dlm_config_info dlm_config = {
.ci_log_info = DEFAULT_LOG_INFO,
.ci_protocol = DEFAULT_PROTOCOL,
.ci_mark = DEFAULT_MARK,
+#ifdef CONFIG_DLM_DEPRECATED_API
.ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
- .ci_waitwarn_us = DEFAULT_WAITWARN_US,
+#endif
.ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT,
.ci_recover_callbacks = DEFAULT_RECOVER_CALLBACKS,
.ci_cluster_name = DEFAULT_CLUSTER_NAME
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index df92b0a07fc6..55c5f2c13ebd 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -37,8 +37,9 @@ struct dlm_config_info {
int ci_log_info;
int ci_protocol;
int ci_mark;
+#ifdef CONFIG_DLM_DEPRECATED_API
int ci_timewarn_cs;
- int ci_waitwarn_us;
+#endif
int ci_new_rsb_count;
int ci_recover_callbacks;
char ci_cluster_name[DLM_LOCKSPACE_LEN];
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 776c3ed519f0..8aca8085d24e 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -145,7 +145,9 @@ struct dlm_args {
void (*bastfn) (void *astparam, int mode);
int mode;
struct dlm_lksb *lksb;
+#ifdef CONFIG_DLM_DEPRECATED_API
unsigned long timeout;
+#endif
};
@@ -203,10 +205,20 @@ struct dlm_args {
#define DLM_IFL_OVERLAP_UNLOCK 0x00080000
#define DLM_IFL_OVERLAP_CANCEL 0x00100000
#define DLM_IFL_ENDOFLIFE 0x00200000
+#ifdef CONFIG_DLM_DEPRECATED_API
#define DLM_IFL_WATCH_TIMEWARN 0x00400000
#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
+#endif
#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */
+/* least significant 2 bytes are message changed, they are full transmitted
+ * but at receive side only the 2 bytes LSB will be set.
+ *
+ * Even wireshark dlm dissector does only evaluate the lower bytes and note
+ * that they may not be used on transceiver side, we assume the higher bytes
+ * are for internal use or reserved so long they are not parsed on receiver
+ * side.
+ */
#define DLM_IFL_USER 0x00000001
#define DLM_IFL_ORPHAN 0x00000002
@@ -249,10 +261,12 @@ struct dlm_lkb {
struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */
struct list_head lkb_wait_reply; /* waiting for remote reply */
struct list_head lkb_ownqueue; /* list of locks for a process */
- struct list_head lkb_time_list;
ktime_t lkb_timestamp;
- ktime_t lkb_wait_time;
+
+#ifdef CONFIG_DLM_DEPRECATED_API
+ struct list_head lkb_time_list;
unsigned long lkb_timeout_cs;
+#endif
struct mutex lkb_cb_mutex;
struct work_struct lkb_cb_work;
@@ -568,8 +582,10 @@ struct dlm_ls {
struct mutex ls_orphans_mutex;
struct list_head ls_orphans;
+#ifdef CONFIG_DLM_DEPRECATED_API
struct mutex ls_timeout_mutex;
struct list_head ls_timeout;
+#endif
spinlock_t ls_new_rsb_spin;
int ls_new_rsb_count;
@@ -606,8 +622,8 @@ struct dlm_ls {
wait_queue_head_t ls_uevent_wait; /* user part of join/leave */
int ls_uevent_result;
- struct completion ls_members_done;
- int ls_members_result;
+ struct completion ls_recovery_done;
+ int ls_recovery_result;
struct miscdevice ls_device;
@@ -688,7 +704,9 @@ struct dlm_ls {
#define LSFL_RCOM_READY 5
#define LSFL_RCOM_WAIT 6
#define LSFL_UEVENT_WAIT 7
+#ifdef CONFIG_DLM_DEPRECATED_API
#define LSFL_TIMEWARN 8
+#endif
#define LSFL_CB_DELAY 9
#define LSFL_NODIR 10
@@ -741,9 +759,15 @@ static inline int dlm_no_directory(struct dlm_ls *ls)
return test_bit(LSFL_NODIR, &ls->ls_flags);
}
+#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_netlink_init(void);
void dlm_netlink_exit(void);
void dlm_timeout_warn(struct dlm_lkb *lkb);
+#else
+static inline int dlm_netlink_init(void) { return 0; }
+static inline void dlm_netlink_exit(void) { };
+static inline void dlm_timeout_warn(struct dlm_lkb *lkb) { };
+#endif
int dlm_plock_init(void);
void dlm_plock_exit(void);
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 226822f49d30..dac7eb75dba9 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -296,12 +296,14 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
+#ifdef CONFIG_DLM_DEPRECATED_API
/* if the operation was a cancel, then return -DLM_ECANCEL, if a
timeout caused the cancel then return -ETIMEDOUT */
if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
rv = -ETIMEDOUT;
}
+#endif
if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
@@ -1210,7 +1212,9 @@ static int _create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret,
kref_init(&lkb->lkb_ref);
INIT_LIST_HEAD(&lkb->lkb_ownqueue);
INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
+#ifdef CONFIG_DLM_DEPRECATED_API
INIT_LIST_HEAD(&lkb->lkb_time_list);
+#endif
INIT_LIST_HEAD(&lkb->lkb_cb_list);
mutex_init(&lkb->lkb_cb_mutex);
INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work);
@@ -1306,6 +1310,13 @@ static inline void hold_lkb(struct dlm_lkb *lkb)
kref_get(&lkb->lkb_ref);
}
+static void unhold_lkb_assert(struct kref *kref)
+{
+ struct dlm_lkb *lkb = container_of(kref, struct dlm_lkb, lkb_ref);
+
+ DLM_ASSERT(false, dlm_print_lkb(lkb););
+}
+
/* This is called when we need to remove a reference and are certain
it's not the last ref. e.g. del_lkb is always called between a
find_lkb/put_lkb and is always the inverse of a previous add_lkb.
@@ -1313,9 +1324,7 @@ static inline void hold_lkb(struct dlm_lkb *lkb)
static inline void unhold_lkb(struct dlm_lkb *lkb)
{
- int rv;
- rv = kref_put(&lkb->lkb_ref, kill_lkb);
- DLM_ASSERT(!rv, dlm_print_lkb(lkb););
+ kref_put(&lkb->lkb_ref, unhold_lkb_assert);
}
static void lkb_add_ordered(struct list_head *new, struct list_head *head,
@@ -1402,75 +1411,6 @@ static int msg_reply_type(int mstype)
return -1;
}
-static int nodeid_warned(int nodeid, int num_nodes, int *warned)
-{
- int i;
-
- for (i = 0; i < num_nodes; i++) {
- if (!warned[i]) {
- warned[i] = nodeid;
- return 0;
- }
- if (warned[i] == nodeid)
- return 1;
- }
- return 0;
-}
-
-void dlm_scan_waiters(struct dlm_ls *ls)
-{
- struct dlm_lkb *lkb;
- s64 us;
- s64 debug_maxus = 0;
- u32 debug_scanned = 0;
- u32 debug_expired = 0;
- int num_nodes = 0;
- int *warned = NULL;
-
- if (!dlm_config.ci_waitwarn_us)
- return;
-
- mutex_lock(&ls->ls_waiters_mutex);
-
- list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
- if (!lkb->lkb_wait_time)
- continue;
-
- debug_scanned++;
-
- us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time));
-
- if (us < dlm_config.ci_waitwarn_us)
- continue;
-
- lkb->lkb_wait_time = 0;
-
- debug_expired++;
- if (us > debug_maxus)
- debug_maxus = us;
-
- if (!num_nodes) {
- num_nodes = ls->ls_num_nodes;
- warned = kcalloc(num_nodes, sizeof(int), GFP_KERNEL);
- }
- if (!warned)
- continue;
- if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned))
- continue;
-
- log_error(ls, "waitwarn %x %lld %d us check connection to "
- "node %d", lkb->lkb_id, (long long)us,
- dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
- }
- mutex_unlock(&ls->ls_waiters_mutex);
- kfree(warned);
-
- if (debug_expired)
- log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
- debug_scanned, debug_expired,
- dlm_config.ci_waitwarn_us, (long long)debug_maxus);
-}
-
/* add/remove lkb from global waiters list of lkb's waiting for
a reply from a remote node */
@@ -1514,7 +1454,6 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
lkb->lkb_wait_count++;
lkb->lkb_wait_type = mstype;
- lkb->lkb_wait_time = ktime_get();
lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
hold_lkb(lkb);
list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
@@ -1842,6 +1781,7 @@ void dlm_scan_rsbs(struct dlm_ls *ls)
}
}
+#ifdef CONFIG_DLM_DEPRECATED_API
static void add_timeout(struct dlm_lkb *lkb)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
@@ -1962,17 +1902,11 @@ void dlm_adjust_timeouts(struct dlm_ls *ls)
list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
mutex_unlock(&ls->ls_timeout_mutex);
-
- if (!dlm_config.ci_waitwarn_us)
- return;
-
- mutex_lock(&ls->ls_waiters_mutex);
- list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
- if (ktime_to_us(lkb->lkb_wait_time))
- lkb->lkb_wait_time = ktime_get();
- }
- mutex_unlock(&ls->ls_waiters_mutex);
}
+#else
+static void add_timeout(struct dlm_lkb *lkb) { }
+static void del_timeout(struct dlm_lkb *lkb) { }
+#endif
/* lkb is master or local copy */
@@ -2837,12 +2771,20 @@ static void confirm_master(struct dlm_rsb *r, int error)
}
}
+#ifdef CONFIG_DLM_DEPRECATED_API
static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
int namelen, unsigned long timeout_cs,
void (*ast) (void *astparam),
void *astparam,
void (*bast) (void *astparam, int mode),
struct dlm_args *args)
+#else
+static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
+ int namelen, void (*ast)(void *astparam),
+ void *astparam,
+ void (*bast)(void *astparam, int mode),
+ struct dlm_args *args)
+#endif
{
int rv = -EINVAL;
@@ -2895,7 +2837,9 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
args->astfn = ast;
args->astparam = astparam;
args->bastfn = bast;
+#ifdef CONFIG_DLM_DEPRECATED_API
args->timeout = timeout_cs;
+#endif
args->mode = mode;
args->lksb = lksb;
rv = 0;
@@ -2951,7 +2895,9 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
lkb->lkb_lksb = args->lksb;
lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
lkb->lkb_ownpid = (int) current->pid;
+#ifdef CONFIG_DLM_DEPRECATED_API
lkb->lkb_timeout_cs = args->timeout;
+#endif
rv = 0;
out:
if (rv)
@@ -3472,10 +3418,15 @@ int dlm_lock(dlm_lockspace_t *lockspace,
if (error)
goto out;
- trace_dlm_lock_start(ls, lkb, mode, flags);
+ trace_dlm_lock_start(ls, lkb, name, namelen, mode, flags);
+#ifdef CONFIG_DLM_DEPRECATED_API
error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
astarg, bast, &args);
+#else
+ error = set_lock_args(mode, lksb, flags, namelen, ast, astarg, bast,
+ &args);
+#endif
if (error)
goto out_put;
@@ -3487,7 +3438,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
if (error == -EINPROGRESS)
error = 0;
out_put:
- trace_dlm_lock_end(ls, lkb, mode, flags, error);
+ trace_dlm_lock_end(ls, lkb, name, namelen, mode, flags, error);
if (convert || error)
__put_lkb(ls, lkb);
@@ -5839,9 +5790,14 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
return 0;
}
+#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
int mode, uint32_t flags, void *name, unsigned int namelen,
unsigned long timeout_cs)
+#else
+int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
+ int mode, uint32_t flags, void *name, unsigned int namelen)
+#endif
{
struct dlm_lkb *lkb;
struct dlm_args args;
@@ -5864,8 +5820,13 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
goto out;
}
}
+#ifdef CONFIG_DLM_DEPRECATED_API
error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
fake_astfn, ua, fake_bastfn, &args);
+#else
+ error = set_lock_args(mode, &ua->lksb, flags, namelen, fake_astfn, ua,
+ fake_bastfn, &args);
+#endif
if (error) {
kfree(ua->lksb.sb_lvbptr);
ua->lksb.sb_lvbptr = NULL;
@@ -5904,9 +5865,14 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
return error;
}
+#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
unsigned long timeout_cs)
+#else
+int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
+ int mode, uint32_t flags, uint32_t lkid, char *lvb_in)
+#endif
{
struct dlm_lkb *lkb;
struct dlm_args args;
@@ -5941,8 +5907,13 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
ua->bastaddr = ua_tmp->bastaddr;
ua->user_lksb = ua_tmp->user_lksb;
+#ifdef CONFIG_DLM_DEPRECATED_API
error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
fake_astfn, ua, fake_bastfn, &args);
+#else
+ error = set_lock_args(mode, &ua->lksb, flags, 0, fake_astfn, ua,
+ fake_bastfn, &args);
+#endif
if (error)
goto out_put;
@@ -5966,7 +5937,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, void *name, unsigned int namelen,
- unsigned long timeout_cs, uint32_t *lkid)
+ uint32_t *lkid)
{
struct dlm_lkb *lkb = NULL, *iter;
struct dlm_user_args *ua;
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 252a5898f908..a7b6474f009d 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -24,9 +24,15 @@ int dlm_put_lkb(struct dlm_lkb *lkb);
void dlm_scan_rsbs(struct dlm_ls *ls);
int dlm_lock_recovery_try(struct dlm_ls *ls);
void dlm_unlock_recovery(struct dlm_ls *ls);
-void dlm_scan_waiters(struct dlm_ls *ls);
+
+#ifdef CONFIG_DLM_DEPRECATED_API
void dlm_scan_timeout(struct dlm_ls *ls);
void dlm_adjust_timeouts(struct dlm_ls *ls);
+#else
+static inline void dlm_scan_timeout(struct dlm_ls *ls) { }
+static inline void dlm_adjust_timeouts(struct dlm_ls *ls) { }
+#endif
+
int dlm_master_lookup(struct dlm_ls *ls, int nodeid, char *name, int len,
unsigned int flags, int *r_nodeid, int *result);
@@ -41,15 +47,22 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls);
int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
+#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
uint32_t flags, void *name, unsigned int namelen,
unsigned long timeout_cs);
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
unsigned long timeout_cs);
+#else
+int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
+ uint32_t flags, void *name, unsigned int namelen);
+int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
+ int mode, uint32_t flags, uint32_t lkid, char *lvb_in);
+#endif
int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, void *name, unsigned int namelen,
- unsigned long timeout_cs, uint32_t *lkid);
+ uint32_t *lkid);
int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
uint32_t flags, uint32_t lkid, char *lvb_in);
int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 19ed41a5da93..3972f4d86c75 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -275,7 +275,6 @@ static int dlm_scand(void *data)
ls->ls_scan_time = jiffies;
dlm_scan_rsbs(ls);
dlm_scan_timeout(ls);
- dlm_scan_waiters(ls);
dlm_unlock_recovery(ls);
} else {
ls->ls_scan_time += HZ;
@@ -490,13 +489,28 @@ static int new_lockspace(const char *name, const char *cluster,
ls->ls_ops_arg = ops_arg;
}
- if (flags & DLM_LSFL_TIMEWARN)
+#ifdef CONFIG_DLM_DEPRECATED_API
+ if (flags & DLM_LSFL_TIMEWARN) {
+ pr_warn_once("===============================================================\n"
+ "WARNING: the dlm DLM_LSFL_TIMEWARN flag is being deprecated and\n"
+ " will be removed in v6.2!\n"
+ " Inclusive DLM_LSFL_TIMEWARN define in UAPI header!\n"
+ "===============================================================\n");
+
set_bit(LSFL_TIMEWARN, &ls->ls_flags);
+ }
/* ls_exflags are forced to match among nodes, and we don't
- need to require all nodes to have some flags set */
+ * need to require all nodes to have some flags set
+ */
ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
DLM_LSFL_NEWEXCL));
+#else
+ /* ls_exflags are forced to match among nodes, and we don't
+ * need to require all nodes to have some flags set
+ */
+ ls->ls_exflags = (flags & ~(DLM_LSFL_FS | DLM_LSFL_NEWEXCL));
+#endif
size = READ_ONCE(dlm_config.ci_rsbtbl_size);
ls->ls_rsbtbl_size = size;
@@ -527,8 +541,10 @@ static int new_lockspace(const char *name, const char *cluster,
mutex_init(&ls->ls_waiters_mutex);
INIT_LIST_HEAD(&ls->ls_orphans);
mutex_init(&ls->ls_orphans_mutex);
+#ifdef CONFIG_DLM_DEPRECATED_API
INIT_LIST_HEAD(&ls->ls_timeout);
mutex_init(&ls->ls_timeout_mutex);
+#endif
INIT_LIST_HEAD(&ls->ls_new_rsb);
spin_lock_init(&ls->ls_new_rsb_spin);
@@ -548,8 +564,8 @@ static int new_lockspace(const char *name, const char *cluster,
init_waitqueue_head(&ls->ls_uevent_wait);
ls->ls_uevent_result = 0;
- init_completion(&ls->ls_members_done);
- ls->ls_members_result = -1;
+ init_completion(&ls->ls_recovery_done);
+ ls->ls_recovery_result = -1;
mutex_init(&ls->ls_cb_mutex);
INIT_LIST_HEAD(&ls->ls_cb_delay);
@@ -645,8 +661,9 @@ static int new_lockspace(const char *name, const char *cluster,
if (error)
goto out_recoverd;
- wait_for_completion(&ls->ls_members_done);
- error = ls->ls_members_result;
+ /* wait until recovery is successful or failed */
+ wait_for_completion(&ls->ls_recovery_done);
+ error = ls->ls_recovery_result;
if (error)
goto out_members;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 19e82f08c0e0..a4e84e8d94c8 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -529,7 +529,7 @@ static void lowcomms_write_space(struct sock *sk)
return;
if (!test_and_set_bit(CF_CONNECTED, &con->flags)) {
- log_print("successful connected to node %d", con->nodeid);
+ log_print("connected to node %d", con->nodeid);
queue_work(send_workqueue, &con->swork);
return;
}
@@ -1931,7 +1931,7 @@ static int dlm_sctp_connect(struct connection *con, struct socket *sock,
return ret;
if (!test_and_set_bit(CF_CONNECTED, &con->flags))
- log_print("successful connected to node %d", con->nodeid);
+ log_print("connected to node %d", con->nodeid);
return 0;
}
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 98084e0cfccf..2af2ccfe43a9 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -534,7 +534,11 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
int i, error, neg = 0, low = -1;
/* previously removed members that we've not finished removing need to
- count as a negative change so the "neg" recovery steps will happen */
+ * count as a negative change so the "neg" recovery steps will happen
+ *
+ * This functionality must report all member changes to lsops or
+ * midcomms layer and must never return before.
+ */
list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
log_rinfo(ls, "prev removed member %d", memb->nodeid);
@@ -583,19 +587,6 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
*neg_out = neg;
error = ping_members(ls);
- /* error -EINTR means that a new recovery action is triggered.
- * We ignore this recovery action and let run the new one which might
- * have new member configuration.
- */
- if (error == -EINTR)
- error = 0;
-
- /* new_lockspace() may be waiting to know if the config
- * is good or bad
- */
- ls->ls_members_result = error;
- complete(&ls->ls_members_done);
-
log_rinfo(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes);
return error;
}
@@ -675,7 +666,16 @@ int dlm_ls_stop(struct dlm_ls *ls)
if (!ls->ls_recover_begin)
ls->ls_recover_begin = jiffies;
- dlm_lsop_recover_prep(ls);
+ /* call recover_prep ops only once and not multiple times
+ * for each possible dlm_ls_stop() when recovery is already
+ * stopped.
+ *
+ * If we successful was able to clear LSFL_RUNNING bit and
+ * it was set we know it is the first dlm_ls_stop() call.
+ */
+ if (new)
+ dlm_lsop_recover_prep(ls);
+
return 0;
}
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 0993eebf2060..737f185aad8d 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -29,6 +29,8 @@ struct plock_async_data {
struct plock_op {
struct list_head list;
int done;
+ /* if lock op got interrupted while waiting dlm_controld reply */
+ bool sigint;
struct dlm_plock_info info;
/* if set indicates async handling */
struct plock_async_data *data;
@@ -79,8 +81,7 @@ static void send_op(struct plock_op *op)
abandoned waiter. So, we have to insert the unlock-close when the
lock call is interrupted. */
-static void do_unlock_close(struct dlm_ls *ls, u64 number,
- struct file *file, struct file_lock *fl)
+static void do_unlock_close(const struct dlm_plock_info *info)
{
struct plock_op *op;
@@ -89,15 +90,12 @@ static void do_unlock_close(struct dlm_ls *ls, u64 number,
return;
op->info.optype = DLM_PLOCK_OP_UNLOCK;
- op->info.pid = fl->fl_pid;
- op->info.fsid = ls->ls_global_id;
- op->info.number = number;
+ op->info.pid = info->pid;
+ op->info.fsid = info->fsid;
+ op->info.number = info->number;
op->info.start = 0;
op->info.end = OFFSET_MAX;
- if (fl->fl_lmops && fl->fl_lmops->lm_grant)
- op->info.owner = (__u64) fl->fl_pid;
- else
- op->info.owner = (__u64)(long) fl->fl_owner;
+ op->info.owner = info->owner;
op->info.flags |= DLM_PLOCK_FL_CLOSE;
send_op(op);
@@ -161,16 +159,24 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
rv = wait_event_interruptible(recv_wq, (op->done != 0));
if (rv == -ERESTARTSYS) {
spin_lock(&ops_lock);
- list_del(&op->list);
+ /* recheck under ops_lock if we got a done != 0,
+ * if so this interrupt case should be ignored
+ */
+ if (op->done != 0) {
+ spin_unlock(&ops_lock);
+ goto do_lock_wait;
+ }
+
+ op->sigint = true;
spin_unlock(&ops_lock);
- log_print("%s: wait interrupted %x %llx, op removed",
+ log_debug(ls, "%s: wait interrupted %x %llx pid %d",
__func__, ls->ls_global_id,
- (unsigned long long)number);
- dlm_release_plock_op(op);
- do_unlock_close(ls, number, file, fl);
+ (unsigned long long)number, op->info.pid);
goto out;
}
+do_lock_wait:
+
WARN_ON(!list_empty(&op->list));
rv = op->info.rv;
@@ -378,7 +384,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
spin_lock(&ops_lock);
if (!list_empty(&send_list)) {
- op = list_entry(send_list.next, struct plock_op, list);
+ op = list_first_entry(&send_list, struct plock_op, list);
if (op->info.flags & DLM_PLOCK_FL_CLOSE)
list_del(&op->list);
else
@@ -425,6 +431,19 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
if (iter->info.fsid == info.fsid &&
iter->info.number == info.number &&
iter->info.owner == info.owner) {
+ if (iter->sigint) {
+ list_del(&iter->list);
+ spin_unlock(&ops_lock);
+
+ pr_debug("%s: sigint cleanup %x %llx pid %d",
+ __func__, iter->info.fsid,
+ (unsigned long long)iter->info.number,
+ iter->info.pid);
+ do_unlock_close(&iter->info);
+ memcpy(&iter->info, &info, sizeof(info));
+ dlm_release_plock_op(iter);
+ return count;
+ }
list_del_init(&iter->list);
memcpy(&iter->info, &info, sizeof(info));
if (iter->data)
@@ -443,7 +462,7 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
else
wake_up(&recv_wq);
} else
- log_print("%s: no op %x %llx - may got interrupted?", __func__,
+ log_print("%s: no op %x %llx", __func__,
info.fsid, (unsigned long long)info.number);
return count;
}
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index a55dfce705dd..e15eb511b04b 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -70,6 +70,10 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
/*
* Add or remove nodes from the lockspace's ls_nodes list.
+ *
+ * Due to the fact that we must report all membership changes to lsops
+ * or midcomms layer, it is not permitted to abort ls_recover() until
+ * this is done.
*/
error = dlm_recover_members(ls, rv, &neg);
@@ -239,14 +243,12 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
jiffies_to_msecs(jiffies - start));
mutex_unlock(&ls->ls_recoverd_active);
- dlm_lsop_recover_done(ls);
return 0;
fail:
dlm_release_root_list(ls);
- log_rinfo(ls, "dlm_recover %llu error %d",
- (unsigned long long)rv->seq, error);
mutex_unlock(&ls->ls_recoverd_active);
+
return error;
}
@@ -257,6 +259,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
static void do_ls_recovery(struct dlm_ls *ls)
{
struct dlm_recover *rv = NULL;
+ int error;
spin_lock(&ls->ls_recover_lock);
rv = ls->ls_recover_args;
@@ -266,7 +269,31 @@ static void do_ls_recovery(struct dlm_ls *ls)
spin_unlock(&ls->ls_recover_lock);
if (rv) {
- ls_recover(ls, rv);
+ error = ls_recover(ls, rv);
+ switch (error) {
+ case 0:
+ ls->ls_recovery_result = 0;
+ complete(&ls->ls_recovery_done);
+
+ dlm_lsop_recover_done(ls);
+ break;
+ case -EINTR:
+ /* if recovery was interrupted -EINTR we wait for the next
+ * ls_recover() iteration until it hopefully succeeds.
+ */
+ log_rinfo(ls, "%s %llu interrupted and should be queued to run again",
+ __func__, (unsigned long long)rv->seq);
+ break;
+ default:
+ log_rinfo(ls, "%s %llu error %d", __func__,
+ (unsigned long long)rv->seq, error);
+
+ /* let new_lockspace() get aware of critical error */
+ ls->ls_recovery_result = error;
+ complete(&ls->ls_recovery_done);
+ break;
+ }
+
kfree(rv->nodes);
kfree(rv);
}
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 1060b24f18d4..99e8f0744513 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -250,6 +250,14 @@ static int device_user_lock(struct dlm_user_proc *proc,
goto out;
}
+#ifdef CONFIG_DLM_DEPRECATED_API
+ if (params->timeout)
+ pr_warn_once("========================================================\n"
+ "WARNING: the lkb timeout feature is being deprecated and\n"
+ " will be removed in v6.2!\n"
+ "========================================================\n");
+#endif
+
ua = kzalloc(sizeof(struct dlm_user_args), GFP_NOFS);
if (!ua)
goto out;
@@ -262,23 +270,34 @@ static int device_user_lock(struct dlm_user_proc *proc,
ua->xid = params->xid;
if (params->flags & DLM_LKF_CONVERT) {
+#ifdef CONFIG_DLM_DEPRECATED_API
error = dlm_user_convert(ls, ua,
params->mode, params->flags,
params->lkid, params->lvb,
(unsigned long) params->timeout);
+#else
+ error = dlm_user_convert(ls, ua,
+ params->mode, params->flags,
+ params->lkid, params->lvb);
+#endif
} else if (params->flags & DLM_LKF_ORPHAN) {
error = dlm_user_adopt_orphan(ls, ua,
params->mode, params->flags,
params->name, params->namelen,
- (unsigned long) params->timeout,
&lkid);
if (!error)
error = lkid;
} else {
+#ifdef CONFIG_DLM_DEPRECATED_API
error = dlm_user_request(ls, ua,
params->mode, params->flags,
params->name, params->namelen,
(unsigned long) params->timeout);
+#else
+ error = dlm_user_request(ls, ua,
+ params->mode, params->flags,
+ params->name, params->namelen);
+#endif
if (!error)
error = ua->lksb.sb_lkid;
}
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 19e6c56a9f47..26fa170090b8 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -17,7 +17,7 @@ struct z_erofs_decompress_req {
/* indicate the algorithm will be used for decompression */
unsigned int alg;
- bool inplace_io, partial_decoding;
+ bool inplace_io, partial_decoding, fillgaps;
};
struct z_erofs_decompressor {
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index fbb037ba326e..fe8ac0e163f7 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -366,42 +366,33 @@ static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
return iomap_bmap(mapping, block, &erofs_iomap_ops);
}
-static int erofs_prepare_dio(struct kiocb *iocb, struct iov_iter *to)
+static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
- loff_t align = iocb->ki_pos | iov_iter_count(to) |
- iov_iter_alignment(to);
- struct block_device *bdev = inode->i_sb->s_bdev;
- unsigned int blksize_mask;
-
- if (bdev)
- blksize_mask = (1 << ilog2(bdev_logical_block_size(bdev))) - 1;
- else
- blksize_mask = (1 << inode->i_blkbits) - 1;
- if (align & blksize_mask)
- return -EINVAL;
- return 0;
-}
-
-static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
-{
/* no need taking (shared) inode lock since it's a ro filesystem */
if (!iov_iter_count(to))
return 0;
#ifdef CONFIG_FS_DAX
- if (IS_DAX(iocb->ki_filp->f_mapping->host))
+ if (IS_DAX(inode))
return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
#endif
if (iocb->ki_flags & IOCB_DIRECT) {
- int err = erofs_prepare_dio(iocb, to);
+ struct block_device *bdev = inode->i_sb->s_bdev;
+ unsigned int blksize_mask;
+
+ if (bdev)
+ blksize_mask = bdev_logical_block_size(bdev) - 1;
+ else
+ blksize_mask = (1 << inode->i_blkbits) - 1;
+
+ if ((iocb->ki_pos | iov_iter_count(to) |
+ iov_iter_alignment(to)) & blksize_mask)
+ return -EINVAL;
- if (!err)
- return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
- NULL, 0, NULL, 0);
- if (err < 0)
- return err;
+ return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
+ NULL, 0, NULL, 0);
}
return filemap_read(iocb, to, 0);
}
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 6dca1900c733..2d55569f96ac 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -83,7 +83,7 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
j = 0;
/* 'valid' bounced can only be tested after a complete round */
- if (test_bit(j, bounced)) {
+ if (!rq->fillgaps && test_bit(j, bounced)) {
DBG_BUGON(i < lz4_max_distance_pages);
DBG_BUGON(top >= lz4_max_distance_pages);
availables[top++] = rq->out[i - lz4_max_distance_pages];
@@ -91,14 +91,18 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
if (page) {
__clear_bit(j, bounced);
- if (kaddr) {
- if (kaddr + PAGE_SIZE == page_address(page))
+ if (!PageHighMem(page)) {
+ if (!i) {
+ kaddr = page_address(page);
+ continue;
+ }
+ if (kaddr &&
+ kaddr + PAGE_SIZE == page_address(page)) {
kaddr += PAGE_SIZE;
- else
- kaddr = NULL;
- } else if (!i) {
- kaddr = page_address(page);
+ continue;
+ }
}
+ kaddr = NULL;
continue;
}
kaddr = NULL;
diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c
index 05a3063cf2bc..5e59b3f523eb 100644
--- a/fs/erofs/decompressor_lzma.c
+++ b/fs/erofs/decompressor_lzma.c
@@ -143,6 +143,7 @@ again:
DBG_BUGON(z_erofs_lzma_head);
z_erofs_lzma_head = head;
spin_unlock(&z_erofs_lzma_lock);
+ wake_up_all(&z_erofs_lzma_wq);
z_erofs_lzma_max_dictsize = dict_size;
mutex_unlock(&lzma_resize_mutex);
diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c
index 18e59821c597..ecf28f66b97d 100644
--- a/fs/erofs/dir.c
+++ b/fs/erofs/dir.c
@@ -22,10 +22,9 @@ static void debug_one_dentry(unsigned char d_type, const char *de_name,
}
static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
- void *dentry_blk, unsigned int *ofs,
+ void *dentry_blk, struct erofs_dirent *de,
unsigned int nameoff, unsigned int maxsize)
{
- struct erofs_dirent *de = dentry_blk + *ofs;
const struct erofs_dirent *end = dentry_blk + nameoff;
while (de < end) {
@@ -59,9 +58,8 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
/* stopped by some reason */
return 1;
++de;
- *ofs += sizeof(struct erofs_dirent);
+ ctx->pos += sizeof(struct erofs_dirent);
}
- *ofs = maxsize;
return 0;
}
@@ -90,33 +88,33 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
nameoff = le16_to_cpu(de->nameoff);
if (nameoff < sizeof(struct erofs_dirent) ||
- nameoff >= PAGE_SIZE) {
+ nameoff >= EROFS_BLKSIZ) {
erofs_err(dir->i_sb,
"invalid de[0].nameoff %u @ nid %llu",
nameoff, EROFS_I(dir)->nid);
err = -EFSCORRUPTED;
- goto skip_this;
+ break;
}
maxsize = min_t(unsigned int,
- dirsize - ctx->pos + ofs, PAGE_SIZE);
+ dirsize - ctx->pos + ofs, EROFS_BLKSIZ);
/* search dirents at the arbitrary position */
if (initial) {
initial = false;
ofs = roundup(ofs, sizeof(struct erofs_dirent));
+ ctx->pos = blknr_to_addr(i) + ofs;
if (ofs >= nameoff)
goto skip_this;
}
- err = erofs_fill_dentries(dir, ctx, de, &ofs,
+ err = erofs_fill_dentries(dir, ctx, de, (void *)de + ofs,
nameoff, maxsize);
-skip_this:
- ctx->pos = blknr_to_addr(i) + ofs;
-
if (err)
break;
+skip_this:
+ ctx->pos = blknr_to_addr(i) + maxsize;
++i;
ofs = 0;
}
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 724bb57075f6..5792ca9e0d5e 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -2,6 +2,7 @@
/*
* Copyright (C) 2018 HUAWEI, Inc.
* https://www.huawei.com/
+ * Copyright (C) 2022 Alibaba Cloud
*/
#include "zdata.h"
#include "compress.h"
@@ -26,6 +27,82 @@ static struct z_erofs_pcluster_slab pcluster_pool[] __read_mostly = {
_PCLP(Z_EROFS_PCLUSTER_MAX_PAGES)
};
+struct z_erofs_bvec_iter {
+ struct page *bvpage;
+ struct z_erofs_bvset *bvset;
+ unsigned int nr, cur;
+};
+
+static struct page *z_erofs_bvec_iter_end(struct z_erofs_bvec_iter *iter)
+{
+ if (iter->bvpage)
+ kunmap_local(iter->bvset);
+ return iter->bvpage;
+}
+
+static struct page *z_erofs_bvset_flip(struct z_erofs_bvec_iter *iter)
+{
+ unsigned long base = (unsigned long)((struct z_erofs_bvset *)0)->bvec;
+ /* have to access nextpage in advance, otherwise it will be unmapped */
+ struct page *nextpage = iter->bvset->nextpage;
+ struct page *oldpage;
+
+ DBG_BUGON(!nextpage);
+ oldpage = z_erofs_bvec_iter_end(iter);
+ iter->bvpage = nextpage;
+ iter->bvset = kmap_local_page(nextpage);
+ iter->nr = (PAGE_SIZE - base) / sizeof(struct z_erofs_bvec);
+ iter->cur = 0;
+ return oldpage;
+}
+
+static void z_erofs_bvec_iter_begin(struct z_erofs_bvec_iter *iter,
+ struct z_erofs_bvset_inline *bvset,
+ unsigned int bootstrap_nr,
+ unsigned int cur)
+{
+ *iter = (struct z_erofs_bvec_iter) {
+ .nr = bootstrap_nr,
+ .bvset = (struct z_erofs_bvset *)bvset,
+ };
+
+ while (cur > iter->nr) {
+ cur -= iter->nr;
+ z_erofs_bvset_flip(iter);
+ }
+ iter->cur = cur;
+}
+
+static int z_erofs_bvec_enqueue(struct z_erofs_bvec_iter *iter,
+ struct z_erofs_bvec *bvec,
+ struct page **candidate_bvpage)
+{
+ if (iter->cur == iter->nr) {
+ if (!*candidate_bvpage)
+ return -EAGAIN;
+
+ DBG_BUGON(iter->bvset->nextpage);
+ iter->bvset->nextpage = *candidate_bvpage;
+ z_erofs_bvset_flip(iter);
+
+ iter->bvset->nextpage = NULL;
+ *candidate_bvpage = NULL;
+ }
+ iter->bvset->bvec[iter->cur++] = *bvec;
+ return 0;
+}
+
+static void z_erofs_bvec_dequeue(struct z_erofs_bvec_iter *iter,
+ struct z_erofs_bvec *bvec,
+ struct page **old_bvpage)
+{
+ if (iter->cur == iter->nr)
+ *old_bvpage = z_erofs_bvset_flip(iter);
+ else
+ *old_bvpage = NULL;
+ *bvec = iter->bvset->bvec[iter->cur++];
+}
+
static void z_erofs_destroy_pcluster_pool(void)
{
int i;
@@ -46,7 +123,7 @@ static int z_erofs_create_pcluster_pool(void)
for (pcs = pcluster_pool;
pcs < pcluster_pool + ARRAY_SIZE(pcluster_pool); ++pcs) {
- size = struct_size(a, compressed_pages, pcs->maxpages);
+ size = struct_size(a, compressed_bvecs, pcs->maxpages);
sprintf(pcs->name, "erofs_pcluster-%u", pcs->maxpages);
pcs->slab = kmem_cache_create(pcs->name, size, 0,
@@ -150,30 +227,29 @@ int __init z_erofs_init_zip_subsystem(void)
return err;
}
-enum z_erofs_collectmode {
- COLLECT_SECONDARY,
- COLLECT_PRIMARY,
+enum z_erofs_pclustermode {
+ Z_EROFS_PCLUSTER_INFLIGHT,
/*
- * The current collection was the tail of an exist chain, in addition
- * that the previous processed chained collections are all decided to
+ * The current pclusters was the tail of an exist chain, in addition
+ * that the previous processed chained pclusters are all decided to
* be hooked up to it.
- * A new chain will be created for the remaining collections which are
- * not processed yet, therefore different from COLLECT_PRIMARY_FOLLOWED,
- * the next collection cannot reuse the whole page safely in
- * the following scenario:
+ * A new chain will be created for the remaining pclusters which are
+ * not processed yet, so different from Z_EROFS_PCLUSTER_FOLLOWED,
+ * the next pcluster cannot reuse the whole page safely for inplace I/O
+ * in the following scenario:
* ________________________________________________________________
* | tail (partial) page | head (partial) page |
- * | (belongs to the next cl) | (belongs to the current cl) |
- * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________|
+ * | (belongs to the next pcl) | (belongs to the current pcl) |
+ * |_______PCLUSTER_FOLLOWED______|________PCLUSTER_HOOKED__________|
*/
- COLLECT_PRIMARY_HOOKED,
+ Z_EROFS_PCLUSTER_HOOKED,
/*
- * a weak form of COLLECT_PRIMARY_FOLLOWED, the difference is that it
+ * a weak form of Z_EROFS_PCLUSTER_FOLLOWED, the difference is that it
* could be dispatched into bypass queue later due to uptodated managed
* pages. All related online pages cannot be reused for inplace I/O (or
- * pagevec) since it can be directly decoded without I/O submission.
+ * bvpage) since it can be directly decoded without I/O submission.
*/
- COLLECT_PRIMARY_FOLLOWED_NOINPLACE,
+ Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE,
/*
* The current collection has been linked with the owned chain, and
* could also be linked with the remaining collections, which means
@@ -184,39 +260,36 @@ enum z_erofs_collectmode {
* ________________________________________________________________
* | tail (partial) page | head (partial) page |
* | (of the current cl) | (of the previous collection) |
- * | PRIMARY_FOLLOWED or | |
- * |_____PRIMARY_HOOKED___|____________PRIMARY_FOLLOWED____________|
+ * | PCLUSTER_FOLLOWED or | |
+ * |_____PCLUSTER_HOOKED__|___________PCLUSTER_FOLLOWED____________|
*
* [ (*) the above page can be used as inplace I/O. ]
*/
- COLLECT_PRIMARY_FOLLOWED,
+ Z_EROFS_PCLUSTER_FOLLOWED,
};
struct z_erofs_decompress_frontend {
struct inode *const inode;
struct erofs_map_blocks map;
+ struct z_erofs_bvec_iter biter;
- struct z_erofs_pagevec_ctor vector;
-
+ struct page *candidate_bvpage;
struct z_erofs_pcluster *pcl, *tailpcl;
- /* a pointer used to pick up inplace I/O pages */
- struct page **icpage_ptr;
z_erofs_next_pcluster_t owned_head;
-
- enum z_erofs_collectmode mode;
+ enum z_erofs_pclustermode mode;
bool readahead;
/* used for applying cache strategy on the fly */
bool backmost;
erofs_off_t headoffset;
+
+ /* a pointer used to pick up inplace I/O pages */
+ unsigned int icur;
};
#define DECOMPRESS_FRONTEND_INIT(__i) { \
.inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \
- .mode = COLLECT_PRIMARY_FOLLOWED, .backmost = true }
-
-static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES];
-static DEFINE_MUTEX(z_pagemap_global_lock);
+ .mode = Z_EROFS_PCLUSTER_FOLLOWED, .backmost = true }
static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
enum z_erofs_cache_alloctype type,
@@ -231,24 +304,21 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
*/
gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
- struct page **pages;
- pgoff_t index;
+ unsigned int i;
- if (fe->mode < COLLECT_PRIMARY_FOLLOWED)
+ if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED)
return;
- pages = pcl->compressed_pages;
- index = pcl->obj.index;
- for (; index < pcl->obj.index + pcl->pclusterpages; ++index, ++pages) {
+ for (i = 0; i < pcl->pclusterpages; ++i) {
struct page *page;
compressed_page_t t;
struct page *newpage = NULL;
/* the compressed page was loaded before */
- if (READ_ONCE(*pages))
+ if (READ_ONCE(pcl->compressed_bvecs[i].page))
continue;
- page = find_get_page(mc, index);
+ page = find_get_page(mc, pcl->obj.index + i);
if (page) {
t = tag_compressed_page_justfound(page);
@@ -269,7 +339,8 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
}
}
- if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
+ if (!cmpxchg_relaxed(&pcl->compressed_bvecs[i].page, NULL,
+ tagptr_cast_ptr(t)))
continue;
if (page)
@@ -283,7 +354,7 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
* managed cache since it can be moved to the bypass queue instead.
*/
if (standalone)
- fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
+ fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
}
/* called by erofs_shrinker to get rid of all compressed_pages */
@@ -300,7 +371,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
* therefore no need to worry about available decompression users.
*/
for (i = 0; i < pcl->pclusterpages; ++i) {
- struct page *page = pcl->compressed_pages[i];
+ struct page *page = pcl->compressed_bvecs[i].page;
if (!page)
continue;
@@ -313,7 +384,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
continue;
/* barrier is implied in the following 'unlock_page' */
- WRITE_ONCE(pcl->compressed_pages[i], NULL);
+ WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
detach_page_private(page);
unlock_page(page);
}
@@ -323,56 +394,59 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
int erofs_try_to_free_cached_page(struct page *page)
{
struct z_erofs_pcluster *const pcl = (void *)page_private(page);
- int ret = 0; /* 0 - busy */
+ int ret, i;
- if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
- unsigned int i;
+ if (!erofs_workgroup_try_to_freeze(&pcl->obj, 1))
+ return 0;
- DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
- for (i = 0; i < pcl->pclusterpages; ++i) {
- if (pcl->compressed_pages[i] == page) {
- WRITE_ONCE(pcl->compressed_pages[i], NULL);
- ret = 1;
- break;
- }
+ ret = 0;
+ DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
+ for (i = 0; i < pcl->pclusterpages; ++i) {
+ if (pcl->compressed_bvecs[i].page == page) {
+ WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
+ ret = 1;
+ break;
}
- erofs_workgroup_unfreeze(&pcl->obj, 1);
-
- if (ret)
- detach_page_private(page);
}
+ erofs_workgroup_unfreeze(&pcl->obj, 1);
+ if (ret)
+ detach_page_private(page);
return ret;
}
-/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe,
- struct page *page)
+ struct z_erofs_bvec *bvec)
{
struct z_erofs_pcluster *const pcl = fe->pcl;
- while (fe->icpage_ptr > pcl->compressed_pages)
- if (!cmpxchg(--fe->icpage_ptr, NULL, page))
+ while (fe->icur > 0) {
+ if (!cmpxchg(&pcl->compressed_bvecs[--fe->icur].page,
+ NULL, bvec->page)) {
+ pcl->compressed_bvecs[fe->icur] = *bvec;
return true;
+ }
+ }
return false;
}
/* callers must be with pcluster lock held */
static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
- struct page *page, enum z_erofs_page_type type,
- bool pvec_safereuse)
+ struct z_erofs_bvec *bvec, bool exclusive)
{
int ret;
- /* give priority for inplaceio */
- if (fe->mode >= COLLECT_PRIMARY &&
- type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
- z_erofs_try_inplace_io(fe, page))
- return 0;
-
- ret = z_erofs_pagevec_enqueue(&fe->vector, page, type,
- pvec_safereuse);
- fe->pcl->vcnt += (unsigned int)ret;
- return ret ? 0 : -EAGAIN;
+ if (exclusive) {
+ /* give priority for inplaceio to use file pages first */
+ if (z_erofs_try_inplace_io(fe, bvec))
+ return 0;
+ /* otherwise, check if it can be used as a bvpage */
+ if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED &&
+ !fe->candidate_bvpage)
+ fe->candidate_bvpage = bvec->page;
+ }
+ ret = z_erofs_bvec_enqueue(&fe->biter, bvec, &fe->candidate_bvpage);
+ fe->pcl->vcnt += (ret >= 0);
+ return ret;
}
static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
@@ -385,7 +459,7 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
*owned_head) == Z_EROFS_PCLUSTER_NIL) {
*owned_head = &pcl->next;
/* so we can attach this pcluster to our submission chain. */
- f->mode = COLLECT_PRIMARY_FOLLOWED;
+ f->mode = Z_EROFS_PCLUSTER_FOLLOWED;
return;
}
@@ -393,66 +467,21 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
* type 2, link to the end of an existing open chain, be careful
* that its submission is controlled by the original attached chain.
*/
- if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
+ if (*owned_head != &pcl->next && pcl != f->tailpcl &&
+ cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
*owned_head) == Z_EROFS_PCLUSTER_TAIL) {
*owned_head = Z_EROFS_PCLUSTER_TAIL;
- f->mode = COLLECT_PRIMARY_HOOKED;
+ f->mode = Z_EROFS_PCLUSTER_HOOKED;
f->tailpcl = NULL;
return;
}
/* type 3, it belongs to a chain, but it isn't the end of the chain */
- f->mode = COLLECT_PRIMARY;
+ f->mode = Z_EROFS_PCLUSTER_INFLIGHT;
}
-static int z_erofs_lookup_pcluster(struct z_erofs_decompress_frontend *fe,
- struct inode *inode,
- struct erofs_map_blocks *map)
-{
- struct z_erofs_pcluster *pcl = fe->pcl;
- unsigned int length;
-
- /* to avoid unexpected loop formed by corrupted images */
- if (fe->owned_head == &pcl->next || pcl == fe->tailpcl) {
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
-
- if (pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) {
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
-
- length = READ_ONCE(pcl->length);
- if (length & Z_EROFS_PCLUSTER_FULL_LENGTH) {
- if ((map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) > length) {
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
- } else {
- unsigned int llen = map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT;
-
- if (map->m_flags & EROFS_MAP_FULL_MAPPED)
- llen |= Z_EROFS_PCLUSTER_FULL_LENGTH;
-
- while (llen > length &&
- length != cmpxchg_relaxed(&pcl->length, length, llen)) {
- cpu_relax();
- length = READ_ONCE(pcl->length);
- }
- }
- mutex_lock(&pcl->lock);
- /* used to check tail merging loop due to corrupted images */
- if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL)
- fe->tailpcl = pcl;
-
- z_erofs_try_to_claim_pcluster(fe);
- return 0;
-}
-
-static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe,
- struct inode *inode,
- struct erofs_map_blocks *map)
+static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
{
+ struct erofs_map_blocks *map = &fe->map;
bool ztailpacking = map->m_flags & EROFS_MAP_META;
struct z_erofs_pcluster *pcl;
struct erofs_workgroup *grp;
@@ -471,14 +500,13 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe,
atomic_set(&pcl->obj.refcount, 1);
pcl->algorithmformat = map->m_algorithmformat;
- pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
- (map->m_flags & EROFS_MAP_FULL_MAPPED ?
- Z_EROFS_PCLUSTER_FULL_LENGTH : 0);
+ pcl->length = 0;
+ pcl->partial = true;
/* new pclusters should be claimed as type 1, primary and followed */
pcl->next = fe->owned_head;
pcl->pageofs_out = map->m_la & ~PAGE_MASK;
- fe->mode = COLLECT_PRIMARY_FOLLOWED;
+ fe->mode = Z_EROFS_PCLUSTER_FOLLOWED;
/*
* lock all primary followed works before visible to others
@@ -494,7 +522,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe,
} else {
pcl->obj.index = map->m_pa >> PAGE_SHIFT;
- grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
+ grp = erofs_insert_workgroup(fe->inode->i_sb, &pcl->obj);
if (IS_ERR(grp)) {
err = PTR_ERR(grp);
goto err_out;
@@ -520,11 +548,10 @@ err_out:
return err;
}
-static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe,
- struct inode *inode,
- struct erofs_map_blocks *map)
+static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe)
{
- struct erofs_workgroup *grp;
+ struct erofs_map_blocks *map = &fe->map;
+ struct erofs_workgroup *grp = NULL;
int ret;
DBG_BUGON(fe->pcl);
@@ -533,38 +560,35 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe,
DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL);
DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
- if (map->m_flags & EROFS_MAP_META) {
- if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
- goto tailpacking;
+ if (!(map->m_flags & EROFS_MAP_META)) {
+ grp = erofs_find_workgroup(fe->inode->i_sb,
+ map->m_pa >> PAGE_SHIFT);
+ } else if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
}
- grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
if (grp) {
fe->pcl = container_of(grp, struct z_erofs_pcluster, obj);
+ ret = -EEXIST;
} else {
-tailpacking:
- ret = z_erofs_register_pcluster(fe, inode, map);
- if (!ret)
- goto out;
- if (ret != -EEXIST)
- return ret;
+ ret = z_erofs_register_pcluster(fe);
}
- ret = z_erofs_lookup_pcluster(fe, inode, map);
- if (ret) {
- erofs_workgroup_put(&fe->pcl->obj);
+ if (ret == -EEXIST) {
+ mutex_lock(&fe->pcl->lock);
+ /* used to check tail merging loop due to corrupted images */
+ if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL)
+ fe->tailpcl = fe->pcl;
+
+ z_erofs_try_to_claim_pcluster(fe);
+ } else if (ret) {
return ret;
}
-
-out:
- z_erofs_pagevec_ctor_init(&fe->vector, Z_EROFS_NR_INLINE_PAGEVECS,
- fe->pcl->pagevec, fe->pcl->vcnt);
+ z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset,
+ Z_EROFS_INLINE_BVECS, fe->pcl->vcnt);
/* since file-backed online pages are traversed in reverse order */
- fe->icpage_ptr = fe->pcl->compressed_pages +
- z_erofs_pclusterpages(fe->pcl);
+ fe->icur = z_erofs_pclusterpages(fe->pcl);
return 0;
}
@@ -593,14 +617,19 @@ static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
if (!pcl)
return false;
- z_erofs_pagevec_ctor_exit(&fe->vector, false);
+ z_erofs_bvec_iter_end(&fe->biter);
mutex_unlock(&pcl->lock);
+ if (fe->candidate_bvpage) {
+ DBG_BUGON(z_erofs_is_shortlived_page(fe->candidate_bvpage));
+ fe->candidate_bvpage = NULL;
+ }
+
/*
* if all pending pages are added, don't hold its reference
* any longer if the pcluster isn't hosted by ourselves.
*/
- if (fe->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE)
+ if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE)
erofs_workgroup_put(&pcl->obj);
fe->pcl = NULL;
@@ -628,11 +657,10 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
struct erofs_map_blocks *const map = &fe->map;
const loff_t offset = page_offset(page);
- bool tight = true;
+ bool tight = true, exclusive;
enum z_erofs_cache_alloctype cache_strategy;
- enum z_erofs_page_type page_type;
- unsigned int cur, end, spiltted, index;
+ unsigned int cur, end, spiltted;
int err = 0;
/* register locked file pages as online pages in pack */
@@ -653,7 +681,7 @@ repeat:
map->m_llen = 0;
err = z_erofs_map_blocks_iter(inode, map, 0);
if (err)
- goto err_out;
+ goto out;
} else {
if (fe->pcl)
goto hitted;
@@ -663,9 +691,9 @@ repeat:
if (!(map->m_flags & EROFS_MAP_MAPPED))
goto hitted;
- err = z_erofs_collector_begin(fe, inode, map);
+ err = z_erofs_collector_begin(fe);
if (err)
- goto err_out;
+ goto out;
if (z_erofs_is_inline_pcluster(fe->pcl)) {
void *mp;
@@ -676,11 +704,12 @@ repeat:
err = PTR_ERR(mp);
erofs_err(inode->i_sb,
"failed to get inline page, err %d", err);
- goto err_out;
+ goto out;
}
get_page(fe->map.buf.page);
- WRITE_ONCE(fe->pcl->compressed_pages[0], fe->map.buf.page);
- fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
+ WRITE_ONCE(fe->pcl->compressed_bvecs[0].page,
+ fe->map.buf.page);
+ fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
} else {
/* bind cache first when cached decompression is preferred */
if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy,
@@ -696,10 +725,10 @@ hitted:
* Ensure the current partial page belongs to this submit chain rather
* than other concurrent submit chains or the noio(bypass) chain since
* those chains are handled asynchronously thus the page cannot be used
- * for inplace I/O or pagevec (should be processed in strict order.)
+ * for inplace I/O or bvpage (should be processed in a strict order.)
*/
- tight &= (fe->mode >= COLLECT_PRIMARY_HOOKED &&
- fe->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE);
+ tight &= (fe->mode >= Z_EROFS_PCLUSTER_HOOKED &&
+ fe->mode != Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
cur = end - min_t(unsigned int, offset + end - map->m_la, end);
if (!(map->m_flags & EROFS_MAP_MAPPED)) {
@@ -707,60 +736,59 @@ hitted:
goto next_part;
}
- /* let's derive page type */
- page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD :
- (!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
- (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
- Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
-
+ exclusive = (!cur && (!spiltted || tight));
if (cur)
- tight &= (fe->mode >= COLLECT_PRIMARY_FOLLOWED);
+ tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED);
retry:
- err = z_erofs_attach_page(fe, page, page_type,
- fe->mode >= COLLECT_PRIMARY_FOLLOWED);
- /* should allocate an additional short-lived page for pagevec */
- if (err == -EAGAIN) {
- struct page *const newpage =
- alloc_page(GFP_NOFS | __GFP_NOFAIL);
-
- set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
- err = z_erofs_attach_page(fe, newpage,
- Z_EROFS_PAGE_TYPE_EXCLUSIVE, true);
- if (!err)
- goto retry;
+ err = z_erofs_attach_page(fe, &((struct z_erofs_bvec) {
+ .page = page,
+ .offset = offset - map->m_la,
+ .end = end,
+ }), exclusive);
+ /* should allocate an additional short-lived page for bvset */
+ if (err == -EAGAIN && !fe->candidate_bvpage) {
+ fe->candidate_bvpage = alloc_page(GFP_NOFS | __GFP_NOFAIL);
+ set_page_private(fe->candidate_bvpage,
+ Z_EROFS_SHORTLIVED_PAGE);
+ goto retry;
}
- if (err)
- goto err_out;
-
- index = page->index - (map->m_la >> PAGE_SHIFT);
-
- z_erofs_onlinepage_fixup(page, index, true);
+ if (err) {
+ DBG_BUGON(err == -EAGAIN && fe->candidate_bvpage);
+ goto out;
+ }
+ z_erofs_onlinepage_split(page);
/* bump up the number of spiltted parts of a page */
++spiltted;
- /* also update nr_pages */
- fe->pcl->nr_pages = max_t(pgoff_t, fe->pcl->nr_pages, index + 1);
+ if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK))
+ fe->pcl->multibases = true;
+
+ if ((map->m_flags & EROFS_MAP_FULL_MAPPED) &&
+ fe->pcl->length == map->m_llen)
+ fe->pcl->partial = false;
+ if (fe->pcl->length < offset + end - map->m_la) {
+ fe->pcl->length = offset + end - map->m_la;
+ fe->pcl->pageofs_out = map->m_la & ~PAGE_MASK;
+ }
next_part:
- /* can be used for verification */
+ /* shorten the remaining extent to update progress */
map->m_llen = offset + cur - map->m_la;
+ map->m_flags &= ~EROFS_MAP_FULL_MAPPED;
end = cur;
if (end > 0)
goto repeat;
out:
+ if (err)
+ z_erofs_page_mark_eio(page);
z_erofs_onlinepage_endio(page);
erofs_dbg("%s, finish page: %pK spiltted: %u map->m_llen %llu",
__func__, page, spiltted, map->m_llen);
return err;
-
- /* if some error occurred while processing this page */
-err_out:
- SetPageError(page);
- goto out;
}
static bool z_erofs_get_sync_decompress_policy(struct erofs_sb_info *sbi,
@@ -783,97 +811,137 @@ static bool z_erofs_page_is_invalidated(struct page *page)
return !page->mapping && !z_erofs_is_shortlived_page(page);
}
-static int z_erofs_decompress_pcluster(struct super_block *sb,
- struct z_erofs_pcluster *pcl,
- struct page **pagepool)
-{
- struct erofs_sb_info *const sbi = EROFS_SB(sb);
- unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
- struct z_erofs_pagevec_ctor ctor;
- unsigned int i, inputsize, outputsize, llen, nr_pages;
- struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
- struct page **pages, **compressed_pages, *page;
+struct z_erofs_decompress_backend {
+ struct page *onstack_pages[Z_EROFS_ONSTACK_PAGES];
+ struct super_block *sb;
+ struct z_erofs_pcluster *pcl;
- enum z_erofs_page_type page_type;
- bool overlapped, partial;
- int err;
+ /* pages with the longest decompressed length for deduplication */
+ struct page **decompressed_pages;
+ /* pages to keep the compressed data */
+ struct page **compressed_pages;
- might_sleep();
- DBG_BUGON(!READ_ONCE(pcl->nr_pages));
+ struct list_head decompressed_secondary_bvecs;
+ struct page **pagepool;
+ unsigned int onstack_used, nr_pages;
+};
- mutex_lock(&pcl->lock);
- nr_pages = pcl->nr_pages;
+struct z_erofs_bvec_item {
+ struct z_erofs_bvec bvec;
+ struct list_head list;
+};
- if (nr_pages <= Z_EROFS_VMAP_ONSTACK_PAGES) {
- pages = pages_onstack;
- } else if (nr_pages <= Z_EROFS_VMAP_GLOBAL_PAGES &&
- mutex_trylock(&z_pagemap_global_lock)) {
- pages = z_pagemap_global;
- } else {
- gfp_t gfp_flags = GFP_KERNEL;
+static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
+ struct z_erofs_bvec *bvec)
+{
+ struct z_erofs_bvec_item *item;
- if (nr_pages > Z_EROFS_VMAP_GLOBAL_PAGES)
- gfp_flags |= __GFP_NOFAIL;
+ if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) {
+ unsigned int pgnr;
+ struct page *oldpage;
- pages = kvmalloc_array(nr_pages, sizeof(struct page *),
- gfp_flags);
+ pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT;
+ DBG_BUGON(pgnr >= be->nr_pages);
+ oldpage = be->decompressed_pages[pgnr];
+ be->decompressed_pages[pgnr] = bvec->page;
- /* fallback to global pagemap for the lowmem scenario */
- if (!pages) {
- mutex_lock(&z_pagemap_global_lock);
- pages = z_pagemap_global;
- }
+ if (!oldpage)
+ return;
}
- for (i = 0; i < nr_pages; ++i)
- pages[i] = NULL;
-
- err = 0;
- z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS,
- pcl->pagevec, 0);
-
- for (i = 0; i < pcl->vcnt; ++i) {
- unsigned int pagenr;
+ /* (cold path) one pcluster is requested multiple times */
+ item = kmalloc(sizeof(*item), GFP_KERNEL | __GFP_NOFAIL);
+ item->bvec = *bvec;
+ list_add(&item->list, &be->decompressed_secondary_bvecs);
+}
- page = z_erofs_pagevec_dequeue(&ctor, &page_type);
+static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be,
+ int err)
+{
+ unsigned int off0 = be->pcl->pageofs_out;
+ struct list_head *p, *n;
+
+ list_for_each_safe(p, n, &be->decompressed_secondary_bvecs) {
+ struct z_erofs_bvec_item *bvi;
+ unsigned int end, cur;
+ void *dst, *src;
+
+ bvi = container_of(p, struct z_erofs_bvec_item, list);
+ cur = bvi->bvec.offset < 0 ? -bvi->bvec.offset : 0;
+ end = min_t(unsigned int, be->pcl->length - bvi->bvec.offset,
+ bvi->bvec.end);
+ dst = kmap_local_page(bvi->bvec.page);
+ while (cur < end) {
+ unsigned int pgnr, scur, len;
+
+ pgnr = (bvi->bvec.offset + cur + off0) >> PAGE_SHIFT;
+ DBG_BUGON(pgnr >= be->nr_pages);
+
+ scur = bvi->bvec.offset + cur -
+ ((pgnr << PAGE_SHIFT) - off0);
+ len = min_t(unsigned int, end - cur, PAGE_SIZE - scur);
+ if (!be->decompressed_pages[pgnr]) {
+ err = -EFSCORRUPTED;
+ cur += len;
+ continue;
+ }
+ src = kmap_local_page(be->decompressed_pages[pgnr]);
+ memcpy(dst + cur, src + scur, len);
+ kunmap_local(src);
+ cur += len;
+ }
+ kunmap_local(dst);
+ if (err)
+ z_erofs_page_mark_eio(bvi->bvec.page);
+ z_erofs_onlinepage_endio(bvi->bvec.page);
+ list_del(p);
+ kfree(bvi);
+ }
+}
- /* all pages in pagevec ought to be valid */
- DBG_BUGON(!page);
- DBG_BUGON(z_erofs_page_is_invalidated(page));
+static void z_erofs_parse_out_bvecs(struct z_erofs_decompress_backend *be)
+{
+ struct z_erofs_pcluster *pcl = be->pcl;
+ struct z_erofs_bvec_iter biter;
+ struct page *old_bvpage;
+ int i;
- if (z_erofs_put_shortlivedpage(pagepool, page))
- continue;
+ z_erofs_bvec_iter_begin(&biter, &pcl->bvset, Z_EROFS_INLINE_BVECS, 0);
+ for (i = 0; i < pcl->vcnt; ++i) {
+ struct z_erofs_bvec bvec;
- if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
- pagenr = 0;
- else
- pagenr = z_erofs_onlinepage_index(page);
+ z_erofs_bvec_dequeue(&biter, &bvec, &old_bvpage);
- DBG_BUGON(pagenr >= nr_pages);
+ if (old_bvpage)
+ z_erofs_put_shortlivedpage(be->pagepool, old_bvpage);
- /*
- * currently EROFS doesn't support multiref(dedup),
- * so here erroring out one multiref page.
- */
- if (pages[pagenr]) {
- DBG_BUGON(1);
- SetPageError(pages[pagenr]);
- z_erofs_onlinepage_endio(pages[pagenr]);
- err = -EFSCORRUPTED;
- }
- pages[pagenr] = page;
+ DBG_BUGON(z_erofs_page_is_invalidated(bvec.page));
+ z_erofs_do_decompressed_bvec(be, &bvec);
}
- z_erofs_pagevec_ctor_exit(&ctor, true);
- overlapped = false;
- compressed_pages = pcl->compressed_pages;
+ old_bvpage = z_erofs_bvec_iter_end(&biter);
+ if (old_bvpage)
+ z_erofs_put_shortlivedpage(be->pagepool, old_bvpage);
+}
+static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be,
+ bool *overlapped)
+{
+ struct z_erofs_pcluster *pcl = be->pcl;
+ unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
+ int i, err = 0;
+
+ *overlapped = false;
for (i = 0; i < pclusterpages; ++i) {
- unsigned int pagenr;
+ struct z_erofs_bvec *bvec = &pcl->compressed_bvecs[i];
+ struct page *page = bvec->page;
- page = compressed_pages[i];
- /* all compressed pages ought to be valid */
- DBG_BUGON(!page);
+ /* compressed pages ought to be present before decompressing */
+ if (!page) {
+ DBG_BUGON(1);
+ continue;
+ }
+ be->compressed_pages[i] = page;
if (z_erofs_is_inline_pcluster(pcl)) {
if (!PageUptodate(page))
@@ -883,109 +951,129 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
DBG_BUGON(z_erofs_page_is_invalidated(page));
if (!z_erofs_is_shortlived_page(page)) {
- if (erofs_page_is_managed(sbi, page)) {
+ if (erofs_page_is_managed(EROFS_SB(be->sb), page)) {
if (!PageUptodate(page))
err = -EIO;
continue;
}
+ z_erofs_do_decompressed_bvec(be, bvec);
+ *overlapped = true;
+ }
+ }
- /*
- * only if non-head page can be selected
- * for inplace decompression
- */
- pagenr = z_erofs_onlinepage_index(page);
-
- DBG_BUGON(pagenr >= nr_pages);
- if (pages[pagenr]) {
- DBG_BUGON(1);
- SetPageError(pages[pagenr]);
- z_erofs_onlinepage_endio(pages[pagenr]);
- err = -EFSCORRUPTED;
- }
- pages[pagenr] = page;
+ if (err)
+ return err;
+ return 0;
+}
- overlapped = true;
- }
+static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
+ int err)
+{
+ struct erofs_sb_info *const sbi = EROFS_SB(be->sb);
+ struct z_erofs_pcluster *pcl = be->pcl;
+ unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
+ unsigned int i, inputsize;
+ int err2;
+ struct page *page;
+ bool overlapped;
- /* PG_error needs checking for all non-managed pages */
- if (PageError(page)) {
- DBG_BUGON(PageUptodate(page));
- err = -EIO;
- }
+ mutex_lock(&pcl->lock);
+ be->nr_pages = PAGE_ALIGN(pcl->length + pcl->pageofs_out) >> PAGE_SHIFT;
+
+ /* allocate (de)compressed page arrays if cannot be kept on stack */
+ be->decompressed_pages = NULL;
+ be->compressed_pages = NULL;
+ be->onstack_used = 0;
+ if (be->nr_pages <= Z_EROFS_ONSTACK_PAGES) {
+ be->decompressed_pages = be->onstack_pages;
+ be->onstack_used = be->nr_pages;
+ memset(be->decompressed_pages, 0,
+ sizeof(struct page *) * be->nr_pages);
}
+ if (pclusterpages + be->onstack_used <= Z_EROFS_ONSTACK_PAGES)
+ be->compressed_pages = be->onstack_pages + be->onstack_used;
+
+ if (!be->decompressed_pages)
+ be->decompressed_pages =
+ kvcalloc(be->nr_pages, sizeof(struct page *),
+ GFP_KERNEL | __GFP_NOFAIL);
+ if (!be->compressed_pages)
+ be->compressed_pages =
+ kvcalloc(pclusterpages, sizeof(struct page *),
+ GFP_KERNEL | __GFP_NOFAIL);
+
+ z_erofs_parse_out_bvecs(be);
+ err2 = z_erofs_parse_in_bvecs(be, &overlapped);
+ if (err2)
+ err = err2;
if (err)
goto out;
- llen = pcl->length >> Z_EROFS_PCLUSTER_LENGTH_BIT;
- if (nr_pages << PAGE_SHIFT >= pcl->pageofs_out + llen) {
- outputsize = llen;
- partial = !(pcl->length & Z_EROFS_PCLUSTER_FULL_LENGTH);
- } else {
- outputsize = (nr_pages << PAGE_SHIFT) - pcl->pageofs_out;
- partial = true;
- }
-
if (z_erofs_is_inline_pcluster(pcl))
inputsize = pcl->tailpacking_size;
else
inputsize = pclusterpages * PAGE_SIZE;
err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
- .sb = sb,
- .in = compressed_pages,
- .out = pages,
+ .sb = be->sb,
+ .in = be->compressed_pages,
+ .out = be->decompressed_pages,
.pageofs_in = pcl->pageofs_in,
.pageofs_out = pcl->pageofs_out,
.inputsize = inputsize,
- .outputsize = outputsize,
+ .outputsize = pcl->length,
.alg = pcl->algorithmformat,
.inplace_io = overlapped,
- .partial_decoding = partial
- }, pagepool);
+ .partial_decoding = pcl->partial,
+ .fillgaps = pcl->multibases,
+ }, be->pagepool);
out:
/* must handle all compressed pages before actual file pages */
if (z_erofs_is_inline_pcluster(pcl)) {
- page = compressed_pages[0];
- WRITE_ONCE(compressed_pages[0], NULL);
+ page = pcl->compressed_bvecs[0].page;
+ WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL);
put_page(page);
} else {
for (i = 0; i < pclusterpages; ++i) {
- page = compressed_pages[i];
+ page = pcl->compressed_bvecs[i].page;
if (erofs_page_is_managed(sbi, page))
continue;
/* recycle all individual short-lived pages */
- (void)z_erofs_put_shortlivedpage(pagepool, page);
- WRITE_ONCE(compressed_pages[i], NULL);
+ (void)z_erofs_put_shortlivedpage(be->pagepool, page);
+ WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
}
}
+ if (be->compressed_pages < be->onstack_pages ||
+ be->compressed_pages >= be->onstack_pages + Z_EROFS_ONSTACK_PAGES)
+ kvfree(be->compressed_pages);
+ z_erofs_fill_other_copies(be, err);
- for (i = 0; i < nr_pages; ++i) {
- page = pages[i];
+ for (i = 0; i < be->nr_pages; ++i) {
+ page = be->decompressed_pages[i];
if (!page)
continue;
DBG_BUGON(z_erofs_page_is_invalidated(page));
/* recycle all individual short-lived pages */
- if (z_erofs_put_shortlivedpage(pagepool, page))
+ if (z_erofs_put_shortlivedpage(be->pagepool, page))
continue;
-
- if (err < 0)
- SetPageError(page);
-
+ if (err)
+ z_erofs_page_mark_eio(page);
z_erofs_onlinepage_endio(page);
}
- if (pages == z_pagemap_global)
- mutex_unlock(&z_pagemap_global_lock);
- else if (pages != pages_onstack)
- kvfree(pages);
+ if (be->decompressed_pages != be->onstack_pages)
+ kvfree(be->decompressed_pages);
- pcl->nr_pages = 0;
+ pcl->length = 0;
+ pcl->partial = true;
+ pcl->multibases = false;
+ pcl->bvset.nextpage = NULL;
pcl->vcnt = 0;
/* pcluster lock MUST be taken before the following line */
@@ -997,22 +1085,25 @@ out:
static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
struct page **pagepool)
{
+ struct z_erofs_decompress_backend be = {
+ .sb = io->sb,
+ .pagepool = pagepool,
+ .decompressed_secondary_bvecs =
+ LIST_HEAD_INIT(be.decompressed_secondary_bvecs),
+ };
z_erofs_next_pcluster_t owned = io->head;
while (owned != Z_EROFS_PCLUSTER_TAIL_CLOSED) {
- struct z_erofs_pcluster *pcl;
-
- /* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
+ /* impossible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
DBG_BUGON(owned == Z_EROFS_PCLUSTER_TAIL);
-
- /* no possible that 'owned' equals NULL */
+ /* impossible that 'owned' equals Z_EROFS_PCLUSTER_NIL */
DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL);
- pcl = container_of(owned, struct z_erofs_pcluster, next);
- owned = READ_ONCE(pcl->next);
+ be.pcl = container_of(owned, struct z_erofs_pcluster, next);
+ owned = READ_ONCE(be.pcl->next);
- z_erofs_decompress_pcluster(io->sb, pcl, pagepool);
- erofs_workgroup_put(&pcl->obj);
+ z_erofs_decompress_pcluster(&be, io->eio ? -EIO : 0);
+ erofs_workgroup_put(&be.pcl->obj);
}
}
@@ -1038,7 +1129,6 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
if (sync) {
if (!atomic_add_return(bios, &io->pending_bios))
complete(&io->u.done);
-
return;
}
@@ -1071,7 +1161,7 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
int justfound;
repeat:
- page = READ_ONCE(pcl->compressed_pages[nr]);
+ page = READ_ONCE(pcl->compressed_bvecs[nr].page);
oldpage = page;
if (!page)
@@ -1087,7 +1177,7 @@ repeat:
* otherwise, it will go inplace I/O path instead.
*/
if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
- WRITE_ONCE(pcl->compressed_pages[nr], page);
+ WRITE_ONCE(pcl->compressed_bvecs[nr].page, page);
set_page_private(page, 0);
tocache = true;
goto out_tocache;
@@ -1113,14 +1203,13 @@ repeat:
/* the page is still in manage cache */
if (page->mapping == mc) {
- WRITE_ONCE(pcl->compressed_pages[nr], page);
+ WRITE_ONCE(pcl->compressed_bvecs[nr].page, page);
- ClearPageError(page);
if (!PagePrivate(page)) {
/*
* impossible to be !PagePrivate(page) for
* the current restriction as well if
- * the page is already in compressed_pages[].
+ * the page is already in compressed_bvecs[].
*/
DBG_BUGON(!justfound);
@@ -1149,7 +1238,8 @@ repeat:
put_page(page);
out_allocpage:
page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
- if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) {
+ if (oldpage != cmpxchg(&pcl->compressed_bvecs[nr].page,
+ oldpage, page)) {
erofs_pagepool_add(pagepool, page);
cond_resched();
goto repeat;
@@ -1186,6 +1276,7 @@ fg_out:
q = fgq;
init_completion(&fgq->u.done);
atomic_set(&fgq->pending_bios, 0);
+ q->eio = false;
}
q->sb = sb;
q->head = Z_EROFS_PCLUSTER_TAIL_CLOSED;
@@ -1246,26 +1337,25 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
DBG_BUGON(PageUptodate(page));
DBG_BUGON(z_erofs_page_is_invalidated(page));
- if (err)
- SetPageError(page);
-
if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
if (!err)
SetPageUptodate(page);
unlock_page(page);
}
}
+ if (err)
+ q->eio = true;
z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1);
bio_put(bio);
}
-static void z_erofs_submit_queue(struct super_block *sb,
- struct z_erofs_decompress_frontend *f,
+static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
struct page **pagepool,
struct z_erofs_decompressqueue *fgq,
bool *force_fg)
{
- struct erofs_sb_info *const sbi = EROFS_SB(sb);
+ struct super_block *sb = f->inode->i_sb;
+ struct address_space *mc = MNGD_MAPPING(EROFS_SB(sb));
z_erofs_next_pcluster_t qtail[NR_JOBQUEUES];
struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
void *bi_private;
@@ -1317,7 +1407,7 @@ static void z_erofs_submit_queue(struct super_block *sb,
struct page *page;
page = pickup_page_for_submission(pcl, i++, pagepool,
- MNGD_MAPPING(sbi));
+ mc);
if (!page)
continue;
@@ -1369,15 +1459,14 @@ submit_bio_retry:
z_erofs_decompress_kickoff(q[JQ_SUBMIT], *force_fg, nr_bios);
}
-static void z_erofs_runqueue(struct super_block *sb,
- struct z_erofs_decompress_frontend *f,
+static void z_erofs_runqueue(struct z_erofs_decompress_frontend *f,
struct page **pagepool, bool force_fg)
{
struct z_erofs_decompressqueue io[NR_JOBQUEUES];
if (f->owned_head == Z_EROFS_PCLUSTER_TAIL)
return;
- z_erofs_submit_queue(sb, f, pagepool, io, &force_fg);
+ z_erofs_submit_queue(f, pagepool, io, &force_fg);
/* handle bypass queue (no i/o pclusters) immediately */
z_erofs_decompress_queue(&io[JQ_BYPASS], pagepool);
@@ -1475,7 +1564,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
(void)z_erofs_collector_end(&f);
/* if some compressed cluster ready, need submit them anyway */
- z_erofs_runqueue(inode->i_sb, &f, &pagepool,
+ z_erofs_runqueue(&f, &pagepool,
z_erofs_get_sync_decompress_policy(sbi, 0));
if (err)
@@ -1524,7 +1613,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false);
(void)z_erofs_collector_end(&f);
- z_erofs_runqueue(inode->i_sb, &f, &pagepool,
+ z_erofs_runqueue(&f, &pagepool,
z_erofs_get_sync_decompress_policy(sbi, nr_pages));
erofs_put_metabuf(&f.map.buf);
erofs_release_pages(&pagepool);
diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
index 58053bb5066f..e7f04c4fbb81 100644
--- a/fs/erofs/zdata.h
+++ b/fs/erofs/zdata.h
@@ -7,13 +7,10 @@
#define __EROFS_FS_ZDATA_H
#include "internal.h"
-#include "zpvec.h"
+#include "tagptr.h"
#define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE)
-#define Z_EROFS_NR_INLINE_PAGEVECS 3
-
-#define Z_EROFS_PCLUSTER_FULL_LENGTH 0x00000001
-#define Z_EROFS_PCLUSTER_LENGTH_BIT 1
+#define Z_EROFS_INLINE_BVECS 2
/*
* let's leave a type here in case of introducing
@@ -21,6 +18,21 @@
*/
typedef void *z_erofs_next_pcluster_t;
+struct z_erofs_bvec {
+ struct page *page;
+ int offset;
+ unsigned int end;
+};
+
+#define __Z_EROFS_BVSET(name, total) \
+struct name { \
+ /* point to the next page which contains the following bvecs */ \
+ struct page *nextpage; \
+ struct z_erofs_bvec bvec[total]; \
+}
+__Z_EROFS_BVSET(z_erofs_bvset,);
+__Z_EROFS_BVSET(z_erofs_bvset_inline, Z_EROFS_INLINE_BVECS);
+
/*
* Structure fields follow one of the following exclusion rules.
*
@@ -38,24 +50,21 @@ struct z_erofs_pcluster {
/* A: point to next chained pcluster or TAILs */
z_erofs_next_pcluster_t next;
- /* A: lower limit of decompressed length and if full length or not */
+ /* L: the maximum decompression size of this round */
unsigned int length;
+ /* L: total number of bvecs */
+ unsigned int vcnt;
+
/* I: page offset of start position of decompression */
unsigned short pageofs_out;
/* I: page offset of inline compressed data */
unsigned short pageofs_in;
- /* L: maximum relative page index in pagevec[] */
- unsigned short nr_pages;
-
- /* L: total number of pages in pagevec[] */
- unsigned int vcnt;
-
union {
- /* L: inline a certain number of pagevecs for bootstrap */
- erofs_vtptr_t pagevec[Z_EROFS_NR_INLINE_PAGEVECS];
+ /* L: inline a certain number of bvec for bootstrap */
+ struct z_erofs_bvset_inline bvset;
/* I: can be used to free the pcluster by RCU. */
struct rcu_head rcu;
@@ -72,8 +81,14 @@ struct z_erofs_pcluster {
/* I: compression algorithm format */
unsigned char algorithmformat;
- /* A: compressed pages (can be cached or inplaced pages) */
- struct page *compressed_pages[];
+ /* L: whether partial decompression or not */
+ bool partial;
+
+ /* L: indicate several pageofs_outs or not */
+ bool multibases;
+
+ /* A: compressed bvecs (can be cached or inplaced pages) */
+ struct z_erofs_bvec compressed_bvecs[];
};
/* let's avoid the valid 32-bit kernel addresses */
@@ -94,6 +109,8 @@ struct z_erofs_decompressqueue {
struct completion done;
struct work_struct work;
} u;
+
+ bool eio;
};
static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl)
@@ -108,38 +125,17 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
return pcl->pclusterpages;
}
-#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
-#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
-#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS)
-
/*
- * waiters (aka. ongoing_packs): # to unlock the page
- * sub-index: 0 - for partial page, >= 1 full page sub-index
+ * bit 31: I/O error occurred on this page
+ * bit 0 - 30: remaining parts to complete this page
*/
-typedef atomic_t z_erofs_onlinepage_t;
-
-/* type punning */
-union z_erofs_onlinepage_converter {
- z_erofs_onlinepage_t *o;
- unsigned long *v;
-};
-
-static inline unsigned int z_erofs_onlinepage_index(struct page *page)
-{
- union z_erofs_onlinepage_converter u;
-
- DBG_BUGON(!PagePrivate(page));
- u.v = &page_private(page);
-
- return atomic_read(u.o) >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
-}
+#define Z_EROFS_PAGE_EIO (1 << 31)
static inline void z_erofs_onlinepage_init(struct page *page)
{
union {
- z_erofs_onlinepage_t o;
+ atomic_t o;
unsigned long v;
- /* keep from being unlocked in advance */
} u = { .o = ATOMIC_INIT(1) };
set_page_private(page, u.v);
@@ -147,49 +143,36 @@ static inline void z_erofs_onlinepage_init(struct page *page)
SetPagePrivate(page);
}
-static inline void z_erofs_onlinepage_fixup(struct page *page,
- uintptr_t index, bool down)
+static inline void z_erofs_onlinepage_split(struct page *page)
{
- union z_erofs_onlinepage_converter u = { .v = &page_private(page) };
- int orig, orig_index, val;
-
-repeat:
- orig = atomic_read(u.o);
- orig_index = orig >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
- if (orig_index) {
- if (!index)
- return;
+ atomic_inc((atomic_t *)&page->private);
+}
- DBG_BUGON(orig_index != index);
- }
+static inline void z_erofs_page_mark_eio(struct page *page)
+{
+ int orig;
- val = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
- ((orig & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down);
- if (atomic_cmpxchg(u.o, orig, val) != orig)
- goto repeat;
+ do {
+ orig = atomic_read((atomic_t *)&page->private);
+ } while (atomic_cmpxchg((atomic_t *)&page->private, orig,
+ orig | Z_EROFS_PAGE_EIO) != orig);
}
static inline void z_erofs_onlinepage_endio(struct page *page)
{
- union z_erofs_onlinepage_converter u;
unsigned int v;
DBG_BUGON(!PagePrivate(page));
- u.v = &page_private(page);
-
- v = atomic_dec_return(u.o);
- if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
+ v = atomic_dec_return((atomic_t *)&page->private);
+ if (!(v & ~Z_EROFS_PAGE_EIO)) {
set_page_private(page, 0);
ClearPagePrivate(page);
- if (!PageError(page))
+ if (!(v & Z_EROFS_PAGE_EIO))
SetPageUptodate(page);
unlock_page(page);
}
- erofs_dbg("%s, page %p value %x", __func__, page, atomic_read(u.o));
}
-#define Z_EROFS_VMAP_ONSTACK_PAGES \
- min_t(unsigned int, THREAD_SIZE / 8 / sizeof(struct page *), 96U)
-#define Z_EROFS_VMAP_GLOBAL_PAGES 2048
+#define Z_EROFS_ONSTACK_PAGES 32
#endif
diff --git a/fs/erofs/zpvec.h b/fs/erofs/zpvec.h
deleted file mode 100644
index b05464f4a808..000000000000
--- a/fs/erofs/zpvec.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2018 HUAWEI, Inc.
- * https://www.huawei.com/
- */
-#ifndef __EROFS_FS_ZPVEC_H
-#define __EROFS_FS_ZPVEC_H
-
-#include "tagptr.h"
-
-/* page type in pagevec for decompress subsystem */
-enum z_erofs_page_type {
- /* including Z_EROFS_VLE_PAGE_TAIL_EXCLUSIVE */
- Z_EROFS_PAGE_TYPE_EXCLUSIVE,
-
- Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED,
-
- Z_EROFS_VLE_PAGE_TYPE_HEAD,
- Z_EROFS_VLE_PAGE_TYPE_MAX
-};
-
-extern void __compiletime_error("Z_EROFS_PAGE_TYPE_EXCLUSIVE != 0")
- __bad_page_type_exclusive(void);
-
-/* pagevec tagged pointer */
-typedef tagptr2_t erofs_vtptr_t;
-
-/* pagevec collector */
-struct z_erofs_pagevec_ctor {
- struct page *curr, *next;
- erofs_vtptr_t *pages;
-
- unsigned int nr, index;
-};
-
-static inline void z_erofs_pagevec_ctor_exit(struct z_erofs_pagevec_ctor *ctor,
- bool atomic)
-{
- if (!ctor->curr)
- return;
-
- if (atomic)
- kunmap_atomic(ctor->pages);
- else
- kunmap(ctor->curr);
-}
-
-static inline struct page *
-z_erofs_pagevec_ctor_next_page(struct z_erofs_pagevec_ctor *ctor,
- unsigned int nr)
-{
- unsigned int index;
-
- /* keep away from occupied pages */
- if (ctor->next)
- return ctor->next;
-
- for (index = 0; index < nr; ++index) {
- const erofs_vtptr_t t = ctor->pages[index];
- const unsigned int tags = tagptr_unfold_tags(t);
-
- if (tags == Z_EROFS_PAGE_TYPE_EXCLUSIVE)
- return tagptr_unfold_ptr(t);
- }
- DBG_BUGON(nr >= ctor->nr);
- return NULL;
-}
-
-static inline void
-z_erofs_pagevec_ctor_pagedown(struct z_erofs_pagevec_ctor *ctor,
- bool atomic)
-{
- struct page *next = z_erofs_pagevec_ctor_next_page(ctor, ctor->nr);
-
- z_erofs_pagevec_ctor_exit(ctor, atomic);
-
- ctor->curr = next;
- ctor->next = NULL;
- ctor->pages = atomic ?
- kmap_atomic(ctor->curr) : kmap(ctor->curr);
-
- ctor->nr = PAGE_SIZE / sizeof(struct page *);
- ctor->index = 0;
-}
-
-static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor,
- unsigned int nr,
- erofs_vtptr_t *pages,
- unsigned int i)
-{
- ctor->nr = nr;
- ctor->curr = ctor->next = NULL;
- ctor->pages = pages;
-
- if (i >= nr) {
- i -= nr;
- z_erofs_pagevec_ctor_pagedown(ctor, false);
- while (i > ctor->nr) {
- i -= ctor->nr;
- z_erofs_pagevec_ctor_pagedown(ctor, false);
- }
- }
- ctor->next = z_erofs_pagevec_ctor_next_page(ctor, i);
- ctor->index = i;
-}
-
-static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor,
- struct page *page,
- enum z_erofs_page_type type,
- bool pvec_safereuse)
-{
- if (!ctor->next) {
- /* some pages cannot be reused as pvec safely without I/O */
- if (type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && !pvec_safereuse)
- type = Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED;
-
- if (type != Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
- ctor->index + 1 == ctor->nr)
- return false;
- }
-
- if (ctor->index >= ctor->nr)
- z_erofs_pagevec_ctor_pagedown(ctor, false);
-
- /* exclusive page type must be 0 */
- if (Z_EROFS_PAGE_TYPE_EXCLUSIVE != (uintptr_t)NULL)
- __bad_page_type_exclusive();
-
- /* should remind that collector->next never equal to 1, 2 */
- if (type == (uintptr_t)ctor->next) {
- ctor->next = page;
- }
- ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, page, type);
- return true;
-}
-
-static inline struct page *
-z_erofs_pagevec_dequeue(struct z_erofs_pagevec_ctor *ctor,
- enum z_erofs_page_type *type)
-{
- erofs_vtptr_t t;
-
- if (ctor->index >= ctor->nr) {
- DBG_BUGON(!ctor->next);
- z_erofs_pagevec_ctor_pagedown(ctor, true);
- }
-
- t = ctor->pages[ctor->index];
-
- *type = tagptr_unfold_tags(t);
-
- /* should remind that collector->next never equal to 1, 2 */
- if (*type == (uintptr_t)ctor->next)
- ctor->next = tagptr_unfold_ptr(t);
-
- ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, NULL, 0);
- return tagptr_unfold_ptr(t);
-}
-#endif
diff --git a/fs/exec.c b/fs/exec.c
index 0989fb8472a1..778123259e42 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1301,7 +1301,7 @@ int begin_new_exec(struct linux_binprm * bprm)
bprm->mm = NULL;
#ifdef CONFIG_POSIX_TIMERS
- exit_itimers(me->signal);
+ exit_itimers(me);
flush_itimer_signals();
#endif
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index e6b932219803..7a192e4e7fa9 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1679,14 +1679,14 @@ int ext2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (error)
return error;
- if (is_quota_modification(inode, iattr)) {
+ if (is_quota_modification(mnt_userns, inode, iattr)) {
error = dquot_initialize(inode);
if (error)
return error;
}
- if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
- (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
- error = dquot_transfer(inode, iattr);
+ if (i_uid_needs_update(mnt_userns, iattr, inode) ||
+ i_gid_needs_update(mnt_userns, iattr, inode)) {
+ error = dquot_transfer(mnt_userns, inode, iattr);
if (error)
return error;
}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index f6a19f6d9f6d..6f475d2e3b18 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1059,9 +1059,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_frags_per_group);
goto failed_mount;
}
- if (sbi->s_inodes_per_group > sb->s_blocksize * 8) {
+ if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
+ sbi->s_inodes_per_group > sb->s_blocksize * 8) {
ext2_msg(sb, KERN_ERR,
- "error: #inodes per group too big: %lu",
+ "error: invalid #inodes per group: %lu",
sbi->s_inodes_per_group);
goto failed_mount;
}
@@ -1071,6 +1072,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
le32_to_cpu(es->s_first_data_block) - 1)
/ EXT2_BLOCKS_PER_GROUP(sb)) + 1;
+ if ((u64)sbi->s_groups_count * sbi->s_inodes_per_group !=
+ le32_to_cpu(es->s_inodes_count)) {
+ ext2_msg(sb, KERN_ERR, "error: invalid #inodes: %u vs computed %llu",
+ le32_to_cpu(es->s_inodes_count),
+ (u64)sbi->s_groups_count * sbi->s_inodes_per_group);
+ goto failed_mount;
+ }
db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
EXT2_DESC_PER_BLOCK(sb);
sbi->s_group_desc = kmalloc_array(db_count,
@@ -1490,8 +1498,7 @@ static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data,
len = i_size-off;
toread = len;
while (toread > 0) {
- tocopy = sb->s_blocksize - offset < toread ?
- sb->s_blocksize - offset : toread;
+ tocopy = min_t(size_t, sb->s_blocksize - offset, toread);
tmp_bh.b_state = 0;
tmp_bh.b_size = sb->s_blocksize;
@@ -1529,8 +1536,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
struct buffer_head *bh;
while (towrite > 0) {
- tocopy = sb->s_blocksize - offset < towrite ?
- sb->s_blocksize - offset : towrite;
+ tocopy = min_t(size_t, sb->s_blocksize - offset, towrite);
tmp_bh.b_state = 0;
tmp_bh.b_size = sb->s_blocksize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 84c0eb55071d..3dcc1dd1f179 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5350,14 +5350,14 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (error)
return error;
- if (is_quota_modification(inode, attr)) {
+ if (is_quota_modification(mnt_userns, inode, attr)) {
error = dquot_initialize(inode);
if (error)
return error;
}
- if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
- (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
+ if (i_uid_needs_update(mnt_userns, attr, inode) ||
+ i_gid_needs_update(mnt_userns, attr, inode)) {
handle_t *handle;
/* (user+group)*(old+new) structure, inode write (sb,
@@ -5374,7 +5374,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
* counts xattr inode references.
*/
down_read(&EXT4_I(inode)->xattr_sem);
- error = dquot_transfer(inode, attr);
+ error = dquot_transfer(mnt_userns, inode, attr);
up_read(&EXT4_I(inode)->xattr_sem);
if (error) {
@@ -5383,10 +5383,8 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
}
/* Update corresponding info in inode so that everything is in
* one transaction */
- if (attr->ia_valid & ATTR_UID)
- inode->i_uid = attr->ia_uid;
- if (attr->ia_valid & ATTR_GID)
- inode->i_gid = attr->ia_gid;
+ i_uid_update(mnt_userns, attr, inode);
+ i_gid_update(mnt_userns, attr, inode);
error = ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
if (unlikely(error)) {
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index bd14cef1b08f..d66e37d80a2d 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -861,10 +861,8 @@ static void __setattr_copy(struct user_namespace *mnt_userns,
{
unsigned int ia_valid = attr->ia_valid;
- if (ia_valid & ATTR_UID)
- inode->i_uid = attr->ia_uid;
- if (ia_valid & ATTR_GID)
- inode->i_gid = attr->ia_gid;
+ i_uid_update(mnt_userns, attr, inode);
+ i_gid_update(mnt_userns, attr, inode);
if (ia_valid & ATTR_ATIME)
inode->i_atime = attr->ia_atime;
if (ia_valid & ATTR_MTIME)
@@ -917,17 +915,15 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (err)
return err;
- if (is_quota_modification(inode, attr)) {
+ if (is_quota_modification(mnt_userns, inode, attr)) {
err = f2fs_dquot_initialize(inode);
if (err)
return err;
}
- if ((attr->ia_valid & ATTR_UID &&
- !uid_eq(attr->ia_uid, inode->i_uid)) ||
- (attr->ia_valid & ATTR_GID &&
- !gid_eq(attr->ia_gid, inode->i_gid))) {
+ if (i_uid_needs_update(mnt_userns, attr, inode) ||
+ i_gid_needs_update(mnt_userns, attr, inode)) {
f2fs_lock_op(F2FS_I_SB(inode));
- err = dquot_transfer(inode, attr);
+ err = dquot_transfer(mnt_userns, inode, attr);
if (err) {
set_sbi_flag(F2FS_I_SB(inode),
SBI_QUOTA_NEED_REPAIR);
@@ -938,10 +934,8 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
* update uid/gid under lock_op(), so that dquot and inode can
* be updated atomically.
*/
- if (attr->ia_valid & ATTR_UID)
- inode->i_uid = attr->ia_uid;
- if (attr->ia_valid & ATTR_GID)
- inode->i_gid = attr->ia_gid;
+ i_uid_update(mnt_userns, attr, inode);
+ i_gid_update(mnt_userns, attr, inode);
f2fs_mark_inode_dirty_sync(inode, true);
f2fs_unlock_op(F2FS_I_SB(inode));
}
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 3cb7f8a43b4d..dcd0a1e35095 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -255,18 +255,18 @@ static int recover_quota_data(struct inode *inode, struct page *page)
memset(&attr, 0, sizeof(attr));
- attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid);
- attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid);
+ attr.ia_vfsuid = VFSUIDT_INIT(make_kuid(inode->i_sb->s_user_ns, i_uid));
+ attr.ia_vfsgid = VFSGIDT_INIT(make_kgid(inode->i_sb->s_user_ns, i_gid));
- if (!uid_eq(attr.ia_uid, inode->i_uid))
+ if (!vfsuid_eq(attr.ia_vfsuid, i_uid_into_vfsuid(&init_user_ns, inode)))
attr.ia_valid |= ATTR_UID;
- if (!gid_eq(attr.ia_gid, inode->i_gid))
+ if (!vfsgid_eq(attr.ia_vfsgid, i_gid_into_vfsgid(&init_user_ns, inode)))
attr.ia_valid |= ATTR_GID;
if (!attr.ia_valid)
return 0;
- err = dquot_transfer(inode, &attr);
+ err = dquot_transfer(&init_user_ns, inode, &attr);
if (err)
set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
return err;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 3dae3ed60f3a..3e4eb3467cb4 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -90,7 +90,8 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
* out the RO attribute for checking by the security
* module, just because it maps to a file mode.
*/
- err = security_inode_setattr(file->f_path.dentry, &ia);
+ err = security_inode_setattr(file_mnt_user_ns(file),
+ file->f_path.dentry, &ia);
if (err)
goto out_unlock_inode;
@@ -516,9 +517,11 @@ int fat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
}
if (((attr->ia_valid & ATTR_UID) &&
- (!uid_eq(attr->ia_uid, sbi->options.fs_uid))) ||
+ (!uid_eq(from_vfsuid(mnt_userns, i_user_ns(inode), attr->ia_vfsuid),
+ sbi->options.fs_uid))) ||
((attr->ia_valid & ATTR_GID) &&
- (!gid_eq(attr->ia_gid, sbi->options.fs_gid))) ||
+ (!gid_eq(from_vfsgid(mnt_userns, i_user_ns(inode), attr->ia_vfsgid),
+ sbi->options.fs_gid))) ||
((attr->ia_valid & ATTR_MODE) &&
(attr->ia_mode & ~FAT_VALID_MODE)))
error = -EPERM;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a01ea49f3017..e8e769be9ed0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1738,6 +1738,14 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
return;
/*
+ * READV uses fields in `struct io_rw` (len/addr) to stash the selected
+ * buffer data. However if that buffer is recycled the original request
+ * data stored in addr is lost. Therefore forbid recycling for now.
+ */
+ if (req->opcode == IORING_OP_READV)
+ return;
+
+ /*
* We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
* the flag and hence ensure that bl->head doesn't get incremented.
* If the tail has already been incremented, hang on to it.
@@ -12931,7 +12939,7 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
{
struct io_uring_buf_ring *br;
struct io_uring_buf_reg reg;
- struct io_buffer_list *bl;
+ struct io_buffer_list *bl, *free_bl = NULL;
struct page **pages;
int nr_pages;
@@ -12963,7 +12971,7 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
if (bl->buf_nr_pages || !list_empty(&bl->buf_list))
return -EEXIST;
} else {
- bl = kzalloc(sizeof(*bl), GFP_KERNEL);
+ free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
if (!bl)
return -ENOMEM;
}
@@ -12972,7 +12980,7 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
struct_size(br, bufs, reg.ring_entries),
&nr_pages);
if (IS_ERR(pages)) {
- kfree(bl);
+ kfree(free_bl);
return PTR_ERR(pages);
}
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 1d732fd223d4..332dc9ac47a9 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -95,14 +95,14 @@ int jfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (rc)
return rc;
- if (is_quota_modification(inode, iattr)) {
+ if (is_quota_modification(mnt_userns, inode, iattr)) {
rc = dquot_initialize(inode);
if (rc)
return rc;
}
if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
(iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
- rc = dquot_transfer(inode, iattr);
+ rc = dquot_transfer(mnt_userns, inode, iattr);
if (rc)
return rc;
}
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index 05efcdf7a4a7..7c849024999f 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -963,7 +963,7 @@ ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns,
*/
int ksmbd_vfs_setxattr(struct user_namespace *user_ns,
struct dentry *dentry, const char *attr_name,
- const void *attr_value, size_t attr_size, int flags)
+ void *attr_value, size_t attr_size, int flags)
{
int err;
diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h
index 8c37aaf936ab..70da4c0ba7ad 100644
--- a/fs/ksmbd/vfs.h
+++ b/fs/ksmbd/vfs.h
@@ -109,7 +109,7 @@ ssize_t ksmbd_vfs_casexattr_len(struct user_namespace *user_ns,
int attr_name_len);
int ksmbd_vfs_setxattr(struct user_namespace *user_ns,
struct dentry *dentry, const char *attr_name,
- const void *attr_value, size_t attr_size, int flags);
+ void *attr_value, size_t attr_size, int flags);
int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name,
size_t *xattr_stream_name_size, int s_type);
int ksmbd_vfs_remove_xattr(struct user_namespace *user_ns,
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 0a22a2faf552..e1c4617de771 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -176,7 +176,7 @@ nlm_delete_file(struct nlm_file *file)
}
}
-static int nlm_unlock_files(struct nlm_file *file)
+static int nlm_unlock_files(struct nlm_file *file, fl_owner_t owner)
{
struct file_lock lock;
@@ -184,6 +184,7 @@ static int nlm_unlock_files(struct nlm_file *file)
lock.fl_type = F_UNLCK;
lock.fl_start = 0;
lock.fl_end = OFFSET_MAX;
+ lock.fl_owner = owner;
if (file->f_file[O_RDONLY] &&
vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, &lock, NULL))
goto out_err;
@@ -225,7 +226,7 @@ again:
if (match(lockhost, host)) {
spin_unlock(&flctx->flc_lock);
- if (nlm_unlock_files(file))
+ if (nlm_unlock_files(file, fl->fl_owner))
return 1;
goto again;
}
@@ -282,11 +283,10 @@ nlm_file_inuse(struct nlm_file *file)
static void nlm_close_files(struct nlm_file *file)
{
- struct file *f;
-
- for (f = file->f_file[0]; f <= file->f_file[1]; f++)
- if (f)
- nlmsvc_ops->fclose(f);
+ if (file->f_file[O_RDONLY])
+ nlmsvc_ops->fclose(file->f_file[O_RDONLY]);
+ if (file->f_file[O_WRONLY])
+ nlmsvc_ops->fclose(file->f_file[O_WRONLY]);
}
/*
diff --git a/fs/locks.c b/fs/locks.c
index ca28e0e50e56..c266cfdc3291 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -425,21 +425,9 @@ static inline int flock_translate_cmd(int cmd) {
}
/* Fill in a file_lock structure with an appropriate FLOCK lock. */
-static struct file_lock *
-flock_make_lock(struct file *filp, unsigned int cmd, struct file_lock *fl)
+static void flock_make_lock(struct file *filp, struct file_lock *fl, int type)
{
- int type = flock_translate_cmd(cmd);
-
- if (type < 0)
- return ERR_PTR(type);
-
- if (fl == NULL) {
- fl = locks_alloc_lock();
- if (fl == NULL)
- return ERR_PTR(-ENOMEM);
- } else {
- locks_init_lock(fl);
- }
+ locks_init_lock(fl);
fl->fl_file = filp;
fl->fl_owner = filp;
@@ -447,8 +435,6 @@ flock_make_lock(struct file *filp, unsigned int cmd, struct file_lock *fl)
fl->fl_flags = FL_FLOCK;
fl->fl_type = type;
fl->fl_end = OFFSET_MAX;
-
- return fl;
}
static int assign_type(struct file_lock *fl, long type)
@@ -2097,21 +2083,9 @@ EXPORT_SYMBOL(locks_lock_inode_wait);
*/
SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
{
- struct fd f = fdget(fd);
- struct file_lock *lock;
- int can_sleep, unlock;
- int error;
-
- error = -EBADF;
- if (!f.file)
- goto out;
-
- can_sleep = !(cmd & LOCK_NB);
- cmd &= ~LOCK_NB;
- unlock = (cmd == LOCK_UN);
-
- if (!unlock && !(f.file->f_mode & (FMODE_READ|FMODE_WRITE)))
- goto out_putf;
+ int can_sleep, error, type;
+ struct file_lock fl;
+ struct fd f;
/*
* LOCK_MAND locks were broken for a long time in that they never
@@ -2123,36 +2097,41 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
*/
if (cmd & LOCK_MAND) {
pr_warn_once("Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n");
- error = 0;
- goto out_putf;
+ return 0;
}
- lock = flock_make_lock(f.file, cmd, NULL);
- if (IS_ERR(lock)) {
- error = PTR_ERR(lock);
+ type = flock_translate_cmd(cmd & ~LOCK_NB);
+ if (type < 0)
+ return type;
+
+ error = -EBADF;
+ f = fdget(fd);
+ if (!f.file)
+ return error;
+
+ if (type != F_UNLCK && !(f.file->f_mode & (FMODE_READ | FMODE_WRITE)))
goto out_putf;
- }
- if (can_sleep)
- lock->fl_flags |= FL_SLEEP;
+ flock_make_lock(f.file, &fl, type);
- error = security_file_lock(f.file, lock->fl_type);
+ error = security_file_lock(f.file, fl.fl_type);
if (error)
- goto out_free;
+ goto out_putf;
+
+ can_sleep = !(cmd & LOCK_NB);
+ if (can_sleep)
+ fl.fl_flags |= FL_SLEEP;
if (f.file->f_op->flock)
error = f.file->f_op->flock(f.file,
- (can_sleep) ? F_SETLKW : F_SETLK,
- lock);
+ (can_sleep) ? F_SETLKW : F_SETLK,
+ &fl);
else
- error = locks_lock_file_wait(f.file, lock);
-
- out_free:
- locks_free_lock(lock);
+ error = locks_lock_file_wait(f.file, &fl);
out_putf:
fdput(f);
- out:
+
return error;
}
@@ -2614,7 +2593,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
if (list_empty(&flctx->flc_flock))
return;
- flock_make_lock(filp, LOCK_UN, &fl);
+ flock_make_lock(filp, &fl, F_UNLCK);
fl.fl_flags |= FL_CLOSE;
if (filp->f_op->flock)
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 42f892c5712e..0ce535852151 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -319,8 +319,9 @@ zero_out:
* conflicting writes once the folio is grabbed and locked. It is passed a
* pointer to the fsdata cookie that gets returned to the VM to be passed to
* write_end. It is permitted to sleep. It should return 0 if the request
- * should go ahead; unlock the folio and return -EAGAIN to cause the folio to
- * be regot; or return an error.
+ * should go ahead or it may return an error. It may also unlock and put the
+ * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0
+ * will cause the folio to be re-got and the process to be retried.
*
* The calling netfs must initialise a netfs context contiguous to the vfs
* inode before calling this.
@@ -348,13 +349,13 @@ retry:
if (ctx->ops->check_write_begin) {
/* Allow the netfs (eg. ceph) to flush conflicts. */
- ret = ctx->ops->check_write_begin(file, pos, len, folio, _fsdata);
+ ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata);
if (ret < 0) {
trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
- if (ret == -EAGAIN)
- goto retry;
goto error;
}
+ if (!folio)
+ goto retry;
}
if (folio_test_uptodate(folio))
@@ -416,8 +417,10 @@ have_folio_no_wait:
error_put:
netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
error:
- folio_unlock(folio);
- folio_put(folio);
+ if (folio) {
+ folio_unlock(folio);
+ folio_put(folio);
+ }
_leave(" = %d", ret);
return ret;
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 61b2aae81abb..2acea7792bb2 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -470,6 +470,15 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen,
return nfserr_bad_xdr;
}
}
+ if (bmval[1] & FATTR4_WORD1_TIME_CREATE) {
+ struct timespec64 ts;
+
+ /* No Linux filesystem supports setting this attribute. */
+ bmval[1] &= ~FATTR4_WORD1_TIME_CREATE;
+ status = nfsd4_decode_nfstime4(argp, &ts);
+ if (status)
+ return status;
+ }
if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
u32 set_it;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 847b482155ae..9a8b09afc173 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -465,7 +465,8 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL)
#define NFSD_WRITEABLE_ATTRS_WORD1 \
(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
- | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_CREATE \
+ | FATTR4_WORD1_TIME_MODIFY_SET)
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
#define MAYBE_FATTR4_WORD2_SECURITY_LABEL \
FATTR4_WORD2_SECURITY_LABEL
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 1344f7d475d3..aecda4fc95f5 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -198,6 +198,9 @@ static inline int nilfs_acl_chmod(struct inode *inode)
static inline int nilfs_init_acl(struct inode *inode, struct inode *dir)
{
+ if (S_ISLNK(inode->i_mode))
+ return 0;
+
inode->i_mode &= ~current_umask();
return 0;
}
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 4f897e109547..cd7d09a569ff 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -295,12 +295,13 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
const void *data, int data_type,
struct inode *dir)
{
- __u32 marks_mask = 0, marks_ignored_mask = 0;
+ __u32 marks_mask = 0, marks_ignore_mask = 0;
__u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS |
FANOTIFY_EVENT_FLAGS;
const struct path *path = fsnotify_data_path(data, data_type);
unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
struct fsnotify_mark *mark;
+ bool ondir = event_mask & FAN_ONDIR;
int type;
pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
@@ -315,19 +316,21 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
return 0;
} else if (!(fid_mode & FAN_REPORT_FID)) {
/* Do we have a directory inode to report? */
- if (!dir && !(event_mask & FS_ISDIR))
+ if (!dir && !ondir)
return 0;
}
fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
- /* Apply ignore mask regardless of mark's ISDIR flag */
- marks_ignored_mask |= mark->ignored_mask;
+ /*
+ * Apply ignore mask depending on event flags in ignore mask.
+ */
+ marks_ignore_mask |=
+ fsnotify_effective_ignore_mask(mark, ondir, type);
/*
- * If the event is on dir and this mark doesn't care about
- * events on dir, don't send it!
+ * Send the event depending on event flags in mark mask.
*/
- if (event_mask & FS_ISDIR && !(mark->mask & FS_ISDIR))
+ if (!fsnotify_mask_applicable(mark->mask, ondir, type))
continue;
marks_mask |= mark->mask;
@@ -336,7 +339,7 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
*match_mask |= 1U << type;
}
- test_mask = event_mask & marks_mask & ~marks_ignored_mask;
+ test_mask = event_mask & marks_mask & ~marks_ignore_mask;
/*
* For dirent modification events (create/delete/move) that do not carry
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 80e0ec95b113..1d9f11255c64 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -499,6 +499,8 @@ static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
if (mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)
mflags |= FAN_MARK_EVICTABLE;
+ if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS)
+ mflags |= FAN_MARK_IGNORE;
return mflags;
}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index b08ce0d821a7..f0e49a406ffa 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1009,10 +1009,10 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
mask &= ~umask;
spin_lock(&fsn_mark->lock);
oldmask = fsnotify_calc_mask(fsn_mark);
- if (!(flags & FAN_MARK_IGNORED_MASK)) {
+ if (!(flags & FANOTIFY_MARK_IGNORE_BITS)) {
fsn_mark->mask &= ~mask;
} else {
- fsn_mark->ignored_mask &= ~mask;
+ fsn_mark->ignore_mask &= ~mask;
}
newmask = fsnotify_calc_mask(fsn_mark);
/*
@@ -1021,7 +1021,7 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
* changes to the mask.
* Destroy mark when only umask bits remain.
*/
- *destroy = !((fsn_mark->mask | fsn_mark->ignored_mask) & ~umask);
+ *destroy = !((fsn_mark->mask | fsn_mark->ignore_mask) & ~umask);
spin_unlock(&fsn_mark->lock);
return oldmask & ~newmask;
@@ -1085,15 +1085,24 @@ static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark,
unsigned int fan_flags)
{
bool want_iref = !(fan_flags & FAN_MARK_EVICTABLE);
+ unsigned int ignore = fan_flags & FANOTIFY_MARK_IGNORE_BITS;
bool recalc = false;
/*
+ * When using FAN_MARK_IGNORE for the first time, mark starts using
+ * independent event flags in ignore mask. After that, trying to
+ * update the ignore mask with the old FAN_MARK_IGNORED_MASK API
+ * will result in EEXIST error.
+ */
+ if (ignore == FAN_MARK_IGNORE)
+ fsn_mark->flags |= FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS;
+
+ /*
* Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to
* the removal of the FS_MODIFY bit in calculated mask if it was set
- * because of an ignored mask that is now going to survive FS_MODIFY.
+ * because of an ignore mask that is now going to survive FS_MODIFY.
*/
- if ((fan_flags & FAN_MARK_IGNORED_MASK) &&
- (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
+ if (ignore && (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
!(fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) {
fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
if (!(fsn_mark->mask & FS_MODIFY))
@@ -1120,10 +1129,10 @@ static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
bool recalc;
spin_lock(&fsn_mark->lock);
- if (!(fan_flags & FAN_MARK_IGNORED_MASK))
+ if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS))
fsn_mark->mask |= mask;
else
- fsn_mark->ignored_mask |= mask;
+ fsn_mark->ignore_mask |= mask;
recalc = fsnotify_calc_mask(fsn_mark) &
~fsnotify_conn_mask(fsn_mark->connector);
@@ -1187,6 +1196,37 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
sizeof(struct fanotify_error_event));
}
+static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
+ unsigned int fan_flags)
+{
+ /*
+ * Non evictable mark cannot be downgraded to evictable mark.
+ */
+ if (fan_flags & FAN_MARK_EVICTABLE &&
+ !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF))
+ return -EEXIST;
+
+ /*
+ * New ignore mask semantics cannot be downgraded to old semantics.
+ */
+ if (fan_flags & FAN_MARK_IGNORED_MASK &&
+ fsn_mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS)
+ return -EEXIST;
+
+ /*
+ * An ignore mask that survives modify could never be downgraded to not
+ * survive modify. With new FAN_MARK_IGNORE semantics we make that rule
+ * explicit and return an error when trying to update the ignore mask
+ * without the original FAN_MARK_IGNORED_SURV_MODIFY value.
+ */
+ if (fan_flags & FAN_MARK_IGNORE &&
+ !(fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
+ fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
+ return -EEXIST;
+
+ return 0;
+}
+
static int fanotify_add_mark(struct fsnotify_group *group,
fsnotify_connp_t *connp, unsigned int obj_type,
__u32 mask, unsigned int fan_flags,
@@ -1208,19 +1248,18 @@ static int fanotify_add_mark(struct fsnotify_group *group,
}
/*
- * Non evictable mark cannot be downgraded to evictable mark.
+ * Check if requested mark flags conflict with an existing mark flags.
*/
- if (fan_flags & FAN_MARK_EVICTABLE &&
- !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) {
- ret = -EEXIST;
+ ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags);
+ if (ret)
goto out;
- }
/*
* Error events are pre-allocated per group, only if strictly
* needed (i.e. FAN_FS_ERROR was requested).
*/
- if (!(fan_flags & FAN_MARK_IGNORED_MASK) && (mask & FAN_FS_ERROR)) {
+ if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS) &&
+ (mask & FAN_FS_ERROR)) {
ret = fanotify_group_init_error_pool(group);
if (ret)
goto out;
@@ -1261,10 +1300,10 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
/*
* If some other task has this inode open for write we should not add
- * an ignored mark, unless that ignored mark is supposed to survive
+ * an ignore mask, unless that ignore mask is supposed to survive
* modification changes anyway.
*/
- if ((flags & FAN_MARK_IGNORED_MASK) &&
+ if ((flags & FANOTIFY_MARK_IGNORE_BITS) &&
!(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
inode_is_open_for_write(inode))
return 0;
@@ -1520,7 +1559,8 @@ static int fanotify_events_supported(struct fsnotify_group *group,
unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
/* Strict validation of events in non-dir inode mask with v5.17+ APIs */
bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) ||
- (mask & FAN_RENAME);
+ (mask & FAN_RENAME) ||
+ (flags & FAN_MARK_IGNORE);
/*
* Some filesystems such as 'proc' acquire unusual locks when opening
@@ -1557,7 +1597,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
__kernel_fsid_t __fsid, *fsid = NULL;
u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
- bool ignored = flags & FAN_MARK_IGNORED_MASK;
+ unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
+ unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS;
unsigned int obj_type, fid_mode;
u32 umask = 0;
int ret;
@@ -1586,7 +1627,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
return -EINVAL;
}
- switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
+ switch (mark_cmd) {
case FAN_MARK_ADD:
case FAN_MARK_REMOVE:
if (!mask)
@@ -1606,9 +1647,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (mask & ~valid_mask)
return -EINVAL;
- /* Event flags (ONDIR, ON_CHILD) are meaningless in ignored mask */
- if (ignored)
+
+ /* We don't allow FAN_MARK_IGNORE & FAN_MARK_IGNORED_MASK together */
+ if (ignore == (FAN_MARK_IGNORE | FAN_MARK_IGNORED_MASK))
+ return -EINVAL;
+
+ /*
+ * Event flags (FAN_ONDIR, FAN_EVENT_ON_CHILD) have no effect with
+ * FAN_MARK_IGNORED_MASK.
+ */
+ if (ignore == FAN_MARK_IGNORED_MASK) {
mask &= ~FANOTIFY_EVENT_FLAGS;
+ umask = FANOTIFY_EVENT_FLAGS;
+ }
f = fdget(fanotify_fd);
if (unlikely(!f.file))
@@ -1672,7 +1723,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME))
goto fput_and_out;
- if (flags & FAN_MARK_FLUSH) {
+ if (mark_cmd == FAN_MARK_FLUSH) {
ret = 0;
if (mark_type == FAN_MARK_MOUNT)
fsnotify_clear_vfsmount_marks_by_group(group);
@@ -1688,7 +1739,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (ret)
goto fput_and_out;
- if (flags & FAN_MARK_ADD) {
+ if (mark_cmd == FAN_MARK_ADD) {
ret = fanotify_events_supported(group, &path, mask, flags);
if (ret)
goto path_put_and_out;
@@ -1712,6 +1763,13 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
else
mnt = path.mnt;
+ ret = mnt ? -EINVAL : -EISDIR;
+ /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
+ if (mark_cmd == FAN_MARK_ADD && ignore == FAN_MARK_IGNORE &&
+ (mnt || S_ISDIR(inode->i_mode)) &&
+ !(flags & FAN_MARK_IGNORED_SURV_MODIFY))
+ goto path_put_and_out;
+
/* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
if (mnt || !S_ISDIR(inode->i_mode)) {
mask &= ~FAN_EVENT_ON_CHILD;
@@ -1721,12 +1779,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
* events with parent/name info for non-directory.
*/
if ((fid_mode & FAN_REPORT_DIR_FID) &&
- (flags & FAN_MARK_ADD) && !ignored)
+ (flags & FAN_MARK_ADD) && !ignore)
mask |= FAN_EVENT_ON_CHILD;
}
/* create/update an inode mark */
- switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
+ switch (mark_cmd) {
case FAN_MARK_ADD:
if (mark_type == FAN_MARK_MOUNT)
ret = fanotify_add_vfsmount_mark(group, mnt, mask,
@@ -1804,7 +1862,7 @@ static int __init fanotify_user_setup(void)
BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12);
- BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 10);
+ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11);
fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
SLAB_PANIC|SLAB_ACCOUNT);
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 59fb40abe33d..55081ae3a6ec 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -113,7 +113,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
return;
seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ",
inode->i_ino, inode->i_sb->s_dev,
- mflags, mark->mask, mark->ignored_mask);
+ mflags, mark->mask, mark->ignore_mask);
show_mark_fhandle(m, inode);
seq_putc(m, '\n');
iput(inode);
@@ -121,12 +121,12 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
struct mount *mnt = fsnotify_conn_mount(mark->connector);
seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
- mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
+ mnt->mnt_id, mflags, mark->mask, mark->ignore_mask);
} else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_SB) {
struct super_block *sb = fsnotify_conn_sb(mark->connector);
seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
- sb->s_dev, mflags, mark->mask, mark->ignored_mask);
+ sb->s_dev, mflags, mark->mask, mark->ignore_mask);
}
}
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 0b3e74935cb4..7974e91ffe13 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -100,7 +100,7 @@ void fsnotify_sb_delete(struct super_block *sb)
* Given an inode, first check if we care what happens to our children. Inotify
* and dnotify both tell their parents about events. If we care about any event
* on a child we run all of our children and set a dentry flag saying that the
- * parent cares. Thus when an event happens on a child it can quickly tell if
+ * parent cares. Thus when an event happens on a child it can quickly tell
* if there is a need to find a parent and send the event to the parent.
*/
void __fsnotify_update_child_dentry_flags(struct inode *inode)
@@ -324,7 +324,8 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
struct fsnotify_group *group = NULL;
__u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
__u32 marks_mask = 0;
- __u32 marks_ignored_mask = 0;
+ __u32 marks_ignore_mask = 0;
+ bool is_dir = mask & FS_ISDIR;
struct fsnotify_mark *mark;
int type;
@@ -336,7 +337,7 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
if (!(mark->flags &
FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
- mark->ignored_mask = 0;
+ mark->ignore_mask = 0;
}
}
@@ -344,14 +345,15 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
group = mark->group;
marks_mask |= mark->mask;
- marks_ignored_mask |= mark->ignored_mask;
+ marks_ignore_mask |=
+ fsnotify_effective_ignore_mask(mark, is_dir, type);
}
- pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignored_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
- __func__, group, mask, marks_mask, marks_ignored_mask,
+ pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignore_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
+ __func__, group, mask, marks_mask, marks_ignore_mask,
data, data_type, dir, cookie);
- if (!(test_mask & marks_mask & ~marks_ignored_mask))
+ if (!(test_mask & marks_mask & ~marks_ignore_mask))
return 0;
if (group->ops->handle_event) {
@@ -423,7 +425,8 @@ static bool fsnotify_iter_select_report_types(
* But is *this mark* watching children?
*/
if (type == FSNOTIFY_ITER_TYPE_PARENT &&
- !(mark->mask & FS_EVENT_ON_CHILD))
+ !(mark->mask & FS_EVENT_ON_CHILD) &&
+ !(fsnotify_ignore_mask(mark) & FS_EVENT_ON_CHILD))
continue;
fsnotify_iter_set_report_type(iter_info, type);
@@ -532,8 +535,8 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
/*
- * If this is a modify event we may need to clear some ignored masks.
- * In that case, the object with ignored masks will have the FS_MODIFY
+ * If this is a modify event we may need to clear some ignore masks.
+ * In that case, the object with ignore masks will have the FS_MODIFY
* event in its mask.
* Otherwise, return if none of the marks care about this type of event.
*/
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index ed42a189faa2..1c4bfdab008d 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -136,7 +136,7 @@ static inline u32 inotify_mask_to_arg(__u32 mask)
IN_Q_OVERFLOW);
}
-/* intofiy userspace file descriptor functions */
+/* inotify userspace file descriptor functions */
static __poll_t inotify_poll(struct file *file, poll_table *wait)
{
struct fsnotify_group *group = file->private_data;
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 4de597a83b88..52615e6090e1 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -592,8 +592,12 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name,
a = (ATTR_RECORD*)((u8*)ctx->attr +
le32_to_cpu(ctx->attr->length));
for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) {
- if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec +
- le32_to_cpu(ctx->mrec->bytes_allocated))
+ u8 *mrec_end = (u8 *)ctx->mrec +
+ le32_to_cpu(ctx->mrec->bytes_allocated);
+ u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) +
+ a->name_length * sizeof(ntfschar);
+ if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end ||
+ name_end > mrec_end)
break;
ctx->attr = a;
if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 7497cd592258..9c67edd215d5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1146,7 +1146,7 @@ int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (status)
return status;
- if (is_quota_modification(inode, attr)) {
+ if (is_quota_modification(mnt_userns, inode, attr)) {
status = dquot_initialize(inode);
if (status)
return status;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 337527571461..740b64238312 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -277,7 +277,6 @@ enum ocfs2_mount_options
OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */
OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */
OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */
- OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */
};
#define OCFS2_OSB_SOFT_RO 0x0001
@@ -673,8 +672,7 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb)
static inline int ocfs2_mount_local(struct ocfs2_super *osb)
{
- return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)
- || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER));
+ return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
}
static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 0b0ae3ebb0cf..da7718cef735 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -252,16 +252,14 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
int i, ret = -ENOSPC;
if ((preferred >= 0) && (preferred < si->si_num_slots)) {
- if (!si->si_slots[preferred].sl_valid ||
- !si->si_slots[preferred].sl_node_num) {
+ if (!si->si_slots[preferred].sl_valid) {
ret = preferred;
goto out;
}
}
for(i = 0; i < si->si_num_slots; i++) {
- if (!si->si_slots[i].sl_valid ||
- !si->si_slots[i].sl_node_num) {
+ if (!si->si_slots[i].sl_valid) {
ret = i;
break;
}
@@ -456,30 +454,24 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
spin_lock(&osb->osb_lock);
ocfs2_update_slot_info(si);
- if (ocfs2_mount_local(osb))
- /* use slot 0 directly in local mode */
- slot = 0;
- else {
- /* search for ourselves first and take the slot if it already
- * exists. Perhaps we need to mark this in a variable for our
- * own journal recovery? Possibly not, though we certainly
- * need to warn to the user */
- slot = __ocfs2_node_num_to_slot(si, osb->node_num);
+ /* search for ourselves first and take the slot if it already
+ * exists. Perhaps we need to mark this in a variable for our
+ * own journal recovery? Possibly not, though we certainly
+ * need to warn to the user */
+ slot = __ocfs2_node_num_to_slot(si, osb->node_num);
+ if (slot < 0) {
+ /* if no slot yet, then just take 1st available
+ * one. */
+ slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
if (slot < 0) {
- /* if no slot yet, then just take 1st available
- * one. */
- slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
- if (slot < 0) {
- spin_unlock(&osb->osb_lock);
- mlog(ML_ERROR, "no free slots available!\n");
- status = -EINVAL;
- goto bail;
- }
- } else
- printk(KERN_INFO "ocfs2: Slot %d on device (%s) was "
- "already allocated to this node!\n",
- slot, osb->dev_str);
- }
+ spin_unlock(&osb->osb_lock);
+ mlog(ML_ERROR, "no free slots available!\n");
+ status = -EINVAL;
+ goto bail;
+ }
+ } else
+ printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
+ "allocated to this node!\n", slot, osb->dev_str);
ocfs2_set_slot(si, slot, osb->node_num);
osb->slot_num = slot;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index f7298816d8d9..438be028935d 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -172,7 +172,6 @@ enum {
Opt_dir_resv_level,
Opt_journal_async_commit,
Opt_err_cont,
- Opt_nocluster,
Opt_err,
};
@@ -206,7 +205,6 @@ static const match_table_t tokens = {
{Opt_dir_resv_level, "dir_resv_level=%u"},
{Opt_journal_async_commit, "journal_async_commit"},
{Opt_err_cont, "errors=continue"},
- {Opt_nocluster, "nocluster"},
{Opt_err, NULL}
};
@@ -618,13 +616,6 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
goto out;
}
- tmp = OCFS2_MOUNT_NOCLUSTER;
- if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
- ret = -EINVAL;
- mlog(ML_ERROR, "Cannot change nocluster option on remount\n");
- goto out;
- }
-
tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
OCFS2_MOUNT_HB_NONE;
if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
@@ -865,7 +856,6 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,
}
if (ocfs2_userspace_stack(osb) &&
- !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
strncmp(osb->osb_cluster_stack, mopt->cluster_stack,
OCFS2_STACK_LABEL_LEN)) {
mlog(ML_ERROR,
@@ -1137,11 +1127,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :
"ordered");
- if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
- !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT))
- printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted "
- "without cluster aware mode.\n", osb->dev_str);
-
atomic_set(&osb->vol_state, VOLUME_MOUNTED);
wake_up(&osb->osb_mount_event);
@@ -1452,9 +1437,6 @@ static int ocfs2_parse_options(struct super_block *sb,
case Opt_journal_async_commit:
mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
break;
- case Opt_nocluster:
- mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER;
- break;
default:
mlog(ML_ERROR,
"Unrecognized mount option \"%s\" "
@@ -1566,9 +1548,6 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
seq_printf(s, ",journal_async_commit");
- if (opts & OCFS2_MOUNT_NOCLUSTER)
- seq_printf(s, ",nocluster");
-
return 0;
}
diff --git a/fs/open.c b/fs/open.c
index 1d57fbde2feb..2790aac66e58 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -663,6 +663,42 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
return do_fchmodat(AT_FDCWD, filename, mode);
}
+/**
+ * setattr_vfsuid - check and set ia_fsuid attribute
+ * @kuid: new inode owner
+ *
+ * Check whether @kuid is valid and if so generate and set vfsuid_t in
+ * ia_vfsuid.
+ *
+ * Return: true if @kuid is valid, false if not.
+ */
+static inline bool setattr_vfsuid(struct iattr *attr, kuid_t kuid)
+{
+ if (!uid_valid(kuid))
+ return false;
+ attr->ia_valid |= ATTR_UID;
+ attr->ia_vfsuid = VFSUIDT_INIT(kuid);
+ return true;
+}
+
+/**
+ * setattr_vfsgid - check and set ia_fsgid attribute
+ * @kgid: new inode owner
+ *
+ * Check whether @kgid is valid and if so generate and set vfsgid_t in
+ * ia_vfsgid.
+ *
+ * Return: true if @kgid is valid, false if not.
+ */
+static inline bool setattr_vfsgid(struct iattr *attr, kgid_t kgid)
+{
+ if (!gid_valid(kgid))
+ return false;
+ attr->ia_valid |= ATTR_GID;
+ attr->ia_vfsgid = VFSGIDT_INIT(kgid);
+ return true;
+}
+
int chown_common(const struct path *path, uid_t user, gid_t group)
{
struct user_namespace *mnt_userns, *fs_userns;
@@ -678,28 +714,22 @@ int chown_common(const struct path *path, uid_t user, gid_t group)
mnt_userns = mnt_user_ns(path->mnt);
fs_userns = i_user_ns(inode);
- uid = mapped_kuid_user(mnt_userns, fs_userns, uid);
- gid = mapped_kgid_user(mnt_userns, fs_userns, gid);
retry_deleg:
newattrs.ia_valid = ATTR_CTIME;
- if (user != (uid_t) -1) {
- if (!uid_valid(uid))
- return -EINVAL;
- newattrs.ia_valid |= ATTR_UID;
- newattrs.ia_uid = uid;
- }
- if (group != (gid_t) -1) {
- if (!gid_valid(gid))
- return -EINVAL;
- newattrs.ia_valid |= ATTR_GID;
- newattrs.ia_gid = gid;
- }
+ if ((user != (uid_t)-1) && !setattr_vfsuid(&newattrs, uid))
+ return -EINVAL;
+ if ((group != (gid_t)-1) && !setattr_vfsgid(&newattrs, gid))
+ return -EINVAL;
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |=
ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
inode_lock(inode);
- error = security_path_chown(path, uid, gid);
+ /* Continue to send actual fs values, not the mount values. */
+ error = security_path_chown(
+ path,
+ from_vfsuid(mnt_userns, fs_userns, newattrs.ia_vfsuid),
+ from_vfsgid(mnt_userns, fs_userns, newattrs.ia_vfsgid));
if (!error)
error = notify_change(mnt_userns, path->dentry, &newattrs,
&delegated_inode);
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 714ec569d25b..245e2cb62708 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -331,8 +331,8 @@ int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upperdentry,
if (!err) {
struct iattr attr = {
.ia_valid = ATTR_UID | ATTR_GID,
- .ia_uid = stat->uid,
- .ia_gid = stat->gid,
+ .ia_vfsuid = VFSUIDT_INIT(stat->uid),
+ .ia_vfsgid = VFSGIDT_INIT(stat->gid),
};
err = ovl_do_notify_change(ofs, upperdentry, &attr);
}
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 492eddeb481f..7922b619f6c8 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -454,23 +454,94 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
return res;
}
+/*
+ * Apply the idmapping of the layer to POSIX ACLs. The caller must pass a clone
+ * of the POSIX ACLs retrieved from the lower layer to this function to not
+ * alter the POSIX ACLs for the underlying filesystem.
+ */
+static void ovl_idmap_posix_acl(struct user_namespace *mnt_userns,
+ struct posix_acl *acl)
+{
+ for (unsigned int i = 0; i < acl->a_count; i++) {
+ vfsuid_t vfsuid;
+ vfsgid_t vfsgid;
+
+ struct posix_acl_entry *e = &acl->a_entries[i];
+ switch (e->e_tag) {
+ case ACL_USER:
+ vfsuid = make_vfsuid(mnt_userns, &init_user_ns, e->e_uid);
+ e->e_uid = vfsuid_into_kuid(vfsuid);
+ break;
+ case ACL_GROUP:
+ vfsgid = make_vfsgid(mnt_userns, &init_user_ns, e->e_gid);
+ e->e_gid = vfsgid_into_kgid(vfsgid);
+ break;
+ }
+ }
+}
+
+/*
+ * When the relevant layer is an idmapped mount we need to take the idmapping
+ * of the layer into account and translate any ACL_{GROUP,USER} values
+ * according to the idmapped mount.
+ *
+ * We cannot alter the ACLs returned from the relevant layer as that would
+ * alter the cached values filesystem wide for the lower filesystem. Instead we
+ * can clone the ACLs and then apply the relevant idmapping of the layer.
+ *
+ * This is obviously only relevant when idmapped layers are used.
+ */
struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu)
{
struct inode *realinode = ovl_inode_real(inode);
- const struct cred *old_cred;
- struct posix_acl *acl;
+ struct posix_acl *acl, *clone;
+ struct path realpath;
if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode))
return NULL;
- if (rcu)
- return get_cached_acl_rcu(realinode, type);
+ /* Careful in RCU walk mode */
+ ovl_i_path_real(inode, &realpath);
+ if (!realpath.dentry) {
+ WARN_ON(!rcu);
+ return ERR_PTR(-ECHILD);
+ }
- old_cred = ovl_override_creds(inode->i_sb);
- acl = get_acl(realinode, type);
- revert_creds(old_cred);
+ if (rcu) {
+ acl = get_cached_acl_rcu(realinode, type);
+ } else {
+ const struct cred *old_cred;
+
+ old_cred = ovl_override_creds(inode->i_sb);
+ acl = get_acl(realinode, type);
+ revert_creds(old_cred);
+ }
+ /*
+ * If there are no POSIX ACLs, or we encountered an error,
+ * or the layer isn't idmapped we don't need to do anything.
+ */
+ if (!is_idmapped_mnt(realpath.mnt) || IS_ERR_OR_NULL(acl))
+ return acl;
- return acl;
+ /*
+ * We only get here if the layer is idmapped. So drop out of RCU path
+ * walk so we can clone the ACLs. There's no need to release the ACLs
+ * since get_cached_acl_rcu() doesn't take a reference on the ACLs.
+ */
+ if (rcu)
+ return ERR_PTR(-ECHILD);
+
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ if (!clone)
+ clone = ERR_PTR(-ENOMEM);
+ else
+ ovl_idmap_posix_acl(mnt_user_ns(realpath.mnt), clone);
+ /*
+ * Since we're not in RCU path walk we always need to release the
+ * original ACLs.
+ */
+ posix_acl_release(acl);
+ return clone;
}
int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 4f34b7e02eee..6ec815b84d48 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -139,17 +139,7 @@ static inline int ovl_do_notify_change(struct ovl_fs *ofs,
struct dentry *upperdentry,
struct iattr *attr)
{
- struct user_namespace *upper_mnt_userns = ovl_upper_mnt_userns(ofs);
- struct user_namespace *fs_userns = i_user_ns(d_inode(upperdentry));
-
- if (attr->ia_valid & ATTR_UID)
- attr->ia_uid = mapped_kuid_user(upper_mnt_userns,
- fs_userns, attr->ia_uid);
- if (attr->ia_valid & ATTR_GID)
- attr->ia_gid = mapped_kgid_user(upper_mnt_userns,
- fs_userns, attr->ia_gid);
-
- return notify_change(upper_mnt_userns, upperdentry, attr, NULL);
+ return notify_change(ovl_upper_mnt_userns(ofs), upperdentry, attr, NULL);
}
static inline int ovl_do_rmdir(struct ovl_fs *ofs,
@@ -259,7 +249,8 @@ static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry,
const char *name, const void *value,
size_t size, int flags)
{
- int err = vfs_setxattr(ovl_upper_mnt_userns(ofs), dentry, name, value, size, flags);
+ int err = vfs_setxattr(ovl_upper_mnt_userns(ofs), dentry, name,
+ (void *)value, size, flags);
pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, %d) = %i\n",
dentry, name, min((int)size, 48), value, size, flags, err);
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 962d32468eb4..1d17d7b13dcd 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -199,7 +199,7 @@ EXPORT_SYMBOL(posix_acl_alloc);
/*
* Clone an ACL.
*/
-static struct posix_acl *
+struct posix_acl *
posix_acl_clone(const struct posix_acl *acl, gfp_t flags)
{
struct posix_acl *clone = NULL;
@@ -213,6 +213,7 @@ posix_acl_clone(const struct posix_acl *acl, gfp_t flags)
}
return clone;
}
+EXPORT_SYMBOL_GPL(posix_acl_clone);
/*
* Check if an acl is valid. Returns 0 if it is, or -E... otherwise.
@@ -361,8 +362,8 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
{
const struct posix_acl_entry *pa, *pe, *mask_obj;
int found = 0;
- kuid_t uid;
- kgid_t gid;
+ vfsuid_t vfsuid;
+ vfsgid_t vfsgid;
want &= MAY_READ | MAY_WRITE | MAY_EXEC;
@@ -370,30 +371,28 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
switch(pa->e_tag) {
case ACL_USER_OBJ:
/* (May have been checked already) */
- uid = i_uid_into_mnt(mnt_userns, inode);
- if (uid_eq(uid, current_fsuid()))
+ vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
+ if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
goto check_perm;
break;
case ACL_USER:
- uid = mapped_kuid_fs(mnt_userns,
- i_user_ns(inode),
+ vfsuid = make_vfsuid(mnt_userns, &init_user_ns,
pa->e_uid);
- if (uid_eq(uid, current_fsuid()))
+ if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
goto mask;
break;
case ACL_GROUP_OBJ:
- gid = i_gid_into_mnt(mnt_userns, inode);
- if (in_group_p(gid)) {
+ vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+ if (vfsgid_in_group_p(vfsgid)) {
found = 1;
if ((pa->e_perm & want) == want)
goto mask;
}
break;
case ACL_GROUP:
- gid = mapped_kgid_fs(mnt_userns,
- i_user_ns(inode),
+ vfsgid = make_vfsgid(mnt_userns, &init_user_ns,
pa->e_gid);
- if (in_group_p(gid)) {
+ if (vfsgid_in_group_p(vfsgid)) {
found = 1;
if ((pa->e_perm & want) == want)
goto mask;
@@ -699,7 +698,7 @@ int posix_acl_update_mode(struct user_namespace *mnt_userns,
return error;
if (error == 0)
*acl = NULL;
- if (!in_group_p(i_gid_into_mnt(mnt_userns, inode)) &&
+ if (!vfsgid_in_group_p(i_gid_into_vfsgid(mnt_userns, inode)) &&
!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
mode &= ~S_ISGID;
*mode_p = mode;
@@ -710,46 +709,127 @@ EXPORT_SYMBOL(posix_acl_update_mode);
/*
* Fix up the uids and gids in posix acl extended attributes in place.
*/
-static void posix_acl_fix_xattr_userns(
- struct user_namespace *to, struct user_namespace *from,
- struct user_namespace *mnt_userns,
- void *value, size_t size, bool from_user)
+static int posix_acl_fix_xattr_common(void *value, size_t size)
+{
+ struct posix_acl_xattr_header *header = value;
+ int count;
+
+ if (!header)
+ return -EINVAL;
+ if (size < sizeof(struct posix_acl_xattr_header))
+ return -EINVAL;
+ if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
+ return -EINVAL;
+
+ count = posix_acl_xattr_count(size);
+ if (count < 0)
+ return -EINVAL;
+ if (count == 0)
+ return -EINVAL;
+
+ return count;
+}
+
+void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
+ const struct inode *inode,
+ void *value, size_t size)
{
struct posix_acl_xattr_header *header = value;
struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
int count;
+ vfsuid_t vfsuid;
+ vfsgid_t vfsgid;
kuid_t uid;
kgid_t gid;
- if (!value)
+ if (no_idmapping(mnt_userns, i_user_ns(inode)))
return;
- if (size < sizeof(struct posix_acl_xattr_header))
+
+ count = posix_acl_fix_xattr_common(value, size);
+ if (count < 0)
return;
- if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
+
+ for (end = entry + count; entry != end; entry++) {
+ switch (le16_to_cpu(entry->e_tag)) {
+ case ACL_USER:
+ uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
+ vfsuid = make_vfsuid(mnt_userns, &init_user_ns, uid);
+ entry->e_id = cpu_to_le32(from_kuid(&init_user_ns,
+ vfsuid_into_kuid(vfsuid)));
+ break;
+ case ACL_GROUP:
+ gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
+ vfsgid = make_vfsgid(mnt_userns, &init_user_ns, gid);
+ entry->e_id = cpu_to_le32(from_kgid(&init_user_ns,
+ vfsgid_into_kgid(vfsgid)));
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns,
+ const struct inode *inode,
+ void *value, size_t size)
+{
+ struct posix_acl_xattr_header *header = value;
+ struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
+ int count;
+ vfsuid_t vfsuid;
+ vfsgid_t vfsgid;
+ kuid_t uid;
+ kgid_t gid;
+
+ if (no_idmapping(mnt_userns, i_user_ns(inode)))
return;
- count = posix_acl_xattr_count(size);
+ count = posix_acl_fix_xattr_common(value, size);
if (count < 0)
return;
- if (count == 0)
+
+ for (end = entry + count; entry != end; entry++) {
+ switch (le16_to_cpu(entry->e_tag)) {
+ case ACL_USER:
+ uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
+ vfsuid = VFSUIDT_INIT(uid);
+ uid = from_vfsuid(mnt_userns, &init_user_ns, vfsuid);
+ entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, uid));
+ break;
+ case ACL_GROUP:
+ gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
+ vfsgid = VFSGIDT_INIT(gid);
+ gid = from_vfsgid(mnt_userns, &init_user_ns, vfsgid);
+ entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, gid));
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void posix_acl_fix_xattr_userns(
+ struct user_namespace *to, struct user_namespace *from,
+ void *value, size_t size)
+{
+ struct posix_acl_xattr_header *header = value;
+ struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
+ int count;
+ kuid_t uid;
+ kgid_t gid;
+
+ count = posix_acl_fix_xattr_common(value, size);
+ if (count < 0)
return;
for (end = entry + count; entry != end; entry++) {
switch(le16_to_cpu(entry->e_tag)) {
case ACL_USER:
uid = make_kuid(from, le32_to_cpu(entry->e_id));
- if (from_user)
- uid = mapped_kuid_user(mnt_userns, &init_user_ns, uid);
- else
- uid = mapped_kuid_fs(mnt_userns, &init_user_ns, uid);
entry->e_id = cpu_to_le32(from_kuid(to, uid));
break;
case ACL_GROUP:
gid = make_kgid(from, le32_to_cpu(entry->e_id));
- if (from_user)
- gid = mapped_kgid_user(mnt_userns, &init_user_ns, gid);
- else
- gid = mapped_kgid_fs(mnt_userns, &init_user_ns, gid);
entry->e_id = cpu_to_le32(from_kgid(to, gid));
break;
default:
@@ -758,34 +838,20 @@ static void posix_acl_fix_xattr_userns(
}
}
-void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
- struct inode *inode,
- void *value, size_t size)
+void posix_acl_fix_xattr_from_user(void *value, size_t size)
{
struct user_namespace *user_ns = current_user_ns();
-
- /* Leave ids untouched on non-idmapped mounts. */
- if (no_idmapping(mnt_userns, i_user_ns(inode)))
- mnt_userns = &init_user_ns;
- if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns))
+ if (user_ns == &init_user_ns)
return;
- posix_acl_fix_xattr_userns(&init_user_ns, user_ns, mnt_userns, value,
- size, true);
+ posix_acl_fix_xattr_userns(&init_user_ns, user_ns, value, size);
}
-void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
- struct inode *inode,
- void *value, size_t size)
+void posix_acl_fix_xattr_to_user(void *value, size_t size)
{
struct user_namespace *user_ns = current_user_ns();
-
- /* Leave ids untouched on non-idmapped mounts. */
- if (no_idmapping(mnt_userns, i_user_ns(inode)))
- mnt_userns = &init_user_ns;
- if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns))
+ if (user_ns == &init_user_ns)
return;
- posix_acl_fix_xattr_userns(user_ns, &init_user_ns, mnt_userns, value,
- size, false);
+ posix_acl_fix_xattr_userns(user_ns, &init_user_ns, value, size);
}
/*
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 09d1307959d0..28966da7834e 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2085,7 +2085,8 @@ EXPORT_SYMBOL(__dquot_transfer);
/* Wrapper for transferring ownership of an inode for uid/gid only
* Called from FSXXX_setattr()
*/
-int dquot_transfer(struct inode *inode, struct iattr *iattr)
+int dquot_transfer(struct user_namespace *mnt_userns, struct inode *inode,
+ struct iattr *iattr)
{
struct dquot *transfer_to[MAXQUOTAS] = {};
struct dquot *dquot;
@@ -2095,8 +2096,11 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
if (!dquot_active(inode))
return 0;
- if (iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)){
- dquot = dqget(sb, make_kqid_uid(iattr->ia_uid));
+ if (i_uid_needs_update(mnt_userns, iattr, inode)) {
+ kuid_t kuid = from_vfsuid(mnt_userns, i_user_ns(inode),
+ iattr->ia_vfsuid);
+
+ dquot = dqget(sb, make_kqid_uid(kuid));
if (IS_ERR(dquot)) {
if (PTR_ERR(dquot) != -ESRCH) {
ret = PTR_ERR(dquot);
@@ -2106,8 +2110,11 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
}
transfer_to[USRQUOTA] = dquot;
}
- if (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid)){
- dquot = dqget(sb, make_kqid_gid(iattr->ia_gid));
+ if (i_gid_needs_update(mnt_userns, iattr, inode)) {
+ kgid_t kgid = from_vfsgid(mnt_userns, i_user_ns(inode),
+ iattr->ia_vfsgid);
+
+ dquot = dqget(sb, make_kqid_gid(kgid));
if (IS_ERR(dquot)) {
if (PTR_ERR(dquot) != -ESRCH) {
ret = PTR_ERR(dquot);
diff --git a/fs/read_write.c b/fs/read_write.c
index e0777eefd846..397da0236607 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1263,6 +1263,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
count, fl);
file_end_write(out.file);
} else {
+ if (out.file->f_flags & O_NONBLOCK)
+ fl |= SPLICE_F_NONBLOCK;
+
retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl);
}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0cffe054b78e..0df48d176732 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -290,7 +290,7 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
struct buffer_head *bh;
struct item_head *ih, tmp_ih;
b_blocknr_t blocknr;
- char *p = NULL;
+ char *p;
int chars;
int ret;
int result;
@@ -305,8 +305,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
result = search_for_position_by_key(inode->i_sb, &key, &path);
if (result != POSITION_FOUND) {
pathrelse(&path);
- if (p)
- kunmap(bh_result->b_page);
if (result == IO_ERROR)
return -EIO;
/*
@@ -352,8 +350,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
}
pathrelse(&path);
- if (p)
- kunmap(bh_result->b_page);
return ret;
}
/* requested data are in direct item(s) */
@@ -363,8 +359,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
* when it is stored in direct item(s)
*/
pathrelse(&path);
- if (p)
- kunmap(bh_result->b_page);
return -ENOENT;
}
@@ -396,9 +390,7 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
* sure we need to. But, this means the item might move if
* kmap schedules
*/
- if (!p)
- p = (char *)kmap(bh_result->b_page);
-
+ p = (char *)kmap(bh_result->b_page);
p += offset;
memset(p, 0, inode->i_sb->s_blocksize);
do {
@@ -3284,7 +3276,7 @@ int reiserfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
/* must be turned off for recursive notify_change calls */
ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
- if (is_quota_modification(inode, attr)) {
+ if (is_quota_modification(mnt_userns, inode, attr)) {
error = dquot_initialize(inode);
if (error)
return error;
@@ -3367,7 +3359,7 @@ int reiserfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
reiserfs_write_unlock(inode->i_sb);
if (error)
goto out;
- error = dquot_transfer(inode, attr);
+ error = dquot_transfer(mnt_userns, inode, attr);
reiserfs_write_lock(inode->i_sb);
if (error) {
journal_end(&th);
diff --git a/fs/remap_range.c b/fs/remap_range.c
index e112b5424cdb..881a306ee247 100644
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -71,7 +71,8 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in,
* Otherwise, make sure the count is also block-aligned, having
* already confirmed the starting offsets' block alignment.
*/
- if (pos_in + count == size_in) {
+ if (pos_in + count == size_in &&
+ (!(remap_flags & REMAP_FILE_DEDUP) || pos_out + count == size_out)) {
bcount = ALIGN(size_in, bs) - pos_in;
} else {
if (!IS_ALIGNED(count, bs))
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index e943370107d0..de86f5b2859f 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -192,17 +192,19 @@ static inline void msg_init(struct uffd_msg *msg)
}
static inline struct uffd_msg userfault_msg(unsigned long address,
+ unsigned long real_address,
unsigned int flags,
unsigned long reason,
unsigned int features)
{
struct uffd_msg msg;
+
msg_init(&msg);
msg.event = UFFD_EVENT_PAGEFAULT;
- if (!(features & UFFD_FEATURE_EXACT_ADDRESS))
- address &= PAGE_MASK;
- msg.arg.pagefault.address = address;
+ msg.arg.pagefault.address = (features & UFFD_FEATURE_EXACT_ADDRESS) ?
+ real_address : address;
+
/*
* These flags indicate why the userfault occurred:
* - UFFD_PAGEFAULT_FLAG_WP indicates a write protect fault.
@@ -488,8 +490,8 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
uwq.wq.private = current;
- uwq.msg = userfault_msg(vmf->real_address, vmf->flags, reason,
- ctx->features);
+ uwq.msg = userfault_msg(vmf->address, vmf->real_address, vmf->flags,
+ reason, ctx->features);
uwq.ctx = ctx;
uwq.waken = false;
diff --git a/fs/xattr.c b/fs/xattr.c
index e8dd03e4561e..a1f4998bc6be 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -282,9 +282,15 @@ out:
}
EXPORT_SYMBOL_GPL(__vfs_setxattr_locked);
+static inline bool is_posix_acl_xattr(const char *name)
+{
+ return (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
+ (strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0);
+}
+
int
vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
- const char *name, const void *value, size_t size, int flags)
+ const char *name, void *value, size_t size, int flags)
{
struct inode *inode = dentry->d_inode;
struct inode *delegated_inode = NULL;
@@ -292,12 +298,16 @@ vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
int error;
if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
- error = cap_convert_nscap(mnt_userns, dentry, &value, size);
+ error = cap_convert_nscap(mnt_userns, dentry,
+ (const void **)&value, size);
if (error < 0)
return error;
size = error;
}
+ if (size && is_posix_acl_xattr(name))
+ posix_acl_setxattr_idmapped_mnt(mnt_userns, inode, value, size);
+
retry_deleg:
inode_lock(inode);
error = __vfs_setxattr_locked(mnt_userns, dentry, name, value, size,
@@ -431,7 +441,10 @@ vfs_getxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
return ret;
}
nolsm:
- return __vfs_getxattr(dentry, inode, name, value, size);
+ error = __vfs_getxattr(dentry, inode, name, value, size);
+ if (error > 0 && is_posix_acl_xattr(name))
+ posix_acl_getxattr_idmapped_mnt(mnt_userns, inode, value, size);
+ return error;
}
EXPORT_SYMBOL_GPL(vfs_getxattr);
@@ -577,8 +590,7 @@ static void setxattr_convert(struct user_namespace *mnt_userns,
if (ctx->size &&
((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)))
- posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d),
- ctx->kvalue, ctx->size);
+ posix_acl_fix_xattr_from_user(ctx->kvalue, ctx->size);
}
int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
@@ -695,8 +707,7 @@ do_getxattr(struct user_namespace *mnt_userns, struct dentry *d,
if (error > 0) {
if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
- posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d),
- ctx->kvalue, error);
+ posix_acl_fix_xattr_to_user(ctx->kvalue, error);
if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error))
error = -EFAULT;
} else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) {
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 29f5b8b8aca6..a7402f6ea510 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -667,13 +667,15 @@ xfs_setattr_nonsize(
uint qflags = 0;
if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
- uid = iattr->ia_uid;
+ uid = from_vfsuid(mnt_userns, i_user_ns(inode),
+ iattr->ia_vfsuid);
qflags |= XFS_QMOPT_UQUOTA;
} else {
uid = inode->i_uid;
}
if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
- gid = iattr->ia_gid;
+ gid = from_vfsgid(mnt_userns, i_user_ns(inode),
+ iattr->ia_vfsgid);
qflags |= XFS_QMOPT_GQUOTA;
} else {
gid = inode->i_gid;
@@ -704,13 +706,13 @@ xfs_setattr_nonsize(
* didn't have the inode locked, inode's dquot(s) would have changed
* also.
*/
- if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp) &&
- !uid_eq(inode->i_uid, iattr->ia_uid)) {
+ if (XFS_IS_UQUOTA_ON(mp) &&
+ i_uid_needs_update(mnt_userns, iattr, inode)) {
ASSERT(udqp);
old_udqp = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp);
}
- if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp) &&
- !gid_eq(inode->i_gid, iattr->ia_gid)) {
+ if (XFS_IS_GQUOTA_ON(mp) &&
+ i_gid_needs_update(mnt_userns, iattr, inode)) {
ASSERT(xfs_has_pquotino(mp) || !XFS_IS_PQUOTA_ON(mp));
ASSERT(gdqp);
old_gdqp = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp);
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 053299758deb..f5d8338967cb 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -616,7 +616,7 @@ static int zonefs_inode_setattr(struct user_namespace *mnt_userns,
!uid_eq(iattr->ia_uid, inode->i_uid)) ||
((iattr->ia_valid & ATTR_GID) &&
!gid_eq(iattr->ia_gid, inode->i_gid))) {
- ret = dquot_transfer(inode, iattr);
+ ret = dquot_transfer(mnt_userns, inode, iattr);
if (ret)
return ret;
}
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index fd7e8fbaeef1..961f4d88f9ef 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -38,6 +38,10 @@
#define wmb() do { kcsan_wmb(); __wmb(); } while (0)
#endif
+#ifdef __dma_mb
+#define dma_mb() do { kcsan_mb(); __dma_mb(); } while (0)
+#endif
+
#ifdef __dma_rmb
#define dma_rmb() do { kcsan_rmb(); __dma_rmb(); } while (0)
#endif
@@ -65,6 +69,10 @@
#define wmb() mb()
#endif
+#ifndef dma_mb
+#define dma_mb() mb()
+#endif
+
#ifndef dma_rmb
#define dma_rmb() rmb()
#endif
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 7ce93aaf69f8..72974cb81343 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -964,7 +964,34 @@ static inline void iounmap(volatile void __iomem *addr)
#elif defined(CONFIG_GENERIC_IOREMAP)
#include <linux/pgtable.h>
-void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot);
+/*
+ * Arch code can implement the following two hooks when using GENERIC_IOREMAP
+ * ioremap_allowed() return a bool,
+ * - true means continue to remap
+ * - false means skip remap and return directly
+ * iounmap_allowed() return a bool,
+ * - true means continue to vunmap
+ * - false means skip vunmap and return directly
+ */
+#ifndef ioremap_allowed
+#define ioremap_allowed ioremap_allowed
+static inline bool ioremap_allowed(phys_addr_t phys_addr, size_t size,
+ unsigned long prot)
+{
+ return true;
+}
+#endif
+
+#ifndef iounmap_allowed
+#define iounmap_allowed iounmap_allowed
+static inline bool iounmap_allowed(void *addr)
+{
+ return true;
+}
+#endif
+
+void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
+ unsigned long prot);
void iounmap(volatile void __iomem *addr);
static inline void __iomem *ioremap(phys_addr_t addr, size_t size)
@@ -1125,9 +1152,7 @@ static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer,
}
#endif
-#ifndef CONFIG_GENERIC_DEVMEM_IS_ALLOWED
extern int devmem_is_allowed(unsigned long pfn);
-#endif
#endif /* __KERNEL__ */
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index ff3e82553a76..492dce43236e 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -158,9 +158,24 @@
* Useful if your architecture doesn't use IPIs for remote TLB invalidates
* and therefore doesn't naturally serialize with software page-table walkers.
*
+ * MMU_GATHER_NO_FLUSH_CACHE
+ *
+ * Indicates the architecture has flush_cache_range() but it needs *NOT* be called
+ * before unmapping a VMA.
+ *
+ * NOTE: strictly speaking we shouldn't have this knob and instead rely on
+ * flush_cache_range() being a NOP, except Sparc64 seems to be
+ * different here.
+ *
+ * MMU_GATHER_MERGE_VMAS
+ *
+ * Indicates the architecture wants to merge ranges over VMAs; typical when
+ * multiple range invalidates are more expensive than a full invalidate.
+ *
* MMU_GATHER_NO_RANGE
*
- * Use this if your architecture lacks an efficient flush_tlb_range().
+ * Use this if your architecture lacks an efficient flush_tlb_range(). This
+ * option implies MMU_GATHER_MERGE_VMAS above.
*
* MMU_GATHER_NO_GATHER
*
@@ -288,6 +303,7 @@ struct mmu_gather {
*/
unsigned int vma_exec : 1;
unsigned int vma_huge : 1;
+ unsigned int vma_pfn : 1;
unsigned int batch_count;
@@ -334,8 +350,8 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb)
#ifdef CONFIG_MMU_GATHER_NO_RANGE
-#if defined(tlb_flush) || defined(tlb_start_vma) || defined(tlb_end_vma)
-#error MMU_GATHER_NO_RANGE relies on default tlb_flush(), tlb_start_vma() and tlb_end_vma()
+#if defined(tlb_flush)
+#error MMU_GATHER_NO_RANGE relies on default tlb_flush()
#endif
/*
@@ -352,20 +368,9 @@ static inline void tlb_flush(struct mmu_gather *tlb)
flush_tlb_mm(tlb->mm);
}
-static inline void
-tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
-
-#define tlb_end_vma tlb_end_vma
-static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
-
#else /* CONFIG_MMU_GATHER_NO_RANGE */
#ifndef tlb_flush
-
-#if defined(tlb_start_vma) || defined(tlb_end_vma)
-#error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma()
-#endif
-
/*
* When an architecture does not provide its own tlb_flush() implementation
* but does have a reasonably efficient flush_vma_range() implementation
@@ -385,6 +390,9 @@ static inline void tlb_flush(struct mmu_gather *tlb)
flush_tlb_range(&vma, tlb->start, tlb->end);
}
}
+#endif
+
+#endif /* CONFIG_MMU_GATHER_NO_RANGE */
static inline void
tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma)
@@ -402,17 +410,9 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma)
*/
tlb->vma_huge = is_vm_hugetlb_page(vma);
tlb->vma_exec = !!(vma->vm_flags & VM_EXEC);
+ tlb->vma_pfn = !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP));
}
-#else
-
-static inline void
-tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
-
-#endif
-
-#endif /* CONFIG_MMU_GATHER_NO_RANGE */
-
static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
{
/*
@@ -486,32 +486,36 @@ static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb)
* case where we're doing a full MM flush. When we're doing a munmap,
* the vmas are adjusted to only cover the region to be torn down.
*/
-#ifndef tlb_start_vma
static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
{
if (tlb->fullmm)
return;
tlb_update_vma_flags(tlb, vma);
+#ifndef CONFIG_MMU_GATHER_NO_FLUSH_CACHE
flush_cache_range(vma, vma->vm_start, vma->vm_end);
-}
#endif
+}
-#ifndef tlb_end_vma
static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
{
if (tlb->fullmm)
return;
/*
- * Do a TLB flush and reset the range at VMA boundaries; this avoids
- * the ranges growing with the unused space between consecutive VMAs,
- * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on
- * this.
+ * VM_PFNMAP is more fragile because the core mm will not track the
+ * page mapcount -- there might not be page-frames for these PFNs after
+ * all. Force flush TLBs for such ranges to avoid munmap() vs
+ * unmap_mapping_range() races.
*/
- tlb_flush_mmu_tlbonly(tlb);
+ if (tlb->vma_pfn || !IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) {
+ /*
+ * Do a TLB flush and reset the range at VMA boundaries; this avoids
+ * the ranges growing with the unused space between consecutive VMAs.
+ */
+ tlb_flush_mmu_tlbonly(tlb);
+ }
}
-#endif
/*
* tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end,
diff --git a/include/clocksource/timer-ti-dm.h b/include/clocksource/timer-ti-dm.h
index f6da8a132639..b0f80cfd2a26 100644
--- a/include/clocksource/timer-ti-dm.h
+++ b/include/clocksource/timer-ti-dm.h
@@ -247,148 +247,4 @@ int omap_dm_timers_active(void);
#define OMAP_TIMER_TICK_INT_MASK_COUNT_REG \
(_OMAP_TIMER_TICK_INT_MASK_COUNT_OFFSET | (WP_TOWR << WPSHIFT))
-/*
- * The below are inlined to optimize code size for system timers. Other code
- * should not need these at all.
- */
-#if defined(CONFIG_ARCH_OMAP1) || defined(CONFIG_ARCH_OMAP2PLUS)
-static inline u32 __omap_dm_timer_read(struct omap_dm_timer *timer, u32 reg,
- int posted)
-{
- if (posted)
- while (readl_relaxed(timer->pend) & (reg >> WPSHIFT))
- cpu_relax();
-
- return readl_relaxed(timer->func_base + (reg & 0xff));
-}
-
-static inline void __omap_dm_timer_write(struct omap_dm_timer *timer,
- u32 reg, u32 val, int posted)
-{
- if (posted)
- while (readl_relaxed(timer->pend) & (reg >> WPSHIFT))
- cpu_relax();
-
- writel_relaxed(val, timer->func_base + (reg & 0xff));
-}
-
-static inline void __omap_dm_timer_init_regs(struct omap_dm_timer *timer)
-{
- u32 tidr;
-
- /* Assume v1 ip if bits [31:16] are zero */
- tidr = readl_relaxed(timer->io_base);
- if (!(tidr >> 16)) {
- timer->revision = 1;
- timer->irq_stat = timer->io_base + OMAP_TIMER_V1_STAT_OFFSET;
- timer->irq_ena = timer->io_base + OMAP_TIMER_V1_INT_EN_OFFSET;
- timer->irq_dis = timer->io_base + OMAP_TIMER_V1_INT_EN_OFFSET;
- timer->pend = timer->io_base + _OMAP_TIMER_WRITE_PEND_OFFSET;
- timer->func_base = timer->io_base;
- } else {
- timer->revision = 2;
- timer->irq_stat = timer->io_base + OMAP_TIMER_V2_IRQSTATUS;
- timer->irq_ena = timer->io_base + OMAP_TIMER_V2_IRQENABLE_SET;
- timer->irq_dis = timer->io_base + OMAP_TIMER_V2_IRQENABLE_CLR;
- timer->pend = timer->io_base +
- _OMAP_TIMER_WRITE_PEND_OFFSET +
- OMAP_TIMER_V2_FUNC_OFFSET;
- timer->func_base = timer->io_base + OMAP_TIMER_V2_FUNC_OFFSET;
- }
-}
-
-/*
- * __omap_dm_timer_enable_posted - enables write posted mode
- * @timer: pointer to timer instance handle
- *
- * Enables the write posted mode for the timer. When posted mode is enabled
- * writes to certain timer registers are immediately acknowledged by the
- * internal bus and hence prevents stalling the CPU waiting for the write to
- * complete. Enabling this feature can improve performance for writing to the
- * timer registers.
- */
-static inline void __omap_dm_timer_enable_posted(struct omap_dm_timer *timer)
-{
- if (timer->posted)
- return;
-
- if (timer->errata & OMAP_TIMER_ERRATA_I103_I767) {
- timer->posted = OMAP_TIMER_NONPOSTED;
- __omap_dm_timer_write(timer, OMAP_TIMER_IF_CTRL_REG, 0, 0);
- return;
- }
-
- __omap_dm_timer_write(timer, OMAP_TIMER_IF_CTRL_REG,
- OMAP_TIMER_CTRL_POSTED, 0);
- timer->context.tsicr = OMAP_TIMER_CTRL_POSTED;
- timer->posted = OMAP_TIMER_POSTED;
-}
-
-/**
- * __omap_dm_timer_override_errata - override errata flags for a timer
- * @timer: pointer to timer handle
- * @errata: errata flags to be ignored
- *
- * For a given timer, override a timer errata by clearing the flags
- * specified by the errata argument. A specific erratum should only be
- * overridden for a timer if the timer is used in such a way the erratum
- * has no impact.
- */
-static inline void __omap_dm_timer_override_errata(struct omap_dm_timer *timer,
- u32 errata)
-{
- timer->errata &= ~errata;
-}
-
-static inline void __omap_dm_timer_stop(struct omap_dm_timer *timer,
- int posted, unsigned long rate)
-{
- u32 l;
-
- l = __omap_dm_timer_read(timer, OMAP_TIMER_CTRL_REG, posted);
- if (l & OMAP_TIMER_CTRL_ST) {
- l &= ~0x1;
- __omap_dm_timer_write(timer, OMAP_TIMER_CTRL_REG, l, posted);
-#ifdef CONFIG_ARCH_OMAP2PLUS
- /* Readback to make sure write has completed */
- __omap_dm_timer_read(timer, OMAP_TIMER_CTRL_REG, posted);
- /*
- * Wait for functional clock period x 3.5 to make sure that
- * timer is stopped
- */
- udelay(3500000 / rate + 1);
-#endif
- }
-
- /* Ack possibly pending interrupt */
- writel_relaxed(OMAP_TIMER_INT_OVERFLOW, timer->irq_stat);
-}
-
-static inline void __omap_dm_timer_load_start(struct omap_dm_timer *timer,
- u32 ctrl, unsigned int load,
- int posted)
-{
- __omap_dm_timer_write(timer, OMAP_TIMER_COUNTER_REG, load, posted);
- __omap_dm_timer_write(timer, OMAP_TIMER_CTRL_REG, ctrl, posted);
-}
-
-static inline void __omap_dm_timer_int_enable(struct omap_dm_timer *timer,
- unsigned int value)
-{
- writel_relaxed(value, timer->irq_ena);
- __omap_dm_timer_write(timer, OMAP_TIMER_WAKEUP_EN_REG, value, 0);
-}
-
-static inline unsigned int
-__omap_dm_timer_read_counter(struct omap_dm_timer *timer, int posted)
-{
- return __omap_dm_timer_read(timer, OMAP_TIMER_COUNTER_REG, posted);
-}
-
-static inline void __omap_dm_timer_write_status(struct omap_dm_timer *timer,
- unsigned int value)
-{
- writel_relaxed(value, timer->irq_stat);
-}
-#endif /* CONFIG_ARCH_OMAP1 || CONFIG_ARCH_OMAP2PLUS */
#endif /* __CLOCKSOURCE_DMTIMER_H */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 44975c1bbe12..7e7a33b6c8d7 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -105,6 +105,7 @@ enum acpi_irq_model_id {
ACPI_IRQ_MODEL_IOSAPIC,
ACPI_IRQ_MODEL_PLATFORM,
ACPI_IRQ_MODEL_GIC,
+ ACPI_IRQ_MODEL_LPIC,
ACPI_IRQ_MODEL_COUNT
};
@@ -356,7 +357,8 @@ int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi);
void acpi_set_irq_model(enum acpi_irq_model_id model,
- struct fwnode_handle *fwnode);
+ struct fwnode_handle *(*)(u32));
+void acpi_set_gsi_to_irq_fallback(u32 (*)(u32));
struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags,
unsigned int size,
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 1bfcfb1af352..187b54a9567e 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -264,7 +264,8 @@ struct css_set {
* List of csets participating in the on-going migration either as
* source or destination. Protected by cgroup_mutex.
*/
- struct list_head mg_preload_node;
+ struct list_head mg_src_preload_node;
+ struct list_head mg_dst_preload_node;
struct list_head mg_node;
/*
@@ -287,6 +288,10 @@ struct css_set {
struct cgroup_base_stat {
struct task_cputime cputime;
+
+#ifdef CONFIG_SCHED_CORE
+ u64 forceidle_sum;
+#endif
};
/*
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 2c7477354744..314802f98b9d 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr,
extern ssize_t cpu_show_mmio_stale_data(struct device *dev,
struct device_attribute *attr,
char *buf);
+extern ssize_t cpu_show_retbleed(struct device *dev,
+ struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 19f0dbfdd7fe..f61447913db9 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -130,7 +130,6 @@ enum cpuhp_state {
CPUHP_ZCOMP_PREPARE,
CPUHP_TIMERS_PREPARE,
CPUHP_MIPS_SOC_PREPARE,
- CPUHP_LOONGARCH_SOC_PREPARE,
CPUHP_BP_PREPARE_DYN,
CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20,
CPUHP_BRINGUP_CPU,
@@ -151,6 +150,7 @@ enum cpuhp_state {
CPUHP_AP_IRQ_BCM2836_STARTING,
CPUHP_AP_IRQ_MIPS_GIC_STARTING,
CPUHP_AP_IRQ_RISCV_STARTING,
+ CPUHP_AP_IRQ_LOONGARCH_STARTING,
CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
CPUHP_AP_ARM_MVEBU_COHERENCY,
CPUHP_AP_MICROCODE_LOADER,
@@ -230,6 +230,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE,
+ CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
CPUHP_AP_PERF_ARM_L2X0_ONLINE,
CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
diff --git a/include/linux/evm.h b/include/linux/evm.h
index 4c374be70247..aa63e0b3c0a2 100644
--- a/include/linux/evm.h
+++ b/include/linux/evm.h
@@ -21,7 +21,8 @@ extern enum integrity_status evm_verifyxattr(struct dentry *dentry,
void *xattr_value,
size_t xattr_value_len,
struct integrity_iint_cache *iint);
-extern int evm_inode_setattr(struct dentry *dentry, struct iattr *attr);
+extern int evm_inode_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr);
extern void evm_inode_post_setattr(struct dentry *dentry, int ia_valid);
extern int evm_inode_setxattr(struct user_namespace *mnt_userns,
struct dentry *dentry, const char *name,
@@ -68,7 +69,8 @@ static inline enum integrity_status evm_verifyxattr(struct dentry *dentry,
}
#endif
-static inline int evm_inode_setattr(struct dentry *dentry, struct iattr *attr)
+static inline int evm_inode_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr)
{
return 0;
}
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index e517dbcf74ed..8ad743def6f3 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -59,15 +59,19 @@
#define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \
FAN_MARK_FILESYSTEM)
+#define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \
+ FAN_MARK_FLUSH)
+
+#define FANOTIFY_MARK_IGNORE_BITS (FAN_MARK_IGNORED_MASK | \
+ FAN_MARK_IGNORE)
+
#define FANOTIFY_MARK_FLAGS (FANOTIFY_MARK_TYPE_BITS | \
- FAN_MARK_ADD | \
- FAN_MARK_REMOVE | \
+ FANOTIFY_MARK_CMD_BITS | \
+ FANOTIFY_MARK_IGNORE_BITS | \
FAN_MARK_DONT_FOLLOW | \
FAN_MARK_ONLYDIR | \
- FAN_MARK_IGNORED_MASK | \
FAN_MARK_IGNORED_SURV_MODIFY | \
- FAN_MARK_EVICTABLE | \
- FAN_MARK_FLUSH)
+ FAN_MARK_EVICTABLE)
/*
* Events that can be reported with data type FSNOTIFY_EVENT_PATH.
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9ad5e3520fae..ec2e35886779 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -221,8 +221,26 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
struct iattr {
unsigned int ia_valid;
umode_t ia_mode;
- kuid_t ia_uid;
- kgid_t ia_gid;
+ /*
+ * The two anonymous unions wrap structures with the same member.
+ *
+ * Filesystems raising FS_ALLOW_IDMAP need to use ia_vfs{g,u}id which
+ * are a dedicated type requiring the filesystem to use the dedicated
+ * helpers. Other filesystem can continue to use ia_{g,u}id until they
+ * have been ported.
+ *
+ * They always contain the same value. In other words FS_ALLOW_IDMAP
+ * pass down the same value on idmapped mounts as they would on regular
+ * mounts.
+ */
+ union {
+ kuid_t ia_uid;
+ vfsuid_t ia_vfsuid;
+ };
+ union {
+ kgid_t ia_gid;
+ vfsgid_t ia_vfsgid;
+ };
loff_t ia_size;
struct timespec64 ia_atime;
struct timespec64 ia_mtime;
@@ -1600,13 +1618,68 @@ static inline void i_gid_write(struct inode *inode, gid_t gid)
* @mnt_userns: user namespace of the mount the inode was found from
* @inode: inode to map
*
+ * Note, this will eventually be removed completely in favor of the type-safe
+ * i_uid_into_vfsuid().
+ *
* Return: the inode's i_uid mapped down according to @mnt_userns.
* If the inode's i_uid has no mapping INVALID_UID is returned.
*/
static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
const struct inode *inode)
{
- return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid);
+ return AS_KUIDT(make_vfsuid(mnt_userns, i_user_ns(inode), inode->i_uid));
+}
+
+/**
+ * i_uid_into_vfsuid - map an inode's i_uid down into a mnt_userns
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @inode: inode to map
+ *
+ * Return: whe inode's i_uid mapped down according to @mnt_userns.
+ * If the inode's i_uid has no mapping INVALID_VFSUID is returned.
+ */
+static inline vfsuid_t i_uid_into_vfsuid(struct user_namespace *mnt_userns,
+ const struct inode *inode)
+{
+ return make_vfsuid(mnt_userns, i_user_ns(inode), inode->i_uid);
+}
+
+/**
+ * i_uid_needs_update - check whether inode's i_uid needs to be updated
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @attr: the new attributes of @inode
+ * @inode: the inode to update
+ *
+ * Check whether the $inode's i_uid field needs to be updated taking idmapped
+ * mounts into account if the filesystem supports it.
+ *
+ * Return: true if @inode's i_uid field needs to be updated, false if not.
+ */
+static inline bool i_uid_needs_update(struct user_namespace *mnt_userns,
+ const struct iattr *attr,
+ const struct inode *inode)
+{
+ return ((attr->ia_valid & ATTR_UID) &&
+ !vfsuid_eq(attr->ia_vfsuid,
+ i_uid_into_vfsuid(mnt_userns, inode)));
+}
+
+/**
+ * i_uid_update - update @inode's i_uid field
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @attr: the new attributes of @inode
+ * @inode: the inode to update
+ *
+ * Safely update @inode's i_uid field translating the vfsuid of any idmapped
+ * mount into the filesystem kuid.
+ */
+static inline void i_uid_update(struct user_namespace *mnt_userns,
+ const struct iattr *attr,
+ struct inode *inode)
+{
+ if (attr->ia_valid & ATTR_UID)
+ inode->i_uid = from_vfsuid(mnt_userns, i_user_ns(inode),
+ attr->ia_vfsuid);
}
/**
@@ -1614,13 +1687,68 @@ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
* @mnt_userns: user namespace of the mount the inode was found from
* @inode: inode to map
*
+ * Note, this will eventually be removed completely in favor of the type-safe
+ * i_gid_into_vfsgid().
+ *
* Return: the inode's i_gid mapped down according to @mnt_userns.
* If the inode's i_gid has no mapping INVALID_GID is returned.
*/
static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
const struct inode *inode)
{
- return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid);
+ return AS_KGIDT(make_vfsgid(mnt_userns, i_user_ns(inode), inode->i_gid));
+}
+
+/**
+ * i_gid_into_vfsgid - map an inode's i_gid down into a mnt_userns
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @inode: inode to map
+ *
+ * Return: the inode's i_gid mapped down according to @mnt_userns.
+ * If the inode's i_gid has no mapping INVALID_VFSGID is returned.
+ */
+static inline vfsgid_t i_gid_into_vfsgid(struct user_namespace *mnt_userns,
+ const struct inode *inode)
+{
+ return make_vfsgid(mnt_userns, i_user_ns(inode), inode->i_gid);
+}
+
+/**
+ * i_gid_needs_update - check whether inode's i_gid needs to be updated
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @attr: the new attributes of @inode
+ * @inode: the inode to update
+ *
+ * Check whether the $inode's i_gid field needs to be updated taking idmapped
+ * mounts into account if the filesystem supports it.
+ *
+ * Return: true if @inode's i_gid field needs to be updated, false if not.
+ */
+static inline bool i_gid_needs_update(struct user_namespace *mnt_userns,
+ const struct iattr *attr,
+ const struct inode *inode)
+{
+ return ((attr->ia_valid & ATTR_GID) &&
+ !vfsgid_eq(attr->ia_vfsgid,
+ i_gid_into_vfsgid(mnt_userns, inode)));
+}
+
+/**
+ * i_gid_update - update @inode's i_gid field
+ * @mnt_userns: user namespace of the mount the inode was found from
+ * @attr: the new attributes of @inode
+ * @inode: the inode to update
+ *
+ * Safely update @inode's i_gid field translating the vfsgid of any idmapped
+ * mount into the filesystem kgid.
+ */
+static inline void i_gid_update(struct user_namespace *mnt_userns,
+ const struct iattr *attr,
+ struct inode *inode)
+{
+ if (attr->ia_valid & ATTR_GID)
+ inode->i_gid = from_vfsgid(mnt_userns, i_user_ns(inode),
+ attr->ia_vfsgid);
}
/**
@@ -2195,8 +2323,8 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
static inline bool HAS_UNMAPPED_ID(struct user_namespace *mnt_userns,
struct inode *inode)
{
- return !uid_valid(i_uid_into_mnt(mnt_userns, inode)) ||
- !gid_valid(i_gid_into_mnt(mnt_userns, inode));
+ return !vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)) ||
+ !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode));
}
static inline int iocb_flags(struct file *file);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 9560734759fa..d7d96c806bff 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -518,8 +518,8 @@ struct fsnotify_mark {
struct hlist_node obj_list;
/* Head of list of marks for an object [mark ref] */
struct fsnotify_mark_connector *connector;
- /* Events types to ignore [mark->lock, group->mark_mutex] */
- __u32 ignored_mask;
+ /* Events types and flags to ignore [mark->lock, group->mark_mutex] */
+ __u32 ignore_mask;
/* General fsnotify mark flags */
#define FSNOTIFY_MARK_FLAG_ALIVE 0x0001
#define FSNOTIFY_MARK_FLAG_ATTACHED 0x0002
@@ -529,6 +529,7 @@ struct fsnotify_mark {
/* fanotify mark flags */
#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100
#define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200
+#define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS 0x0400
unsigned int flags; /* flags [mark->lock] */
};
@@ -655,15 +656,91 @@ extern void fsnotify_remove_queued_event(struct fsnotify_group *group,
/* functions used to manipulate the marks attached to inodes */
-/* Get mask for calculating object interest taking ignored mask into account */
+/*
+ * Canonical "ignore mask" including event flags.
+ *
+ * Note the subtle semantic difference from the legacy ->ignored_mask.
+ * ->ignored_mask traditionally only meant which events should be ignored,
+ * while ->ignore_mask also includes flags regarding the type of objects on
+ * which events should be ignored.
+ */
+static inline __u32 fsnotify_ignore_mask(struct fsnotify_mark *mark)
+{
+ __u32 ignore_mask = mark->ignore_mask;
+
+ /* The event flags in ignore mask take effect */
+ if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS)
+ return ignore_mask;
+
+ /*
+ * Legacy behavior:
+ * - Always ignore events on dir
+ * - Ignore events on child if parent is watching children
+ */
+ ignore_mask |= FS_ISDIR;
+ ignore_mask &= ~FS_EVENT_ON_CHILD;
+ ignore_mask |= mark->mask & FS_EVENT_ON_CHILD;
+
+ return ignore_mask;
+}
+
+/* Legacy ignored_mask - only event types to ignore */
+static inline __u32 fsnotify_ignored_events(struct fsnotify_mark *mark)
+{
+ return mark->ignore_mask & ALL_FSNOTIFY_EVENTS;
+}
+
+/*
+ * Check if mask (or ignore mask) should be applied depending if victim is a
+ * directory and whether it is reported to a watching parent.
+ */
+static inline bool fsnotify_mask_applicable(__u32 mask, bool is_dir,
+ int iter_type)
+{
+ /* Should mask be applied to a directory? */
+ if (is_dir && !(mask & FS_ISDIR))
+ return false;
+
+ /* Should mask be applied to a child? */
+ if (iter_type == FSNOTIFY_ITER_TYPE_PARENT &&
+ !(mask & FS_EVENT_ON_CHILD))
+ return false;
+
+ return true;
+}
+
+/*
+ * Effective ignore mask taking into account if event victim is a
+ * directory and whether it is reported to a watching parent.
+ */
+static inline __u32 fsnotify_effective_ignore_mask(struct fsnotify_mark *mark,
+ bool is_dir, int iter_type)
+{
+ __u32 ignore_mask = fsnotify_ignored_events(mark);
+
+ if (!ignore_mask)
+ return 0;
+
+ /* For non-dir and non-child, no need to consult the event flags */
+ if (!is_dir && iter_type != FSNOTIFY_ITER_TYPE_PARENT)
+ return ignore_mask;
+
+ ignore_mask = fsnotify_ignore_mask(mark);
+ if (!fsnotify_mask_applicable(ignore_mask, is_dir, iter_type))
+ return 0;
+
+ return ignore_mask & ALL_FSNOTIFY_EVENTS;
+}
+
+/* Get mask for calculating object interest taking ignore mask into account */
static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark)
{
__u32 mask = mark->mask;
- if (!mark->ignored_mask)
+ if (!fsnotify_ignored_events(mark))
return mask;
- /* Interest in FS_MODIFY may be needed for clearing ignored mask */
+ /* Interest in FS_MODIFY may be needed for clearing ignore mask */
if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
mask |= FS_MODIFY;
@@ -671,7 +748,7 @@ static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark)
* If mark is interested in ignoring events on children, the object must
* show interest in those events for fsnotify_parent() to notice it.
*/
- return mask | (mark->ignored_mask & ALL_FSNOTIFY_EVENTS);
+ return mask | mark->ignore_mask;
}
/* Get mask of events for a list of marks */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 2d2ccae933c2..0ace7759acd2 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -348,7 +348,7 @@ struct vm_area_struct;
#define GFP_DMA32 __GFP_DMA32
#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | \
- __GFP_SKIP_KASAN_POISON)
+ __GFP_SKIP_KASAN_POISON | __GFP_SKIP_KASAN_UNPOISON)
#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
__GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 54c3c6506503..6aeea1071b1b 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -12,6 +12,8 @@
#include <linux/property.h>
#include <linux/types.h>
+#include <asm/msi.h>
+
struct gpio_desc;
struct of_phandle_args;
struct device_node;
@@ -23,6 +25,13 @@ enum gpio_lookup_flags;
struct gpio_chip;
+union gpio_irq_fwspec {
+ struct irq_fwspec fwspec;
+#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+ msi_alloc_info_t msiinfo;
+#endif
+};
+
#define GPIO_LINE_DIRECTION_IN 1
#define GPIO_LINE_DIRECTION_OUT 0
@@ -103,9 +112,10 @@ struct gpio_irq_chip {
* variant named &gpiochip_populate_parent_fwspec_fourcell is also
* available.
*/
- void *(*populate_parent_alloc_arg)(struct gpio_chip *gc,
- unsigned int parent_hwirq,
- unsigned int parent_type);
+ int (*populate_parent_alloc_arg)(struct gpio_chip *gc,
+ union gpio_irq_fwspec *fwspec,
+ unsigned int parent_hwirq,
+ unsigned int parent_type);
/**
* @child_offset_to_irq:
@@ -649,28 +659,14 @@ struct bgpio_pdata {
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
-void *gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc,
+int gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc,
+ union gpio_irq_fwspec *gfwspec,
+ unsigned int parent_hwirq,
+ unsigned int parent_type);
+int gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc,
+ union gpio_irq_fwspec *gfwspec,
unsigned int parent_hwirq,
unsigned int parent_type);
-void *gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc,
- unsigned int parent_hwirq,
- unsigned int parent_type);
-
-#else
-
-static inline void *gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc,
- unsigned int parent_hwirq,
- unsigned int parent_type)
-{
- return NULL;
-}
-
-static inline void *gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc,
- unsigned int parent_hwirq,
- unsigned int parent_type)
-{
- return NULL;
-}
#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 3af34de54330..56d6a0196534 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -149,19 +149,19 @@ static inline void *kmap_local_folio(struct folio *folio, size_t offset);
* It is used in atomic context when code wants to access the contents of a
* page that might be allocated from high memory (see __GFP_HIGHMEM), for
* example a page in the pagecache. The API has two functions, and they
- * can be used in a manner similar to the following:
+ * can be used in a manner similar to the following::
*
- * -- Find the page of interest. --
- * struct page *page = find_get_page(mapping, offset);
+ * // Find the page of interest.
+ * struct page *page = find_get_page(mapping, offset);
*
- * -- Gain access to the contents of that page. --
- * void *vaddr = kmap_atomic(page);
+ * // Gain access to the contents of that page.
+ * void *vaddr = kmap_atomic(page);
*
- * -- Do something to the contents of that page. --
- * memset(vaddr, 0, PAGE_SIZE);
+ * // Do something to the contents of that page.
+ * memset(vaddr, 0, PAGE_SIZE);
*
- * -- Unmap that page. --
- * kunmap_atomic(vaddr);
+ * // Unmap that page.
+ * kunmap_atomic(vaddr);
*
* Note that the kunmap_atomic() call takes the result of the kmap_atomic()
* call, not the argument.
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index de29821231c9..4ddaf6ad73ef 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -461,4 +461,16 @@ static inline int split_folio_to_list(struct folio *folio,
return split_huge_page_to_list(&folio->page, list);
}
+/*
+ * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to
+ * limitations in the implementation like arm64 MTE can override this to
+ * false
+ */
+#ifndef arch_thp_swp_supported
+static inline bool arch_thp_swp_supported(void)
+{
+ return true;
+}
+#endif
+
#endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 426b1744215e..81708ca0ebc7 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -140,6 +140,11 @@ static inline int ima_measure_critical_data(const char *event_label,
#endif /* CONFIG_IMA */
+#ifdef CONFIG_HAVE_IMA_KEXEC
+int __init ima_free_kexec_buffer(void);
+int __init ima_get_kexec_buffer(void **addr, size_t *size);
+#endif
+
#ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT
extern bool arch_ima_get_secureboot(void);
extern const char * const *arch_get_ima_policy(void);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 505308253d23..c3eb89606c2b 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -151,7 +151,9 @@ struct irq_common_data {
#endif
void *handler_data;
struct msi_desc *msi_desc;
+#ifdef CONFIG_SMP
cpumask_var_t affinity;
+#endif
#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
cpumask_var_t effective_affinity;
#endif
@@ -879,21 +881,34 @@ static inline int irq_data_get_node(struct irq_data *d)
return irq_common_data_get_node(d->common);
}
-static inline struct cpumask *irq_get_affinity_mask(int irq)
+static inline
+const struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
{
- struct irq_data *d = irq_get_irq_data(irq);
+#ifdef CONFIG_SMP
+ return d->common->affinity;
+#else
+ return cpumask_of(0);
+#endif
+}
- return d ? d->common->affinity : NULL;
+static inline void irq_data_update_affinity(struct irq_data *d,
+ const struct cpumask *m)
+{
+#ifdef CONFIG_SMP
+ cpumask_copy(d->common->affinity, m);
+#endif
}
-static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
+static inline const struct cpumask *irq_get_affinity_mask(int irq)
{
- return d->common->affinity;
+ struct irq_data *d = irq_get_irq_data(irq);
+
+ return d ? irq_data_get_affinity_mask(d) : NULL;
}
#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
static inline
-struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
+const struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
{
return d->common->effective_affinity;
}
@@ -908,13 +923,14 @@ static inline void irq_data_update_effective_affinity(struct irq_data *d,
{
}
static inline
-struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
+const struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
{
- return d->common->affinity;
+ return irq_data_get_affinity_mask(d);
}
#endif
-static inline struct cpumask *irq_get_effective_affinity_mask(unsigned int irq)
+static inline
+const struct cpumask *irq_get_effective_affinity_mask(unsigned int irq)
{
struct irq_data *d = irq_get_irq_data(irq);
@@ -1121,6 +1137,7 @@ int irq_gc_set_wake(struct irq_data *d, unsigned int on);
/* Setup functions for irq_chip_generic */
int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
irq_hw_number_t hw_irq);
+void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq);
struct irq_chip_generic *
irq_alloc_generic_chip(const char *name, int nr_ct, unsigned int irq_base,
void __iomem *reg_base, irq_flow_handler_t handler);
diff --git a/include/linux/irqchip/mmp.h b/include/linux/irqchip/mmp.h
index cb8455c87c8a..aa1813749a4f 100644
--- a/include/linux/irqchip/mmp.h
+++ b/include/linux/irqchip/mmp.h
@@ -4,4 +4,7 @@
extern struct irq_chip icu_irq_chip;
+extern void icu_init_irq(void);
+extern void mmp2_init_icu(void);
+
#endif /* __IRQCHIP_MMP_H */
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index a77584593f7d..1cd4e36890fb 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -209,14 +209,15 @@ static inline void irq_set_handler_locked(struct irq_data *data,
* Must be called with irq_desc locked and valid parameters.
*/
static inline void
-irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip,
+irq_set_chip_handler_name_locked(struct irq_data *data,
+ const struct irq_chip *chip,
irq_flow_handler_t handler, const char *name)
{
struct irq_desc *desc = irq_data_to_desc(data);
desc->handle_irq = handler;
desc->name = name;
- data->chip = chip;
+ data->chip = (struct irq_chip *)chip;
}
bool irq_check_status_bit(unsigned int irq, unsigned int bitmask);
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index bf1eef337a07..570831ca9951 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -220,8 +220,6 @@ extern void jump_label_lock(void);
extern void jump_label_unlock(void);
extern void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type);
-extern void arch_jump_label_transform_static(struct jump_entry *entry,
- enum jump_label_type type);
extern bool arch_jump_label_transform_queue(struct jump_entry *entry,
enum jump_label_type type);
extern void arch_jump_label_transform_apply(void);
@@ -230,12 +228,12 @@ extern void static_key_slow_inc(struct static_key *key);
extern void static_key_slow_dec(struct static_key *key);
extern void static_key_slow_inc_cpuslocked(struct static_key *key);
extern void static_key_slow_dec_cpuslocked(struct static_key *key);
-extern void jump_label_apply_nops(struct module *mod);
extern int static_key_count(struct static_key *key);
extern void static_key_enable(struct static_key *key);
extern void static_key_disable(struct static_key *key);
extern void static_key_enable_cpuslocked(struct static_key *key);
extern void static_key_disable_cpuslocked(struct static_key *key);
+extern enum jump_label_type jump_label_init_type(struct jump_entry *entry);
/*
* We should be using ATOMIC_INIT() for initializing .enabled, but
@@ -303,11 +301,6 @@ static inline int jump_label_text_reserved(void *start, void *end)
static inline void jump_label_lock(void) {}
static inline void jump_label_unlock(void) {}
-static inline int jump_label_apply_nops(struct module *mod)
-{
- return 0;
-}
-
static inline void static_key_enable(struct static_key *key)
{
STATIC_KEY_CHECK_USE(key);
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 69ae6b278464..ddb5a358fd82 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -28,6 +28,9 @@ enum cpu_usage_stat {
CPUTIME_STEAL,
CPUTIME_GUEST,
CPUTIME_GUEST_NICE,
+#ifdef CONFIG_SCHED_CORE
+ CPUTIME_FORCEIDLE,
+#endif
NR_STATS,
};
@@ -115,4 +118,8 @@ extern void account_process_tick(struct task_struct *, int user);
extern void account_idle_ticks(unsigned long ticks);
+#ifdef CONFIG_SCHED_CORE
+extern void __account_forceidle_time(struct task_struct *tsk, u64 delta);
+#endif
+
#endif /* _LINUX_KERNEL_STAT_H */
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index ce6536f1d269..475683cd67f1 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -452,6 +452,12 @@ static inline int kexec_crash_loaded(void) { return 0; }
#define kexec_in_progress false
#endif /* CONFIG_KEXEC_CORE */
+#ifdef CONFIG_KEXEC_SIG
+void set_kexec_sig_enforced(void);
+#else
+static inline void set_kexec_sig_enforced(void) {}
+#endif
+
#endif /* !defined(__ASSEBMLY__) */
#endif /* LINUX_KEXEC_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c20f2d55840c..90a45ef7203b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1513,7 +1513,7 @@ static inline void kvm_arch_end_assignment(struct kvm *kvm)
{
}
-static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
+static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
{
return false;
}
@@ -1822,6 +1822,15 @@ struct _kvm_stats_desc {
STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_NONE, \
KVM_STATS_BASE_POW10, 0)
+/* Instantaneous boolean value, read only */
+#define STATS_DESC_IBOOLEAN(SCOPE, name) \
+ STATS_DESC_INSTANT(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \
+ KVM_STATS_BASE_POW10, 0)
+/* Peak (sticky) boolean value, read/write */
+#define STATS_DESC_PBOOLEAN(SCOPE, name) \
+ STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \
+ KVM_STATS_BASE_POW10, 0)
+
/* Cumulative time in nanosecond */
#define STATS_DESC_TIME_NSEC(SCOPE, name) \
STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_SECONDS, \
@@ -1853,7 +1862,7 @@ struct _kvm_stats_desc {
HALT_POLL_HIST_COUNT), \
STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist, \
HALT_POLL_HIST_COUNT), \
- STATS_DESC_ICOUNTER(VCPU_GENERIC, blocking)
+ STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking)
extern struct dentry *kvm_debugfs_dir;
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index b6829b970093..1f1099dac3f0 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -188,7 +188,7 @@ static inline void
lockdep_init_map_waits(struct lockdep_map *lock, const char *name,
struct lock_class_key *key, int subclass, u8 inner, u8 outer)
{
- lockdep_init_map_type(lock, name, key, subclass, inner, LD_WAIT_INV, LD_LOCK_NORMAL);
+ lockdep_init_map_type(lock, name, key, subclass, inner, outer, LD_LOCK_NORMAL);
}
static inline void
@@ -211,24 +211,28 @@ static inline void lockdep_init_map(struct lockdep_map *lock, const char *name,
* or they are too narrow (they suffer from a false class-split):
*/
#define lockdep_set_class(lock, key) \
- lockdep_init_map_waits(&(lock)->dep_map, #key, key, 0, \
- (lock)->dep_map.wait_type_inner, \
- (lock)->dep_map.wait_type_outer)
+ lockdep_init_map_type(&(lock)->dep_map, #key, key, 0, \
+ (lock)->dep_map.wait_type_inner, \
+ (lock)->dep_map.wait_type_outer, \
+ (lock)->dep_map.lock_type)
#define lockdep_set_class_and_name(lock, key, name) \
- lockdep_init_map_waits(&(lock)->dep_map, name, key, 0, \
- (lock)->dep_map.wait_type_inner, \
- (lock)->dep_map.wait_type_outer)
+ lockdep_init_map_type(&(lock)->dep_map, name, key, 0, \
+ (lock)->dep_map.wait_type_inner, \
+ (lock)->dep_map.wait_type_outer, \
+ (lock)->dep_map.lock_type)
#define lockdep_set_class_and_subclass(lock, key, sub) \
- lockdep_init_map_waits(&(lock)->dep_map, #key, key, sub,\
- (lock)->dep_map.wait_type_inner, \
- (lock)->dep_map.wait_type_outer)
+ lockdep_init_map_type(&(lock)->dep_map, #key, key, sub, \
+ (lock)->dep_map.wait_type_inner, \
+ (lock)->dep_map.wait_type_outer, \
+ (lock)->dep_map.lock_type)
#define lockdep_set_subclass(lock, sub) \
- lockdep_init_map_waits(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\
- (lock)->dep_map.wait_type_inner, \
- (lock)->dep_map.wait_type_outer)
+ lockdep_init_map_type(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\
+ (lock)->dep_map.wait_type_inner, \
+ (lock)->dep_map.wait_type_outer, \
+ (lock)->dep_map.lock_type)
#define lockdep_set_novalidate_class(lock) \
lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index cf3d0d673f6b..7898e29bcfb5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1130,23 +1130,27 @@ static inline bool is_zone_movable_page(const struct page *page)
#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX)
DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
-bool __put_devmap_managed_page(struct page *page);
-static inline bool put_devmap_managed_page(struct page *page)
+bool __put_devmap_managed_page_refs(struct page *page, int refs);
+static inline bool put_devmap_managed_page_refs(struct page *page, int refs)
{
if (!static_branch_unlikely(&devmap_managed_key))
return false;
if (!is_zone_device_page(page))
return false;
- return __put_devmap_managed_page(page);
+ return __put_devmap_managed_page_refs(page, refs);
}
-
#else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
-static inline bool put_devmap_managed_page(struct page *page)
+static inline bool put_devmap_managed_page_refs(struct page *page, int refs)
{
return false;
}
#endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
+static inline bool put_devmap_managed_page(struct page *page)
+{
+ return put_devmap_managed_page_refs(page, 1);
+}
+
/* 127: arbitrary random number, small enough to assemble well */
#define folio_ref_zero_or_close_to_overflow(folio) \
((unsigned int) folio_ref_count(folio) + 127u <= 127u)
diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
index ee5a217de2a8..f6e5369d2928 100644
--- a/include/linux/mnt_idmapping.h
+++ b/include/linux/mnt_idmapping.h
@@ -13,6 +13,129 @@ struct user_namespace;
*/
extern struct user_namespace init_user_ns;
+typedef struct {
+ uid_t val;
+} vfsuid_t;
+
+typedef struct {
+ gid_t val;
+} vfsgid_t;
+
+static_assert(sizeof(vfsuid_t) == sizeof(kuid_t));
+static_assert(sizeof(vfsgid_t) == sizeof(kgid_t));
+static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val));
+static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val));
+
+#ifdef CONFIG_MULTIUSER
+static inline uid_t __vfsuid_val(vfsuid_t uid)
+{
+ return uid.val;
+}
+
+static inline gid_t __vfsgid_val(vfsgid_t gid)
+{
+ return gid.val;
+}
+#else
+static inline uid_t __vfsuid_val(vfsuid_t uid)
+{
+ return 0;
+}
+
+static inline gid_t __vfsgid_val(vfsgid_t gid)
+{
+ return 0;
+}
+#endif
+
+static inline bool vfsuid_valid(vfsuid_t uid)
+{
+ return __vfsuid_val(uid) != (uid_t)-1;
+}
+
+static inline bool vfsgid_valid(vfsgid_t gid)
+{
+ return __vfsgid_val(gid) != (gid_t)-1;
+}
+
+static inline bool vfsuid_eq(vfsuid_t left, vfsuid_t right)
+{
+ return vfsuid_valid(left) && __vfsuid_val(left) == __vfsuid_val(right);
+}
+
+static inline bool vfsgid_eq(vfsgid_t left, vfsgid_t right)
+{
+ return vfsgid_valid(left) && __vfsgid_val(left) == __vfsgid_val(right);
+}
+
+/**
+ * vfsuid_eq_kuid - check whether kuid and vfsuid have the same value
+ * @vfsuid: the vfsuid to compare
+ * @kuid: the kuid to compare
+ *
+ * Check whether @vfsuid and @kuid have the same values.
+ *
+ * Return: true if @vfsuid and @kuid have the same value, false if not.
+ * Comparison between two invalid uids returns false.
+ */
+static inline bool vfsuid_eq_kuid(vfsuid_t vfsuid, kuid_t kuid)
+{
+ return vfsuid_valid(vfsuid) && __vfsuid_val(vfsuid) == __kuid_val(kuid);
+}
+
+/**
+ * vfsgid_eq_kgid - check whether kgid and vfsgid have the same value
+ * @vfsgid: the vfsgid to compare
+ * @kgid: the kgid to compare
+ *
+ * Check whether @vfsgid and @kgid have the same values.
+ *
+ * Return: true if @vfsgid and @kgid have the same value, false if not.
+ * Comparison between two invalid gids returns false.
+ */
+static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid)
+{
+ return vfsgid_valid(vfsgid) && __vfsgid_val(vfsgid) == __kgid_val(kgid);
+}
+
+/*
+ * vfs{g,u}ids are created from k{g,u}ids.
+ * We don't allow them to be created from regular {u,g}id.
+ */
+#define VFSUIDT_INIT(val) (vfsuid_t){ __kuid_val(val) }
+#define VFSGIDT_INIT(val) (vfsgid_t){ __kgid_val(val) }
+
+#define INVALID_VFSUID VFSUIDT_INIT(INVALID_UID)
+#define INVALID_VFSGID VFSGIDT_INIT(INVALID_GID)
+
+/*
+ * Allow a vfs{g,u}id to be used as a k{g,u}id where we want to compare
+ * whether the mapped value is identical to value of a k{g,u}id.
+ */
+#define AS_KUIDT(val) (kuid_t){ __vfsuid_val(val) }
+#define AS_KGIDT(val) (kgid_t){ __vfsgid_val(val) }
+
+#ifdef CONFIG_MULTIUSER
+/**
+ * vfsgid_in_group_p() - check whether a vfsuid matches the caller's groups
+ * @vfsgid: the mnt gid to match
+ *
+ * This function can be used to determine whether @vfsuid matches any of the
+ * caller's groups.
+ *
+ * Return: 1 if vfsuid matches caller's groups, 0 if not.
+ */
+static inline int vfsgid_in_group_p(vfsgid_t vfsgid)
+{
+ return in_group_p(AS_KGIDT(vfsgid));
+}
+#else
+static inline int vfsgid_in_group_p(vfsgid_t vfsgid)
+{
+ return 1;
+}
+#endif
+
/**
* initial_idmapping - check whether this is the initial mapping
* @ns: idmapping to check
@@ -48,7 +171,7 @@ static inline bool no_idmapping(const struct user_namespace *mnt_userns,
}
/**
- * mapped_kuid_fs - map a filesystem kuid into a mnt_userns
+ * make_vfsuid - map a filesystem kuid into a mnt_userns
* @mnt_userns: the mount's idmapping
* @fs_userns: the filesystem's idmapping
* @kuid : kuid to be mapped
@@ -67,25 +190,33 @@ static inline bool no_idmapping(const struct user_namespace *mnt_userns,
* If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is
* returned.
*/
-static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns,
- struct user_namespace *fs_userns,
- kuid_t kuid)
+
+static inline vfsuid_t make_vfsuid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ kuid_t kuid)
{
uid_t uid;
if (no_idmapping(mnt_userns, fs_userns))
- return kuid;
+ return VFSUIDT_INIT(kuid);
if (initial_idmapping(fs_userns))
uid = __kuid_val(kuid);
else
uid = from_kuid(fs_userns, kuid);
if (uid == (uid_t)-1)
- return INVALID_UID;
- return make_kuid(mnt_userns, uid);
+ return INVALID_VFSUID;
+ return VFSUIDT_INIT(make_kuid(mnt_userns, uid));
+}
+
+static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ kuid_t kuid)
+{
+ return AS_KUIDT(make_vfsuid(mnt_userns, fs_userns, kuid));
}
/**
- * mapped_kgid_fs - map a filesystem kgid into a mnt_userns
+ * make_vfsgid - map a filesystem kgid into a mnt_userns
* @mnt_userns: the mount's idmapping
* @fs_userns: the filesystem's idmapping
* @kgid : kgid to be mapped
@@ -104,21 +235,56 @@ static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns,
* If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is
* returned.
*/
-static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns,
- struct user_namespace *fs_userns,
- kgid_t kgid)
+
+static inline vfsgid_t make_vfsgid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ kgid_t kgid)
{
gid_t gid;
if (no_idmapping(mnt_userns, fs_userns))
- return kgid;
+ return VFSGIDT_INIT(kgid);
if (initial_idmapping(fs_userns))
gid = __kgid_val(kgid);
else
gid = from_kgid(fs_userns, kgid);
if (gid == (gid_t)-1)
- return INVALID_GID;
- return make_kgid(mnt_userns, gid);
+ return INVALID_VFSGID;
+ return VFSGIDT_INIT(make_kgid(mnt_userns, gid));
+}
+
+static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ kgid_t kgid)
+{
+ return AS_KGIDT(make_vfsgid(mnt_userns, fs_userns, kgid));
+}
+
+/**
+ * from_vfsuid - map a vfsuid into the filesystem idmapping
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @vfsuid : vfsuid to be mapped
+ *
+ * Map @vfsuid into the filesystem idmapping. This function has to be used in
+ * order to e.g. write @vfsuid to inode->i_uid.
+ *
+ * Return: @vfsuid mapped into the filesystem idmapping
+ */
+static inline kuid_t from_vfsuid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ vfsuid_t vfsuid)
+{
+ uid_t uid;
+
+ if (no_idmapping(mnt_userns, fs_userns))
+ return AS_KUIDT(vfsuid);
+ uid = from_kuid(mnt_userns, AS_KUIDT(vfsuid));
+ if (uid == (uid_t)-1)
+ return INVALID_UID;
+ if (initial_idmapping(fs_userns))
+ return KUIDT_INIT(uid);
+ return make_kuid(fs_userns, uid);
}
/**
@@ -145,16 +311,66 @@ static inline kuid_t mapped_kuid_user(struct user_namespace *mnt_userns,
struct user_namespace *fs_userns,
kuid_t kuid)
{
- uid_t uid;
+ return from_vfsuid(mnt_userns, fs_userns, VFSUIDT_INIT(kuid));
+}
+
+/**
+ * vfsuid_has_fsmapping - check whether a vfsuid maps into the filesystem
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @vfsuid: vfsuid to be mapped
+ *
+ * Check whether @vfsuid has a mapping in the filesystem idmapping. Use this
+ * function to check whether the filesystem idmapping has a mapping for
+ * @vfsuid.
+ *
+ * Return: true if @vfsuid has a mapping in the filesystem, false if not.
+ */
+static inline bool vfsuid_has_fsmapping(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ vfsuid_t vfsuid)
+{
+ return uid_valid(from_vfsuid(mnt_userns, fs_userns, vfsuid));
+}
+
+/**
+ * vfsuid_into_kuid - convert vfsuid into kuid
+ * @vfsuid: the vfsuid to convert
+ *
+ * This can be used when a vfsuid is committed as a kuid.
+ *
+ * Return: a kuid with the value of @vfsuid
+ */
+static inline kuid_t vfsuid_into_kuid(vfsuid_t vfsuid)
+{
+ return AS_KUIDT(vfsuid);
+}
+
+/**
+ * from_vfsgid - map a vfsgid into the filesystem idmapping
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @vfsgid : vfsgid to be mapped
+ *
+ * Map @vfsgid into the filesystem idmapping. This function has to be used in
+ * order to e.g. write @vfsgid to inode->i_gid.
+ *
+ * Return: @vfsgid mapped into the filesystem idmapping
+ */
+static inline kgid_t from_vfsgid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ vfsgid_t vfsgid)
+{
+ gid_t gid;
if (no_idmapping(mnt_userns, fs_userns))
- return kuid;
- uid = from_kuid(mnt_userns, kuid);
- if (uid == (uid_t)-1)
- return INVALID_UID;
+ return AS_KGIDT(vfsgid);
+ gid = from_kgid(mnt_userns, AS_KGIDT(vfsgid));
+ if (gid == (gid_t)-1)
+ return INVALID_GID;
if (initial_idmapping(fs_userns))
- return KUIDT_INIT(uid);
- return make_kuid(fs_userns, uid);
+ return KGIDT_INIT(gid);
+ return make_kgid(fs_userns, gid);
}
/**
@@ -181,16 +397,39 @@ static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns,
struct user_namespace *fs_userns,
kgid_t kgid)
{
- gid_t gid;
+ return from_vfsgid(mnt_userns, fs_userns, VFSGIDT_INIT(kgid));
+}
- if (no_idmapping(mnt_userns, fs_userns))
- return kgid;
- gid = from_kgid(mnt_userns, kgid);
- if (gid == (gid_t)-1)
- return INVALID_GID;
- if (initial_idmapping(fs_userns))
- return KGIDT_INIT(gid);
- return make_kgid(fs_userns, gid);
+/**
+ * vfsgid_has_fsmapping - check whether a vfsgid maps into the filesystem
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @vfsgid: vfsgid to be mapped
+ *
+ * Check whether @vfsgid has a mapping in the filesystem idmapping. Use this
+ * function to check whether the filesystem idmapping has a mapping for
+ * @vfsgid.
+ *
+ * Return: true if @vfsgid has a mapping in the filesystem, false if not.
+ */
+static inline bool vfsgid_has_fsmapping(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ vfsgid_t vfsgid)
+{
+ return gid_valid(from_vfsgid(mnt_userns, fs_userns, vfsgid));
+}
+
+/**
+ * vfsgid_into_kgid - convert vfsgid into kgid
+ * @vfsgid: the vfsgid to convert
+ *
+ * This can be used when a vfsgid is committed as a kgid.
+ *
+ * Return: a kgid with the value of @vfsgid
+ */
+static inline kgid_t vfsgid_into_kgid(vfsgid_t vfsgid)
+{
+ return AS_KGIDT(vfsgid);
}
/**
@@ -209,7 +448,8 @@ static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns,
static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns,
struct user_namespace *fs_userns)
{
- return mapped_kuid_user(mnt_userns, fs_userns, current_fsuid());
+ return from_vfsuid(mnt_userns, fs_userns,
+ VFSUIDT_INIT(current_fsuid()));
}
/**
@@ -228,7 +468,8 @@ static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns,
static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns,
struct user_namespace *fs_userns)
{
- return mapped_kgid_user(mnt_userns, fs_userns, current_fsgid());
+ return from_vfsgid(mnt_userns, fs_userns,
+ VFSGIDT_INIT(current_fsgid()));
}
#endif /* _LINUX_MNT_IDMAPPING_H */
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 1773e5df8e65..1b18dfa52e48 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -214,7 +214,7 @@ struct netfs_request_ops {
void (*issue_read)(struct netfs_io_subrequest *subreq);
bool (*is_still_valid)(struct netfs_io_request *rreq);
int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
- struct folio *folio, void **_fsdata);
+ struct folio **foliop, void **_fsdata);
void (*done)(struct netfs_io_request *rreq);
};
diff --git a/include/linux/objtool.h b/include/linux/objtool.h
index 15b940ec1eac..10bc88cc3bf6 100644
--- a/include/linux/objtool.h
+++ b/include/linux/objtool.h
@@ -32,11 +32,16 @@ struct unwind_hint {
*
* UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
* Useful for code which doesn't have an ELF function annotation.
+ *
+ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
*/
#define UNWIND_HINT_TYPE_CALL 0
#define UNWIND_HINT_TYPE_REGS 1
#define UNWIND_HINT_TYPE_REGS_PARTIAL 2
#define UNWIND_HINT_TYPE_FUNC 3
+#define UNWIND_HINT_TYPE_ENTRY 4
+#define UNWIND_HINT_TYPE_SAVE 5
+#define UNWIND_HINT_TYPE_RESTORE 6
#ifdef CONFIG_OBJTOOL
@@ -124,7 +129,7 @@ struct unwind_hint {
* the debuginfo as necessary. It will also warn if it sees any
* inconsistencies.
*/
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
.Lunwind_hint_ip_\@:
.pushsection .discard.unwind_hints
/* struct unwind_hint */
@@ -177,7 +182,7 @@ struct unwind_hint {
#define ASM_REACHABLE
#else
#define ANNOTATE_INTRA_FUNCTION_CALL
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
.endm
.macro STACK_FRAME_NON_STANDARD func:req
.endm
diff --git a/include/linux/of.h b/include/linux/of.h
index f0a5d6b10c5a..20a4e7cb7afe 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -441,8 +441,6 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
unsigned long initrd_load_addr,
unsigned long initrd_len,
const char *cmdline, size_t extra_fdt_size);
-int ima_get_kexec_buffer(void **addr, size_t *size);
-int ima_free_kexec_buffer(void);
#else /* CONFIG_OF */
static inline void of_core_init(void)
diff --git a/include/linux/once_lite.h b/include/linux/once_lite.h
index 861e606b820f..b7bce4983638 100644
--- a/include/linux/once_lite.h
+++ b/include/linux/once_lite.h
@@ -9,15 +9,27 @@
*/
#define DO_ONCE_LITE(func, ...) \
DO_ONCE_LITE_IF(true, func, ##__VA_ARGS__)
-#define DO_ONCE_LITE_IF(condition, func, ...) \
+
+#define __ONCE_LITE_IF(condition) \
({ \
static bool __section(".data.once") __already_done; \
- bool __ret_do_once = !!(condition); \
+ bool __ret_cond = !!(condition); \
+ bool __ret_once = false; \
\
- if (unlikely(__ret_do_once && !__already_done)) { \
+ if (unlikely(__ret_cond && !__already_done)) { \
__already_done = true; \
- func(__VA_ARGS__); \
+ __ret_once = true; \
} \
+ unlikely(__ret_once); \
+ })
+
+#define DO_ONCE_LITE_IF(condition, func, ...) \
+ ({ \
+ bool __ret_do_once = !!(condition); \
+ \
+ if (__ONCE_LITE_IF(__ret_do_once)) \
+ func(__VA_ARGS__); \
+ \
unlikely(__ret_do_once); \
})
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0178823ce8c2..7fa460ccf7fa 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -556,10 +556,13 @@
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F3 0x1493
#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F3 0x144b
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443
+#define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F3 0x1727
#define PCI_DEVICE_ID_AMD_19H_DF_F3 0x1653
#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F3 0x14b0
#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F3 0x167c
#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F3 0x166d
+#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F3 0x14e3
+#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F3 0x14f3
#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703
#define PCI_DEVICE_ID_AMD_LANCE 0x2000
#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
index 46f9b6fe306e..bf66fe011fa8 100644
--- a/include/linux/perf/riscv_pmu.h
+++ b/include/linux/perf/riscv_pmu.h
@@ -56,9 +56,13 @@ struct riscv_pmu {
struct cpu_hw_events __percpu *hw_events;
struct hlist_node node;
+ struct notifier_block riscv_pm_nb;
};
#define to_riscv_pmu(p) (container_of(p, struct riscv_pmu, pmu))
+
+void riscv_pmu_start(struct perf_event *event, int flags);
+void riscv_pmu_stop(struct perf_event *event, int flags);
unsigned long riscv_pmu_ctr_read_csr(unsigned long csr);
int riscv_pmu_event_set_period(struct perf_event *event);
uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index da759560eec5..ee8b9ecdc03b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -759,6 +759,8 @@ struct perf_event {
struct pid_namespace *ns;
u64 id;
+ atomic64_t lost_samples;
+
u64 (*clock)(void);
perf_overflow_handler_t overflow_handler;
void *overflow_handler_context;
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index b65c877d92b8..7d1e604c1325 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -73,6 +73,7 @@ extern int set_posix_acl(struct user_namespace *, struct inode *, int,
struct posix_acl *);
struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type);
+struct posix_acl *posix_acl_clone(const struct posix_acl *acl, gfp_t flags);
#ifdef CONFIG_FS_POSIX_ACL
int posix_acl_chmod(struct user_namespace *, struct inode *, umode_t);
diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h
index 1766e1de6956..b6bd3eac2bcc 100644
--- a/include/linux/posix_acl_xattr.h
+++ b/include/linux/posix_acl_xattr.h
@@ -33,21 +33,31 @@ posix_acl_xattr_count(size_t size)
}
#ifdef CONFIG_FS_POSIX_ACL
-void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
- struct inode *inode,
- void *value, size_t size);
-void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
- struct inode *inode,
- void *value, size_t size);
+void posix_acl_fix_xattr_from_user(void *value, size_t size);
+void posix_acl_fix_xattr_to_user(void *value, size_t size);
+void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
+ const struct inode *inode,
+ void *value, size_t size);
+void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns,
+ const struct inode *inode,
+ void *value, size_t size);
#else
-static inline void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
- struct inode *inode,
- void *value, size_t size)
+static inline void posix_acl_fix_xattr_from_user(void *value, size_t size)
{
}
-static inline void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
- struct inode *inode,
- void *value, size_t size)
+static inline void posix_acl_fix_xattr_to_user(void *value, size_t size)
+{
+}
+static inline void
+posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
+ const struct inode *inode, void *value,
+ size_t size)
+{
+}
+static inline void
+posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns,
+ const struct inode *inode, void *value,
+ size_t size)
{
}
#endif
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index a0f6668924d3..0d8625d71733 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -20,11 +20,12 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb)
}
/* i_mutex must being held */
-static inline bool is_quota_modification(struct inode *inode, struct iattr *ia)
+static inline bool is_quota_modification(struct user_namespace *mnt_userns,
+ struct inode *inode, struct iattr *ia)
{
- return (ia->ia_valid & ATTR_SIZE) ||
- (ia->ia_valid & ATTR_UID && !uid_eq(ia->ia_uid, inode->i_uid)) ||
- (ia->ia_valid & ATTR_GID && !gid_eq(ia->ia_gid, inode->i_gid));
+ return ((ia->ia_valid & ATTR_SIZE) ||
+ i_uid_needs_update(mnt_userns, ia, inode) ||
+ i_gid_needs_update(mnt_userns, ia, inode));
}
#if defined(CONFIG_QUOTA)
@@ -115,7 +116,8 @@ int dquot_set_dqblk(struct super_block *sb, struct kqid id,
struct qc_dqblk *di);
int __dquot_transfer(struct inode *inode, struct dquot **transfer_to);
-int dquot_transfer(struct inode *inode, struct iattr *iattr);
+int dquot_transfer(struct user_namespace *mnt_userns, struct inode *inode,
+ struct iattr *iattr);
static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
{
@@ -234,7 +236,8 @@ static inline void dquot_free_inode(struct inode *inode)
{
}
-static inline int dquot_transfer(struct inode *inode, struct iattr *iattr)
+static inline int dquot_transfer(struct user_namespace *mnt_userns,
+ struct inode *inode, struct iattr *iattr)
{
return 0;
}
diff --git a/include/linux/reset.h b/include/linux/reset.h
index 8a21b5756c3e..514ddf003efc 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -731,7 +731,7 @@ static inline int __must_check
devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs,
struct reset_control_bulk_data *rstcs)
{
- return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, true);
+ return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true);
}
/**
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c46f3a63b758..88b8817b827d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2257,7 +2257,7 @@ static inline bool owner_on_cpu(struct task_struct *owner)
}
/* Returns effective CPU energy utilization, as seen by the scheduler */
-unsigned long sched_cpu_util(int cpu, unsigned long max);
+unsigned long sched_cpu_util(int cpu);
#endif /* CONFIG_SMP */
#ifdef CONFIG_RSEQ
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index e5af028c08b4..994c25640e15 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -39,20 +39,12 @@ static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p)
}
extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task);
extern void rt_mutex_adjust_pi(struct task_struct *p);
-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
-{
- return tsk->pi_blocked_on != NULL;
-}
#else
static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
{
return NULL;
}
# define rt_mutex_adjust_pi(p) do { } while (0)
-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
-{
- return false;
-}
#endif
extern void normalize_rt_tasks(void);
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 505aaf9fe477..81cab4b01edc 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -85,7 +85,7 @@ static inline void exit_thread(struct task_struct *tsk)
extern __noreturn void do_group_exit(int);
extern void exit_files(struct task_struct *);
-extern void exit_itimers(struct signal_struct *);
+extern void exit_itimers(struct task_struct *);
extern pid_t kernel_clone(struct kernel_clone_args *kargs);
struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 56cffe42abbc..816df6cc444e 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -81,6 +81,7 @@ struct sched_domain_shared {
atomic_t ref;
atomic_t nr_busy_cpus;
int has_idle_cores;
+ int nr_idle_scan;
};
struct sched_domain {
diff --git a/include/linux/security.h b/include/linux/security.h
index 7fc4e9f49f54..4d0baf30266e 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -353,7 +353,8 @@ int security_inode_readlink(struct dentry *dentry);
int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
bool rcu);
int security_inode_permission(struct inode *inode, int mask);
-int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
+int security_inode_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr);
int security_inode_getattr(const struct path *path);
int security_inode_setxattr(struct user_namespace *mnt_userns,
struct dentry *dentry, const char *name,
@@ -848,8 +849,9 @@ static inline int security_inode_permission(struct inode *inode, int mask)
return 0;
}
-static inline int security_inode_setattr(struct dentry *dentry,
- struct iattr *attr)
+static inline int security_inode_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry,
+ struct iattr *attr)
{
return 0;
}
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 657a0fc68a3f..fde258b3decd 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -390,6 +390,11 @@ static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED;
static inline int setup_earlycon(char *buf) { return 0; }
#endif
+static inline bool uart_console_enabled(struct uart_port *port)
+{
+ return uart_console(port) && (port->cons->flags & CON_ENABLED);
+}
+
struct uart_port *uart_get_console(struct uart_port *ports, int nr,
struct console *c);
int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr,
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 29917850f079..8df475db88c0 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -260,6 +260,7 @@ struct plat_stmmacenet_data {
bool has_crossts;
int int_snapshot_num;
int ext_snapshot_num;
+ bool int_snapshot_en;
bool ext_snapshot_en;
bool multi_msi_en;
int msi_mac_vec;
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 851e07da2583..58cfbf81447c 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -544,10 +544,11 @@ do { \
\
hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC, \
HRTIMER_MODE_REL); \
- if ((timeout) != KTIME_MAX) \
- hrtimer_start_range_ns(&__t.timer, timeout, \
- current->timer_slack_ns, \
- HRTIMER_MODE_REL); \
+ if ((timeout) != KTIME_MAX) { \
+ hrtimer_set_expires_range_ns(&__t.timer, timeout, \
+ current->timer_slack_ns); \
+ hrtimer_sleeper_start_expires(&__t, HRTIMER_MODE_REL); \
+ } \
\
__ret = ___wait_event(wq_head, condition, state, 0, 0, \
if (!__t.task) { \
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 4c379d23ec6e..979a9d3e5bfb 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -61,7 +61,7 @@ int __vfs_setxattr_locked(struct user_namespace *, struct dentry *,
const char *, const void *, size_t, int,
struct inode **);
int vfs_setxattr(struct user_namespace *, struct dentry *, const char *,
- const void *, size_t, int);
+ void *, size_t, int);
int __vfs_removexattr(struct user_namespace *, struct dentry *, const char *);
int __vfs_removexattr_locked(struct user_namespace *, struct dentry *,
const char *, struct inode **);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index f7506f08e505..c04f359655b8 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -405,6 +405,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev)
{
const struct inet6_dev *idev = __in6_dev_get(dev);
+ if (unlikely(!idev))
+ return true;
+
return !!idev->cnf.ignore_routes_with_linkdown;
}
diff --git a/include/net/amt.h b/include/net/amt.h
index 0e40c3d64fcf..08fc30cf2f34 100644
--- a/include/net/amt.h
+++ b/include/net/amt.h
@@ -78,6 +78,15 @@ enum amt_status {
#define AMT_STATUS_MAX (__AMT_STATUS_MAX - 1)
+/* Gateway events only */
+enum amt_event {
+ AMT_EVENT_NONE,
+ AMT_EVENT_RECEIVE,
+ AMT_EVENT_SEND_DISCOVERY,
+ AMT_EVENT_SEND_REQUEST,
+ __AMT_EVENT_MAX,
+};
+
struct amt_header {
#if defined(__LITTLE_ENDIAN_BITFIELD)
u8 type:4,
@@ -292,6 +301,12 @@ struct amt_group_node {
struct hlist_head sources[];
};
+#define AMT_MAX_EVENTS 16
+struct amt_events {
+ enum amt_event event;
+ struct sk_buff *skb;
+};
+
struct amt_dev {
struct net_device *dev;
struct net_device *stream_dev;
@@ -308,6 +323,7 @@ struct amt_dev {
struct delayed_work req_wq;
/* Protected by RTNL */
struct delayed_work secret_wq;
+ struct work_struct event_wq;
/* AMT status */
enum amt_status status;
/* Generated key */
@@ -345,6 +361,10 @@ struct amt_dev {
/* Used only in gateway mode */
u64 mac:48,
reserved:16;
+ /* AMT gateway side message handler queue */
+ struct amt_events events[AMT_MAX_EVENTS];
+ u8 event_idx;
+ u8 nr_events;
};
#define AMT_TOS 0xc0
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 3c4f550e5a8b..2f766e3437ce 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -847,6 +847,7 @@ enum {
};
void l2cap_chan_hold(struct l2cap_chan *c);
+struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c);
void l2cap_chan_put(struct l2cap_chan *c);
static inline void l2cap_chan_lock(struct l2cap_chan *chan)
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 6d02e12e4702..80f41446b1f0 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -8462,11 +8462,12 @@ int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp,
* cfg80211_obss_color_collision_notify - notify about bss color collision
* @dev: network device
* @color_bitmap: representations of the colors that the local BSS is aware of
+ * @gfp: allocation flags
*/
static inline int cfg80211_obss_color_collision_notify(struct net_device *dev,
- u64 color_bitmap)
+ u64 color_bitmap, gfp_t gfp)
{
- return cfg80211_bss_color_notify(dev, GFP_KERNEL,
+ return cfg80211_bss_color_notify(dev, gfp,
NL80211_CMD_OBSS_COLOR_COLLISION,
0, color_bitmap);
}
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 85cd695e7fd1..ee88f0f1350f 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -321,7 +321,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
-#define TCP_PINGPONG_THRESH 3
+#define TCP_PINGPONG_THRESH 1
static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
{
@@ -338,14 +338,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
}
-static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- if (icsk->icsk_ack.pingpong < U8_MAX)
- icsk->icsk_ack.pingpong++;
-}
-
static inline bool inet_csk_has_ulp(struct sock *sk)
{
return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops;
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index ebfa3df6f8dc..fd6b510d114b 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -179,7 +179,7 @@ static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if,
int dif, int sdif)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept,
+ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept),
bound_dev_if, dif, sdif);
#else
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index daead5fb389a..6395f6b9a5d2 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
{
- if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
+ if (!sk->sk_mark &&
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
return skb->mark;
return sk->sk_mark;
@@ -120,7 +121,7 @@ static inline int inet_request_bound_dev_if(const struct sock *sk,
#ifdef CONFIG_NET_L3_MASTER_DEV
struct net *net = sock_net(sk);
- if (!bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept)
+ if (!bound_dev_if && READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept))
return l3mdev_master_ifindex_by_index(net, skb->skb_iif);
#endif
@@ -132,7 +133,7 @@ static inline int inet_sk_bound_l3mdev(const struct sock *sk)
#ifdef CONFIG_NET_L3_MASTER_DEV
struct net *net = sock_net(sk);
- if (!net->ipv4.sysctl_tcp_l3mdev_accept)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept))
return l3mdev_master_ifindex_by_index(net,
sk->sk_bound_dev_if);
#endif
@@ -374,7 +375,7 @@ static inline bool inet_get_convert_csum(struct sock *sk)
static inline bool inet_can_nonlocal_bind(struct net *net,
struct inet_sock *inet)
{
- return net->ipv4.sysctl_ip_nonlocal_bind ||
+ return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) ||
inet->freebind || inet->transparent;
}
diff --git a/include/net/ip.h b/include/net/ip.h
index 26fffda78cca..1c979fd1904c 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -357,7 +357,7 @@ static inline bool sysctl_dev_name_is_allowed(const char *name)
static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port)
{
- return port < net->ipv4.sysctl_ip_prot_sock;
+ return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
}
#else
@@ -384,7 +384,7 @@ void ipfrag_init(void);
void ip_static_sysctl_init(void);
#define IP4_REPLY_MARK(net, mark) \
- ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
+ (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0)
static inline bool ip_is_fragment(const struct iphdr *iph)
{
@@ -446,7 +446,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
struct net *net = dev_net(dst->dev);
unsigned int mtu;
- if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
+ if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) ||
ip_mtu_locked(dst) ||
!forwarding) {
mtu = rt->rt_pmtu;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ebadb2103968..47642b020706 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6960,10 +6960,11 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw,
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
* @color_bitmap: a 64 bit bitmap representing the colors that the local BSS is
* aware of.
+ * @gfp: allocation flags
*/
void
ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
- u64 color_bitmap);
+ u64 color_bitmap, gfp_t gfp);
/**
* ieee80211_is_tx_data - check if frame is a data frame
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 5c4e5a96a984..64cf655c818c 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -657,18 +657,22 @@ static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl)
tmpl->len = sizeof(struct nft_set_ext);
}
-static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id,
- unsigned int len)
+static inline int nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id,
+ unsigned int len)
{
tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align);
- BUG_ON(tmpl->len > U8_MAX);
+ if (tmpl->len > U8_MAX)
+ return -EINVAL;
+
tmpl->offset[id] = tmpl->len;
tmpl->len += nft_set_ext_types[id].len + len;
+
+ return 0;
}
-static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id)
+static inline int nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id)
{
- nft_set_ext_add_length(tmpl, id, 0);
+ return nft_set_ext_add_length(tmpl, id, 0);
}
static inline void nft_set_ext_init(struct nft_set_ext *ext,
diff --git a/include/net/protocol.h b/include/net/protocol.h
index f51c06ae365f..6aef8cb11cc8 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -35,8 +35,6 @@
/* This is used to register protocols. */
struct net_protocol {
- int (*early_demux)(struct sk_buff *skb);
- int (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb);
/* This returns an error if we weren't able to handle the error. */
@@ -52,8 +50,6 @@ struct net_protocol {
#if IS_ENABLED(CONFIG_IPV6)
struct inet6_protocol {
- void (*early_demux)(struct sk_buff *skb);
- void (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb);
/* This returns an error if we weren't able to handle the error. */
diff --git a/include/net/raw.h b/include/net/raw.h
index 8ad8df594853..c51a635671a7 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -75,7 +75,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if,
int dif, int sdif)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept,
+ return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept),
bound_dev_if, dif, sdif);
#else
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
diff --git a/include/net/route.h b/include/net/route.h
index 991a3985712d..bbcf2aba149f 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -373,7 +373,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
struct net *net = dev_net(dst->dev);
if (hoplimit == 0)
- hoplimit = net->ipv4.sysctl_ip_default_ttl;
+ hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
return hoplimit;
}
diff --git a/include/net/sock.h b/include/net/sock.h
index 72ca97ccb460..7a48991cdb19 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1529,7 +1529,7 @@ void __sk_mem_reclaim(struct sock *sk, int amount);
/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */
static inline long sk_prot_mem_limits(const struct sock *sk, int index)
{
- long val = sk->sk_prot->sysctl_mem[index];
+ long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]);
#if PAGE_SIZE > SK_MEM_QUANTUM
val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT;
@@ -2843,18 +2843,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
{
/* Does this proto have per netns sysctl_wmem ? */
if (proto->sysctl_wmem_offset)
- return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset);
+ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset));
- return *proto->sysctl_wmem;
+ return READ_ONCE(*proto->sysctl_wmem);
}
static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
{
/* Does this proto have per netns sysctl_rmem ? */
if (proto->sysctl_rmem_offset)
- return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset);
+ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset));
- return *proto->sysctl_rmem;
+ return READ_ONCE(*proto->sysctl_rmem);
}
/* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1e99f5c61f84..78a64e1b33a7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -932,7 +932,7 @@ extern const struct inet_connection_sock_af_ops ipv6_specific;
INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb));
INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb));
-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb));
+void tcp_v6_early_demux(struct sk_buff *skb);
#endif
@@ -1403,8 +1403,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
s32 delta;
- if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out ||
- ca_ops->cong_control)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) ||
+ tp->packets_out || ca_ops->cong_control)
return;
delta = tcp_jiffies32 - tp->lsndtime;
if (delta > inet_csk(sk)->icsk_rto)
@@ -1419,7 +1419,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space,
static inline int tcp_win_from_space(const struct sock *sk, int space)
{
- int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
+ int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);
return tcp_adv_win_scale <= 0 ?
(space>>(-tcp_adv_win_scale)) :
@@ -1493,21 +1493,24 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl;
+ return tp->keepalive_intvl ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
}
static inline int keepalive_time_when(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time;
+ return tp->keepalive_time ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
}
static inline int keepalive_probes(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes;
+ return tp->keepalive_probes ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
}
static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
@@ -1520,7 +1523,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
static inline int tcp_fin_time(const struct sock *sk)
{
- int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
+ int fin_timeout = tcp_sk(sk)->linger2 ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout);
const int rto = inet_csk(sk)->icsk_rto;
if (fin_timeout < (rto << 2) - (rto >> 1))
@@ -2023,7 +2027,7 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat;
+ return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
}
bool tcp_stream_memory_free(const struct sock *sk, int wake);
diff --git a/include/net/tls.h b/include/net/tls.h
index 8017f1703447..8bd938f98bdd 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -704,7 +704,7 @@ int tls_sw_fallback_init(struct sock *sk,
struct tls_crypto_info *crypto_info);
#ifdef CONFIG_TLS_DEVICE
-void tls_device_init(void);
+int tls_device_init(void);
void tls_device_cleanup(void);
void tls_device_sk_destruct(struct sock *sk);
int tls_set_device_offload(struct sock *sk, struct tls_context *ctx);
@@ -724,7 +724,7 @@ static inline bool tls_is_sk_rx_device_offloaded(struct sock *sk)
return tls_get_ctx(sk)->rx_conf == TLS_HW;
}
#else
-static inline void tls_device_init(void) {}
+static inline int tls_device_init(void) { return 0; }
static inline void tls_device_cleanup(void) {}
static inline int
diff --git a/include/net/udp.h b/include/net/udp.h
index b83a00330566..8dd4aa1485a6 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -167,7 +167,7 @@ static inline void udp_csum_pull_header(struct sk_buff *skb)
typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport,
__be16 dport);
-INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *));
+void udp_v6_early_demux(struct sk_buff *skb);
INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
@@ -238,7 +238,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if,
int dif, int sdif)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept,
+ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept),
bound_dev_if, dif, sdif);
#else
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
diff --git a/include/trace/events/dlm.h b/include/trace/events/dlm.h
index 32088c603244..bad21222130e 100644
--- a/include/trace/events/dlm.h
+++ b/include/trace/events/dlm.h
@@ -49,38 +49,52 @@
/* note: we begin tracing dlm_lock_start() only if ls and lkb are found */
TRACE_EVENT(dlm_lock_start,
- TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode,
- __u32 flags),
+ TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, void *name,
+ unsigned int namelen, int mode, __u32 flags),
- TP_ARGS(ls, lkb, mode, flags),
+ TP_ARGS(ls, lkb, name, namelen, mode, flags),
TP_STRUCT__entry(
__field(__u32, ls_id)
__field(__u32, lkb_id)
__field(int, mode)
__field(__u32, flags)
+ __dynamic_array(unsigned char, res_name,
+ lkb->lkb_resource ? lkb->lkb_resource->res_length : namelen)
),
TP_fast_assign(
+ struct dlm_rsb *r;
+
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->mode = mode;
__entry->flags = flags;
+
+ r = lkb->lkb_resource;
+ if (r)
+ memcpy(__get_dynamic_array(res_name), r->res_name,
+ __get_dynamic_array_len(res_name));
+ else if (name)
+ memcpy(__get_dynamic_array(res_name), name,
+ __get_dynamic_array_len(res_name));
),
- TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s",
+ TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s res_name=%s",
__entry->ls_id, __entry->lkb_id,
show_lock_mode(__entry->mode),
- show_lock_flags(__entry->flags))
+ show_lock_flags(__entry->flags),
+ __print_hex_str(__get_dynamic_array(res_name),
+ __get_dynamic_array_len(res_name)))
);
TRACE_EVENT(dlm_lock_end,
- TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode, __u32 flags,
- int error),
+ TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, void *name,
+ unsigned int namelen, int mode, __u32 flags, int error),
- TP_ARGS(ls, lkb, mode, flags, error),
+ TP_ARGS(ls, lkb, name, namelen, mode, flags, error),
TP_STRUCT__entry(
__field(__u32, ls_id)
@@ -88,14 +102,26 @@ TRACE_EVENT(dlm_lock_end,
__field(int, mode)
__field(__u32, flags)
__field(int, error)
+ __dynamic_array(unsigned char, res_name,
+ lkb->lkb_resource ? lkb->lkb_resource->res_length : namelen)
),
TP_fast_assign(
+ struct dlm_rsb *r;
+
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->mode = mode;
__entry->flags = flags;
+ r = lkb->lkb_resource;
+ if (r)
+ memcpy(__get_dynamic_array(res_name), r->res_name,
+ __get_dynamic_array_len(res_name));
+ else if (name)
+ memcpy(__get_dynamic_array(res_name), name,
+ __get_dynamic_array_len(res_name));
+
/* return value will be zeroed in those cases by dlm_lock()
* we do it here again to not introduce more overhead if
* trace isn't running and error reflects the return value.
@@ -104,12 +130,15 @@ TRACE_EVENT(dlm_lock_end,
__entry->error = 0;
else
__entry->error = error;
+
),
- TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s error=%d",
+ TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s error=%d res_name=%s",
__entry->ls_id, __entry->lkb_id,
show_lock_mode(__entry->mode),
- show_lock_flags(__entry->flags), __entry->error)
+ show_lock_flags(__entry->flags), __entry->error,
+ __print_hex_str(__get_dynamic_array(res_name),
+ __get_dynamic_array_len(res_name)))
);
@@ -123,42 +152,65 @@ TRACE_EVENT(dlm_bast,
__field(__u32, ls_id)
__field(__u32, lkb_id)
__field(int, mode)
+ __dynamic_array(unsigned char, res_name,
+ lkb->lkb_resource ? lkb->lkb_resource->res_length : 0)
),
TP_fast_assign(
+ struct dlm_rsb *r;
+
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->mode = mode;
+
+ r = lkb->lkb_resource;
+ if (r)
+ memcpy(__get_dynamic_array(res_name), r->res_name,
+ __get_dynamic_array_len(res_name));
),
- TP_printk("ls_id=%u lkb_id=%x mode=%s", __entry->ls_id,
- __entry->lkb_id, show_lock_mode(__entry->mode))
+ TP_printk("ls_id=%u lkb_id=%x mode=%s res_name=%s",
+ __entry->ls_id, __entry->lkb_id,
+ show_lock_mode(__entry->mode),
+ __print_hex_str(__get_dynamic_array(res_name),
+ __get_dynamic_array_len(res_name)))
);
TRACE_EVENT(dlm_ast,
- TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_lksb *lksb),
+ TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb),
- TP_ARGS(ls, lkb, lksb),
+ TP_ARGS(ls, lkb),
TP_STRUCT__entry(
__field(__u32, ls_id)
__field(__u32, lkb_id)
__field(u8, sb_flags)
__field(int, sb_status)
+ __dynamic_array(unsigned char, res_name,
+ lkb->lkb_resource ? lkb->lkb_resource->res_length : 0)
),
TP_fast_assign(
+ struct dlm_rsb *r;
+
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
- __entry->sb_flags = lksb->sb_flags;
- __entry->sb_status = lksb->sb_status;
+ __entry->sb_flags = lkb->lkb_lksb->sb_flags;
+ __entry->sb_status = lkb->lkb_lksb->sb_status;
+
+ r = lkb->lkb_resource;
+ if (r)
+ memcpy(__get_dynamic_array(res_name), r->res_name,
+ __get_dynamic_array_len(res_name));
),
- TP_printk("ls_id=%u lkb_id=%x sb_flags=%s sb_status=%d",
+ TP_printk("ls_id=%u lkb_id=%x sb_flags=%s sb_status=%d res_name=%s",
__entry->ls_id, __entry->lkb_id,
- show_dlm_sb_flags(__entry->sb_flags), __entry->sb_status)
+ show_dlm_sb_flags(__entry->sb_flags), __entry->sb_status,
+ __print_hex_str(__get_dynamic_array(res_name),
+ __get_dynamic_array_len(res_name)))
);
@@ -173,17 +225,28 @@ TRACE_EVENT(dlm_unlock_start,
__field(__u32, ls_id)
__field(__u32, lkb_id)
__field(__u32, flags)
+ __dynamic_array(unsigned char, res_name,
+ lkb->lkb_resource ? lkb->lkb_resource->res_length : 0)
),
TP_fast_assign(
+ struct dlm_rsb *r;
+
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->flags = flags;
+
+ r = lkb->lkb_resource;
+ if (r)
+ memcpy(__get_dynamic_array(res_name), r->res_name,
+ __get_dynamic_array_len(res_name));
),
- TP_printk("ls_id=%u lkb_id=%x flags=%s",
+ TP_printk("ls_id=%u lkb_id=%x flags=%s res_name=%s",
__entry->ls_id, __entry->lkb_id,
- show_lock_flags(__entry->flags))
+ show_lock_flags(__entry->flags),
+ __print_hex_str(__get_dynamic_array(res_name),
+ __get_dynamic_array_len(res_name)))
);
@@ -199,18 +262,29 @@ TRACE_EVENT(dlm_unlock_end,
__field(__u32, lkb_id)
__field(__u32, flags)
__field(int, error)
+ __dynamic_array(unsigned char, res_name,
+ lkb->lkb_resource ? lkb->lkb_resource->res_length : 0)
),
TP_fast_assign(
+ struct dlm_rsb *r;
+
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->flags = flags;
__entry->error = error;
+
+ r = lkb->lkb_resource;
+ if (r)
+ memcpy(__get_dynamic_array(res_name), r->res_name,
+ __get_dynamic_array_len(res_name));
),
- TP_printk("ls_id=%u lkb_id=%x flags=%s error=%d",
+ TP_printk("ls_id=%u lkb_id=%x flags=%s error=%d res_name=%s",
__entry->ls_id, __entry->lkb_id,
- show_lock_flags(__entry->flags), __entry->error)
+ show_lock_flags(__entry->flags), __entry->error,
+ __print_hex_str(__get_dynamic_array(res_name),
+ __get_dynamic_array_len(res_name)))
);
diff --git a/include/trace/events/iocost.h b/include/trace/events/iocost.h
index e282ce02fa2d..6d1626e7a4ce 100644
--- a/include/trace/events/iocost.h
+++ b/include/trace/events/iocost.h
@@ -160,7 +160,7 @@ TRACE_EVENT(iocost_ioc_vrate_adj,
TP_fast_assign(
__assign_str(devname, ioc_name(ioc));
- __entry->old_vrate = atomic64_read(&ioc->vtime_rate);;
+ __entry->old_vrate = atomic64_read(&ioc->vtime_rate);
__entry->new_vrate = new_vrate;
__entry->busy_level = ioc->busy_level;
__entry->read_missed_ppm = missed_ppm[READ];
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index f76668305ac5..4cb51ace600d 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,
TP_PROTO(unsigned long call_site,
const void *ptr,
+ struct kmem_cache *s,
size_t bytes_req,
size_t bytes_alloc,
gfp_t gfp_flags),
- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
TP_STRUCT__entry(
__field( unsigned long, call_site )
@@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
__field( size_t, bytes_req )
__field( size_t, bytes_alloc )
__field( unsigned long, gfp_flags )
+ __field( bool, accounted )
),
TP_fast_assign(
@@ -33,42 +35,47 @@ DECLARE_EVENT_CLASS(kmem_alloc,
__entry->bytes_req = bytes_req;
__entry->bytes_alloc = bytes_alloc;
__entry->gfp_flags = (__force unsigned long)gfp_flags;
+ __entry->accounted = IS_ENABLED(CONFIG_MEMCG_KMEM) ?
+ ((gfp_flags & __GFP_ACCOUNT) ||
+ (s && s->flags & SLAB_ACCOUNT)) : false;
),
- TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+ TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
(void *)__entry->call_site,
__entry->ptr,
__entry->bytes_req,
__entry->bytes_alloc,
- show_gfp_flags(__entry->gfp_flags))
+ show_gfp_flags(__entry->gfp_flags),
+ __entry->accounted ? "true" : "false")
);
DEFINE_EVENT(kmem_alloc, kmalloc,
- TP_PROTO(unsigned long call_site, const void *ptr,
+ TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
);
DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
- TP_PROTO(unsigned long call_site, const void *ptr,
+ TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
);
DECLARE_EVENT_CLASS(kmem_alloc_node,
TP_PROTO(unsigned long call_site,
const void *ptr,
+ struct kmem_cache *s,
size_t bytes_req,
size_t bytes_alloc,
gfp_t gfp_flags,
int node),
- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),
TP_STRUCT__entry(
__field( unsigned long, call_site )
@@ -77,6 +84,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
__field( size_t, bytes_alloc )
__field( unsigned long, gfp_flags )
__field( int, node )
+ __field( bool, accounted )
),
TP_fast_assign(
@@ -86,33 +94,37 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
__entry->bytes_alloc = bytes_alloc;
__entry->gfp_flags = (__force unsigned long)gfp_flags;
__entry->node = node;
+ __entry->accounted = IS_ENABLED(CONFIG_MEMCG_KMEM) ?
+ ((gfp_flags & __GFP_ACCOUNT) ||
+ (s && s->flags & SLAB_ACCOUNT)) : false;
),
- TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+ TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
(void *)__entry->call_site,
__entry->ptr,
__entry->bytes_req,
__entry->bytes_alloc,
show_gfp_flags(__entry->gfp_flags),
- __entry->node)
+ __entry->node,
+ __entry->accounted ? "true" : "false")
);
DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
TP_PROTO(unsigned long call_site, const void *ptr,
- size_t bytes_req, size_t bytes_alloc,
+ struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
gfp_t gfp_flags, int node),
- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
);
DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
TP_PROTO(unsigned long call_site, const void *ptr,
- size_t bytes_req, size_t bytes_alloc,
+ struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
gfp_t gfp_flags, int node),
- TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+ TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
);
TRACE_EVENT(kfree,
diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
index 12c315782766..777ee6cbe933 100644
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -98,7 +98,7 @@ TRACE_EVENT(sock_exceed_buf_limit,
TP_STRUCT__entry(
__array(char, name, 32)
- __field(long *, sysctl_mem)
+ __array(long, sysctl_mem, 3)
__field(long, allocated)
__field(int, sysctl_rmem)
__field(int, rmem_alloc)
@@ -110,7 +110,9 @@ TRACE_EVENT(sock_exceed_buf_limit,
TP_fast_assign(
strncpy(__entry->name, prot->name, 32);
- __entry->sysctl_mem = prot->sysctl_mem;
+ __entry->sysctl_mem[0] = READ_ONCE(prot->sysctl_mem[0]);
+ __entry->sysctl_mem[1] = READ_ONCE(prot->sysctl_mem[1]);
+ __entry->sysctl_mem[2] = READ_ONCE(prot->sysctl_mem[2]);
__entry->allocated = allocated;
__entry->sysctl_rmem = sk_get_rmem0(sk, prot);
__entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index f13d37b60775..1ecdb911add8 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -192,6 +192,7 @@ struct f_owner_ex {
#define F_LINUX_SPECIFIC_BASE 1024
+#ifndef HAVE_ARCH_STRUCT_FLOCK
struct flock {
short l_type;
short l_whence;
@@ -216,5 +217,6 @@ struct flock64 {
__ARCH_FLOCK64_PAD
#endif
};
+#endif /* HAVE_ARCH_STRUCT_FLOCK */
#endif /* _ASM_GENERIC_FCNTL_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f4009dbdf62d..ef78e0e1a754 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5222,22 +5222,25 @@ union bpf_attr {
* Return
* Nothing. Always succeeds.
*
- * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset)
+ * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags)
* Description
* Read *len* bytes from *src* into *dst*, starting from *offset*
* into *src*.
+ * *flags* is currently unused.
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
- * of *src*'s data, -EINVAL if *src* is an invalid dynptr.
+ * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
+ * *flags* is not 0.
*
- * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len)
+ * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
+ * *flags* is currently unused.
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
- * is a read-only dynptr.
+ * is a read-only dynptr or if *flags* is not 0.
*
* void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index f1f89132d60e..d8536d77fea1 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -83,12 +83,20 @@
#define FAN_MARK_FLUSH 0x00000080
/* FAN_MARK_FILESYSTEM is 0x00000100 */
#define FAN_MARK_EVICTABLE 0x00000200
+/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */
+#define FAN_MARK_IGNORE 0x00000400
/* These are NOT bitwise flags. Both bits can be used togther. */
#define FAN_MARK_INODE 0x00000000
#define FAN_MARK_MOUNT 0x00000010
#define FAN_MARK_FILESYSTEM 0x00000100
+/*
+ * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY
+ * for non-inode mark types.
+ */
+#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY)
+
/* Deprecated - do not use this in programs and do not add new flags here! */
#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\
FAN_MARK_REMOVE |\
diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index ef4257ab3026..2557eb7b0561 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -78,10 +78,13 @@ struct input_id {
* Note that input core does not clamp reported values to the
* [minimum, maximum] limits, such task is left to userspace.
*
- * The default resolution for main axes (ABS_X, ABS_Y, ABS_Z)
- * is reported in units per millimeter (units/mm), resolution
- * for rotational axes (ABS_RX, ABS_RY, ABS_RZ) is reported
- * in units per radian.
+ * The default resolution for main axes (ABS_X, ABS_Y, ABS_Z,
+ * ABS_MT_POSITION_X, ABS_MT_POSITION_Y) is reported in units
+ * per millimeter (units/mm), resolution for rotational axes
+ * (ABS_RX, ABS_RY, ABS_RZ) is reported in units per radian.
+ * The resolution for the size axes (ABS_MT_TOUCH_MAJOR,
+ * ABS_MT_TOUCH_MINOR, ABS_MT_WIDTH_MAJOR, ABS_MT_WIDTH_MINOR)
+ * is reported in units per millimeter (units/mm).
* When INPUT_PROP_ACCELEROMETER is set the resolution changes.
* The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in
* units per g (units/g) and in units per degree per second
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 5088bd9f1922..860f867c50c0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -2083,7 +2083,8 @@ struct kvm_stats_header {
#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
-#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
+#define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN
#define KVM_STATS_BASE_SHIFT 8
#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT)
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index d37629dbad72..0474ee362151 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -301,6 +301,7 @@ enum {
* { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
* { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
* { u64 id; } && PERF_FORMAT_ID
+ * { u64 lost; } && PERF_FORMAT_LOST
* } && !PERF_FORMAT_GROUP
*
* { u64 nr;
@@ -308,6 +309,7 @@ enum {
* { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
* { u64 value;
* { u64 id; } && PERF_FORMAT_ID
+ * { u64 lost; } && PERF_FORMAT_LOST
* } cntr[nr];
* } && PERF_FORMAT_GROUP
* };
@@ -317,8 +319,9 @@ enum perf_event_read_format {
PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
PERF_FORMAT_ID = 1U << 2,
PERF_FORMAT_GROUP = 1U << 3,
+ PERF_FORMAT_LOST = 1U << 4,
- PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
+ PERF_FORMAT_MAX = 1U << 5, /* non-ABI */
};
#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
diff --git a/include/uapi/linux/tty.h b/include/uapi/linux/tty.h
index 9d0f06bfbac3..68aeae2addec 100644
--- a/include/uapi/linux/tty.h
+++ b/include/uapi/linux/tty.h
@@ -38,8 +38,9 @@
#define N_NULL 27 /* Null ldisc used for error handling */
#define N_MCTP 28 /* MCTP-over-serial */
#define N_DEVELOPMENT 29 /* Manual out-of-tree testing */
+#define N_CAN327 30 /* ELM327 based OBD-II interfaces */
/* Always the newest line discipline + 1 */
-#define NR_LDISCS 30
+#define NR_LDISCS 31
#endif /* _UAPI_LINUX_TTY_H */
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 754f3237194a..e1fcaedba4fa 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -64,7 +64,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
goto fail_put;
if (!setup_ipc_sysctls(ns))
- goto fail_put;
+ goto fail_mq;
sem_init_ns(ns);
msg_init_ns(ns);
@@ -72,6 +72,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
return ns;
+fail_mq:
+ retire_mq_sysctls(ns);
+
fail_put:
put_user_ns(ns->user_ns);
ns_free_inum(&ns->ns);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 5f6f3f829b36..e7961508a47d 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -68,11 +68,13 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
{
u8 *ptr = NULL;
- if (k >= SKF_NET_OFF)
+ if (k >= SKF_NET_OFF) {
ptr = skb_network_header(skb) + k - SKF_NET_OFF;
- else if (k >= SKF_LL_OFF)
+ } else if (k >= SKF_LL_OFF) {
+ if (unlikely(!skb_mac_header_was_set(skb)))
+ return NULL;
ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
-
+ }
if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
return ptr;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 225806a02efb..bb1254f07667 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1497,11 +1497,12 @@ const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
.arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT,
};
-BPF_CALL_4(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src, u32, offset)
+BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src,
+ u32, offset, u64, flags)
{
int err;
- if (!src->data)
+ if (!src->data || flags)
return -EINVAL;
err = bpf_dynptr_check_off_len(src, offset, len);
@@ -1521,13 +1522,15 @@ const struct bpf_func_proto bpf_dynptr_read_proto = {
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_PTR_TO_DYNPTR,
.arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
};
-BPF_CALL_4(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src, u32, len)
+BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
+ u32, len, u64, flags)
{
int err;
- if (!dst->data || bpf_dynptr_is_rdonly(dst))
+ if (!dst->data || flags || bpf_dynptr_is_rdonly(dst))
return -EINVAL;
err = bpf_dynptr_check_off_len(dst, offset, len);
@@ -1547,6 +1550,7 @@ const struct bpf_func_proto bpf_dynptr_write_proto = {
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg5_type = ARG_ANYTHING,
};
BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 1779ccddb734..13c8e91d7862 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -765,7 +765,8 @@ struct css_set init_css_set = {
.task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
.threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
.cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
- .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),
+ .mg_src_preload_node = LIST_HEAD_INIT(init_css_set.mg_src_preload_node),
+ .mg_dst_preload_node = LIST_HEAD_INIT(init_css_set.mg_dst_preload_node),
.mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
/*
@@ -1240,7 +1241,8 @@ static struct css_set *find_css_set(struct css_set *old_cset,
INIT_LIST_HEAD(&cset->threaded_csets);
INIT_HLIST_NODE(&cset->hlist);
INIT_LIST_HEAD(&cset->cgrp_links);
- INIT_LIST_HEAD(&cset->mg_preload_node);
+ INIT_LIST_HEAD(&cset->mg_src_preload_node);
+ INIT_LIST_HEAD(&cset->mg_dst_preload_node);
INIT_LIST_HEAD(&cset->mg_node);
/* Copy the set of subsystem state objects generated in
@@ -2597,21 +2599,27 @@ int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp)
*/
void cgroup_migrate_finish(struct cgroup_mgctx *mgctx)
{
- LIST_HEAD(preloaded);
struct css_set *cset, *tmp_cset;
lockdep_assert_held(&cgroup_mutex);
spin_lock_irq(&css_set_lock);
- list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded);
- list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded);
+ list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_src_csets,
+ mg_src_preload_node) {
+ cset->mg_src_cgrp = NULL;
+ cset->mg_dst_cgrp = NULL;
+ cset->mg_dst_cset = NULL;
+ list_del_init(&cset->mg_src_preload_node);
+ put_css_set_locked(cset);
+ }
- list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) {
+ list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_dst_csets,
+ mg_dst_preload_node) {
cset->mg_src_cgrp = NULL;
cset->mg_dst_cgrp = NULL;
cset->mg_dst_cset = NULL;
- list_del_init(&cset->mg_preload_node);
+ list_del_init(&cset->mg_dst_preload_node);
put_css_set_locked(cset);
}
@@ -2651,7 +2659,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset,
if (src_cset->dead)
return;
- if (!list_empty(&src_cset->mg_preload_node))
+ if (!list_empty(&src_cset->mg_src_preload_node))
return;
src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
@@ -2664,7 +2672,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset,
src_cset->mg_src_cgrp = src_cgrp;
src_cset->mg_dst_cgrp = dst_cgrp;
get_css_set(src_cset);
- list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets);
+ list_add_tail(&src_cset->mg_src_preload_node, &mgctx->preloaded_src_csets);
}
/**
@@ -2689,7 +2697,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
/* look up the dst cset for each src cset and link it to src */
list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets,
- mg_preload_node) {
+ mg_src_preload_node) {
struct css_set *dst_cset;
struct cgroup_subsys *ss;
int ssid;
@@ -2708,7 +2716,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
if (src_cset == dst_cset) {
src_cset->mg_src_cgrp = NULL;
src_cset->mg_dst_cgrp = NULL;
- list_del_init(&src_cset->mg_preload_node);
+ list_del_init(&src_cset->mg_src_preload_node);
put_css_set(src_cset);
put_css_set(dst_cset);
continue;
@@ -2716,8 +2724,8 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
src_cset->mg_dst_cset = dst_cset;
- if (list_empty(&dst_cset->mg_preload_node))
- list_add_tail(&dst_cset->mg_preload_node,
+ if (list_empty(&dst_cset->mg_dst_preload_node))
+ list_add_tail(&dst_cset->mg_dst_preload_node,
&mgctx->preloaded_dst_csets);
else
put_css_set(dst_cset);
@@ -2963,7 +2971,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
goto out_finish;
spin_lock_irq(&css_set_lock);
- list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) {
+ list_for_each_entry(src_cset, &mgctx.preloaded_src_csets,
+ mg_src_preload_node) {
struct task_struct *task, *ntask;
/* all tasks in src_csets need to be migrated */
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 24b5c2ab5598..feb59380c896 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -310,6 +310,9 @@ static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat,
dst_bstat->cputime.utime += src_bstat->cputime.utime;
dst_bstat->cputime.stime += src_bstat->cputime.stime;
dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime;
+#ifdef CONFIG_SCHED_CORE
+ dst_bstat->forceidle_sum += src_bstat->forceidle_sum;
+#endif
}
static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
@@ -318,6 +321,9 @@ static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
dst_bstat->cputime.utime -= src_bstat->cputime.utime;
dst_bstat->cputime.stime -= src_bstat->cputime.stime;
dst_bstat->cputime.sum_exec_runtime -= src_bstat->cputime.sum_exec_runtime;
+#ifdef CONFIG_SCHED_CORE
+ dst_bstat->forceidle_sum -= src_bstat->forceidle_sum;
+#endif
}
static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
@@ -398,6 +404,11 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp,
case CPUTIME_SOFTIRQ:
rstatc->bstat.cputime.stime += delta_exec;
break;
+#ifdef CONFIG_SCHED_CORE
+ case CPUTIME_FORCEIDLE:
+ rstatc->bstat.forceidle_sum += delta_exec;
+ break;
+#endif
default:
break;
}
@@ -411,8 +422,9 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp,
* with how it is done by __cgroup_account_cputime_field for each bit of
* cpu time attributed to a cgroup.
*/
-static void root_cgroup_cputime(struct task_cputime *cputime)
+static void root_cgroup_cputime(struct cgroup_base_stat *bstat)
{
+ struct task_cputime *cputime = &bstat->cputime;
int i;
cputime->stime = 0;
@@ -438,6 +450,10 @@ static void root_cgroup_cputime(struct task_cputime *cputime)
cputime->sum_exec_runtime += user;
cputime->sum_exec_runtime += sys;
cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL];
+
+#ifdef CONFIG_SCHED_CORE
+ bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE];
+#endif
}
}
@@ -445,27 +461,43 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
{
struct cgroup *cgrp = seq_css(seq)->cgroup;
u64 usage, utime, stime;
- struct task_cputime cputime;
+ struct cgroup_base_stat bstat;
+#ifdef CONFIG_SCHED_CORE
+ u64 forceidle_time;
+#endif
if (cgroup_parent(cgrp)) {
cgroup_rstat_flush_hold(cgrp);
usage = cgrp->bstat.cputime.sum_exec_runtime;
cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
&utime, &stime);
+#ifdef CONFIG_SCHED_CORE
+ forceidle_time = cgrp->bstat.forceidle_sum;
+#endif
cgroup_rstat_flush_release();
} else {
- root_cgroup_cputime(&cputime);
- usage = cputime.sum_exec_runtime;
- utime = cputime.utime;
- stime = cputime.stime;
+ root_cgroup_cputime(&bstat);
+ usage = bstat.cputime.sum_exec_runtime;
+ utime = bstat.cputime.utime;
+ stime = bstat.cputime.stime;
+#ifdef CONFIG_SCHED_CORE
+ forceidle_time = bstat.forceidle_sum;
+#endif
}
do_div(usage, NSEC_PER_USEC);
do_div(utime, NSEC_PER_USEC);
do_div(stime, NSEC_PER_USEC);
+#ifdef CONFIG_SCHED_CORE
+ do_div(forceidle_time, NSEC_PER_USEC);
+#endif
seq_printf(seq, "usage_usec %llu\n"
"user_usec %llu\n"
"system_usec %llu\n",
usage, utime, stime);
+
+#ifdef CONFIG_SCHED_CORE
+ seq_printf(seq, "core_sched.force_idle_usec %llu\n", forceidle_time);
+#endif
}
diff --git a/kernel/configs/x86_debug.config b/kernel/configs/x86_debug.config
index dcd86f32f4ed..6fac5b405334 100644
--- a/kernel/configs/x86_debug.config
+++ b/kernel/configs/x86_debug.config
@@ -7,12 +7,11 @@ CONFIG_DEBUG_SLAB=y
CONFIG_DEBUG_KMEMLEAK=y
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_SLUB_DEBUG_ON=y
-CONFIG_KMEMCHECK=y
CONFIG_DEBUG_OBJECTS=y
CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1
CONFIG_GCOV_KERNEL=y
CONFIG_LOCKDEP=y
CONFIG_PROVE_LOCKING=y
CONFIG_SCHEDSTATS=y
-CONFIG_VMLINUX_VALIDATION=y
+CONFIG_NOINSTR_VALIDATION=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 80782cddb1da..c9d32d4d2e20 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1819,6 +1819,9 @@ static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
if (event->attr.read_format & PERF_FORMAT_ID)
entry += sizeof(u64);
+ if (event->attr.read_format & PERF_FORMAT_LOST)
+ entry += sizeof(u64);
+
if (event->attr.read_format & PERF_FORMAT_GROUP) {
nr += nr_siblings;
size += sizeof(u64);
@@ -5260,11 +5263,15 @@ static int __perf_read_group_add(struct perf_event *leader,
values[n++] += perf_event_count(leader);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(leader);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&leader->lost_samples);
for_each_sibling_event(sub, leader) {
values[n++] += perf_event_count(sub);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(sub);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&sub->lost_samples);
}
raw_spin_unlock_irqrestore(&ctx->lock, flags);
@@ -5321,7 +5328,7 @@ static int perf_read_one(struct perf_event *event,
u64 read_format, char __user *buf)
{
u64 enabled, running;
- u64 values[4];
+ u64 values[5];
int n = 0;
values[n++] = __perf_event_read_value(event, &enabled, &running);
@@ -5331,6 +5338,8 @@ static int perf_read_one(struct perf_event *event,
values[n++] = running;
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(event);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&event->lost_samples);
if (copy_to_user(buf, values, n * sizeof(u64)))
return -EFAULT;
@@ -6253,10 +6262,10 @@ again:
if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
/*
- * Raced against perf_mmap_close() through
- * perf_event_set_output(). Try again, hope for better
- * luck.
+ * Raced against perf_mmap_close(); remove the
+ * event and try again.
*/
+ ring_buffer_attach(event, NULL);
mutex_unlock(&event->mmap_mutex);
goto again;
}
@@ -6858,7 +6867,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
u64 enabled, u64 running)
{
u64 read_format = event->attr.read_format;
- u64 values[4];
+ u64 values[5];
int n = 0;
values[n++] = perf_event_count(event);
@@ -6872,6 +6881,8 @@ static void perf_output_read_one(struct perf_output_handle *handle,
}
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(event);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&event->lost_samples);
__output_copy(handle, values, n * sizeof(u64));
}
@@ -6882,7 +6893,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
{
struct perf_event *leader = event->group_leader, *sub;
u64 read_format = event->attr.read_format;
- u64 values[5];
+ u64 values[6];
int n = 0;
values[n++] = 1 + leader->nr_siblings;
@@ -6900,6 +6911,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
values[n++] = perf_event_count(leader);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(leader);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&leader->lost_samples);
__output_copy(handle, values, n * sizeof(u64));
@@ -6913,6 +6926,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
values[n++] = perf_event_count(sub);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(sub);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&sub->lost_samples);
__output_copy(handle, values, n * sizeof(u64));
}
@@ -11825,14 +11840,25 @@ err_size:
goto out;
}
+static void mutex_lock_double(struct mutex *a, struct mutex *b)
+{
+ if (b < a)
+ swap(a, b);
+
+ mutex_lock(a);
+ mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
+}
+
static int
perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
{
struct perf_buffer *rb = NULL;
int ret = -EINVAL;
- if (!output_event)
+ if (!output_event) {
+ mutex_lock(&event->mmap_mutex);
goto set;
+ }
/* don't allow circular references */
if (event == output_event)
@@ -11870,8 +11896,15 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
event->pmu != output_event->pmu)
goto out;
+ /*
+ * Hold both mmap_mutex to serialize against perf_mmap_close(). Since
+ * output_event is already on rb->event_list, and the list iteration
+ * restarts after every removal, it is guaranteed this new event is
+ * observed *OR* if output_event is already removed, it's guaranteed we
+ * observe !rb->mmap_count.
+ */
+ mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex);
set:
- mutex_lock(&event->mmap_mutex);
/* Can't redirect output if we've got an active mmap() */
if (atomic_read(&event->mmap_count))
goto unlock;
@@ -11881,6 +11914,12 @@ set:
rb = ring_buffer_get(output_event);
if (!rb)
goto unlock;
+
+ /* did we race against perf_mmap_close() */
+ if (!atomic_read(&rb->mmap_count)) {
+ ring_buffer_put(rb);
+ goto unlock;
+ }
}
ring_buffer_attach(event, rb);
@@ -11888,20 +11927,13 @@ set:
ret = 0;
unlock:
mutex_unlock(&event->mmap_mutex);
+ if (output_event)
+ mutex_unlock(&output_event->mmap_mutex);
out:
return ret;
}
-static void mutex_lock_double(struct mutex *a, struct mutex *b)
-{
- if (b < a)
- swap(a, b);
-
- mutex_lock(a);
- mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
-}
-
static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
{
bool nmi_safe = false;
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index fb35b926024c..726132039c38 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -172,8 +172,10 @@ __perf_output_begin(struct perf_output_handle *handle,
goto out;
if (unlikely(rb->paused)) {
- if (rb->nr_pages)
+ if (rb->nr_pages) {
local_inc(&rb->lost);
+ atomic64_inc(&event->lost_samples);
+ }
goto out;
}
@@ -254,6 +256,7 @@ __perf_output_begin(struct perf_output_handle *handle,
fail:
local_inc(&rb->lost);
+ atomic64_inc(&event->lost_samples);
perf_output_put_handle(handle);
out:
rcu_read_unlock();
diff --git a/kernel/exit.c b/kernel/exit.c
index f072959fcab7..64c938ce36fe 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -766,7 +766,7 @@ void __noreturn do_exit(long code)
#ifdef CONFIG_POSIX_TIMERS
hrtimer_cancel(&tsk->signal->real_timer);
- exit_itimers(tsk->signal);
+ exit_itimers(tsk);
#endif
if (tsk->mm)
setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 10929eda9825..db3d174c53d4 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -24,6 +24,7 @@ config GENERIC_IRQ_SHOW_LEVEL
# Supports effective affinity mask
config GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ depends on SMP
bool
# Support for delayed migration from interrupt context
@@ -82,6 +83,7 @@ config IRQ_FASTEOI_HIERARCHY_HANDLERS
# Generic IRQ IPI support
config GENERIC_IRQ_IPI
bool
+ depends on SMP
select IRQ_DOMAIN_HIERARCHY
# Generic MSI interrupt support
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 886789dcee43..8ac37e8e738a 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -188,7 +188,8 @@ enum {
#ifdef CONFIG_SMP
static int
-__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
+__irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff,
+ bool force)
{
struct irq_data *d = irq_desc_get_irq_data(desc);
@@ -224,7 +225,8 @@ __irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
}
#else
static __always_inline int
-__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
+__irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff,
+ bool force)
{
return IRQ_STARTUP_NORMAL;
}
@@ -252,7 +254,7 @@ static int __irq_startup(struct irq_desc *desc)
int irq_startup(struct irq_desc *desc, bool resend, bool force)
{
struct irq_data *d = irq_desc_get_irq_data(desc);
- struct cpumask *aff = irq_data_get_affinity_mask(d);
+ const struct cpumask *aff = irq_data_get_affinity_mask(d);
int ret = 0;
desc->depth = 0;
@@ -1516,7 +1518,8 @@ int irq_chip_request_resources_parent(struct irq_data *data)
if (data->chip->irq_request_resources)
return data->chip->irq_request_resources(data);
- return -ENOSYS;
+ /* no error on missing optional irq_chip::irq_request_resources */
+ return 0;
}
EXPORT_SYMBOL_GPL(irq_chip_request_resources_parent);
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index bc8e40cf2b65..bbcaac64038e 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -30,7 +30,7 @@ static void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state,
static void irq_debug_show_masks(struct seq_file *m, struct irq_desc *desc)
{
struct irq_data *data = irq_desc_get_irq_data(desc);
- struct cpumask *msk;
+ const struct cpumask *msk;
msk = irq_data_get_affinity_mask(data);
seq_printf(m, "affinity: %*pbl\n", cpumask_pr_args(msk));
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index f0862eb6b506..c653cd31548d 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -431,7 +431,7 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
return 0;
}
-static void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq)
+void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq)
{
struct irq_data *data = irq_domain_get_irq_data(d, virq);
struct irq_domain_chip_generic *dgc = d->gc;
diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c
index 08ce7da3b57c..bbd945bacef0 100644
--- a/kernel/irq/ipi.c
+++ b/kernel/irq/ipi.c
@@ -115,11 +115,11 @@ free_descs:
int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest)
{
struct irq_data *data = irq_get_irq_data(irq);
- struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL;
+ const struct cpumask *ipimask;
struct irq_domain *domain;
unsigned int nr_irqs;
- if (!irq || !data || !ipimask)
+ if (!irq || !data)
return -EINVAL;
domain = data->domain;
@@ -131,7 +131,8 @@ int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest)
return -EINVAL;
}
- if (WARN_ON(!cpumask_subset(dest, ipimask)))
+ ipimask = irq_data_get_affinity_mask(data);
+ if (!ipimask || WARN_ON(!cpumask_subset(dest, ipimask)))
/*
* Must be destroying a subset of CPUs to which this IPI
* was set up to target
@@ -162,12 +163,13 @@ int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest)
irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu)
{
struct irq_data *data = irq_get_irq_data(irq);
- struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL;
+ const struct cpumask *ipimask;
- if (!data || !ipimask || cpu >= nr_cpu_ids)
+ if (!data || cpu >= nr_cpu_ids)
return INVALID_HWIRQ;
- if (!cpumask_test_cpu(cpu, ipimask))
+ ipimask = irq_data_get_affinity_mask(data);
+ if (!ipimask || !cpumask_test_cpu(cpu, ipimask))
return INVALID_HWIRQ;
/*
@@ -186,7 +188,7 @@ EXPORT_SYMBOL_GPL(ipi_get_hwirq);
static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data,
const struct cpumask *dest, unsigned int cpu)
{
- struct cpumask *ipimask = irq_data_get_affinity_mask(data);
+ const struct cpumask *ipimask = irq_data_get_affinity_mask(data);
if (!chip || !ipimask)
return -EINVAL;
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index d323b180b0f3..5db0230aa6b5 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -251,7 +251,7 @@ static ssize_t actions_show(struct kobject *kobj,
char *p = "";
raw_spin_lock_irq(&desc->lock);
- for (action = desc->action; action != NULL; action = action->next) {
+ for_each_action_of_desc(desc, action) {
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
p, action->name);
p = ",";
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index d5ce96510549..8fe1da9614ee 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -147,7 +147,8 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s
static atomic_t unknown_domains;
if (WARN_ON((size && direct_max) ||
- (!IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && direct_max)))
+ (!IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && direct_max) ||
+ (direct_max && (direct_max != hwirq_max))))
return NULL;
domain = kzalloc_node(struct_size(domain, revmap, size),
@@ -219,7 +220,6 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s
domain->hwirq_max = hwirq_max;
if (direct_max) {
- size = direct_max;
domain->flags |= IRQ_DOMAIN_FLAG_NO_MAP;
}
@@ -650,9 +650,9 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
pr_debug("create_direct virq allocation failed\n");
return 0;
}
- if (virq >= domain->revmap_size) {
- pr_err("ERROR: no free irqs available below %i maximum\n",
- domain->revmap_size);
+ if (virq >= domain->hwirq_max) {
+ pr_err("ERROR: no free irqs available below %lu maximum\n",
+ domain->hwirq_max);
irq_free_desc(virq);
return 0;
}
@@ -906,10 +906,12 @@ struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain,
return desc;
if (irq_domain_is_nomap(domain)) {
- if (hwirq < domain->revmap_size) {
+ if (hwirq < domain->hwirq_max) {
data = irq_domain_get_irq_data(domain, hwirq);
if (data && data->hwirq == hwirq)
desc = irq_data_to_desc(data);
+ if (irq && desc)
+ *irq = hwirq;
}
return desc;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 8c396319d5ac..40fe7806cc8c 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -205,16 +205,8 @@ static void irq_validate_effective_affinity(struct irq_data *data)
pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n",
chip->name, data->irq);
}
-
-static inline void irq_init_effective_affinity(struct irq_data *data,
- const struct cpumask *mask)
-{
- cpumask_copy(irq_data_get_effective_affinity_mask(data), mask);
-}
#else
static inline void irq_validate_effective_affinity(struct irq_data *data) { }
-static inline void irq_init_effective_affinity(struct irq_data *data,
- const struct cpumask *mask) { }
#endif
int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
@@ -347,7 +339,7 @@ static bool irq_set_affinity_deactivated(struct irq_data *data,
return false;
cpumask_copy(desc->irq_common_data.affinity, mask);
- irq_init_effective_affinity(data, mask);
+ irq_data_update_effective_affinity(data, mask);
irqd_set(data, IRQD_AFFINITY_SET);
return true;
}
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index ca71123a6130..c556bc49d213 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -147,7 +147,6 @@ void suspend_device_irqs(void)
synchronize_irq(irq);
}
}
-EXPORT_SYMBOL_GPL(suspend_device_irqs);
static void resume_irq(struct irq_desc *desc)
{
@@ -259,4 +258,3 @@ void resume_device_irqs(void)
{
resume_irqs(false);
}
-EXPORT_SYMBOL_GPL(resume_device_irqs);
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index b156e152d6b4..714ac4c3b556 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -332,17 +332,13 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start,
return 0;
}
-/*
- * Update code which is definitely not currently executing.
- * Architectures which need heavyweight synchronization to modify
- * running code can override this to make the non-live update case
- * cheaper.
- */
-void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry *entry,
- enum jump_label_type type)
+#ifndef arch_jump_label_transform_static
+static void arch_jump_label_transform_static(struct jump_entry *entry,
+ enum jump_label_type type)
{
- arch_jump_label_transform(entry, type);
+ /* nothing to do on most architectures */
}
+#endif
static inline struct jump_entry *static_key_entries(struct static_key *key)
{
@@ -508,7 +504,7 @@ void __init jump_label_init(void)
#ifdef CONFIG_MODULES
-static enum jump_label_type jump_label_init_type(struct jump_entry *entry)
+enum jump_label_type jump_label_init_type(struct jump_entry *entry)
{
struct static_key *key = jump_entry_key(entry);
bool type = static_key_type(key);
@@ -596,31 +592,6 @@ static void __jump_label_mod_update(struct static_key *key)
}
}
-/***
- * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop()
- * @mod: module to patch
- *
- * Allow for run-time selection of the optimal nops. Before the module
- * loads patch these with arch_get_jump_label_nop(), which is specified by
- * the arch specific jump label code.
- */
-void jump_label_apply_nops(struct module *mod)
-{
- struct jump_entry *iter_start = mod->jump_entries;
- struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
- struct jump_entry *iter;
-
- /* if the module doesn't have jump label entries, just return */
- if (iter_start == iter_stop)
- return;
-
- for (iter = iter_start; iter < iter_stop; iter++) {
- /* Only write NOPs for arch_branch_static(). */
- if (jump_label_init_type(iter) == JUMP_LABEL_NOP)
- arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
- }
-}
-
static int jump_label_add_module(struct module *mod)
{
struct jump_entry *iter_start = mod->jump_entries;
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 145321a5e798..f9261c07b048 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -29,6 +29,15 @@
#include <linux/vmalloc.h>
#include "kexec_internal.h"
+#ifdef CONFIG_KEXEC_SIG
+static bool sig_enforce = IS_ENABLED(CONFIG_KEXEC_SIG_FORCE);
+
+void set_kexec_sig_enforced(void)
+{
+ sig_enforce = true;
+}
+#endif
+
static int kexec_calculate_store_digests(struct kimage *image);
/*
@@ -159,7 +168,7 @@ kimage_validate_signature(struct kimage *image)
image->kernel_buf_len);
if (ret) {
- if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) {
+ if (sig_enforce) {
pr_notice("Enforced kernel signature verification failed (%d).\n", ret);
return ret;
}
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index f06b91ca6482..e2f179491b08 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -5238,9 +5238,10 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
return 0;
}
- lockdep_init_map_waits(lock, name, key, 0,
- lock->wait_type_inner,
- lock->wait_type_outer);
+ lockdep_init_map_type(lock, name, key, 0,
+ lock->wait_type_inner,
+ lock->wait_type_outer,
+ lock->lock_type);
class = register_lock_class(lock, subclass, 0);
hlock->class_idx = class - lock_classes;
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 9d1db4a54d34..65f0262f635e 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -335,8 +335,6 @@ struct rwsem_waiter {
struct task_struct *task;
enum rwsem_waiter_type type;
unsigned long timeout;
-
- /* Writer only, not initialized in reader */
bool handoff_set;
};
#define rwsem_first_waiter(sem) \
@@ -459,10 +457,12 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
* to give up the lock), request a HANDOFF to
* force the issue.
*/
- if (!(oldcount & RWSEM_FLAG_HANDOFF) &&
- time_after(jiffies, waiter->timeout)) {
- adjustment -= RWSEM_FLAG_HANDOFF;
- lockevent_inc(rwsem_rlock_handoff);
+ if (time_after(jiffies, waiter->timeout)) {
+ if (!(oldcount & RWSEM_FLAG_HANDOFF)) {
+ adjustment -= RWSEM_FLAG_HANDOFF;
+ lockevent_inc(rwsem_rlock_handoff);
+ }
+ waiter->handoff_set = true;
}
atomic_long_add(-adjustment, &sem->count);
@@ -599,7 +599,7 @@ rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
struct rwsem_waiter *waiter)
{
- bool first = rwsem_first_waiter(sem) == waiter;
+ struct rwsem_waiter *first = rwsem_first_waiter(sem);
long count, new;
lockdep_assert_held(&sem->wait_lock);
@@ -609,11 +609,20 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
if (has_handoff) {
- if (!first)
+ /*
+ * Honor handoff bit and yield only when the first
+ * waiter is the one that set it. Otherwisee, we
+ * still try to acquire the rwsem.
+ */
+ if (first->handoff_set && (waiter != first))
return false;
- /* First waiter inherits a previously set handoff bit */
- waiter->handoff_set = true;
+ /*
+ * First waiter can inherit a previously set handoff
+ * bit and spin on rwsem if lock acquisition fails.
+ */
+ if (waiter == first)
+ waiter->handoff_set = true;
}
new = count;
@@ -1027,6 +1036,7 @@ queue:
waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_READ;
waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
+ waiter.handoff_set = false;
raw_spin_lock_irq(&sem->wait_lock);
if (list_empty(&sem->wait_list)) {
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index bc5507ab8450..ec104c2950c3 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -11,6 +11,7 @@
#include <linux/mutex.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
+#include <linux/mm.h>
#ifndef ARCH_SHF_SMALL
#define ARCH_SHF_SMALL 0
@@ -30,11 +31,13 @@
* to ensure complete separation of code and data, but
* only when CONFIG_STRICT_MODULE_RWX=y
*/
-#ifdef CONFIG_STRICT_MODULE_RWX
-# define strict_align(X) PAGE_ALIGN(X)
-#else
-# define strict_align(X) (X)
-#endif
+static inline unsigned int strict_align(unsigned int size)
+{
+ if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
+ return PAGE_ALIGN(size);
+ else
+ return size;
+}
extern struct mutex module_mutex;
extern struct list_head modules;
diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c
index 3e11523bc6f6..77e75bead569 100644
--- a/kernel/module/kallsyms.c
+++ b/kernel/module/kallsyms.c
@@ -137,6 +137,7 @@ void layout_symtab(struct module *mod, struct load_info *info)
info->symoffs = ALIGN(mod->data_layout.size, symsect->sh_addralign ?: 1);
info->stroffs = mod->data_layout.size = info->symoffs + ndst * sizeof(Elf_Sym);
mod->data_layout.size += strtab_size;
+ /* Note add_kallsyms() computes strtab_size as core_typeoffs - stroffs */
info->core_typeoffs = mod->data_layout.size;
mod->data_layout.size += ndst * sizeof(char);
mod->data_layout.size = strict_align(mod->data_layout.size);
@@ -169,19 +170,20 @@ void add_kallsyms(struct module *mod, const struct load_info *info)
Elf_Sym *dst;
char *s;
Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
+ unsigned long strtab_size;
/* Set up to point into init section. */
mod->kallsyms = (void __rcu *)mod->init_layout.base +
info->mod_kallsyms_init_off;
- preempt_disable();
+ rcu_read_lock();
/* The following is safe since this pointer cannot change */
- rcu_dereference_sched(mod->kallsyms)->symtab = (void *)symsec->sh_addr;
- rcu_dereference_sched(mod->kallsyms)->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
+ rcu_dereference(mod->kallsyms)->symtab = (void *)symsec->sh_addr;
+ rcu_dereference(mod->kallsyms)->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
/* Make sure we get permanent strtab: don't use info->strtab. */
- rcu_dereference_sched(mod->kallsyms)->strtab =
+ rcu_dereference(mod->kallsyms)->strtab =
(void *)info->sechdrs[info->index.str].sh_addr;
- rcu_dereference_sched(mod->kallsyms)->typetab = mod->init_layout.base + info->init_typeoffs;
+ rcu_dereference(mod->kallsyms)->typetab = mod->init_layout.base + info->init_typeoffs;
/*
* Now populate the cut down core kallsyms for after init
@@ -190,22 +192,29 @@ void add_kallsyms(struct module *mod, const struct load_info *info)
mod->core_kallsyms.symtab = dst = mod->data_layout.base + info->symoffs;
mod->core_kallsyms.strtab = s = mod->data_layout.base + info->stroffs;
mod->core_kallsyms.typetab = mod->data_layout.base + info->core_typeoffs;
- src = rcu_dereference_sched(mod->kallsyms)->symtab;
- for (ndst = i = 0; i < rcu_dereference_sched(mod->kallsyms)->num_symtab; i++) {
- rcu_dereference_sched(mod->kallsyms)->typetab[i] = elf_type(src + i, info);
+ strtab_size = info->core_typeoffs - info->stroffs;
+ src = rcu_dereference(mod->kallsyms)->symtab;
+ for (ndst = i = 0; i < rcu_dereference(mod->kallsyms)->num_symtab; i++) {
+ rcu_dereference(mod->kallsyms)->typetab[i] = elf_type(src + i, info);
if (i == 0 || is_livepatch_module(mod) ||
is_core_symbol(src + i, info->sechdrs, info->hdr->e_shnum,
info->index.pcpu)) {
+ ssize_t ret;
+
mod->core_kallsyms.typetab[ndst] =
- rcu_dereference_sched(mod->kallsyms)->typetab[i];
+ rcu_dereference(mod->kallsyms)->typetab[i];
dst[ndst] = src[i];
dst[ndst++].st_name = s - mod->core_kallsyms.strtab;
- s += strscpy(s,
- &rcu_dereference_sched(mod->kallsyms)->strtab[src[i].st_name],
- KSYM_NAME_LEN) + 1;
+ ret = strscpy(s,
+ &rcu_dereference(mod->kallsyms)->strtab[src[i].st_name],
+ strtab_size);
+ if (ret < 0)
+ break;
+ s += ret + 1;
+ strtab_size -= ret + 1;
}
}
- preempt_enable();
+ rcu_read_unlock();
mod->core_kallsyms.num_symtab = ndst;
}
diff --git a/kernel/module/main.c b/kernel/module/main.c
index fed58d30725d..0548151dd933 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -2939,24 +2939,25 @@ static void cfi_init(struct module *mod)
{
#ifdef CONFIG_CFI_CLANG
initcall_t *init;
+#ifdef CONFIG_MODULE_UNLOAD
exitcall_t *exit;
+#endif
rcu_read_lock_sched();
mod->cfi_check = (cfi_check_fn)
find_kallsyms_symbol_value(mod, "__cfi_check");
init = (initcall_t *)
find_kallsyms_symbol_value(mod, "__cfi_jt_init_module");
- exit = (exitcall_t *)
- find_kallsyms_symbol_value(mod, "__cfi_jt_cleanup_module");
- rcu_read_unlock_sched();
-
/* Fix init/exit functions to point to the CFI jump table */
if (init)
mod->init = *init;
#ifdef CONFIG_MODULE_UNLOAD
+ exit = (exitcall_t *)
+ find_kallsyms_symbol_value(mod, "__cfi_jt_cleanup_module");
if (exit)
mod->exit = *exit;
#endif
+ rcu_read_unlock_sched();
cfi_module_add(mod, mod_tree.addr_min);
#endif
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index b49c6ff6dca0..a1a81fd9889b 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3380,6 +3380,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre
diff = 0;
console_lock();
+
for_each_console(c) {
if (con && con != c)
continue;
@@ -3389,11 +3390,19 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre
if (printk_seq < seq)
diff += seq - printk_seq;
}
- console_unlock();
- if (diff != last_diff && reset_on_progress)
+ /*
+ * If consoles are suspended, it cannot be expected that they
+ * make forward progress, so timeout immediately. @diff is
+ * still used to return a valid flush status.
+ */
+ if (console_suspended)
+ remaining = 0;
+ else if (diff != last_diff && reset_on_progress)
remaining = timeout_ms;
+ console_unlock();
+
if (diff == 0 || remaining == 0)
break;
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 50ba70f019de..1c304fec89c0 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -511,10 +511,52 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
return sum;
}
-#define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending.
-#define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers.
-#define SRCU_MAX_NODELAY_PHASE 1 // Maximum per-GP-phase consecutive no-delay instances.
-#define SRCU_MAX_NODELAY 100 // Maximum consecutive no-delay instances.
+/*
+ * We use an adaptive strategy for synchronize_srcu() and especially for
+ * synchronize_srcu_expedited(). We spin for a fixed time period
+ * (defined below, boot time configurable) to allow SRCU readers to exit
+ * their read-side critical sections. If there are still some readers
+ * after one jiffy, we repeatedly block for one jiffy time periods.
+ * The blocking time is increased as the grace-period age increases,
+ * with max blocking time capped at 10 jiffies.
+ */
+#define SRCU_DEFAULT_RETRY_CHECK_DELAY 5
+
+static ulong srcu_retry_check_delay = SRCU_DEFAULT_RETRY_CHECK_DELAY;
+module_param(srcu_retry_check_delay, ulong, 0444);
+
+#define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending.
+#define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers.
+
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE_LO 3UL // Lowmark on default per-GP-phase
+ // no-delay instances.
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE_HI 1000UL // Highmark on default per-GP-phase
+ // no-delay instances.
+
+#define SRCU_UL_CLAMP_LO(val, low) ((val) > (low) ? (val) : (low))
+#define SRCU_UL_CLAMP_HI(val, high) ((val) < (high) ? (val) : (high))
+#define SRCU_UL_CLAMP(val, low, high) SRCU_UL_CLAMP_HI(SRCU_UL_CLAMP_LO((val), (low)), (high))
+// per-GP-phase no-delay instances adjusted to allow non-sleeping poll upto
+// one jiffies time duration. Mult by 2 is done to factor in the srcu_get_delay()
+// called from process_srcu().
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED \
+ (2UL * USEC_PER_SEC / HZ / SRCU_DEFAULT_RETRY_CHECK_DELAY)
+
+// Maximum per-GP-phase consecutive no-delay instances.
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE \
+ SRCU_UL_CLAMP(SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED, \
+ SRCU_DEFAULT_MAX_NODELAY_PHASE_LO, \
+ SRCU_DEFAULT_MAX_NODELAY_PHASE_HI)
+
+static ulong srcu_max_nodelay_phase = SRCU_DEFAULT_MAX_NODELAY_PHASE;
+module_param(srcu_max_nodelay_phase, ulong, 0444);
+
+// Maximum consecutive no-delay instances.
+#define SRCU_DEFAULT_MAX_NODELAY (SRCU_DEFAULT_MAX_NODELAY_PHASE > 100 ? \
+ SRCU_DEFAULT_MAX_NODELAY_PHASE : 100)
+
+static ulong srcu_max_nodelay = SRCU_DEFAULT_MAX_NODELAY;
+module_param(srcu_max_nodelay, ulong, 0444);
/*
* Return grace-period delay, zero if there are expedited grace
@@ -522,16 +564,22 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
*/
static unsigned long srcu_get_delay(struct srcu_struct *ssp)
{
+ unsigned long gpstart;
+ unsigned long j;
unsigned long jbase = SRCU_INTERVAL;
if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
jbase = 0;
- if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)))
- jbase += jiffies - READ_ONCE(ssp->srcu_gp_start);
- if (!jbase) {
- WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
- if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
- jbase = 1;
+ if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) {
+ j = jiffies - 1;
+ gpstart = READ_ONCE(ssp->srcu_gp_start);
+ if (time_after(j, gpstart))
+ jbase += j - gpstart;
+ if (!jbase) {
+ WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
+ if (READ_ONCE(ssp->srcu_n_exp_nodelay) > srcu_max_nodelay_phase)
+ jbase = 1;
+ }
}
return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase;
}
@@ -607,15 +655,6 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
/*
- * We use an adaptive strategy for synchronize_srcu() and especially for
- * synchronize_srcu_expedited(). We spin for a fixed time period
- * (defined below) to allow SRCU readers to exit their read-side critical
- * sections. If there are still some readers after a few microseconds,
- * we repeatedly block for 1-millisecond time periods.
- */
-#define SRCU_RETRY_CHECK_DELAY 5
-
-/*
* Start an SRCU grace period.
*/
static void srcu_gp_start(struct srcu_struct *ssp)
@@ -700,7 +739,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *ssp, struct srcu_node *snp
*/
static void srcu_gp_end(struct srcu_struct *ssp)
{
- unsigned long cbdelay;
+ unsigned long cbdelay = 1;
bool cbs;
bool last_lvl;
int cpu;
@@ -720,7 +759,9 @@ static void srcu_gp_end(struct srcu_struct *ssp)
spin_lock_irq_rcu_node(ssp);
idx = rcu_seq_state(ssp->srcu_gp_seq);
WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
- cbdelay = !!srcu_get_delay(ssp);
+ if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
+ cbdelay = 0;
+
WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
rcu_seq_end(&ssp->srcu_gp_seq);
gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
@@ -921,12 +962,16 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
*/
static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
{
+ unsigned long curdelay;
+
+ curdelay = !srcu_get_delay(ssp);
+
for (;;) {
if (srcu_readers_active_idx_check(ssp, idx))
return true;
- if (--trycount + !srcu_get_delay(ssp) <= 0)
+ if ((--trycount + curdelay) <= 0)
return false;
- udelay(SRCU_RETRY_CHECK_DELAY);
+ udelay(srcu_retry_check_delay);
}
}
@@ -1582,7 +1627,7 @@ static void process_srcu(struct work_struct *work)
j = jiffies;
if (READ_ONCE(ssp->reschedule_jiffies) == j) {
WRITE_ONCE(ssp->reschedule_count, READ_ONCE(ssp->reschedule_count) + 1);
- if (READ_ONCE(ssp->reschedule_count) > SRCU_MAX_NODELAY)
+ if (READ_ONCE(ssp->reschedule_count) > srcu_max_nodelay)
curdelay = 1;
} else {
WRITE_ONCE(ssp->reschedule_count, 1);
@@ -1674,6 +1719,11 @@ static int __init srcu_bootup_announce(void)
pr_info("Hierarchical SRCU implementation.\n");
if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF)
pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff);
+ if (srcu_retry_check_delay != SRCU_DEFAULT_RETRY_CHECK_DELAY)
+ pr_info("\tNon-default retry check delay of %lu us.\n", srcu_retry_check_delay);
+ if (srcu_max_nodelay != SRCU_DEFAULT_MAX_NODELAY)
+ pr_info("\tNon-default max no-delay of %lu.\n", srcu_max_nodelay);
+ pr_info("\tMax phase no-delay instances is %lu.\n", srcu_max_nodelay_phase);
return 0;
}
early_initcall(srcu_bootup_announce);
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 97ac20b4f738..bda8175f8f99 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -18,8 +18,9 @@
#define CREATE_TRACE_POINTS
#include <trace/events/rseq.h>
-#define RSEQ_CS_PREEMPT_MIGRATE_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE | \
- RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT)
+#define RSEQ_CS_NO_RESTART_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT | \
+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \
+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE)
/*
*
@@ -175,23 +176,15 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
u32 flags, event_mask;
int ret;
+ if (WARN_ON_ONCE(cs_flags & RSEQ_CS_NO_RESTART_FLAGS) || cs_flags)
+ return -EINVAL;
+
/* Get thread flags. */
ret = get_user(flags, &t->rseq->flags);
if (ret)
return ret;
- /* Take critical section flags into account. */
- flags |= cs_flags;
-
- /*
- * Restart on signal can only be inhibited when restart on
- * preempt and restart on migrate are inhibited too. Otherwise,
- * a preempted signal handler could fail to restart the prior
- * execution context on sigreturn.
- */
- if (unlikely((flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL) &&
- (flags & RSEQ_CS_PREEMPT_MIGRATE_FLAGS) !=
- RSEQ_CS_PREEMPT_MIGRATE_FLAGS))
+ if (WARN_ON_ONCE(flags & RSEQ_CS_NO_RESTART_FLAGS) || flags)
return -EINVAL;
/*
@@ -203,7 +196,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
t->rseq_event_mask = 0;
preempt_enable();
- return !!(event_mask & ~flags);
+ return !!event_mask;
}
static int clear_rseq_cs(struct task_struct *t)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index da0bf6fe9ecd..5555e49c4e12 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -873,15 +873,11 @@ static inline void hrtick_rq_init(struct rq *rq)
({ \
typeof(ptr) _ptr = (ptr); \
typeof(mask) _mask = (mask); \
- typeof(*_ptr) _old, _val = *_ptr; \
+ typeof(*_ptr) _val = *_ptr; \
\
- for (;;) { \
- _old = cmpxchg(_ptr, _val, _val | _mask); \
- if (_old == _val) \
- break; \
- _val = _old; \
- } \
- _old; \
+ do { \
+ } while (!try_cmpxchg(_ptr, &_val, _val | _mask)); \
+ _val; \
})
#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
@@ -890,7 +886,7 @@ static inline void hrtick_rq_init(struct rq *rq)
* this avoids any races wrt polling state changes and thereby avoids
* spurious IPIs.
*/
-static bool set_nr_and_not_polling(struct task_struct *p)
+static inline bool set_nr_and_not_polling(struct task_struct *p)
{
struct thread_info *ti = task_thread_info(p);
return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
@@ -905,30 +901,28 @@ static bool set_nr_and_not_polling(struct task_struct *p)
static bool set_nr_if_polling(struct task_struct *p)
{
struct thread_info *ti = task_thread_info(p);
- typeof(ti->flags) old, val = READ_ONCE(ti->flags);
+ typeof(ti->flags) val = READ_ONCE(ti->flags);
for (;;) {
if (!(val & _TIF_POLLING_NRFLAG))
return false;
if (val & _TIF_NEED_RESCHED)
return true;
- old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
- if (old == val)
+ if (try_cmpxchg(&ti->flags, &val, val | _TIF_NEED_RESCHED))
break;
- val = old;
}
return true;
}
#else
-static bool set_nr_and_not_polling(struct task_struct *p)
+static inline bool set_nr_and_not_polling(struct task_struct *p)
{
set_tsk_need_resched(p);
return true;
}
#ifdef CONFIG_SMP
-static bool set_nr_if_polling(struct task_struct *p)
+static inline bool set_nr_if_polling(struct task_struct *p)
{
return false;
}
@@ -3808,7 +3802,7 @@ bool cpus_share_cache(int this_cpu, int that_cpu)
return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
}
-static inline bool ttwu_queue_cond(int cpu, int wake_flags)
+static inline bool ttwu_queue_cond(int cpu)
{
/*
* Do not complicate things with the async wake_list while the CPU is
@@ -3824,13 +3818,21 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
if (!cpus_share_cache(smp_processor_id(), cpu))
return true;
+ if (cpu == smp_processor_id())
+ return false;
+
/*
- * If the task is descheduling and the only running task on the
- * CPU then use the wakelist to offload the task activation to
- * the soon-to-be-idle CPU as the current CPU is likely busy.
- * nr_running is checked to avoid unnecessary task stacking.
+ * If the wakee cpu is idle, or the task is descheduling and the
+ * only running task on the CPU, then use the wakelist to offload
+ * the task activation to the idle (or soon-to-be-idle) CPU as
+ * the current CPU is likely busy. nr_running is checked to
+ * avoid unnecessary task stacking.
+ *
+ * Note that we can only get here with (wakee) p->on_rq=0,
+ * p->on_cpu can be whatever, we've done the dequeue, so
+ * the wakee has been accounted out of ->nr_running.
*/
- if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1)
+ if (!cpu_rq(cpu)->nr_running)
return true;
return false;
@@ -3838,10 +3840,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
{
- if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
- if (WARN_ON_ONCE(cpu == smp_processor_id()))
- return false;
-
+ if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu)) {
sched_clock_cpu(cpu); /* Sync clocks across CPUs */
__ttwu_queue_wakelist(p, cpu, wake_flags);
return true;
@@ -4163,7 +4162,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
* scheduling.
*/
if (smp_load_acquire(&p->on_cpu) &&
- ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
+ ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
goto unlock;
/*
@@ -4753,7 +4752,8 @@ static inline void prepare_task(struct task_struct *next)
* Claim the task as running, we do this before switching to it
* such that any running task will have this set.
*
- * See the ttwu() WF_ON_CPU case and its ordering comment.
+ * See the smp_load_acquire(&p->on_cpu) case in ttwu() and
+ * its ordering comment.
*/
WRITE_ONCE(next->on_cpu, 1);
#endif
@@ -6500,8 +6500,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
io_wq_worker_sleeping(tsk);
}
- if (tsk_is_pi_blocked(tsk))
- return;
+ /*
+ * spinlock and rwlock must not flush block requests. This will
+ * deadlock if the callback attempts to acquire a lock which is
+ * already acquired.
+ */
+ SCHED_WARN_ON(current->__state & TASK_RTLOCK_WAIT);
/*
* If we are going to sleep and we have plugged IO queued,
@@ -6998,17 +7002,29 @@ out_unlock:
EXPORT_SYMBOL(set_user_nice);
/*
- * can_nice - check if a task can reduce its nice value
+ * is_nice_reduction - check if nice value is an actual reduction
+ *
+ * Similar to can_nice() but does not perform a capability check.
+ *
* @p: task
* @nice: nice value
*/
-int can_nice(const struct task_struct *p, const int nice)
+static bool is_nice_reduction(const struct task_struct *p, const int nice)
{
/* Convert nice value [19,-20] to rlimit style value [1,40]: */
int nice_rlim = nice_to_rlimit(nice);
- return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
- capable(CAP_SYS_NICE));
+ return (nice_rlim <= task_rlimit(p, RLIMIT_NICE));
+}
+
+/*
+ * can_nice - check if a task can reduce its nice value
+ * @p: task
+ * @nice: nice value
+ */
+int can_nice(const struct task_struct *p, const int nice)
+{
+ return is_nice_reduction(p, nice) || capable(CAP_SYS_NICE);
}
#ifdef __ARCH_WANT_SYS_NICE
@@ -7137,12 +7153,14 @@ struct task_struct *idle_task(int cpu)
* required to meet deadlines.
*/
unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
- unsigned long max, enum cpu_util_type type,
+ enum cpu_util_type type,
struct task_struct *p)
{
- unsigned long dl_util, util, irq;
+ unsigned long dl_util, util, irq, max;
struct rq *rq = cpu_rq(cpu);
+ max = arch_scale_cpu_capacity(cpu);
+
if (!uclamp_is_used() &&
type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) {
return max;
@@ -7222,10 +7240,9 @@ unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
return min(max, util);
}
-unsigned long sched_cpu_util(int cpu, unsigned long max)
+unsigned long sched_cpu_util(int cpu)
{
- return effective_cpu_util(cpu, cpu_util_cfs(cpu), max,
- ENERGY_UTIL, NULL);
+ return effective_cpu_util(cpu, cpu_util_cfs(cpu), ENERGY_UTIL, NULL);
}
#endif /* CONFIG_SMP */
@@ -7287,6 +7304,69 @@ static bool check_same_owner(struct task_struct *p)
return match;
}
+/*
+ * Allow unprivileged RT tasks to decrease priority.
+ * Only issue a capable test if needed and only once to avoid an audit
+ * event on permitted non-privileged operations:
+ */
+static int user_check_sched_setscheduler(struct task_struct *p,
+ const struct sched_attr *attr,
+ int policy, int reset_on_fork)
+{
+ if (fair_policy(policy)) {
+ if (attr->sched_nice < task_nice(p) &&
+ !is_nice_reduction(p, attr->sched_nice))
+ goto req_priv;
+ }
+
+ if (rt_policy(policy)) {
+ unsigned long rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
+
+ /* Can't set/change the rt policy: */
+ if (policy != p->policy && !rlim_rtprio)
+ goto req_priv;
+
+ /* Can't increase priority: */
+ if (attr->sched_priority > p->rt_priority &&
+ attr->sched_priority > rlim_rtprio)
+ goto req_priv;
+ }
+
+ /*
+ * Can't set/change SCHED_DEADLINE policy at all for now
+ * (safest behavior); in the future we would like to allow
+ * unprivileged DL tasks to increase their relative deadline
+ * or reduce their runtime (both ways reducing utilization)
+ */
+ if (dl_policy(policy))
+ goto req_priv;
+
+ /*
+ * Treat SCHED_IDLE as nice 20. Only allow a switch to
+ * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
+ */
+ if (task_has_idle_policy(p) && !idle_policy(policy)) {
+ if (!is_nice_reduction(p, task_nice(p)))
+ goto req_priv;
+ }
+
+ /* Can't change other user's priorities: */
+ if (!check_same_owner(p))
+ goto req_priv;
+
+ /* Normal users shall not reset the sched_reset_on_fork flag: */
+ if (p->sched_reset_on_fork && !reset_on_fork)
+ goto req_priv;
+
+ return 0;
+
+req_priv:
+ if (!capable(CAP_SYS_NICE))
+ return -EPERM;
+
+ return 0;
+}
+
static int __sched_setscheduler(struct task_struct *p,
const struct sched_attr *attr,
bool user, bool pi)
@@ -7328,58 +7408,11 @@ recheck:
(rt_policy(policy) != (attr->sched_priority != 0)))
return -EINVAL;
- /*
- * Allow unprivileged RT tasks to decrease priority:
- */
- if (user && !capable(CAP_SYS_NICE)) {
- if (fair_policy(policy)) {
- if (attr->sched_nice < task_nice(p) &&
- !can_nice(p, attr->sched_nice))
- return -EPERM;
- }
-
- if (rt_policy(policy)) {
- unsigned long rlim_rtprio =
- task_rlimit(p, RLIMIT_RTPRIO);
-
- /* Can't set/change the rt policy: */
- if (policy != p->policy && !rlim_rtprio)
- return -EPERM;
-
- /* Can't increase priority: */
- if (attr->sched_priority > p->rt_priority &&
- attr->sched_priority > rlim_rtprio)
- return -EPERM;
- }
-
- /*
- * Can't set/change SCHED_DEADLINE policy at all for now
- * (safest behavior); in the future we would like to allow
- * unprivileged DL tasks to increase their relative deadline
- * or reduce their runtime (both ways reducing utilization)
- */
- if (dl_policy(policy))
- return -EPERM;
-
- /*
- * Treat SCHED_IDLE as nice 20. Only allow a switch to
- * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
- */
- if (task_has_idle_policy(p) && !idle_policy(policy)) {
- if (!can_nice(p, task_nice(p)))
- return -EPERM;
- }
-
- /* Can't change other user's priorities: */
- if (!check_same_owner(p))
- return -EPERM;
-
- /* Normal users shall not reset the sched_reset_on_fork flag: */
- if (p->sched_reset_on_fork && !reset_on_fork)
- return -EPERM;
- }
-
if (user) {
+ retval = user_check_sched_setscheduler(p, attr, policy, reset_on_fork);
+ if (retval)
+ return retval;
+
if (attr->sched_flags & SCHED_FLAG_SUGOV)
return -EINVAL;
@@ -9531,7 +9564,7 @@ static struct kmem_cache *task_group_cache __read_mostly;
#endif
DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
-DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
+DECLARE_PER_CPU(cpumask_var_t, select_rq_mask);
void __init sched_init(void)
{
@@ -9580,7 +9613,7 @@ void __init sched_init(void)
for_each_possible_cpu(i) {
per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
cpumask_size(), GFP_KERNEL, cpu_to_node(i));
- per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
+ per_cpu(select_rq_mask, i) = (cpumask_var_t)kzalloc_node(
cpumask_size(), GFP_KERNEL, cpu_to_node(i));
}
#endif /* CONFIG_CPUMASK_OFFSTACK */
diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c
index 38a2cec21014..93878cb2a46d 100644
--- a/kernel/sched/core_sched.c
+++ b/kernel/sched/core_sched.c
@@ -56,7 +56,6 @@ static unsigned long sched_core_update_cookie(struct task_struct *p,
unsigned long old_cookie;
struct rq_flags rf;
struct rq *rq;
- bool enqueued;
rq = task_rq_lock(p, &rf);
@@ -68,14 +67,16 @@ static unsigned long sched_core_update_cookie(struct task_struct *p,
*/
SCHED_WARN_ON((p->core_cookie || cookie) && !sched_core_enabled(rq));
- enqueued = sched_core_enqueued(p);
- if (enqueued)
+ if (sched_core_enqueued(p))
sched_core_dequeue(rq, p, DEQUEUE_SAVE);
old_cookie = p->core_cookie;
p->core_cookie = cookie;
- if (enqueued)
+ /*
+ * Consider the cases: !prev_cookie and !cookie.
+ */
+ if (cookie && task_on_rq_queued(p))
sched_core_enqueue(rq, p);
/*
@@ -277,7 +278,11 @@ void __sched_core_account_forceidle(struct rq *rq)
if (p == rq_i->idle)
continue;
- __schedstat_add(p->stats.core_forceidle_sum, delta);
+ /*
+ * Note: this will account forceidle to the current cpu, even
+ * if it comes from our SMT sibling.
+ */
+ __account_forceidle_time(p, delta);
}
}
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 3dbf351d12d5..1207c78f85c1 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -157,11 +157,10 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
static void sugov_get_util(struct sugov_cpu *sg_cpu)
{
struct rq *rq = cpu_rq(sg_cpu->cpu);
- unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu);
- sg_cpu->max = max;
+ sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu);
sg_cpu->bw_dl = cpu_bw_dl(rq);
- sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), max,
+ sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu),
FREQUENCY_UTIL, NULL);
}
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 78a233d43757..95fc77853743 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -226,6 +226,21 @@ void account_idle_time(u64 cputime)
cpustat[CPUTIME_IDLE] += cputime;
}
+
+#ifdef CONFIG_SCHED_CORE
+/*
+ * Account for forceidle time due to core scheduling.
+ *
+ * REQUIRES: schedstat is enabled.
+ */
+void __account_forceidle_time(struct task_struct *p, u64 delta)
+{
+ __schedstat_add(p->stats.core_forceidle_sum, delta);
+
+ task_group_account_field(p, CPUTIME_FORCEIDLE, delta);
+}
+#endif
+
/*
* When a guest is interrupted for a longer amount of time, missed clock
* ticks are not redelivered later. Due to that, this function may on
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index b5152961b743..0ab79d819a0d 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -30,14 +30,16 @@ static struct ctl_table sched_dl_sysctls[] = {
.data = &sysctl_sched_dl_period_max,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = (void *)&sysctl_sched_dl_period_min,
},
{
.procname = "sched_deadline_period_min_us",
.data = &sysctl_sched_dl_period_min,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_douintvec_minmax,
+ .extra2 = (void *)&sysctl_sched_dl_period_max,
},
{}
};
@@ -1701,7 +1703,10 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
* the throttle.
*/
p->dl.dl_throttled = 0;
- BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH);
+ if (!(flags & ENQUEUE_REPLENISH))
+ printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n",
+ task_pid_nr(p));
+
return;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 77b2048a9326..914096c5b1ae 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -612,11 +612,8 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
}
/* ensure we never gain time by being placed backwards. */
- cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
-#ifndef CONFIG_64BIT
- smp_wmb();
- cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
-#endif
+ u64_u32_store(cfs_rq->min_vruntime,
+ max_vruntime(cfs_rq->min_vruntime, vruntime));
}
static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
@@ -1055,6 +1052,33 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
* Scheduling class queueing methods:
*/
+#ifdef CONFIG_NUMA
+#define NUMA_IMBALANCE_MIN 2
+
+static inline long
+adjust_numa_imbalance(int imbalance, int dst_running, int imb_numa_nr)
+{
+ /*
+ * Allow a NUMA imbalance if busy CPUs is less than the maximum
+ * threshold. Above this threshold, individual tasks may be contending
+ * for both memory bandwidth and any shared HT resources. This is an
+ * approximation as the number of running tasks may not be related to
+ * the number of busy CPUs due to sched_setaffinity.
+ */
+ if (dst_running > imb_numa_nr)
+ return imbalance;
+
+ /*
+ * Allow a small imbalance based on a simple pair of communicating
+ * tasks that remain local when the destination is lightly loaded.
+ */
+ if (imbalance <= NUMA_IMBALANCE_MIN)
+ return 0;
+
+ return imbalance;
+}
+#endif /* CONFIG_NUMA */
+
#ifdef CONFIG_NUMA_BALANCING
/*
* Approximate time to scan a full NUMA task in ms. The task scan period is
@@ -1548,8 +1572,6 @@ struct task_numa_env {
static unsigned long cpu_load(struct rq *rq);
static unsigned long cpu_runnable(struct rq *rq);
-static inline long adjust_numa_imbalance(int imbalance,
- int dst_running, int imb_numa_nr);
static inline enum
numa_type numa_classify(unsigned int imbalance_pct,
@@ -1790,6 +1812,15 @@ static bool task_numa_compare(struct task_numa_env *env,
*/
cur_ng = rcu_dereference(cur->numa_group);
if (cur_ng == p_ng) {
+ /*
+ * Do not swap within a group or between tasks that have
+ * no group if there is spare capacity. Swapping does
+ * not address the load imbalance and helps one task at
+ * the cost of punishing another.
+ */
+ if (env->dst_stats.node_type == node_has_spare)
+ goto unlock;
+
imp = taskimp + task_weight(cur, env->src_nid, dist) -
task_weight(cur, env->dst_nid, dist);
/*
@@ -2885,6 +2916,7 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
p->node_stamp = 0;
p->numa_scan_seq = mm ? mm->numa_scan_seq : 0;
p->numa_scan_period = sysctl_numa_balancing_scan_delay;
+ p->numa_migrate_retry = 0;
/* Protect against double add, see task_tick_numa and task_numa_work */
p->numa_work.next = &p->numa_work;
p->numa_faults = NULL;
@@ -3144,6 +3176,8 @@ void reweight_task(struct task_struct *p, int prio)
load->inv_weight = sched_prio_to_wmult[prio];
}
+static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
+
#ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_SMP
/*
@@ -3254,8 +3288,6 @@ static long calc_group_shares(struct cfs_rq *cfs_rq)
}
#endif /* CONFIG_SMP */
-static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
-
/*
* Recomputes the group entity based on the current state of its group
* runqueue.
@@ -3313,6 +3345,34 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
}
#ifdef CONFIG_SMP
+static inline bool load_avg_is_decayed(struct sched_avg *sa)
+{
+ if (sa->load_sum)
+ return false;
+
+ if (sa->util_sum)
+ return false;
+
+ if (sa->runnable_sum)
+ return false;
+
+ /*
+ * _avg must be null when _sum are null because _avg = _sum / divider
+ * Make sure that rounding and/or propagation of PELT values never
+ * break this.
+ */
+ SCHED_WARN_ON(sa->load_avg ||
+ sa->util_avg ||
+ sa->runnable_avg);
+
+ return true;
+}
+
+static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
+{
+ return u64_u32_load_copy(cfs_rq->avg.last_update_time,
+ cfs_rq->last_update_time_copy);
+}
#ifdef CONFIG_FAIR_GROUP_SCHED
/*
* Because list_add_leaf_cfs_rq always places a child cfs_rq on the list
@@ -3345,27 +3405,12 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
if (cfs_rq->load.weight)
return false;
- if (cfs_rq->avg.load_sum)
- return false;
-
- if (cfs_rq->avg.util_sum)
- return false;
-
- if (cfs_rq->avg.runnable_sum)
+ if (!load_avg_is_decayed(&cfs_rq->avg))
return false;
if (child_cfs_rq_on_list(cfs_rq))
return false;
- /*
- * _avg must be null when _sum are null because _avg = _sum / divider
- * Make sure that rounding and/or propagation of PELT values never
- * break this.
- */
- SCHED_WARN_ON(cfs_rq->avg.load_avg ||
- cfs_rq->avg.util_avg ||
- cfs_rq->avg.runnable_avg);
-
return true;
}
@@ -3423,27 +3468,9 @@ void set_task_rq_fair(struct sched_entity *se,
if (!(se->avg.last_update_time && prev))
return;
-#ifndef CONFIG_64BIT
- {
- u64 p_last_update_time_copy;
- u64 n_last_update_time_copy;
-
- do {
- p_last_update_time_copy = prev->load_last_update_time_copy;
- n_last_update_time_copy = next->load_last_update_time_copy;
-
- smp_rmb();
-
- p_last_update_time = prev->avg.last_update_time;
- n_last_update_time = next->avg.last_update_time;
+ p_last_update_time = cfs_rq_last_update_time(prev);
+ n_last_update_time = cfs_rq_last_update_time(next);
- } while (p_last_update_time != p_last_update_time_copy ||
- n_last_update_time != n_last_update_time_copy);
- }
-#else
- p_last_update_time = prev->avg.last_update_time;
- n_last_update_time = next->avg.last_update_time;
-#endif
__update_load_avg_blocked_se(p_last_update_time, se);
se->avg.last_update_time = n_last_update_time;
}
@@ -3722,6 +3749,89 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum
#endif /* CONFIG_FAIR_GROUP_SCHED */
+#ifdef CONFIG_NO_HZ_COMMON
+static inline void migrate_se_pelt_lag(struct sched_entity *se)
+{
+ u64 throttled = 0, now, lut;
+ struct cfs_rq *cfs_rq;
+ struct rq *rq;
+ bool is_idle;
+
+ if (load_avg_is_decayed(&se->avg))
+ return;
+
+ cfs_rq = cfs_rq_of(se);
+ rq = rq_of(cfs_rq);
+
+ rcu_read_lock();
+ is_idle = is_idle_task(rcu_dereference(rq->curr));
+ rcu_read_unlock();
+
+ /*
+ * The lag estimation comes with a cost we don't want to pay all the
+ * time. Hence, limiting to the case where the source CPU is idle and
+ * we know we are at the greatest risk to have an outdated clock.
+ */
+ if (!is_idle)
+ return;
+
+ /*
+ * Estimated "now" is: last_update_time + cfs_idle_lag + rq_idle_lag, where:
+ *
+ * last_update_time (the cfs_rq's last_update_time)
+ * = cfs_rq_clock_pelt()@cfs_rq_idle
+ * = rq_clock_pelt()@cfs_rq_idle
+ * - cfs->throttled_clock_pelt_time@cfs_rq_idle
+ *
+ * cfs_idle_lag (delta between rq's update and cfs_rq's update)
+ * = rq_clock_pelt()@rq_idle - rq_clock_pelt()@cfs_rq_idle
+ *
+ * rq_idle_lag (delta between now and rq's update)
+ * = sched_clock_cpu() - rq_clock()@rq_idle
+ *
+ * We can then write:
+ *
+ * now = rq_clock_pelt()@rq_idle - cfs->throttled_clock_pelt_time +
+ * sched_clock_cpu() - rq_clock()@rq_idle
+ * Where:
+ * rq_clock_pelt()@rq_idle is rq->clock_pelt_idle
+ * rq_clock()@rq_idle is rq->clock_idle
+ * cfs->throttled_clock_pelt_time@cfs_rq_idle
+ * is cfs_rq->throttled_pelt_idle
+ */
+
+#ifdef CONFIG_CFS_BANDWIDTH
+ throttled = u64_u32_load(cfs_rq->throttled_pelt_idle);
+ /* The clock has been stopped for throttling */
+ if (throttled == U64_MAX)
+ return;
+#endif
+ now = u64_u32_load(rq->clock_pelt_idle);
+ /*
+ * Paired with _update_idle_rq_clock_pelt(). It ensures at the worst case
+ * is observed the old clock_pelt_idle value and the new clock_idle,
+ * which lead to an underestimation. The opposite would lead to an
+ * overestimation.
+ */
+ smp_rmb();
+ lut = cfs_rq_last_update_time(cfs_rq);
+
+ now -= throttled;
+ if (now < lut)
+ /*
+ * cfs_rq->avg.last_update_time is more recent than our
+ * estimation, let's use it.
+ */
+ now = lut;
+ else
+ now += sched_clock_cpu(cpu_of(rq)) - u64_u32_load(rq->clock_idle);
+
+ __update_load_avg_blocked_se(now, se);
+}
+#else
+static void migrate_se_pelt_lag(struct sched_entity *se) {}
+#endif
+
/**
* update_cfs_rq_load_avg - update the cfs_rq's load/util averages
* @now: current time, as per cfs_rq_clock_pelt()
@@ -3796,12 +3906,9 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
}
decayed |= __update_load_avg_cfs_rq(now, cfs_rq);
-
-#ifndef CONFIG_64BIT
- smp_wmb();
- cfs_rq->load_last_update_time_copy = sa->last_update_time;
-#endif
-
+ u64_u32_store_copy(sa->last_update_time,
+ cfs_rq->last_update_time_copy,
+ sa->last_update_time);
return decayed;
}
@@ -3933,27 +4040,6 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
}
}
-#ifndef CONFIG_64BIT
-static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
-{
- u64 last_update_time_copy;
- u64 last_update_time;
-
- do {
- last_update_time_copy = cfs_rq->load_last_update_time_copy;
- smp_rmb();
- last_update_time = cfs_rq->avg.last_update_time;
- } while (last_update_time != last_update_time_copy);
-
- return last_update_time;
-}
-#else
-static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
-{
- return cfs_rq->avg.last_update_time;
-}
-#endif
-
/*
* Synchronize entity load avg of dequeued entity without locking
* the previous rq.
@@ -4368,16 +4454,11 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
__enqueue_entity(cfs_rq, se);
se->on_rq = 1;
- /*
- * When bandwidth control is enabled, cfs might have been removed
- * because of a parent been throttled but cfs->nr_running > 1. Try to
- * add it unconditionally.
- */
- if (cfs_rq->nr_running == 1 || cfs_bandwidth_used())
- list_add_leaf_cfs_rq(cfs_rq);
-
- if (cfs_rq->nr_running == 1)
+ if (cfs_rq->nr_running == 1) {
check_enqueue_throttle(cfs_rq);
+ if (!throttled_hierarchy(cfs_rq))
+ list_add_leaf_cfs_rq(cfs_rq);
+ }
}
static void __clear_buddies_last(struct sched_entity *se)
@@ -4477,6 +4558,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*/
if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE)
update_min_vruntime(cfs_rq);
+
+ if (cfs_rq->nr_running == 0)
+ update_idle_cfs_rq_clock_pelt(cfs_rq);
}
/*
@@ -4992,11 +5076,18 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
/* update hierarchical throttle state */
walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
- /* Nothing to run but something to decay (on_list)? Complete the branch */
if (!cfs_rq->load.weight) {
- if (cfs_rq->on_list)
- goto unthrottle_throttle;
- return;
+ if (!cfs_rq->on_list)
+ return;
+ /*
+ * Nothing to run but something to decay (on_list)?
+ * Complete the branch.
+ */
+ for_each_sched_entity(se) {
+ if (list_add_leaf_cfs_rq(cfs_rq_of(se)))
+ break;
+ }
+ goto unthrottle_throttle;
}
task_delta = cfs_rq->h_nr_running;
@@ -5034,31 +5125,12 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
/* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(qcfs_rq))
goto unthrottle_throttle;
-
- /*
- * One parent has been throttled and cfs_rq removed from the
- * list. Add it back to not break the leaf list.
- */
- if (throttled_hierarchy(qcfs_rq))
- list_add_leaf_cfs_rq(qcfs_rq);
}
/* At this point se is NULL and we are at root level*/
add_nr_running(rq, task_delta);
unthrottle_throttle:
- /*
- * The cfs_rq_throttled() breaks in the above iteration can result in
- * incomplete leaf list maintenance, resulting in triggering the
- * assertion below.
- */
- for_each_sched_entity(se) {
- struct cfs_rq *qcfs_rq = cfs_rq_of(se);
-
- if (list_add_leaf_cfs_rq(qcfs_rq))
- break;
- }
-
assert_list_leaf_cfs_rq(rq);
/* Determine whether we need to wake up potentially idle CPU: */
@@ -5713,13 +5785,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
/* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq))
goto enqueue_throttle;
-
- /*
- * One parent has been throttled and cfs_rq removed from the
- * list. Add it back to not break the leaf list.
- */
- if (throttled_hierarchy(cfs_rq))
- list_add_leaf_cfs_rq(cfs_rq);
}
/* At this point se is NULL and we are at root level*/
@@ -5743,21 +5808,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
update_overutilized_status(rq);
enqueue_throttle:
- if (cfs_bandwidth_used()) {
- /*
- * When bandwidth control is enabled; the cfs_rq_throttled()
- * breaks in the above iteration can result in incomplete
- * leaf list maintenance, resulting in triggering the assertion
- * below.
- */
- for_each_sched_entity(se) {
- cfs_rq = cfs_rq_of(se);
-
- if (list_add_leaf_cfs_rq(cfs_rq))
- break;
- }
- }
-
assert_list_leaf_cfs_rq(rq);
hrtick_update(rq);
@@ -5844,7 +5894,7 @@ dequeue_throttle:
/* Working cpumask for: load_balance, load_balance_newidle. */
DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
-DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
+DEFINE_PER_CPU(cpumask_var_t, select_rq_mask);
#ifdef CONFIG_NO_HZ_COMMON
@@ -6334,8 +6384,9 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
*/
static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_core, int target)
{
- struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+ struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
int i, cpu, idle_cpu = -1, nr = INT_MAX;
+ struct sched_domain_shared *sd_share;
struct rq *this_rq = this_rq();
int this = smp_processor_id();
struct sched_domain *this_sd;
@@ -6375,6 +6426,17 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
time = cpu_clock(this);
}
+ if (sched_feat(SIS_UTIL)) {
+ sd_share = rcu_dereference(per_cpu(sd_llc_shared, target));
+ if (sd_share) {
+ /* because !--nr is the condition to stop scan */
+ nr = READ_ONCE(sd_share->nr_idle_scan) + 1;
+ /* overloaded LLC is unlikely to have idle cpu/core */
+ if (nr == 1)
+ return -1;
+ }
+ }
+
for_each_cpu_wrap(cpu, cpus, target + 1) {
if (has_idle_core) {
i = select_idle_core(p, cpu, cpus, &idle_cpu);
@@ -6420,7 +6482,7 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
int cpu, best_cpu = -1;
struct cpumask *cpus;
- cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+ cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
task_util = uclamp_task_util(p);
@@ -6470,7 +6532,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
}
/*
- * per-cpu select_idle_mask usage
+ * per-cpu select_rq_mask usage
*/
lockdep_assert_irqs_disabled();
@@ -6640,62 +6702,96 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
}
/*
- * compute_energy(): Estimates the energy that @pd would consume if @p was
- * migrated to @dst_cpu. compute_energy() predicts what will be the utilization
- * landscape of @pd's CPUs after the task migration, and uses the Energy Model
- * to compute what would be the energy if we decided to actually migrate that
- * task.
+ * energy_env - Utilization landscape for energy estimation.
+ * @task_busy_time: Utilization contribution by the task for which we test the
+ * placement. Given by eenv_task_busy_time().
+ * @pd_busy_time: Utilization of the whole perf domain without the task
+ * contribution. Given by eenv_pd_busy_time().
+ * @cpu_cap: Maximum CPU capacity for the perf domain.
+ * @pd_cap: Entire perf domain capacity. (pd->nr_cpus * cpu_cap).
+ */
+struct energy_env {
+ unsigned long task_busy_time;
+ unsigned long pd_busy_time;
+ unsigned long cpu_cap;
+ unsigned long pd_cap;
+};
+
+/*
+ * Compute the task busy time for compute_energy(). This time cannot be
+ * injected directly into effective_cpu_util() because of the IRQ scaling.
+ * The latter only makes sense with the most recent CPUs where the task has
+ * run.
+ */
+static inline void eenv_task_busy_time(struct energy_env *eenv,
+ struct task_struct *p, int prev_cpu)
+{
+ unsigned long busy_time, max_cap = arch_scale_cpu_capacity(prev_cpu);
+ unsigned long irq = cpu_util_irq(cpu_rq(prev_cpu));
+
+ if (unlikely(irq >= max_cap))
+ busy_time = max_cap;
+ else
+ busy_time = scale_irq_capacity(task_util_est(p), irq, max_cap);
+
+ eenv->task_busy_time = busy_time;
+}
+
+/*
+ * Compute the perf_domain (PD) busy time for compute_energy(). Based on the
+ * utilization for each @pd_cpus, it however doesn't take into account
+ * clamping since the ratio (utilization / cpu_capacity) is already enough to
+ * scale the EM reported power consumption at the (eventually clamped)
+ * cpu_capacity.
+ *
+ * The contribution of the task @p for which we want to estimate the
+ * energy cost is removed (by cpu_util_next()) and must be calculated
+ * separately (see eenv_task_busy_time). This ensures:
+ *
+ * - A stable PD utilization, no matter which CPU of that PD we want to place
+ * the task on.
+ *
+ * - A fair comparison between CPUs as the task contribution (task_util())
+ * will always be the same no matter which CPU utilization we rely on
+ * (util_avg or util_est).
+ *
+ * Set @eenv busy time for the PD that spans @pd_cpus. This busy time can't
+ * exceed @eenv->pd_cap.
*/
-static long
-compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
+static inline void eenv_pd_busy_time(struct energy_env *eenv,
+ struct cpumask *pd_cpus,
+ struct task_struct *p)
{
- struct cpumask *pd_mask = perf_domain_span(pd);
- unsigned long cpu_cap = arch_scale_cpu_capacity(cpumask_first(pd_mask));
- unsigned long max_util = 0, sum_util = 0;
- unsigned long _cpu_cap = cpu_cap;
+ unsigned long busy_time = 0;
int cpu;
- _cpu_cap -= arch_scale_thermal_pressure(cpumask_first(pd_mask));
+ for_each_cpu(cpu, pd_cpus) {
+ unsigned long util = cpu_util_next(cpu, p, -1);
- /*
- * The capacity state of CPUs of the current rd can be driven by CPUs
- * of another rd if they belong to the same pd. So, account for the
- * utilization of these CPUs too by masking pd with cpu_online_mask
- * instead of the rd span.
- *
- * If an entire pd is outside of the current rd, it will not appear in
- * its pd list and will not be accounted by compute_energy().
- */
- for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
- unsigned long util_freq = cpu_util_next(cpu, p, dst_cpu);
- unsigned long cpu_util, util_running = util_freq;
- struct task_struct *tsk = NULL;
+ busy_time += effective_cpu_util(cpu, util, ENERGY_UTIL, NULL);
+ }
- /*
- * When @p is placed on @cpu:
- *
- * util_running = max(cpu_util, cpu_util_est) +
- * max(task_util, _task_util_est)
- *
- * while cpu_util_next is: max(cpu_util + task_util,
- * cpu_util_est + _task_util_est)
- */
- if (cpu == dst_cpu) {
- tsk = p;
- util_running =
- cpu_util_next(cpu, p, -1) + task_util_est(p);
- }
+ eenv->pd_busy_time = min(eenv->pd_cap, busy_time);
+}
- /*
- * Busy time computation: utilization clamping is not
- * required since the ratio (sum_util / cpu_capacity)
- * is already enough to scale the EM reported power
- * consumption at the (eventually clamped) cpu_capacity.
- */
- cpu_util = effective_cpu_util(cpu, util_running, cpu_cap,
- ENERGY_UTIL, NULL);
+/*
+ * Compute the maximum utilization for compute_energy() when the task @p
+ * is placed on the cpu @dst_cpu.
+ *
+ * Returns the maximum utilization among @eenv->cpus. This utilization can't
+ * exceed @eenv->cpu_cap.
+ */
+static inline unsigned long
+eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
+ struct task_struct *p, int dst_cpu)
+{
+ unsigned long max_util = 0;
+ int cpu;
- sum_util += min(cpu_util, _cpu_cap);
+ for_each_cpu(cpu, pd_cpus) {
+ struct task_struct *tsk = (cpu == dst_cpu) ? p : NULL;
+ unsigned long util = cpu_util_next(cpu, p, dst_cpu);
+ unsigned long cpu_util;
/*
* Performance domain frequency: utilization clamping
@@ -6704,12 +6800,29 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
* NOTE: in case RT tasks are running, by default the
* FREQUENCY_UTIL's utilization can be max OPP.
*/
- cpu_util = effective_cpu_util(cpu, util_freq, cpu_cap,
- FREQUENCY_UTIL, tsk);
- max_util = max(max_util, min(cpu_util, _cpu_cap));
+ cpu_util = effective_cpu_util(cpu, util, FREQUENCY_UTIL, tsk);
+ max_util = max(max_util, cpu_util);
}
- return em_cpu_energy(pd->em_pd, max_util, sum_util, _cpu_cap);
+ return min(max_util, eenv->cpu_cap);
+}
+
+/*
+ * compute_energy(): Use the Energy Model to estimate the energy that @pd would
+ * consume for a given utilization landscape @eenv. When @dst_cpu < 0, the task
+ * contribution is ignored.
+ */
+static inline unsigned long
+compute_energy(struct energy_env *eenv, struct perf_domain *pd,
+ struct cpumask *pd_cpus, struct task_struct *p, int dst_cpu)
+{
+ unsigned long max_util = eenv_pd_max_util(eenv, pd_cpus, p, dst_cpu);
+ unsigned long busy_time = eenv->pd_busy_time;
+
+ if (dst_cpu >= 0)
+ busy_time = min(eenv->pd_cap, busy_time + eenv->task_busy_time);
+
+ return em_cpu_energy(pd->em_pd, max_util, busy_time, eenv->cpu_cap);
}
/*
@@ -6753,12 +6866,13 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
*/
static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
{
+ struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
- struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
- int cpu, best_energy_cpu = prev_cpu, target = -1;
- unsigned long cpu_cap, util, base_energy = 0;
+ struct root_domain *rd = this_rq()->rd;
+ int cpu, best_energy_cpu, target = -1;
struct sched_domain *sd;
struct perf_domain *pd;
+ struct energy_env eenv;
rcu_read_lock();
pd = rcu_dereference(rd->pd);
@@ -6781,20 +6895,39 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
if (!task_util_est(p))
goto unlock;
+ eenv_task_busy_time(&eenv, p, prev_cpu);
+
for (; pd; pd = pd->next) {
- unsigned long cur_delta, spare_cap, max_spare_cap = 0;
+ unsigned long cpu_cap, cpu_thermal_cap, util;
+ unsigned long cur_delta, max_spare_cap = 0;
bool compute_prev_delta = false;
- unsigned long base_energy_pd;
int max_spare_cap_cpu = -1;
+ unsigned long base_energy;
+
+ cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask);
+
+ if (cpumask_empty(cpus))
+ continue;
+
+ /* Account thermal pressure for the energy estimation */
+ cpu = cpumask_first(cpus);
+ cpu_thermal_cap = arch_scale_cpu_capacity(cpu);
+ cpu_thermal_cap -= arch_scale_thermal_pressure(cpu);
+
+ eenv.cpu_cap = cpu_thermal_cap;
+ eenv.pd_cap = 0;
+
+ for_each_cpu(cpu, cpus) {
+ eenv.pd_cap += cpu_thermal_cap;
+
+ if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
+ continue;
- for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) {
if (!cpumask_test_cpu(cpu, p->cpus_ptr))
continue;
util = cpu_util_next(cpu, p, cpu);
cpu_cap = capacity_of(cpu);
- spare_cap = cpu_cap;
- lsub_positive(&spare_cap, util);
/*
* Skip CPUs that cannot satisfy the capacity request.
@@ -6807,15 +6940,17 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
if (!fits_capacity(util, cpu_cap))
continue;
+ lsub_positive(&cpu_cap, util);
+
if (cpu == prev_cpu) {
/* Always use prev_cpu as a candidate. */
compute_prev_delta = true;
- } else if (spare_cap > max_spare_cap) {
+ } else if (cpu_cap > max_spare_cap) {
/*
* Find the CPU with the maximum spare capacity
* in the performance domain.
*/
- max_spare_cap = spare_cap;
+ max_spare_cap = cpu_cap;
max_spare_cap_cpu = cpu;
}
}
@@ -6823,25 +6958,29 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
if (max_spare_cap_cpu < 0 && !compute_prev_delta)
continue;
+ eenv_pd_busy_time(&eenv, cpus, p);
/* Compute the 'base' energy of the pd, without @p */
- base_energy_pd = compute_energy(p, -1, pd);
- base_energy += base_energy_pd;
+ base_energy = compute_energy(&eenv, pd, cpus, p, -1);
/* Evaluate the energy impact of using prev_cpu. */
if (compute_prev_delta) {
- prev_delta = compute_energy(p, prev_cpu, pd);
- if (prev_delta < base_energy_pd)
+ prev_delta = compute_energy(&eenv, pd, cpus, p,
+ prev_cpu);
+ /* CPU utilization has changed */
+ if (prev_delta < base_energy)
goto unlock;
- prev_delta -= base_energy_pd;
+ prev_delta -= base_energy;
best_delta = min(best_delta, prev_delta);
}
/* Evaluate the energy impact of using max_spare_cap_cpu. */
if (max_spare_cap_cpu >= 0) {
- cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
- if (cur_delta < base_energy_pd)
+ cur_delta = compute_energy(&eenv, pd, cpus, p,
+ max_spare_cap_cpu);
+ /* CPU utilization has changed */
+ if (cur_delta < base_energy)
goto unlock;
- cur_delta -= base_energy_pd;
+ cur_delta -= base_energy;
if (cur_delta < best_delta) {
best_delta = cur_delta;
best_energy_cpu = max_spare_cap_cpu;
@@ -6850,12 +6989,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
}
rcu_read_unlock();
- /*
- * Pick the best CPU if prev_cpu cannot be used, or if it saves at
- * least 6% of the energy used by prev_cpu.
- */
- if ((prev_delta == ULONG_MAX) ||
- (prev_delta - best_delta) > ((prev_delta + base_energy) >> 4))
+ if (best_delta < prev_delta)
target = best_energy_cpu;
return target;
@@ -6951,6 +7085,8 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
*/
static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
{
+ struct sched_entity *se = &p->se;
+
/*
* As blocked tasks retain absolute vruntime the migration needs to
* deal with this by subtracting the old and adding the new
@@ -6958,23 +7094,9 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
* the task on the new runqueue.
*/
if (READ_ONCE(p->__state) == TASK_WAKING) {
- struct sched_entity *se = &p->se;
struct cfs_rq *cfs_rq = cfs_rq_of(se);
- u64 min_vruntime;
-
-#ifndef CONFIG_64BIT
- u64 min_vruntime_copy;
-
- do {
- min_vruntime_copy = cfs_rq->min_vruntime_copy;
- smp_rmb();
- min_vruntime = cfs_rq->min_vruntime;
- } while (min_vruntime != min_vruntime_copy);
-#else
- min_vruntime = cfs_rq->min_vruntime;
-#endif
- se->vruntime -= min_vruntime;
+ se->vruntime -= u64_u32_load(cfs_rq->min_vruntime);
}
if (p->on_rq == TASK_ON_RQ_MIGRATING) {
@@ -6983,25 +7105,29 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
* rq->lock and can modify state directly.
*/
lockdep_assert_rq_held(task_rq(p));
- detach_entity_cfs_rq(&p->se);
+ detach_entity_cfs_rq(se);
} else {
+ remove_entity_load_avg(se);
+
/*
- * We are supposed to update the task to "current" time, then
- * its up to date and ready to go to new CPU/cfs_rq. But we
- * have difficulty in getting what current time is, so simply
- * throw away the out-of-date time. This will result in the
- * wakee task is less decayed, but giving the wakee more load
- * sounds not bad.
+ * Here, the task's PELT values have been updated according to
+ * the current rq's clock. But if that clock hasn't been
+ * updated in a while, a substantial idle time will be missed,
+ * leading to an inflation after wake-up on the new rq.
+ *
+ * Estimate the missing time from the cfs_rq last_update_time
+ * and update sched_avg to improve the PELT continuity after
+ * migration.
*/
- remove_entity_load_avg(&p->se);
+ migrate_se_pelt_lag(se);
}
/* Tell new CPU we are migrated */
- p->se.avg.last_update_time = 0;
+ se->avg.last_update_time = 0;
/* We have migrated, no longer consider this task hot */
- p->se.exec_start = 0;
+ se->exec_start = 0;
update_scan_period(p, new_cpu);
}
@@ -7585,8 +7711,8 @@ enum group_type {
*/
group_fully_busy,
/*
- * SD_ASYM_CPUCAPACITY only: One task doesn't fit with CPU's capacity
- * and must be migrated to a more powerful CPU.
+ * One task doesn't fit with CPU's capacity and must be migrated to a
+ * more powerful CPU.
*/
group_misfit_task,
/*
@@ -8167,6 +8293,9 @@ static bool __update_blocked_fair(struct rq *rq, bool *done)
if (update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq)) {
update_tg_load_avg(cfs_rq);
+ if (cfs_rq->nr_running == 0)
+ update_idle_cfs_rq_clock_pelt(cfs_rq);
+
if (cfs_rq == &rq->cfs)
decayed = true;
}
@@ -8500,7 +8629,7 @@ static inline int sg_imbalanced(struct sched_group *group)
/*
* group_has_capacity returns true if the group has spare capacity that could
* be used by some tasks.
- * We consider that a group has spare capacity if the * number of task is
+ * We consider that a group has spare capacity if the number of task is
* smaller than the number of CPUs or if the utilization is lower than the
* available capacity for CFS tasks.
* For the latter, we use a threshold to stabilize the state, to take into
@@ -8669,6 +8798,19 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs
return sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu);
}
+static inline bool
+sched_reduced_capacity(struct rq *rq, struct sched_domain *sd)
+{
+ /*
+ * When there is more than 1 task, the group_overloaded case already
+ * takes care of cpu with reduced capacity
+ */
+ if (rq->cfs.h_nr_running != 1)
+ return false;
+
+ return check_cpu_capacity(rq, sd);
+}
+
/**
* update_sg_lb_stats - Update sched_group's statistics for load balancing.
* @env: The load balancing environment.
@@ -8691,8 +8833,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
for_each_cpu_and(i, sched_group_span(group), env->cpus) {
struct rq *rq = cpu_rq(i);
+ unsigned long load = cpu_load(rq);
- sgs->group_load += cpu_load(rq);
+ sgs->group_load += load;
sgs->group_util += cpu_util_cfs(i);
sgs->group_runnable += cpu_runnable(rq);
sgs->sum_h_nr_running += rq->cfs.h_nr_running;
@@ -8722,11 +8865,17 @@ static inline void update_sg_lb_stats(struct lb_env *env,
if (local_group)
continue;
- /* Check for a misfit task on the cpu */
- if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
- sgs->group_misfit_task_load < rq->misfit_task_load) {
- sgs->group_misfit_task_load = rq->misfit_task_load;
- *sg_status |= SG_OVERLOAD;
+ if (env->sd->flags & SD_ASYM_CPUCAPACITY) {
+ /* Check for a misfit task on the cpu */
+ if (sgs->group_misfit_task_load < rq->misfit_task_load) {
+ sgs->group_misfit_task_load = rq->misfit_task_load;
+ *sg_status |= SG_OVERLOAD;
+ }
+ } else if ((env->idle != CPU_NOT_IDLE) &&
+ sched_reduced_capacity(rq, env->sd)) {
+ /* Check for a task running on a CPU with reduced capacity */
+ if (sgs->group_misfit_task_load < load)
+ sgs->group_misfit_task_load = load;
}
}
@@ -8779,7 +8928,8 @@ static bool update_sd_pick_busiest(struct lb_env *env,
* CPUs in the group should either be possible to resolve
* internally or be covered by avg_load imbalance (eventually).
*/
- if (sgs->group_type == group_misfit_task &&
+ if ((env->sd->flags & SD_ASYM_CPUCAPACITY) &&
+ (sgs->group_type == group_misfit_task) &&
(!capacity_greater(capacity_of(env->dst_cpu), sg->sgc->max_capacity) ||
sds->local_stat.group_type != group_has_spare))
return false;
@@ -9058,16 +9208,6 @@ static bool update_pick_idlest(struct sched_group *idlest,
}
/*
- * Allow a NUMA imbalance if busy CPUs is less than 25% of the domain.
- * This is an approximation as the number of running tasks may not be
- * related to the number of busy CPUs due to sched_setaffinity.
- */
-static inline bool allow_numa_imbalance(int running, int imb_numa_nr)
-{
- return running <= imb_numa_nr;
-}
-
-/*
* find_idlest_group() finds and returns the least busy CPU group within the
* domain.
*
@@ -9183,7 +9323,9 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
break;
case group_has_spare:
+#ifdef CONFIG_NUMA
if (sd->flags & SD_NUMA) {
+ int imb_numa_nr = sd->imb_numa_nr;
#ifdef CONFIG_NUMA_BALANCING
int idlest_cpu;
/*
@@ -9196,17 +9338,31 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
idlest_cpu = cpumask_first(sched_group_span(idlest));
if (cpu_to_node(idlest_cpu) == p->numa_preferred_nid)
return idlest;
-#endif
+#endif /* CONFIG_NUMA_BALANCING */
/*
* Otherwise, keep the task close to the wakeup source
* and improve locality if the number of running tasks
* would remain below threshold where an imbalance is
- * allowed. If there is a real need of migration,
- * periodic load balance will take care of it.
+ * allowed while accounting for the possibility the
+ * task is pinned to a subset of CPUs. If there is a
+ * real need of migration, periodic load balance will
+ * take care of it.
*/
- if (allow_numa_imbalance(local_sgs.sum_nr_running + 1, sd->imb_numa_nr))
+ if (p->nr_cpus_allowed != NR_CPUS) {
+ struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
+
+ cpumask_and(cpus, sched_group_span(local), p->cpus_ptr);
+ imb_numa_nr = min(cpumask_weight(cpus), sd->imb_numa_nr);
+ }
+
+ imbalance = abs(local_sgs.idle_cpus - idlest_sgs.idle_cpus);
+ if (!adjust_numa_imbalance(imbalance,
+ local_sgs.sum_nr_running + 1,
+ imb_numa_nr)) {
return NULL;
+ }
}
+#endif /* CONFIG_NUMA */
/*
* Select group with highest number of idle CPUs. We could also
@@ -9222,6 +9378,77 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
return idlest;
}
+static void update_idle_cpu_scan(struct lb_env *env,
+ unsigned long sum_util)
+{
+ struct sched_domain_shared *sd_share;
+ int llc_weight, pct;
+ u64 x, y, tmp;
+ /*
+ * Update the number of CPUs to scan in LLC domain, which could
+ * be used as a hint in select_idle_cpu(). The update of sd_share
+ * could be expensive because it is within a shared cache line.
+ * So the write of this hint only occurs during periodic load
+ * balancing, rather than CPU_NEWLY_IDLE, because the latter
+ * can fire way more frequently than the former.
+ */
+ if (!sched_feat(SIS_UTIL) || env->idle == CPU_NEWLY_IDLE)
+ return;
+
+ llc_weight = per_cpu(sd_llc_size, env->dst_cpu);
+ if (env->sd->span_weight != llc_weight)
+ return;
+
+ sd_share = rcu_dereference(per_cpu(sd_llc_shared, env->dst_cpu));
+ if (!sd_share)
+ return;
+
+ /*
+ * The number of CPUs to search drops as sum_util increases, when
+ * sum_util hits 85% or above, the scan stops.
+ * The reason to choose 85% as the threshold is because this is the
+ * imbalance_pct(117) when a LLC sched group is overloaded.
+ *
+ * let y = SCHED_CAPACITY_SCALE - p * x^2 [1]
+ * and y'= y / SCHED_CAPACITY_SCALE
+ *
+ * x is the ratio of sum_util compared to the CPU capacity:
+ * x = sum_util / (llc_weight * SCHED_CAPACITY_SCALE)
+ * y' is the ratio of CPUs to be scanned in the LLC domain,
+ * and the number of CPUs to scan is calculated by:
+ *
+ * nr_scan = llc_weight * y' [2]
+ *
+ * When x hits the threshold of overloaded, AKA, when
+ * x = 100 / pct, y drops to 0. According to [1],
+ * p should be SCHED_CAPACITY_SCALE * pct^2 / 10000
+ *
+ * Scale x by SCHED_CAPACITY_SCALE:
+ * x' = sum_util / llc_weight; [3]
+ *
+ * and finally [1] becomes:
+ * y = SCHED_CAPACITY_SCALE -
+ * x'^2 * pct^2 / (10000 * SCHED_CAPACITY_SCALE) [4]
+ *
+ */
+ /* equation [3] */
+ x = sum_util;
+ do_div(x, llc_weight);
+
+ /* equation [4] */
+ pct = env->sd->imbalance_pct;
+ tmp = x * x * pct * pct;
+ do_div(tmp, 10000 * SCHED_CAPACITY_SCALE);
+ tmp = min_t(long, tmp, SCHED_CAPACITY_SCALE);
+ y = SCHED_CAPACITY_SCALE - tmp;
+
+ /* equation [2] */
+ y *= llc_weight;
+ do_div(y, SCHED_CAPACITY_SCALE);
+ if ((int)y != sd_share->nr_idle_scan)
+ WRITE_ONCE(sd_share->nr_idle_scan, (int)y);
+}
+
/**
* update_sd_lb_stats - Update sched_domain's statistics for load balancing.
* @env: The load balancing environment.
@@ -9234,6 +9461,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
struct sched_group *sg = env->sd->groups;
struct sg_lb_stats *local = &sds->local_stat;
struct sg_lb_stats tmp_sgs;
+ unsigned long sum_util = 0;
int sg_status = 0;
do {
@@ -9266,6 +9494,7 @@ next_group:
sds->total_load += sgs->group_load;
sds->total_capacity += sgs->group_capacity;
+ sum_util += sgs->group_util;
sg = sg->next;
} while (sg != env->sd->groups);
@@ -9291,24 +9520,8 @@ next_group:
WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED);
trace_sched_overutilized_tp(rd, SG_OVERUTILIZED);
}
-}
-
-#define NUMA_IMBALANCE_MIN 2
-
-static inline long adjust_numa_imbalance(int imbalance,
- int dst_running, int imb_numa_nr)
-{
- if (!allow_numa_imbalance(dst_running, imb_numa_nr))
- return imbalance;
- /*
- * Allow a small imbalance based on a simple pair of communicating
- * tasks that remain local when the destination is lightly loaded.
- */
- if (imbalance <= NUMA_IMBALANCE_MIN)
- return 0;
-
- return imbalance;
+ update_idle_cpu_scan(env, sum_util);
}
/**
@@ -9325,9 +9538,18 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
busiest = &sds->busiest_stat;
if (busiest->group_type == group_misfit_task) {
- /* Set imbalance to allow misfit tasks to be balanced. */
- env->migration_type = migrate_misfit;
- env->imbalance = 1;
+ if (env->sd->flags & SD_ASYM_CPUCAPACITY) {
+ /* Set imbalance to allow misfit tasks to be balanced. */
+ env->migration_type = migrate_misfit;
+ env->imbalance = 1;
+ } else {
+ /*
+ * Set load imbalance to allow moving task from cpu
+ * with reduced capacity.
+ */
+ env->migration_type = migrate_load;
+ env->imbalance = busiest->group_misfit_task_load;
+ }
return;
}
@@ -9395,7 +9617,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
*/
env->migration_type = migrate_task;
lsub_positive(&nr_diff, local->sum_nr_running);
- env->imbalance = nr_diff >> 1;
+ env->imbalance = nr_diff;
} else {
/*
@@ -9403,15 +9625,21 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
* idle cpus.
*/
env->migration_type = migrate_task;
- env->imbalance = max_t(long, 0, (local->idle_cpus -
- busiest->idle_cpus) >> 1);
+ env->imbalance = max_t(long, 0,
+ (local->idle_cpus - busiest->idle_cpus));
}
+#ifdef CONFIG_NUMA
/* Consider allowing a small imbalance between NUMA groups */
if (env->sd->flags & SD_NUMA) {
env->imbalance = adjust_numa_imbalance(env->imbalance,
- local->sum_nr_running + 1, env->sd->imb_numa_nr);
+ local->sum_nr_running + 1,
+ env->sd->imb_numa_nr);
}
+#endif
+
+ /* Number of tasks to move to restore balance */
+ env->imbalance >>= 1;
return;
}
@@ -9834,9 +10062,15 @@ static int should_we_balance(struct lb_env *env)
/*
* In the newly idle case, we will allow all the CPUs
* to do the newly idle load balance.
+ *
+ * However, we bail out if we already have tasks or a wakeup pending,
+ * to optimize wakeup latency.
*/
- if (env->idle == CPU_NEWLY_IDLE)
+ if (env->idle == CPU_NEWLY_IDLE) {
+ if (env->dst_rq->nr_running > 0 || env->dst_rq->ttwu_pending)
+ return 0;
return 1;
+ }
/* Try to find first idle CPU */
for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) {
@@ -11287,9 +11521,13 @@ static inline bool vruntime_normalized(struct task_struct *p)
*/
static void propagate_entity_cfs_rq(struct sched_entity *se)
{
- struct cfs_rq *cfs_rq;
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
- list_add_leaf_cfs_rq(cfs_rq_of(se));
+ if (cfs_rq_throttled(cfs_rq))
+ return;
+
+ if (!throttled_hierarchy(cfs_rq))
+ list_add_leaf_cfs_rq(cfs_rq);
/* Start to propagate at parent */
se = se->parent;
@@ -11297,14 +11535,13 @@ static void propagate_entity_cfs_rq(struct sched_entity *se)
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
- if (!cfs_rq_throttled(cfs_rq)){
- update_load_avg(cfs_rq, se, UPDATE_TG);
- list_add_leaf_cfs_rq(cfs_rq);
- continue;
- }
+ update_load_avg(cfs_rq, se, UPDATE_TG);
- if (list_add_leaf_cfs_rq(cfs_rq))
+ if (cfs_rq_throttled(cfs_rq))
break;
+
+ if (!throttled_hierarchy(cfs_rq))
+ list_add_leaf_cfs_rq(cfs_rq);
}
}
#else
@@ -11422,10 +11659,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
void init_cfs_rq(struct cfs_rq *cfs_rq)
{
cfs_rq->tasks_timeline = RB_ROOT_CACHED;
- cfs_rq->min_vruntime = (u64)(-(1LL << 20));
-#ifndef CONFIG_64BIT
- cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
-#endif
+ u64_u32_store(cfs_rq->min_vruntime, (u64)(-(1LL << 20)));
#ifdef CONFIG_SMP
raw_spin_lock_init(&cfs_rq->removed.lock);
#endif
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 1cf435bbcd9c..ee7f23c76bd3 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -60,7 +60,8 @@ SCHED_FEAT(TTWU_QUEUE, true)
/*
* When doing wakeups, attempt to limit superfluous scans of the LLC domain.
*/
-SCHED_FEAT(SIS_PROP, true)
+SCHED_FEAT(SIS_PROP, false)
+SCHED_FEAT(SIS_UTIL, true)
/*
* Issue a WARN when we do multiple update_rq_clock() calls
diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
index 4ff2ed4f8fa1..3a0e0dc28721 100644
--- a/kernel/sched/pelt.h
+++ b/kernel/sched/pelt.h
@@ -61,6 +61,25 @@ static inline void cfs_se_util_change(struct sched_avg *avg)
WRITE_ONCE(avg->util_est.enqueued, enqueued);
}
+static inline u64 rq_clock_pelt(struct rq *rq)
+{
+ lockdep_assert_rq_held(rq);
+ assert_clock_updated(rq);
+
+ return rq->clock_pelt - rq->lost_idle_time;
+}
+
+/* The rq is idle, we can sync to clock_task */
+static inline void _update_idle_rq_clock_pelt(struct rq *rq)
+{
+ rq->clock_pelt = rq_clock_task(rq);
+
+ u64_u32_store(rq->clock_idle, rq_clock(rq));
+ /* Paired with smp_rmb in migrate_se_pelt_lag() */
+ smp_wmb();
+ u64_u32_store(rq->clock_pelt_idle, rq_clock_pelt(rq));
+}
+
/*
* The clock_pelt scales the time to reflect the effective amount of
* computation done during the running delta time but then sync back to
@@ -76,8 +95,7 @@ static inline void cfs_se_util_change(struct sched_avg *avg)
static inline void update_rq_clock_pelt(struct rq *rq, s64 delta)
{
if (unlikely(is_idle_task(rq->curr))) {
- /* The rq is idle, we can sync to clock_task */
- rq->clock_pelt = rq_clock_task(rq);
+ _update_idle_rq_clock_pelt(rq);
return;
}
@@ -130,17 +148,23 @@ static inline void update_idle_rq_clock_pelt(struct rq *rq)
*/
if (util_sum >= divider)
rq->lost_idle_time += rq_clock_task(rq) - rq->clock_pelt;
+
+ _update_idle_rq_clock_pelt(rq);
}
-static inline u64 rq_clock_pelt(struct rq *rq)
+#ifdef CONFIG_CFS_BANDWIDTH
+static inline void update_idle_cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
{
- lockdep_assert_rq_held(rq);
- assert_clock_updated(rq);
+ u64 throttled;
- return rq->clock_pelt - rq->lost_idle_time;
+ if (unlikely(cfs_rq->throttle_count))
+ throttled = U64_MAX;
+ else
+ throttled = cfs_rq->throttled_clock_pelt_time;
+
+ u64_u32_store(cfs_rq->throttled_pelt_idle, throttled);
}
-#ifdef CONFIG_CFS_BANDWIDTH
/* rq->task_clock normalized against any time this cfs_rq has spent throttled */
static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
{
@@ -150,6 +174,7 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_pelt_time;
}
#else
+static inline void update_idle_cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) { }
static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
{
return rq_clock_pelt(rq_of(cfs_rq));
@@ -204,6 +229,7 @@ update_rq_clock_pelt(struct rq *rq, s64 delta) { }
static inline void
update_idle_rq_clock_pelt(struct rq *rq) { }
+static inline void update_idle_cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) { }
#endif
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 8c9ed9664840..55f39c8f4203 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -480,7 +480,7 @@ static inline void rt_queue_push_tasks(struct rq *rq)
#endif /* CONFIG_SMP */
static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
-static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
+static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);
static inline int on_rt_rq(struct sched_rt_entity *rt_se)
{
@@ -601,7 +601,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
rt_se = rt_rq->tg->rt_se[cpu];
if (!rt_se) {
- dequeue_top_rt_rq(rt_rq);
+ dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
}
@@ -687,7 +687,7 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
{
- dequeue_top_rt_rq(rt_rq);
+ dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
}
static inline int rt_rq_throttled(struct rt_rq *rt_rq)
@@ -1089,7 +1089,7 @@ static void update_curr_rt(struct rq *rq)
}
static void
-dequeue_top_rt_rq(struct rt_rq *rt_rq)
+dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count)
{
struct rq *rq = rq_of_rt_rq(rt_rq);
@@ -1100,7 +1100,7 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
BUG_ON(!rq->nr_running);
- sub_nr_running(rq, rt_rq->rt_nr_running);
+ sub_nr_running(rq, count);
rt_rq->rt_queued = 0;
}
@@ -1486,18 +1486,21 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct sched_rt_entity *back = NULL;
+ unsigned int rt_nr_running;
for_each_sched_rt_entity(rt_se) {
rt_se->back = back;
back = rt_se;
}
- dequeue_top_rt_rq(rt_rq_of_se(back));
+ rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
for (rt_se = back; rt_se; rt_se = rt_se->back) {
if (on_rt_rq(rt_se))
__dequeue_rt_entity(rt_se, flags);
}
+
+ dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
}
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 47b89a0fc6e5..aad7f5ee9666 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -520,6 +520,45 @@ struct cfs_bandwidth { };
#endif /* CONFIG_CGROUP_SCHED */
+/*
+ * u64_u32_load/u64_u32_store
+ *
+ * Use a copy of a u64 value to protect against data race. This is only
+ * applicable for 32-bits architectures.
+ */
+#ifdef CONFIG_64BIT
+# define u64_u32_load_copy(var, copy) var
+# define u64_u32_store_copy(var, copy, val) (var = val)
+#else
+# define u64_u32_load_copy(var, copy) \
+({ \
+ u64 __val, __val_copy; \
+ do { \
+ __val_copy = copy; \
+ /* \
+ * paired with u64_u32_store_copy(), ordering access \
+ * to var and copy. \
+ */ \
+ smp_rmb(); \
+ __val = var; \
+ } while (__val != __val_copy); \
+ __val; \
+})
+# define u64_u32_store_copy(var, copy, val) \
+do { \
+ typeof(val) __val = (val); \
+ var = __val; \
+ /* \
+ * paired with u64_u32_load_copy(), ordering access to var and \
+ * copy. \
+ */ \
+ smp_wmb(); \
+ copy = __val; \
+} while (0)
+#endif
+# define u64_u32_load(var) u64_u32_load_copy(var, var##_copy)
+# define u64_u32_store(var, val) u64_u32_store_copy(var, var##_copy, val)
+
/* CFS-related fields in a runqueue */
struct cfs_rq {
struct load_weight load;
@@ -560,7 +599,7 @@ struct cfs_rq {
*/
struct sched_avg avg;
#ifndef CONFIG_64BIT
- u64 load_last_update_time_copy;
+ u64 last_update_time_copy;
#endif
struct {
raw_spinlock_t lock ____cacheline_aligned;
@@ -609,6 +648,10 @@ struct cfs_rq {
int runtime_enabled;
s64 runtime_remaining;
+ u64 throttled_pelt_idle;
+#ifndef CONFIG_64BIT
+ u64 throttled_pelt_idle_copy;
+#endif
u64 throttled_clock;
u64 throttled_clock_pelt;
u64 throttled_clock_pelt_time;
@@ -981,6 +1024,12 @@ struct rq {
u64 clock_task ____cacheline_aligned;
u64 clock_pelt;
unsigned long lost_idle_time;
+ u64 clock_pelt_idle;
+ u64 clock_idle;
+#ifndef CONFIG_64BIT
+ u64 clock_pelt_idle_copy;
+ u64 clock_idle_copy;
+#endif
atomic_t nr_iowait;
@@ -1815,15 +1864,6 @@ static inline struct cpumask *group_balance_mask(struct sched_group *sg)
return to_cpumask(sg->sgc->cpumask);
}
-/**
- * group_first_cpu - Returns the first CPU in the cpumask of a sched_group.
- * @group: The group whose first CPU is to be returned.
- */
-static inline unsigned int group_first_cpu(struct sched_group *group)
-{
- return cpumask_first(sched_group_span(group));
-}
-
extern int group_balance_cpu(struct sched_group *sg);
#ifdef CONFIG_SCHED_DEBUG
@@ -2044,7 +2084,6 @@ static inline int task_on_rq_migrating(struct task_struct *p)
#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
#define WF_MIGRATED 0x20 /* Internal use, task got migrated */
-#define WF_ON_CPU 0x40 /* Wakee is on_cpu */
#ifdef CONFIG_SMP
static_assert(WF_EXEC == SD_BALANCE_EXEC);
@@ -2852,7 +2891,7 @@ enum cpu_util_type {
};
unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
- unsigned long max, enum cpu_util_type type,
+ enum cpu_util_type type,
struct task_struct *p);
static inline unsigned long cpu_bw_dl(struct rq *rq)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 05b6c2ad90b9..8739c2a5a54e 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2316,23 +2316,30 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
/*
* For a single LLC per node, allow an
- * imbalance up to 25% of the node. This is an
- * arbitrary cutoff based on SMT-2 to balance
- * between memory bandwidth and avoiding
- * premature sharing of HT resources and SMT-4
- * or SMT-8 *may* benefit from a different
- * cutoff.
+ * imbalance up to 12.5% of the node. This is
+ * arbitrary cutoff based two factors -- SMT and
+ * memory channels. For SMT-2, the intent is to
+ * avoid premature sharing of HT resources but
+ * SMT-4 or SMT-8 *may* benefit from a different
+ * cutoff. For memory channels, this is a very
+ * rough estimate of how many channels may be
+ * active and is based on recent CPUs with
+ * many cores.
*
* For multiple LLCs, allow an imbalance
* until multiple tasks would share an LLC
* on one node while LLCs on another node
- * remain idle.
+ * remain idle. This assumes that there are
+ * enough logical CPUs per LLC to avoid SMT
+ * factors and that there is a correlation
+ * between LLCs and memory channels.
*/
nr_llcs = sd->span_weight / child->span_weight;
if (nr_llcs == 1)
- imb = sd->span_weight >> 2;
+ imb = sd->span_weight >> 3;
else
imb = nr_llcs;
+ imb = max(1U, imb);
sd->imb_numa_nr = imb;
/* Set span based on the first NUMA domain. */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e52b6e372c60..35d034219513 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -446,14 +446,14 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
if (*negp) {
if (*lvalp > (unsigned long) INT_MAX + 1)
return -EINVAL;
- *valp = -*lvalp;
+ WRITE_ONCE(*valp, -*lvalp);
} else {
if (*lvalp > (unsigned long) INT_MAX)
return -EINVAL;
- *valp = *lvalp;
+ WRITE_ONCE(*valp, *lvalp);
}
} else {
- int val = *valp;
+ int val = READ_ONCE(*valp);
if (val < 0) {
*negp = true;
*lvalp = -(unsigned long)val;
@@ -472,9 +472,9 @@ static int do_proc_douintvec_conv(unsigned long *lvalp,
if (write) {
if (*lvalp > UINT_MAX)
return -EINVAL;
- *valp = *lvalp;
+ WRITE_ONCE(*valp, *lvalp);
} else {
- unsigned int val = *valp;
+ unsigned int val = READ_ONCE(*valp);
*lvalp = (unsigned long)val;
}
return 0;
@@ -857,7 +857,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
if ((param->min && *param->min > tmp) ||
(param->max && *param->max < tmp))
return -EINVAL;
- *valp = tmp;
+ WRITE_ONCE(*valp, tmp);
}
return 0;
@@ -923,7 +923,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
(param->max && *param->max < tmp))
return -ERANGE;
- *valp = tmp;
+ WRITE_ONCE(*valp, tmp);
}
return 0;
@@ -1007,13 +1007,13 @@ int proc_dou8vec_minmax(struct ctl_table *table, int write,
tmp.maxlen = sizeof(val);
tmp.data = &val;
- val = *data;
+ val = READ_ONCE(*data);
res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
do_proc_douintvec_minmax_conv, &param);
if (res)
return res;
if (write)
- *data = val;
+ WRITE_ONCE(*data, val);
return 0;
}
EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
@@ -1090,9 +1090,9 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
err = -EINVAL;
break;
}
- *i = val;
+ WRITE_ONCE(*i, val);
} else {
- val = convdiv * (*i) / convmul;
+ val = convdiv * READ_ONCE(*i) / convmul;
if (!first)
proc_put_char(&buffer, &left, '\t');
proc_put_long(&buffer, &left, val, false);
@@ -1173,9 +1173,12 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
if (write) {
if (*lvalp > INT_MAX / HZ)
return 1;
- *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
+ if (*negp)
+ WRITE_ONCE(*valp, -*lvalp * HZ);
+ else
+ WRITE_ONCE(*valp, *lvalp * HZ);
} else {
- int val = *valp;
+ int val = READ_ONCE(*valp);
unsigned long lval;
if (val < 0) {
*negp = true;
@@ -1221,9 +1224,9 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
if (jif > INT_MAX)
return 1;
- *valp = (int)jif;
+ WRITE_ONCE(*valp, (int)jif);
} else {
- int val = *valp;
+ int val = READ_ONCE(*valp);
unsigned long lval;
if (val < 0) {
*negp = true;
@@ -1291,8 +1294,8 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
* @ppos: the current position in the file
*
* Reads/writes up to table->maxlen/sizeof(unsigned int) integer
- * values from/to the user buffer, treated as an ASCII string.
- * The values read are assumed to be in 1/1000 seconds, and
+ * values from/to the user buffer, treated as an ASCII string.
+ * The values read are assumed to be in 1/1000 seconds, and
* are converted into jiffies.
*
* Returns 0 on success.
@@ -2091,6 +2094,17 @@ static struct ctl_table vm_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO_HUNDRED,
},
+#ifdef CONFIG_NUMA
+ {
+ .procname = "numa_stat",
+ .data = &sysctl_vm_numa_stat,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = sysctl_vm_numa_stat_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif
#ifdef CONFIG_HUGETLB_PAGE
{
.procname = "nr_hugepages",
@@ -2107,15 +2121,6 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = &hugetlb_mempolicy_sysctl_handler,
},
- {
- .procname = "numa_stat",
- .data = &sysctl_vm_numa_stat,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = sysctl_vm_numa_stat_handler,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
#endif
{
.procname = "hugetlb_shm_group",
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 1cd10b102c51..5dead89308b7 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1051,15 +1051,24 @@ retry_delete:
}
/*
- * This is called by do_exit or de_thread, only when there are no more
- * references to the shared signal_struct.
+ * This is called by do_exit or de_thread, only when nobody else can
+ * modify the signal->posix_timers list. Yet we need sighand->siglock
+ * to prevent the race with /proc/pid/timers.
*/
-void exit_itimers(struct signal_struct *sig)
+void exit_itimers(struct task_struct *tsk)
{
+ struct list_head timers;
struct k_itimer *tmr;
- while (!list_empty(&sig->posix_timers)) {
- tmr = list_entry(sig->posix_timers.next, struct k_itimer, list);
+ if (list_empty(&tsk->signal->posix_timers))
+ return;
+
+ spin_lock_irq(&tsk->sighand->siglock);
+ list_replace_init(&tsk->signal->posix_timers, &timers);
+ spin_unlock_irq(&tsk->sighand->siglock);
+
+ while (!list_empty(&timers)) {
+ tmr = list_first_entry(&timers, struct k_itimer, list);
itimer_delete(tmr);
}
}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index debbbb083286..ccd6a5ade3e9 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -194,7 +194,8 @@ config FUNCTION_TRACER
sequence is then dynamically patched into a tracer call when
tracing is enabled by the administrator. If it's runtime disabled
(the bootup default), then the overhead of the instructions is very
- small and not measurable even in micro-benchmarks.
+ small and not measurable even in micro-benchmarks (at least on
+ x86, but may have impact on other architectures).
config FUNCTION_GRAPH_TRACER
bool "Kernel Function Graph Tracer"
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a8cfac0611bc..b8dd54627075 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9864,6 +9864,12 @@ void trace_init_global_iter(struct trace_iterator *iter)
/* Output in nanoseconds only if we are using a clock in nanoseconds. */
if (trace_clocks[iter->tr->clock_id].in_ns)
iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
+
+ /* Can not use kmalloc for iter.temp and iter.fmt */
+ iter->temp = static_temp_buf;
+ iter->temp_size = STATIC_TEMP_BUF_SIZE;
+ iter->fmt = static_fmt_buf;
+ iter->fmt_size = STATIC_FMT_BUF_SIZE;
}
void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
@@ -9896,11 +9902,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
/* Simulate the iterator */
trace_init_global_iter(&iter);
- /* Can not use kmalloc for iter.temp and iter.fmt */
- iter.temp = static_temp_buf;
- iter.temp_size = STATIC_TEMP_BUF_SIZE;
- iter.fmt = static_fmt_buf;
- iter.fmt_size = STATIC_FMT_BUF_SIZE;
for_each_tracing_cpu(cpu) {
atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 48e82e141d54..e87a46794079 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -4430,6 +4430,8 @@ static int parse_var_defs(struct hist_trigger_data *hist_data)
s = kstrdup(field_str, GFP_KERNEL);
if (!s) {
+ kfree(hist_data->attrs->var_defs.name[n_vars]);
+ hist_data->attrs->var_defs.name[n_vars] = NULL;
ret = -ENOMEM;
goto free;
}
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index 230038d4f908..59ddb00d6944 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -34,6 +34,27 @@ MODULE_LICENSE("GPL");
#define WATCH_QUEUE_NOTE_SIZE 128
#define WATCH_QUEUE_NOTES_PER_PAGE (PAGE_SIZE / WATCH_QUEUE_NOTE_SIZE)
+/*
+ * This must be called under the RCU read-lock, which makes
+ * sure that the wqueue still exists. It can then take the lock,
+ * and check that the wqueue hasn't been destroyed, which in
+ * turn makes sure that the notification pipe still exists.
+ */
+static inline bool lock_wqueue(struct watch_queue *wqueue)
+{
+ spin_lock_bh(&wqueue->lock);
+ if (unlikely(wqueue->defunct)) {
+ spin_unlock_bh(&wqueue->lock);
+ return false;
+ }
+ return true;
+}
+
+static inline void unlock_wqueue(struct watch_queue *wqueue)
+{
+ spin_unlock_bh(&wqueue->lock);
+}
+
static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
@@ -69,6 +90,10 @@ static const struct pipe_buf_operations watch_queue_pipe_buf_ops = {
/*
* Post a notification to a watch queue.
+ *
+ * Must be called with the RCU lock for reading, and the
+ * watch_queue lock held, which guarantees that the pipe
+ * hasn't been released.
*/
static bool post_one_notification(struct watch_queue *wqueue,
struct watch_notification *n)
@@ -85,9 +110,6 @@ static bool post_one_notification(struct watch_queue *wqueue,
spin_lock_irq(&pipe->rd_wait.lock);
- if (wqueue->defunct)
- goto out;
-
mask = pipe->ring_size - 1;
head = pipe->head;
tail = pipe->tail;
@@ -203,7 +225,10 @@ void __post_watch_notification(struct watch_list *wlist,
if (security_post_notification(watch->cred, cred, n) < 0)
continue;
- post_one_notification(wqueue, n);
+ if (lock_wqueue(wqueue)) {
+ post_one_notification(wqueue, n);
+ unlock_wqueue(wqueue);
+ }
}
rcu_read_unlock();
@@ -429,6 +454,33 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue)
rcu_assign_pointer(watch->queue, wqueue);
}
+static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue)
+{
+ const struct cred *cred;
+ struct watch *w;
+
+ hlist_for_each_entry(w, &wlist->watchers, list_node) {
+ struct watch_queue *wq = rcu_access_pointer(w->queue);
+ if (wqueue == wq && watch->id == w->id)
+ return -EBUSY;
+ }
+
+ cred = current_cred();
+ if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) {
+ atomic_dec(&cred->user->nr_watches);
+ return -EAGAIN;
+ }
+
+ watch->cred = get_cred(cred);
+ rcu_assign_pointer(watch->watch_list, wlist);
+
+ kref_get(&wqueue->usage);
+ kref_get(&watch->usage);
+ hlist_add_head(&watch->queue_node, &wqueue->watches);
+ hlist_add_head_rcu(&watch->list_node, &wlist->watchers);
+ return 0;
+}
+
/**
* add_watch_to_object - Add a watch on an object to a watch list
* @watch: The watch to add
@@ -443,33 +495,21 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue)
*/
int add_watch_to_object(struct watch *watch, struct watch_list *wlist)
{
- struct watch_queue *wqueue = rcu_access_pointer(watch->queue);
- struct watch *w;
-
- hlist_for_each_entry(w, &wlist->watchers, list_node) {
- struct watch_queue *wq = rcu_access_pointer(w->queue);
- if (wqueue == wq && watch->id == w->id)
- return -EBUSY;
- }
+ struct watch_queue *wqueue;
+ int ret = -ENOENT;
- watch->cred = get_current_cred();
- rcu_assign_pointer(watch->watch_list, wlist);
+ rcu_read_lock();
- if (atomic_inc_return(&watch->cred->user->nr_watches) >
- task_rlimit(current, RLIMIT_NOFILE)) {
- atomic_dec(&watch->cred->user->nr_watches);
- put_cred(watch->cred);
- return -EAGAIN;
+ wqueue = rcu_access_pointer(watch->queue);
+ if (lock_wqueue(wqueue)) {
+ spin_lock(&wlist->lock);
+ ret = add_one_watch(watch, wlist, wqueue);
+ spin_unlock(&wlist->lock);
+ unlock_wqueue(wqueue);
}
- spin_lock_bh(&wqueue->lock);
- kref_get(&wqueue->usage);
- kref_get(&watch->usage);
- hlist_add_head(&watch->queue_node, &wqueue->watches);
- spin_unlock_bh(&wqueue->lock);
-
- hlist_add_head(&watch->list_node, &wlist->watchers);
- return 0;
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL(add_watch_to_object);
@@ -520,20 +560,15 @@ found:
wqueue = rcu_dereference(watch->queue);
- /* We don't need the watch list lock for the next bit as RCU is
- * protecting *wqueue from deallocation.
- */
- if (wqueue) {
+ if (lock_wqueue(wqueue)) {
post_one_notification(wqueue, &n.watch);
- spin_lock_bh(&wqueue->lock);
-
if (!hlist_unhashed(&watch->queue_node)) {
hlist_del_init_rcu(&watch->queue_node);
put_watch(watch);
}
- spin_unlock_bh(&wqueue->lock);
+ unlock_wqueue(wqueue);
}
if (wlist->release_watch) {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 04200f341b5a..aeea9731ef80 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -5010,7 +5010,10 @@ static void unbind_workers(int cpu)
for_each_pool_worker(worker, pool) {
kthread_set_per_cpu(worker->task, -1);
- WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, wq_unbound_cpumask) < 0);
+ if (cpumask_intersects(wq_unbound_cpumask, cpu_active_mask))
+ WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, wq_unbound_cpumask) < 0);
+ else
+ WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0);
}
mutex_unlock(&wq_pool_attach_mutex);
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index a9f7eb047768..fd15230a703b 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -84,6 +84,9 @@ config UBSAN_SHIFT
config UBSAN_DIV_ZERO
bool "Perform checking for integer divide-by-zero"
depends on $(cc-option,-fsanitize=integer-divide-by-zero)
+ # https://github.com/ClangBuiltLinux/linux/issues/1657
+ # https://github.com/llvm/llvm-project/issues/56289
+ depends on !CC_IS_CLANG
help
This option enables -fsanitize=integer-divide-by-zero which checks
for integer division by zero. This is effectively redundant with the
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 59e1653799f8..3c7b9d6dca95 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -336,8 +336,7 @@ static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,
if (pte_young(entry)) {
referenced = true;
entry = pte_mkold(entry);
- huge_ptep_set_access_flags(vma, addr, pte, entry,
- vma->vm_flags & VM_WRITE);
+ set_huge_pte_at(mm, addr, pte, entry);
}
#ifdef CONFIG_MMU_NOTIFIER
diff --git a/mm/gup.c b/mm/gup.c
index 551264407624..e2a39e30756d 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -87,7 +87,8 @@ retry:
* belongs to this folio.
*/
if (unlikely(page_folio(page) != folio)) {
- folio_put_refs(folio, refs);
+ if (!put_devmap_managed_page_refs(&folio->page, refs))
+ folio_put_refs(folio, refs);
goto retry;
}
@@ -176,7 +177,8 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
refs *= GUP_PIN_COUNTING_BIAS;
}
- folio_put_refs(folio, refs);
+ if (!put_devmap_managed_page_refs(&folio->page, refs))
+ folio_put_refs(folio, refs);
}
/**
diff --git a/mm/hmm.c b/mm/hmm.c
index 3fd3242c5e50..f2aa63b94d9b 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -212,14 +212,6 @@ int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
unsigned long end, unsigned long hmm_pfns[], pmd_t pmd);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-static inline bool hmm_is_device_private_entry(struct hmm_range *range,
- swp_entry_t entry)
-{
- return is_device_private_entry(entry) &&
- pfn_swap_entry_to_page(entry)->pgmap->owner ==
- range->dev_private_owner;
-}
-
static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range,
pte_t pte)
{
@@ -252,10 +244,12 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
swp_entry_t entry = pte_to_swp_entry(pte);
/*
- * Never fault in device private pages, but just report
- * the PFN even if not present.
+ * Don't fault in device private pages owned by the caller,
+ * just report the PFN.
*/
- if (hmm_is_device_private_entry(range, entry)) {
+ if (is_device_private_entry(entry) &&
+ pfn_swap_entry_to_page(entry)->pgmap->owner ==
+ range->dev_private_owner) {
cpu_flags = HMM_PFN_VALID;
if (is_writable_device_private_entry(entry))
cpu_flags |= HMM_PFN_WRITE;
@@ -273,6 +267,9 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
if (!non_swap_entry(entry))
goto fault;
+ if (is_device_private_entry(entry))
+ goto fault;
+
if (is_device_exclusive_entry(entry))
goto fault;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a57e1be41401..a18c071c294e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4788,8 +4788,13 @@ again:
* sharing with another vma.
*/
;
- } else if (unlikely(is_hugetlb_entry_migration(entry) ||
- is_hugetlb_entry_hwpoisoned(entry))) {
+ } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) {
+ bool uffd_wp = huge_pte_uffd_wp(entry);
+
+ if (!userfaultfd_wp(dst_vma) && uffd_wp)
+ entry = huge_pte_clear_uffd_wp(entry);
+ set_huge_pte_at(dst, addr, dst_pte, entry);
+ } else if (unlikely(is_hugetlb_entry_migration(entry))) {
swp_entry_t swp_entry = pte_to_swp_entry(entry);
bool uffd_wp = huge_pte_uffd_wp(entry);
@@ -5947,6 +5952,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
page = alloc_huge_page(dst_vma, dst_addr, 0);
if (IS_ERR(page)) {
+ put_page(*pagep);
ret = -ENOMEM;
*pagep = NULL;
goto out;
diff --git a/mm/ioremap.c b/mm/ioremap.c
index 5fe598ecd9b7..8652426282cc 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -11,29 +11,35 @@
#include <linux/io.h>
#include <linux/export.h>
-void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot)
+void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
+ unsigned long prot)
{
unsigned long offset, vaddr;
phys_addr_t last_addr;
struct vm_struct *area;
/* Disallow wrap-around or zero size */
- last_addr = addr + size - 1;
- if (!size || last_addr < addr)
+ last_addr = phys_addr + size - 1;
+ if (!size || last_addr < phys_addr)
return NULL;
/* Page-align mappings */
- offset = addr & (~PAGE_MASK);
- addr -= offset;
+ offset = phys_addr & (~PAGE_MASK);
+ phys_addr -= offset;
size = PAGE_ALIGN(size + offset);
+ if (!ioremap_allowed(phys_addr, size, prot))
+ return NULL;
+
area = get_vm_area_caller(size, VM_IOREMAP,
__builtin_return_address(0));
if (!area)
return NULL;
vaddr = (unsigned long)area->addr;
+ area->phys_addr = phys_addr;
- if (ioremap_page_range(vaddr, vaddr + size, addr, __pgprot(prot))) {
+ if (ioremap_page_range(vaddr, vaddr + size, phys_addr,
+ __pgprot(prot))) {
free_vm_area(area);
return NULL;
}
@@ -44,6 +50,12 @@ EXPORT_SYMBOL(ioremap_prot);
void iounmap(volatile void __iomem *addr)
{
- vunmap((void *)((unsigned long)addr & PAGE_MASK));
+ void *vaddr = (void *)((unsigned long)addr & PAGE_MASK);
+
+ if (!iounmap_allowed(vaddr))
+ return;
+
+ if (is_vmalloc_addr(vaddr))
+ vunmap(vaddr);
}
EXPORT_SYMBOL(iounmap);
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index c40c0e7b3b5f..78be2beb7453 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -108,9 +108,10 @@ void __kasan_unpoison_pages(struct page *page, unsigned int order, bool init)
return;
tag = kasan_random_tag();
+ kasan_unpoison(set_tag(page_address(page), tag),
+ PAGE_SIZE << order, init);
for (i = 0; i < (1 << order); i++)
page_kasan_tag_set(page + i, tag);
- kasan_unpoison(page_address(page), PAGE_SIZE << order, init);
}
void __kasan_poison_pages(struct page *page, unsigned int order, bool init)
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 4b5e5a3d3a63..6aff49f6b79e 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -603,14 +603,6 @@ static unsigned long kfence_init_pool(void)
addr += 2 * PAGE_SIZE;
}
- /*
- * The pool is live and will never be deallocated from this point on.
- * Remove the pool object from the kmemleak object tree, as it would
- * otherwise overlap with allocations returned by kfence_alloc(), which
- * are registered with kmemleak through the slab post-alloc hook.
- */
- kmemleak_free(__kfence_pool);
-
return 0;
}
@@ -623,8 +615,16 @@ static bool __init kfence_init_pool_early(void)
addr = kfence_init_pool();
- if (!addr)
+ if (!addr) {
+ /*
+ * The pool is live and will never be deallocated from this point on.
+ * Ignore the pool object from the kmemleak phys object tree, as it would
+ * otherwise overlap with allocations returned by kfence_alloc(), which
+ * are registered with kmemleak through the slab post-alloc hook.
+ */
+ kmemleak_ignore_phys(__pa(__kfence_pool));
return true;
+ }
/*
* Only release unprotected pages, and do not try to go back and change
diff --git a/mm/memory.c b/mm/memory.c
index 7a089145cad4..1c6027adc542 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3043,7 +3043,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
pte_t entry;
VM_BUG_ON(!(vmf->flags & FAULT_FLAG_WRITE));
- VM_BUG_ON(PageAnon(page) && !PageAnonExclusive(page));
+ VM_BUG_ON(page && PageAnon(page) && !PageAnonExclusive(page));
/*
* Clear the pages cpupid information as the existing
@@ -4369,9 +4369,12 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
return VM_FAULT_OOM;
}
- /* See comment in handle_pte_fault() */
+ /*
+ * See comment in handle_pte_fault() for how this scenario happens, we
+ * need to return NOPAGE so that we drop this page.
+ */
if (pmd_devmap_trans_unstable(vmf->pmd))
- return 0;
+ return VM_FAULT_NOPAGE;
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
vmf->address, &vmf->ptl);
@@ -4802,6 +4805,19 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf)
defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
/* No support for anonymous transparent PUD pages yet */
if (vma_is_anonymous(vmf->vma))
+ return VM_FAULT_FALLBACK;
+ if (vmf->vma->vm_ops->huge_fault)
+ return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+ return VM_FAULT_FALLBACK;
+}
+
+static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
+{
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+ /* No support for anonymous transparent PUD pages yet */
+ if (vma_is_anonymous(vmf->vma))
goto split;
if (vmf->vma->vm_ops->huge_fault) {
vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
@@ -4812,19 +4828,7 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf)
split:
/* COW or write-notify not handled on PUD level: split pud.*/
__split_huge_pud(vmf->vma, vmf->pud, vmf->address);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
- return VM_FAULT_FALLBACK;
-}
-
-static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
-{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- /* No support for anonymous transparent PUD pages yet */
- if (vma_is_anonymous(vmf->vma))
- return VM_FAULT_FALLBACK;
- if (vmf->vma->vm_ops->huge_fault)
- return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
return VM_FAULT_FALLBACK;
}
diff --git a/mm/memremap.c b/mm/memremap.c
index b870a659eee6..745eea0f99c3 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -499,7 +499,7 @@ void free_zone_device_page(struct page *page)
}
#ifdef CONFIG_FS_DAX
-bool __put_devmap_managed_page(struct page *page)
+bool __put_devmap_managed_page_refs(struct page *page, int refs)
{
if (page->pgmap->type != MEMORY_DEVICE_FS_DAX)
return false;
@@ -509,9 +509,9 @@ bool __put_devmap_managed_page(struct page *page)
* refcount is 1, then the page is free and the refcount is
* stable because nobody holds a reference on the page.
*/
- if (page_ref_dec_return(page) == 1)
+ if (page_ref_sub_return(page, refs) == 1)
wake_up_var(&page->_refcount);
return true;
}
-EXPORT_SYMBOL(__put_devmap_managed_page);
+EXPORT_SYMBOL(__put_devmap_managed_page_refs);
#endif /* CONFIG_FS_DAX */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e008a3df0485..b0bcab50f0a3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2361,7 +2361,7 @@ static inline bool check_new_pcp(struct page *page, unsigned int order)
}
#endif /* CONFIG_DEBUG_VM */
-static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags)
+static inline bool should_skip_kasan_unpoison(gfp_t flags)
{
/* Don't skip if a software KASAN mode is enabled. */
if (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
@@ -2373,12 +2373,10 @@ static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags)
return true;
/*
- * With hardware tag-based KASAN enabled, skip if either:
- *
- * 1. Memory tags have already been cleared via tag_clear_highpage().
- * 2. Skipping has been requested via __GFP_SKIP_KASAN_UNPOISON.
+ * With hardware tag-based KASAN enabled, skip if this has been
+ * requested via __GFP_SKIP_KASAN_UNPOISON.
*/
- return init_tags || (flags & __GFP_SKIP_KASAN_UNPOISON);
+ return flags & __GFP_SKIP_KASAN_UNPOISON;
}
static inline bool should_skip_init(gfp_t flags)
@@ -2397,6 +2395,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags) &&
!should_skip_init(gfp_flags);
bool init_tags = init && (gfp_flags & __GFP_ZEROTAGS);
+ int i;
set_page_private(page, 0);
set_page_refcounted(page);
@@ -2422,8 +2421,6 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
* should be initialized as well).
*/
if (init_tags) {
- int i;
-
/* Initialize both memory and tags. */
for (i = 0; i != 1 << order; ++i)
tag_clear_highpage(page + i);
@@ -2431,13 +2428,17 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
/* Note that memory is already initialized by the loop above. */
init = false;
}
- if (!should_skip_kasan_unpoison(gfp_flags, init_tags)) {
+ if (!should_skip_kasan_unpoison(gfp_flags)) {
/* Unpoison shadow memory or set memory tags. */
kasan_unpoison_pages(page, order, init);
/* Note that memory is already initialized by KASAN. */
if (kasan_has_integrated_init())
init = false;
+ } else {
+ /* Ensure page_address() dereferencing does not fault. */
+ for (i = 0; i != 1 << order; ++i)
+ page_kasan_tag_reset(page + i);
}
/* If memory is still not initialized, do it now. */
if (init)
@@ -3968,11 +3969,15 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
* need to be calculated.
*/
if (!order) {
- long fast_free;
+ long usable_free;
+ long reserved;
+
+ usable_free = free_pages;
+ reserved = __zone_watermark_unusable_free(z, 0, alloc_flags);
- fast_free = free_pages;
- fast_free -= __zone_watermark_unusable_free(z, 0, alloc_flags);
- if (fast_free > mark + z->lowmem_reserve[highest_zoneidx])
+ /* reserved may over estimate high-atomic reserves. */
+ usable_free -= min(usable_free, reserved);
+ if (usable_free > mark + z->lowmem_reserve[highest_zoneidx])
return true;
}
diff --git a/mm/rmap.c b/mm/rmap.c
index 5bcb334cd6f2..746c05acad27 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1899,8 +1899,23 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
/* Unexpected PMD-mapped THP? */
VM_BUG_ON_FOLIO(!pvmw.pte, folio);
- subpage = folio_page(folio,
- pte_pfn(*pvmw.pte) - folio_pfn(folio));
+ if (folio_is_zone_device(folio)) {
+ /*
+ * Our PTE is a non-present device exclusive entry and
+ * calculating the subpage as for the common case would
+ * result in an invalid pointer.
+ *
+ * Since only PAGE_SIZE pages can currently be
+ * migrated, just set it to page. This will need to be
+ * changed when hugepage migrations to device private
+ * memory are supported.
+ */
+ VM_BUG_ON_FOLIO(folio_nr_pages(folio) > 1, folio);
+ subpage = &folio->page;
+ } else {
+ subpage = folio_page(folio,
+ pte_pfn(*pvmw.pte) - folio_pfn(folio));
+ }
address = pvmw.address;
anon_exclusive = folio_test_anon(folio) &&
PageAnonExclusive(subpage);
@@ -1993,15 +2008,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
/*
* No need to invalidate here it will synchronize on
* against the special swap migration pte.
- *
- * The assignment to subpage above was computed from a
- * swap PTE which results in an invalid pointer.
- * Since only PAGE_SIZE pages can currently be
- * migrated, just set it to page. This will need to be
- * changed when hugepage migrations to device private
- * memory are supported.
*/
- subpage = &folio->page;
} else if (PageHWPoison(subpage)) {
pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
if (folio_test_hugetlb(folio)) {
diff --git a/mm/secretmem.c b/mm/secretmem.c
index 206ed6b40c1d..f06279d6190a 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -55,22 +55,28 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf)
gfp_t gfp = vmf->gfp_mask;
unsigned long addr;
struct page *page;
+ vm_fault_t ret;
int err;
if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
return vmf_error(-EINVAL);
+ filemap_invalidate_lock_shared(mapping);
+
retry:
page = find_lock_page(mapping, offset);
if (!page) {
page = alloc_page(gfp | __GFP_ZERO);
- if (!page)
- return VM_FAULT_OOM;
+ if (!page) {
+ ret = VM_FAULT_OOM;
+ goto out;
+ }
err = set_direct_map_invalid_noflush(page);
if (err) {
put_page(page);
- return vmf_error(err);
+ ret = vmf_error(err);
+ goto out;
}
__SetPageUptodate(page);
@@ -86,7 +92,8 @@ retry:
if (err == -EEXIST)
goto retry;
- return vmf_error(err);
+ ret = vmf_error(err);
+ goto out;
}
addr = (unsigned long)page_address(page);
@@ -94,7 +101,11 @@ retry:
}
vmf->page = page;
- return VM_FAULT_LOCKED;
+ ret = VM_FAULT_LOCKED;
+
+out:
+ filemap_invalidate_unlock_shared(mapping);
+ return ret;
}
static const struct vm_operations_struct secretmem_vm_ops = {
@@ -162,12 +173,20 @@ static int secretmem_setattr(struct user_namespace *mnt_userns,
struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
+ struct address_space *mapping = inode->i_mapping;
unsigned int ia_valid = iattr->ia_valid;
+ int ret;
+
+ filemap_invalidate_lock(mapping);
if ((ia_valid & ATTR_SIZE) && inode->i_size)
- return -EINVAL;
+ ret = -EINVAL;
+ else
+ ret = simple_setattr(mnt_userns, dentry, iattr);
- return simple_setattr(mnt_userns, dentry, iattr);
+ filemap_invalidate_unlock(mapping);
+
+ return ret;
}
static const struct inode_operations secretmem_iops = {
diff --git a/mm/shmem.c b/mm/shmem.c
index a6f565308133..b7f2d4a56867 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3392,7 +3392,7 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
break;
case Opt_nr_blocks:
ctx->blocks = memparse(param->string, &rest);
- if (*rest)
+ if (*rest || ctx->blocks > S64_MAX)
goto bad_value;
ctx->seen |= SHMEM_SEEN_BLOCKS;
break;
@@ -3514,10 +3514,7 @@ static int shmem_reconfigure(struct fs_context *fc)
raw_spin_lock(&sbinfo->stat_lock);
inodes = sbinfo->max_inodes - sbinfo->free_inodes;
- if (ctx->blocks > S64_MAX) {
- err = "Number of blocks too large";
- goto out;
- }
+
if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
if (!sbinfo->max_blocks) {
err = "Cannot retroactively limit size";
diff --git a/mm/slab.c b/mm/slab.c
index f8cd00f4ba13..5e73e2d80222 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3230,7 +3230,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, size_t orig_
}
/* ___cache_alloc_node can fall back to other nodes */
ptr = ____cache_alloc_node(cachep, flags, nodeid);
- out:
+out:
local_irq_restore(save_flags);
ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
init = slab_want_init_on_alloc(flags, cachep);
@@ -3259,7 +3259,7 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
if (!objp)
objp = ____cache_alloc_node(cache, flags, numa_mem_id());
- out:
+out:
return objp;
}
#else
@@ -3406,9 +3406,10 @@ static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp,
{
bool init;
+ memcg_slab_free_hook(cachep, virt_to_slab(objp), &objp, 1);
+
if (is_kfence_address(objp)) {
kmemleak_free_recursive(objp, cachep->flags);
- memcg_slab_free_hook(cachep, &objp, 1);
__kfence_free(objp);
return;
}
@@ -3441,7 +3442,6 @@ void ___cache_free(struct kmem_cache *cachep, void *objp,
check_irq_off();
kmemleak_free_recursive(objp, cachep->flags);
objp = cache_free_debugcheck(cachep, objp, caller);
- memcg_slab_free_hook(cachep, &objp, 1);
/*
* Skip calling cache_free_alien() when the platform is not numa.
@@ -3478,7 +3478,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
{
void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
- trace_kmem_cache_alloc(_RET_IP_, ret,
+ trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
cachep->object_size, cachep->size, flags);
return ret;
@@ -3553,7 +3553,7 @@ error:
local_irq_enable();
cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
slab_post_alloc_hook(s, objcg, flags, i, p, false);
- __kmem_cache_free_bulk(s, i, p);
+ kmem_cache_free_bulk(s, i, p);
return 0;
}
EXPORT_SYMBOL(kmem_cache_alloc_bulk);
@@ -3567,7 +3567,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);
ret = kasan_kmalloc(cachep, ret, size, flags);
- trace_kmalloc(_RET_IP_, ret,
+ trace_kmalloc(_RET_IP_, ret, cachep,
size, cachep->size, flags);
return ret;
}
@@ -3592,7 +3592,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
- trace_kmem_cache_alloc_node(_RET_IP_, ret,
+ trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
cachep->object_size, cachep->size,
flags, nodeid);
@@ -3611,7 +3611,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);
ret = kasan_kmalloc(cachep, ret, size, flags);
- trace_kmalloc_node(_RET_IP_, ret,
+ trace_kmalloc_node(_RET_IP_, ret, cachep,
size, cachep->size,
flags, nodeid);
return ret;
@@ -3694,7 +3694,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
ret = slab_alloc(cachep, NULL, flags, size, caller);
ret = kasan_kmalloc(cachep, ret, size, flags);
- trace_kmalloc(caller, ret,
+ trace_kmalloc(caller, ret, cachep,
size, cachep->size, flags);
return ret;
diff --git a/mm/slab.h b/mm/slab.h
index db9fb5c8dae7..4ec82bec15ec 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -380,15 +380,6 @@ void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s);
ssize_t slabinfo_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos);
-/*
- * Generic implementation of bulk operations
- * These are useful for situations in which the allocator cannot
- * perform optimizations. In that case segments of the object listed
- * may be allocated or freed using these operations.
- */
-void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
-int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
-
static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
{
return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
@@ -547,36 +538,22 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
obj_cgroup_put(objcg);
}
-static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
+static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
void **p, int objects)
{
- struct kmem_cache *s;
struct obj_cgroup **objcgs;
- struct obj_cgroup *objcg;
- struct slab *slab;
- unsigned int off;
int i;
if (!memcg_kmem_enabled())
return;
- for (i = 0; i < objects; i++) {
- if (unlikely(!p[i]))
- continue;
-
- slab = virt_to_slab(p[i]);
- /* we could be given a kmalloc_large() object, skip those */
- if (!slab)
- continue;
-
- objcgs = slab_objcgs(slab);
- if (!objcgs)
- continue;
+ objcgs = slab_objcgs(slab);
+ if (!objcgs)
+ return;
- if (!s_orig)
- s = slab->slab_cache;
- else
- s = s_orig;
+ for (i = 0; i < objects; i++) {
+ struct obj_cgroup *objcg;
+ unsigned int off;
off = obj_to_index(s, slab, p[i]);
objcg = objcgs[off];
@@ -628,7 +605,7 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
{
}
-static inline void memcg_slab_free_hook(struct kmem_cache *s,
+static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
void **p, int objects)
{
}
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 77c3adf40e50..17996649cfe3 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -26,13 +26,12 @@
#include <linux/memcontrol.h>
#include <linux/stackdepot.h>
-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
-
#include "internal.h"
-
#include "slab.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/kmem.h>
+
enum slab_state slab_state;
LIST_HEAD(slab_caches);
DEFINE_MUTEX(slab_mutex);
@@ -105,33 +104,6 @@ static inline int kmem_cache_sanity_check(const char *name, unsigned int size)
}
#endif
-void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
-{
- size_t i;
-
- for (i = 0; i < nr; i++) {
- if (s)
- kmem_cache_free(s, p[i]);
- else
- kfree(p[i]);
- }
-}
-
-int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
- void **p)
-{
- size_t i;
-
- for (i = 0; i < nr; i++) {
- void *x = p[i] = kmem_cache_alloc(s, flags);
- if (!x) {
- __kmem_cache_free_bulk(s, i, p);
- return 0;
- }
- }
- return i;
-}
-
/*
* Figure out what the alignment of the objects will be given a set of
* flags, a user specified alignment and the size of the objects.
@@ -959,7 +931,7 @@ EXPORT_SYMBOL(kmalloc_order);
void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
{
void *ret = kmalloc_order(size, flags, order);
- trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
+ trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
return ret;
}
EXPORT_SYMBOL(kmalloc_order_trace);
diff --git a/mm/slob.c b/mm/slob.c
index f47811f09aca..2bd4f476c340 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -507,7 +507,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
*m = size;
ret = (void *)m + minalign;
- trace_kmalloc_node(caller, ret,
+ trace_kmalloc_node(caller, ret, NULL,
size, size + minalign, gfp, node);
} else {
unsigned int order = get_order(size);
@@ -516,7 +516,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
gfp |= __GFP_COMP;
ret = slob_new_pages(gfp, order, node);
- trace_kmalloc_node(caller, ret,
+ trace_kmalloc_node(caller, ret, NULL,
size, PAGE_SIZE << order, gfp, node);
}
@@ -616,12 +616,12 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
if (c->size < PAGE_SIZE) {
b = slob_alloc(c->size, flags, c->align, node, 0);
- trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+ trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
SLOB_UNITS(c->size) * SLOB_UNIT,
flags, node);
} else {
b = slob_new_pages(flags, get_order(c->size), node);
- trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+ trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
PAGE_SIZE << get_order(c->size),
flags, node);
}
@@ -692,16 +692,33 @@ void kmem_cache_free(struct kmem_cache *c, void *b)
}
EXPORT_SYMBOL(kmem_cache_free);
-void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
+void kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
{
- __kmem_cache_free_bulk(s, size, p);
+ size_t i;
+
+ for (i = 0; i < nr; i++) {
+ if (s)
+ kmem_cache_free(s, p[i]);
+ else
+ kfree(p[i]);
+ }
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
-int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
void **p)
{
- return __kmem_cache_alloc_bulk(s, flags, size, p);
+ size_t i;
+
+ for (i = 0; i < nr; i++) {
+ void *x = p[i] = kmem_cache_alloc(s, flags);
+
+ if (!x) {
+ kmem_cache_free_bulk(s, i, p);
+ return 0;
+ }
+ }
+ return i;
}
EXPORT_SYMBOL(kmem_cache_alloc_bulk);
diff --git a/mm/slub.c b/mm/slub.c
index b1281b8654bd..862dbd9af4f5 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3257,7 +3257,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
{
void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
- trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
+ trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
s->size, gfpflags);
return ret;
@@ -3280,7 +3280,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru);
void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
- trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
+ trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
@@ -3292,7 +3292,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
- trace_kmem_cache_alloc_node(_RET_IP_, ret,
+ trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
s->object_size, s->size, gfpflags, node);
return ret;
@@ -3306,7 +3306,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
- trace_kmalloc_node(_RET_IP_, ret,
+ trace_kmalloc_node(_RET_IP_, ret, s,
size, s->size, gfpflags, node);
ret = kasan_kmalloc(s, ret, size, gfpflags);
@@ -3464,9 +3464,6 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
struct kmem_cache_cpu *c;
unsigned long tid;
- /* memcg_slab_free_hook() is already called for bulk free. */
- if (!tail)
- memcg_slab_free_hook(s, &head, 1);
redo:
/*
* Determine the currently cpus per cpu slab.
@@ -3526,9 +3523,10 @@ redo:
}
static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab,
- void *head, void *tail, int cnt,
+ void *head, void *tail, void **p, int cnt,
unsigned long addr)
{
+ memcg_slab_free_hook(s, slab, p, cnt);
/*
* With KASAN enabled slab_free_freelist_hook modifies the freelist
* to remove objects, whose reuse must be delayed.
@@ -3550,7 +3548,7 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
if (!s)
return;
trace_kmem_cache_free(_RET_IP_, x, s->name);
- slab_free(s, virt_to_slab(x), x, NULL, 1, _RET_IP_);
+ slab_free(s, virt_to_slab(x), x, NULL, &x, 1, _RET_IP_);
}
EXPORT_SYMBOL(kmem_cache_free);
@@ -3591,88 +3589,67 @@ static inline
int build_detached_freelist(struct kmem_cache *s, size_t size,
void **p, struct detached_freelist *df)
{
- size_t first_skipped_index = 0;
int lookahead = 3;
void *object;
struct folio *folio;
- struct slab *slab;
-
- /* Always re-init detached_freelist */
- df->slab = NULL;
-
- do {
- object = p[--size];
- /* Do we need !ZERO_OR_NULL_PTR(object) here? (for kfree) */
- } while (!object && size);
-
- if (!object)
- return 0;
+ size_t same;
+ object = p[--size];
folio = virt_to_folio(object);
if (!s) {
/* Handle kalloc'ed objects */
if (unlikely(!folio_test_slab(folio))) {
free_large_kmalloc(folio, object);
- p[size] = NULL; /* mark object processed */
+ df->slab = NULL;
return size;
}
/* Derive kmem_cache from object */
- slab = folio_slab(folio);
- df->s = slab->slab_cache;
+ df->slab = folio_slab(folio);
+ df->s = df->slab->slab_cache;
} else {
- slab = folio_slab(folio);
+ df->slab = folio_slab(folio);
df->s = cache_from_obj(s, object); /* Support for memcg */
}
- if (is_kfence_address(object)) {
- slab_free_hook(df->s, object, false);
- __kfence_free(object);
- p[size] = NULL; /* mark object processed */
- return size;
- }
-
/* Start new detached freelist */
- df->slab = slab;
- set_freepointer(df->s, object, NULL);
df->tail = object;
df->freelist = object;
- p[size] = NULL; /* mark object processed */
df->cnt = 1;
+ if (is_kfence_address(object))
+ return size;
+
+ set_freepointer(df->s, object, NULL);
+
+ same = size;
while (size) {
object = p[--size];
- if (!object)
- continue; /* Skip processed objects */
-
/* df->slab is always set at this point */
if (df->slab == virt_to_slab(object)) {
/* Opportunity build freelist */
set_freepointer(df->s, object, df->freelist);
df->freelist = object;
df->cnt++;
- p[size] = NULL; /* mark object processed */
-
+ same--;
+ if (size != same)
+ swap(p[size], p[same]);
continue;
}
/* Limit look ahead search */
if (!--lookahead)
break;
-
- if (!first_skipped_index)
- first_skipped_index = size + 1;
}
- return first_skipped_index;
+ return same;
}
/* Note that interrupts must be enabled when calling this function. */
void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
{
- if (WARN_ON(!size))
+ if (!size)
return;
- memcg_slab_free_hook(s, p, size);
do {
struct detached_freelist df;
@@ -3680,7 +3657,8 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
if (!df.slab)
continue;
- slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt, _RET_IP_);
+ slab_free(df.s, df.slab, df.freelist, df.tail, &p[size], df.cnt,
+ _RET_IP_);
} while (likely(size));
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
@@ -3760,7 +3738,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
error:
slub_put_cpu_ptr(s->cpu_slab);
slab_post_alloc_hook(s, objcg, flags, i, p, false);
- __kmem_cache_free_bulk(s, i, p);
+ kmem_cache_free_bulk(s, i, p);
return 0;
}
EXPORT_SYMBOL(kmem_cache_alloc_bulk);
@@ -4441,7 +4419,7 @@ void *__kmalloc(size_t size, gfp_t flags)
ret = slab_alloc(s, NULL, flags, _RET_IP_, size);
- trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
+ trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);
ret = kasan_kmalloc(s, ret, size, flags);
@@ -4475,7 +4453,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = kmalloc_large_node(size, flags, node);
- trace_kmalloc_node(_RET_IP_, ret,
+ trace_kmalloc_node(_RET_IP_, ret, NULL,
size, PAGE_SIZE << get_order(size),
flags, node);
@@ -4489,7 +4467,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);
- trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
+ trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);
ret = kasan_kmalloc(s, ret, size, flags);
@@ -4581,7 +4559,7 @@ void kfree(const void *x)
return;
}
slab = folio_slab(folio);
- slab_free(slab->slab_cache, slab, object, NULL, 1, _RET_IP_);
+ slab_free(slab->slab_cache, slab, object, NULL, &object, 1, _RET_IP_);
}
EXPORT_SYMBOL(kfree);
@@ -4890,6 +4868,9 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
s = find_mergeable(size, align, flags, name, ctor);
if (s) {
+ if (sysfs_slab_alias(s, name))
+ return NULL;
+
s->refcount++;
/*
@@ -4898,11 +4879,6 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
*/
s->object_size = max(s->object_size, size);
s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
-
- if (sysfs_slab_alias(s, name)) {
- s->refcount--;
- s = NULL;
- }
}
return s;
@@ -4948,7 +4924,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
ret = slab_alloc(s, NULL, gfpflags, caller, size);
/* Honor the call site pointer we received. */
- trace_kmalloc(caller, ret, size, s->size, gfpflags);
+ trace_kmalloc(caller, ret, s, size, s->size, gfpflags);
return ret;
}
@@ -4964,7 +4940,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = kmalloc_large_node(size, gfpflags, node);
- trace_kmalloc_node(caller, ret,
+ trace_kmalloc_node(caller, ret, NULL,
size, PAGE_SIZE << get_order(size),
gfpflags, node);
@@ -4979,7 +4955,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);
/* Honor the call site pointer we received. */
- trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
+ trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);
return ret;
}
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index f4fa61dbbee3..dbbd1a7e65f3 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -78,6 +78,14 @@ static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
spin_lock(&init_mm.page_table_lock);
if (likely(pmd_leaf(*pmd))) {
+ /*
+ * Higher order allocations from buddy allocator must be able to
+ * be treated as indepdenent small pages (as they can be freed
+ * individually).
+ */
+ if (!PageReserved(page))
+ split_page(page, get_order(PMD_SIZE));
+
/* Make pte visible before pmd. See comment in pmd_install(). */
smp_wmb();
pmd_populate_kernel(&init_mm, pmd, pgtable);
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 2a65a89b5b4d..10b94d64cc25 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -307,7 +307,7 @@ swp_entry_t folio_alloc_swap(struct folio *folio)
entry.val = 0;
if (folio_test_large(folio)) {
- if (IS_ENABLED(CONFIG_THP_SWAP))
+ if (IS_ENABLED(CONFIG_THP_SWAP) && arch_thp_swp_supported())
get_swap_pages(1, &entry, folio_nr_pages(folio));
goto out;
}
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 4f4892a5f767..07d3befc80e4 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -246,7 +246,10 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
struct page *page;
int ret;
- ret = shmem_getpage(inode, pgoff, &page, SGP_READ);
+ ret = shmem_getpage(inode, pgoff, &page, SGP_NOALLOC);
+ /* Our caller expects us to return -EFAULT if we failed to find page. */
+ if (ret == -ENOENT)
+ ret = -EFAULT;
if (ret)
goto out;
if (!page) {
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 53b1955b027f..214532173536 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -182,10 +182,14 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
else if (dev->mtu > max_mtu)
return -EINVAL;
+ /* Note: If this initial vlan_changelink() fails, we need
+ * to call vlan_dev_free_egress_priority() to free memory.
+ */
err = vlan_changelink(dev, tb, data, extack);
- if (err)
- return err;
- err = register_vlan_dev(dev, extack);
+
+ if (!err)
+ err = register_vlan_dev(dev, extack);
+
if (err)
vlan_dev_free_egress_priority(dev);
return err;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 1739e8cb3291..c17021642234 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -4973,6 +4973,9 @@ int hci_suspend_sync(struct hci_dev *hdev)
return err;
}
+ /* Update event mask so only the allowed event can wakeup the host */
+ hci_set_event_mask_sync(hdev);
+
/* Only configure accept list if disconnect succeeded and wake
* isn't being prevented.
*/
@@ -4984,9 +4987,6 @@ int hci_suspend_sync(struct hci_dev *hdev)
/* Unpause to take care of updating scanning params */
hdev->scanning_paused = false;
- /* Update event mask so only the allowed event can wakeup the host */
- hci_set_event_mask_sync(hdev);
-
/* Enable event filter for paired devices */
hci_update_event_filter_sync(hdev);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index ae78490ecd3d..52668662ae8d 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -111,7 +111,8 @@ static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn,
}
/* Find channel with given SCID.
- * Returns locked channel. */
+ * Returns a reference locked channel.
+ */
static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
u16 cid)
{
@@ -119,15 +120,19 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_scid(conn, cid);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
}
/* Find channel with given DCID.
- * Returns locked channel.
+ * Returns a reference locked channel.
*/
static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
u16 cid)
@@ -136,8 +141,12 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_dcid(conn, cid);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
@@ -162,8 +171,12 @@ static struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn,
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_ident(conn, ident);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
@@ -497,6 +510,16 @@ void l2cap_chan_hold(struct l2cap_chan *c)
kref_get(&c->kref);
}
+struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c)
+{
+ BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
+
+ if (!kref_get_unless_zero(&c->kref))
+ return NULL;
+
+ return c;
+}
+
void l2cap_chan_put(struct l2cap_chan *c)
{
BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
@@ -1968,7 +1991,10 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
src_match = !bacmp(&c->src, src);
dst_match = !bacmp(&c->dst, dst);
if (src_match && dst_match) {
- l2cap_chan_hold(c);
+ c = l2cap_chan_hold_unless_zero(c);
+ if (!c)
+ continue;
+
read_unlock(&chan_list_lock);
return c;
}
@@ -1983,7 +2009,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
}
if (c1)
- l2cap_chan_hold(c1);
+ c1 = l2cap_chan_hold_unless_zero(c1);
read_unlock(&chan_list_lock);
@@ -4463,6 +4489,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
unlock:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return err;
}
@@ -4577,6 +4604,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn,
done:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return err;
}
@@ -5304,6 +5332,7 @@ send_move_response:
l2cap_send_move_chan_rsp(chan, result);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5396,6 +5425,7 @@ static void l2cap_move_continue(struct l2cap_conn *conn, u16 icid, u16 result)
}
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,
@@ -5425,6 +5455,7 @@ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,
l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static int l2cap_move_channel_rsp(struct l2cap_conn *conn,
@@ -5488,6 +5519,7 @@ static int l2cap_move_channel_confirm(struct l2cap_conn *conn,
l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5523,6 +5555,7 @@ static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn,
}
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5895,12 +5928,11 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,
if (credits > max_credits) {
BT_ERR("LE credits overflow");
l2cap_send_disconn_req(chan, ECONNRESET);
- l2cap_chan_unlock(chan);
/* Return 0 so that we don't trigger an unnecessary
* command reject packet.
*/
- return 0;
+ goto unlock;
}
chan->tx_credits += credits;
@@ -5911,7 +5943,9 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,
if (chan->tx_credits)
chan->ops->resume(chan);
+unlock:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -7597,6 +7631,7 @@ drop:
done:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
@@ -8085,7 +8120,7 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c,
if (src_type != c->src_type)
continue;
- l2cap_chan_hold(c);
+ c = l2cap_chan_hold_unless_zero(c);
read_unlock(&chan_list_lock);
return c;
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index ae758ab1b558..2f91a8c2b678 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4723,7 +4723,6 @@ static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev,
else
status = MGMT_STATUS_FAILED;
- mgmt_pending_remove(cmd);
goto unlock;
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index bb01776d2d88..c96509c442a5 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -589,9 +589,13 @@ static int br_fill_ifinfo(struct sk_buff *skb,
}
done:
+ if (af) {
+ if (nlmsg_get_pos(skb) - (void *)af > nla_attr_size(0))
+ nla_nest_end(skb, af);
+ else
+ nla_nest_cancel(skb, af);
+ }
- if (af)
- nla_nest_end(skb, af);
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 251e666ba9a2..748be7253248 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -47,7 +47,7 @@ enum caif_states {
struct caifsock {
struct sock sk; /* must be first member */
struct cflayer layer;
- u32 flow_state;
+ unsigned long flow_state;
struct caif_connect_request conn_req;
struct mutex readlock;
struct dentry *debugfs_socket_dir;
@@ -56,38 +56,32 @@ struct caifsock {
static int rx_flow_is_on(struct caifsock *cf_sk)
{
- return test_bit(RX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ return test_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static int tx_flow_is_on(struct caifsock *cf_sk)
{
- return test_bit(TX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ return test_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void set_rx_flow_off(struct caifsock *cf_sk)
{
- clear_bit(RX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ clear_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void set_rx_flow_on(struct caifsock *cf_sk)
{
- set_bit(RX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ set_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void set_tx_flow_off(struct caifsock *cf_sk)
{
- clear_bit(TX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ clear_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void set_tx_flow_on(struct caifsock *cf_sk)
{
- set_bit(TX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ set_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void caif_read_lock(struct sock *sk)
diff --git a/net/core/dev.c b/net/core/dev.c
index 8e6f22961206..30a1603a7225 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4863,7 +4863,10 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
}
/* When doing generic XDP we have to bypass the qdisc layer and the
- * network taps in order to match in-driver-XDP behavior.
+ * network taps in order to match in-driver-XDP behavior. This also means
+ * that XDP packets are able to starve other packets going through a qdisc,
+ * and DDOS attacks will be more effective. In-driver-XDP use dedicated TX
+ * queues, so they do not have this starvation issue.
*/
void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
{
@@ -4875,7 +4878,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
txq = netdev_core_pick_tx(dev, skb, NULL);
cpu = smp_processor_id();
HARD_TX_LOCK(dev, txq, cpu);
- if (!netif_xmit_stopped(txq)) {
+ if (!netif_xmit_frozen_or_drv_stopped(txq)) {
rc = netdev_start_xmit(skb, dev, txq, 0);
if (dev_xmit_complete(rc))
free_skb = false;
@@ -4883,6 +4886,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
HARD_TX_UNLOCK(dev, txq);
if (free_skb) {
trace_xdp_exception(dev, xdp_prog, XDP_TX);
+ dev_core_stats_tx_dropped_inc(dev);
kfree_skb(skb);
}
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 5d16d66727fc..7950f7520765 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6158,7 +6158,6 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len
if (err)
return err;
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
return seg6_lookup_nexthop(skb, NULL, 0);
@@ -7042,7 +7041,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
return -EINVAL;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
return -EINVAL;
if (!th->ack || th->rst || th->syn)
@@ -7117,7 +7116,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
return -EINVAL;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
return -ENOENT;
if (!th->syn || th->ack || th->fin || th->rst)
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 5f85e01d4093..b0ff6153be62 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -64,7 +64,7 @@ u32 secure_tcpv6_ts_off(const struct net *net,
.daddr = *(struct in6_addr *)daddr,
};
- if (net->ipv4.sysctl_tcp_timestamps != 1)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
return 0;
ts_secret_init();
@@ -120,7 +120,7 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#ifdef CONFIG_INET
u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
{
- if (net->ipv4.sysctl_tcp_timestamps != 1)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
return 0;
ts_secret_init();
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 3f00a28fe762..5daa1fa54249 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -387,7 +387,7 @@ void reuseport_stop_listen_sock(struct sock *sk)
prog = rcu_dereference_protected(reuse->prog,
lockdep_is_held(&reuseport_lock));
- if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req ||
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req) ||
(prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) {
/* Migration capable, move sk from the listening section
* to the closed section.
@@ -545,7 +545,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
hash = migrating_sk->sk_hash;
prog = rcu_dereference(reuse->prog);
if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) {
- if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req))
goto select_by_hash;
goto failure;
}
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index dc92a67baea3..7d542eb46172 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -480,8 +480,8 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf
sk->sk_family = PF_DECnet;
sk->sk_protocol = 0;
sk->sk_allocation = gfp;
- sk->sk_sndbuf = sysctl_decnet_wmem[1];
- sk->sk_rcvbuf = sysctl_decnet_rmem[1];
+ sk->sk_sndbuf = READ_ONCE(sysctl_decnet_wmem[1]);
+ sk->sk_rcvbuf = READ_ONCE(sysctl_decnet_rmem[1]);
/* Initialization of DECnet Session Control Port */
scp = DN_SK(sk);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 3738f2d40a0b..2dd76eb1621c 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -248,6 +248,7 @@ static void dsa_port_reset_vlan_filtering(struct dsa_port *dp,
struct netlink_ext_ack extack = {0};
bool change_vlan_filtering = false;
struct dsa_switch *ds = dp->ds;
+ struct dsa_port *other_dp;
bool vlan_filtering;
int err;
@@ -270,8 +271,8 @@ static void dsa_port_reset_vlan_filtering(struct dsa_port *dp,
* VLAN-aware bridge.
*/
if (change_vlan_filtering && ds->vlan_filtering_is_global) {
- dsa_switch_for_each_port(dp, ds) {
- struct net_device *br = dsa_port_bridge_dev_get(dp);
+ dsa_switch_for_each_port(other_dp, ds) {
+ struct net_device *br = dsa_port_bridge_dev_get(other_dp);
if (br && br_vlan_enabled(br)) {
change_vlan_filtering = false;
@@ -799,7 +800,7 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
ds->vlan_filtering = vlan_filtering;
dsa_switch_for_each_user_port(other_dp, ds) {
- struct net_device *slave = dp->slave;
+ struct net_device *slave = other_dp->slave;
/* We might be called in the unbind path, so not
* all slave devices might still be registered.
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 2b56218fc57c..4dfd68cf61c5 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -344,6 +344,7 @@ static int dsa_switch_do_lag_fdb_add(struct dsa_switch *ds, struct dsa_lag *lag,
ether_addr_copy(a->addr, addr);
a->vid = vid;
+ a->db = db;
refcount_set(&a->refcount, 1);
list_add_tail(&a->list, &lag->fdbs);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 93da9f783bec..252c8bceaba4 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -217,7 +217,7 @@ int inet_listen(struct socket *sock, int backlog)
* because the socket was in TCP_LISTEN state previously but
* was shutdown() rather than close().
*/
- tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
(tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
@@ -335,7 +335,7 @@ lookup_protocol:
inet->hdrincl = 1;
}
- if (net->ipv4.sysctl_ip_no_pmtu_disc)
+ if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
@@ -1246,7 +1246,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
if (new_saddr == old_saddr)
return 0;
- if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) {
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) {
pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
__func__, &old_saddr, &new_saddr);
}
@@ -1301,7 +1301,7 @@ int inet_sk_rebuild_header(struct sock *sk)
* Other protocols have to map its equivalent state to TCP_SYN_SENT.
* DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
*/
- if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr ||
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) ||
sk->sk_state != TCP_SYN_SENT ||
(sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
(err = inet_sk_reselect_saddr(sk)) != 0)
@@ -1710,24 +1710,14 @@ static const struct net_protocol igmp_protocol = {
};
#endif
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol tcp_protocol = {
- .early_demux = tcp_v4_early_demux,
- .early_demux_handler = tcp_v4_early_demux,
+static const struct net_protocol tcp_protocol = {
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
.no_policy = 1,
.icmp_strict_tag_validation = 1,
};
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol udp_protocol = {
- .early_demux = udp_v4_early_demux,
- .early_demux_handler = udp_v4_early_demux,
+static const struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 6eea1e9e998d..f8ad04470d3a 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -507,7 +507,7 @@ static int ah_init_state(struct xfrm_state *x)
if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
crypto_ahash_digestsize(ahash)) {
- pr_info("%s: %s digestsize %u != %hu\n",
+ pr_info("%s: %s digestsize %u != %u\n",
__func__, x->aalg->alg_name,
crypto_ahash_digestsize(ahash),
aalg_desc->uinfo.auth.icv_fullbits / 8);
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 62d5f99760aa..6cd3b6c559f0 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -239,7 +239,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
struct cipso_v4_map_cache_entry *prev_entry = NULL;
u32 hash;
- if (!cipso_v4_cache_enabled)
+ if (!READ_ONCE(cipso_v4_cache_enabled))
return -ENOENT;
hash = cipso_v4_map_cache_hash(key, key_len);
@@ -296,13 +296,14 @@ static int cipso_v4_cache_check(const unsigned char *key,
int cipso_v4_cache_add(const unsigned char *cipso_ptr,
const struct netlbl_lsm_secattr *secattr)
{
+ int bkt_size = READ_ONCE(cipso_v4_cache_bucketsize);
int ret_val = -EPERM;
u32 bkt;
struct cipso_v4_map_cache_entry *entry = NULL;
struct cipso_v4_map_cache_entry *old_entry = NULL;
u32 cipso_ptr_len;
- if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0)
+ if (!READ_ONCE(cipso_v4_cache_enabled) || bkt_size <= 0)
return 0;
cipso_ptr_len = cipso_ptr[1];
@@ -322,7 +323,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr,
bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1);
spin_lock_bh(&cipso_v4_cache[bkt].lock);
- if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
+ if (cipso_v4_cache[bkt].size < bkt_size) {
list_add(&entry->list, &cipso_v4_cache[bkt].list);
cipso_v4_cache[bkt].size += 1;
} else {
@@ -1199,7 +1200,8 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
/* This will send packets using the "optimized" format when
* possible as specified in section 3.4.2.6 of the
* CIPSO draft. */
- if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10)
+ if (READ_ONCE(cipso_v4_rbm_optfmt) && ret_val > 0 &&
+ ret_val <= 10)
tag_len = 14;
else
tag_len = 4 + ret_val;
@@ -1603,7 +1605,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
* all the CIPSO validations here but it doesn't
* really specify _exactly_ what we need to validate
* ... so, just make it a sysctl tunable. */
- if (cipso_v4_rbm_strictvalid) {
+ if (READ_ONCE(cipso_v4_rbm_strictvalid)) {
if (cipso_v4_map_lvl_valid(doi_def,
tag[3]) < 0) {
err_offset = opt_iter + 3;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b21238df3301..b694f352ce7a 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1108,7 +1108,7 @@ static int esp_init_authenc(struct xfrm_state *x)
err = -EINVAL;
if (aalg_desc->uinfo.auth.icv_fullbits / 8 !=
crypto_aead_authsize(aead)) {
- pr_info("ESP: %s digestsize %u != %hu\n",
+ pr_info("ESP: %s digestsize %u != %u\n",
x->aalg->alg_name,
crypto_aead_authsize(aead),
aalg_desc->uinfo.auth.icv_fullbits / 8);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index a57ba23571c9..db7b2503f068 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1230,7 +1230,7 @@ static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh,
nh->fib_nh_dev = in_dev->dev;
dev_hold_track(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC);
- nh->fib_nh_scope = RT_SCOPE_HOST;
+ nh->fib_nh_scope = RT_SCOPE_LINK;
if (!netif_carrier_ok(nh->fib_nh_dev))
nh->fib_nh_flags |= RTNH_F_LINKDOWN;
err = 0;
@@ -1811,7 +1811,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
goto nla_put_failure;
if (nexthop_is_blackhole(fi->nh))
rtm->rtm_type = RTN_BLACKHOLE;
- if (!fi->fib_net->ipv4.sysctl_nexthop_compat_mode)
+ if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode))
goto offload;
}
@@ -2216,7 +2216,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
}
change_nexthops(fi) {
- if (net->ipv4.sysctl_fib_multipath_use_neigh) {
+ if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) {
if (!fib_good_nh(nexthop_nh))
continue;
if (!first) {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2734c3af7e24..452ff177e4da 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -498,7 +498,7 @@ static void tnode_free(struct key_vector *tn)
tn = container_of(head, struct tnode, rcu)->kv;
}
- if (tnode_free_size >= sysctl_fib_sync_mem) {
+ if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) {
tnode_free_size = 0;
synchronize_rcu();
}
@@ -1042,6 +1042,7 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
{
+ u8 fib_notify_on_flag_change;
struct fib_alias *fa_match;
struct sk_buff *skb;
int err;
@@ -1063,14 +1064,16 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
WRITE_ONCE(fa_match->offload, fri->offload);
WRITE_ONCE(fa_match->trap, fri->trap);
+ fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change);
+
/* 2 means send notifications only if offload_failed was changed. */
- if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 &&
+ if (fib_notify_on_flag_change == 2 &&
READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
goto out;
WRITE_ONCE(fa_match->offload_failed, fri->offload_failed);
- if (!net->ipv4.sysctl_fib_notify_on_flag_change)
+ if (!fib_notify_on_flag_change)
goto out;
skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index efea0e796f06..d5d745c3e345 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -253,11 +253,12 @@ bool icmp_global_allow(void)
spin_lock(&icmp_global.lock);
delta = min_t(u32, now - icmp_global.stamp, HZ);
if (delta >= HZ / 50) {
- incr = sysctl_icmp_msgs_per_sec * delta / HZ ;
+ incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ;
if (incr)
WRITE_ONCE(icmp_global.stamp, now);
}
- credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst);
+ credit = min_t(u32, icmp_global.credit + incr,
+ READ_ONCE(sysctl_icmp_msgs_burst));
if (credit) {
/* We want to use a credit of one in average, but need to randomize
* it for security reasons.
@@ -281,7 +282,7 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code)
return true;
/* Limit if icmp type is enabled in ratemask. */
- if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask))
+ if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask)))
return true;
return false;
@@ -319,7 +320,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
vif = l3mdev_master_ifindex(dst->dev);
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
- rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit);
+ rc = inet_peer_xrlim_allow(peer,
+ READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
if (peer)
inet_putpeer(peer);
out:
@@ -692,7 +694,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
rcu_read_lock();
if (rt_is_input_route(rt) &&
- net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
+ READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr))
dev = dev_get_by_index_rcu(net, inet_iif(skb_in));
if (dev)
@@ -879,7 +881,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
* values please see
* Documentation/networking/ip-sysctl.rst
*/
- switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
+ switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) {
default:
net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n",
&iph->daddr);
@@ -932,7 +934,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
* get the other vendor to fix their kit.
*/
- if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses &&
+ if (!READ_ONCE(net->ipv4.sysctl_icmp_ignore_bogus_error_responses) &&
inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) {
net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n",
&ip_hdr(skb)->saddr,
@@ -992,7 +994,7 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb)
net = dev_net(skb_dst(skb)->dev);
/* should there be an ICMP stat for ignored echos? */
- if (net->ipv4.sysctl_icmp_echo_ignore_all)
+ if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all))
return SKB_NOT_DROPPED_YET;
icmp_param.data.icmph = *icmp_hdr(skb);
@@ -1027,7 +1029,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
u16 ident_len;
u8 status;
- if (!net->ipv4.sysctl_icmp_echo_enable_probe)
+ if (!READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
return false;
/* We currently only support probing interfaces on the proxy node
@@ -1248,7 +1250,7 @@ int icmp_rcv(struct sk_buff *skb)
*/
if ((icmph->type == ICMP_ECHO ||
icmph->type == ICMP_TIMESTAMP) &&
- net->ipv4.sysctl_icmp_echo_ignore_broadcasts) {
+ READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_broadcasts)) {
reason = SKB_DROP_REASON_INVALID_PROTO;
goto error;
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index b65d074d9620..e3ab0cb61624 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -467,7 +467,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
- if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(pmc->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return skb;
mtu = READ_ONCE(dev->mtu);
@@ -593,7 +594,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
if (pmc->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(pmc->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
spin_lock_bh(&pmc->lock);
if (pmc->sfcount[MCAST_EXCLUDE])
@@ -736,7 +737,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
return igmpv3_send_report(in_dev, pmc);
- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(group) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return 0;
if (type == IGMP_HOST_LEAVE_MESSAGE)
@@ -825,7 +827,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
struct net *net = dev_net(in_dev->dev);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
return;
- WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv);
+ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv));
igmp_ifc_start_timer(in_dev, 1);
}
@@ -920,7 +922,8 @@ static bool igmp_heard_report(struct in_device *in_dev, __be32 group)
if (group == IGMP_ALL_HOSTS)
return false;
- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(group) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return false;
rcu_read_lock();
@@ -1006,7 +1009,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
* received value was zero, use the default or statically
* configured value.
*/
- in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
+ in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
/* RFC3376, 8.3. Query Response Interval:
@@ -1045,7 +1048,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(im->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
spin_lock_bh(&im->lock);
if (im->tm_running)
@@ -1186,7 +1189,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im,
pmc->interface = im->interface;
in_dev_hold(in_dev);
pmc->multiaddr = im->multiaddr;
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
pmc->sfmode = im->sfmode;
if (pmc->sfmode == MCAST_INCLUDE) {
struct ip_sf_list *psf;
@@ -1237,9 +1240,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
swap(im->tomb, pmc->tomb);
swap(im->sources, pmc->sources);
for (psf = im->sources; psf; psf = psf->sf_next)
- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ psf->sf_crcount = in_dev->mr_qrv ?:
+ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
} else {
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ im->crcount = in_dev->mr_qrv ?:
+ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
}
in_dev_put(pmc->interface);
kfree_pmc(pmc);
@@ -1296,7 +1301,8 @@ static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp)
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return;
reporter = im->reporter;
@@ -1338,13 +1344,14 @@ static void igmp_group_added(struct ip_mc_list *im)
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return;
if (in_dev->dead)
return;
- im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
+ im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
spin_lock_bh(&im->lock);
igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
@@ -1358,7 +1365,7 @@ static void igmp_group_added(struct ip_mc_list *im)
* IN() to IN(A).
*/
if (im->sfmode == MCAST_EXCLUDE)
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
igmp_ifc_event(in_dev);
#endif
@@ -1642,7 +1649,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(im->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
/* a failover is happening and switches
@@ -1749,7 +1756,7 @@ static void ip_mc_reset(struct in_device *in_dev)
in_dev->mr_qi = IGMP_QUERY_INTERVAL;
in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
+ in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
}
#else
static void ip_mc_reset(struct in_device *in_dev)
@@ -1883,7 +1890,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
#ifdef CONFIG_IP_MULTICAST
if (psf->sf_oldin &&
!IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
psf->sf_next = pmc->tomb;
pmc->tomb = psf;
rv = 1;
@@ -1947,7 +1954,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
/* filter mode change */
pmc->sfmode = MCAST_INCLUDE;
#ifdef CONFIG_IP_MULTICAST
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
@@ -2126,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
#ifdef CONFIG_IP_MULTICAST
/* else no filters; keep old mode for reports */
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
@@ -2192,7 +2199,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr,
count++;
}
err = -ENOBUFS;
- if (count >= net->ipv4.sysctl_igmp_max_memberships)
+ if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships))
goto done;
iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
if (!iml)
@@ -2379,7 +2386,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
}
/* else, add a new source to the filter */
- if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) {
+ if (psl && psl->sl_count >= READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
err = -ENOBUFS;
goto done;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 53f5f956d948..eb31c7158b39 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -263,7 +263,7 @@ next_port:
goto other_half_scan;
}
- if (net->ipv4.sysctl_ip_autobind_reuse && !relax) {
+ if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) {
/* We still have a chance to connect to different destinations */
relax = true;
goto ports_exhausted;
@@ -833,7 +833,8 @@ static void reqsk_timer_handler(struct timer_list *t)
icsk = inet_csk(sk_listener);
net = sock_net(sk_listener);
- max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
+ max_syn_ack_retries = icsk->icsk_syn_retries ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_synack_retries);
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
* If synack was not acknowledged for 1 second, it means
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 0ec501845cb3..47ccc343c9fb 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -156,7 +156,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
{
struct inet_timewait_sock *tw;
- if (refcount_read(&dr->tw_refcount) - 1 >= dr->sysctl_max_tw_buckets)
+ if (refcount_read(&dr->tw_refcount) - 1 >=
+ READ_ONCE(dr->sysctl_max_tw_buckets))
return NULL;
tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index da21dfce24d7..e9fed83e9b3c 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -141,16 +141,20 @@ static void inet_peer_gc(struct inet_peer_base *base,
struct inet_peer *gc_stack[],
unsigned int gc_cnt)
{
+ int peer_threshold, peer_maxttl, peer_minttl;
struct inet_peer *p;
__u32 delta, ttl;
int i;
- if (base->total >= inet_peer_threshold)
+ peer_threshold = READ_ONCE(inet_peer_threshold);
+ peer_maxttl = READ_ONCE(inet_peer_maxttl);
+ peer_minttl = READ_ONCE(inet_peer_minttl);
+
+ if (base->total >= peer_threshold)
ttl = 0; /* be aggressive */
else
- ttl = inet_peer_maxttl
- - (inet_peer_maxttl - inet_peer_minttl) / HZ *
- base->total / inet_peer_threshold * HZ;
+ ttl = peer_maxttl - (peer_maxttl - peer_minttl) / HZ *
+ base->total / peer_threshold * HZ;
for (i = 0; i < gc_cnt; i++) {
p = gc_stack[i];
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index e3aa436a1bdf..e18931a6d153 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -157,7 +157,7 @@ int ip_forward(struct sk_buff *skb)
!skb_sec_path(skb))
ip_rt_send_redirect(skb);
- if (net->ipv4.sysctl_ip_fwd_update_priority)
+ if (READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority))
skb->priority = rt_tos2priority(iph->tos);
return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index b1165f717cd1..1b512390b3cf 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -312,14 +312,13 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
ip_hdr(hint)->tos == iph->tos;
}
-INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
+int tcp_v4_early_demux(struct sk_buff *skb);
+int udp_v4_early_demux(struct sk_buff *skb);
static int ip_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb, struct net_device *dev,
const struct sk_buff *hint)
{
const struct iphdr *iph = ip_hdr(skb);
- int (*edemux)(struct sk_buff *skb);
int err, drop_reason;
struct rtable *rt;
@@ -332,21 +331,29 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
goto drop_error;
}
- if (net->ipv4.sysctl_ip_early_demux &&
+ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
!skb_dst(skb) &&
!skb->sk &&
!ip_is_fragment(iph)) {
- const struct net_protocol *ipprot;
- int protocol = iph->protocol;
-
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
- err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
- udp_v4_early_demux, skb);
- if (unlikely(err))
- goto drop_error;
- /* must reload iph, skb->head might have changed */
- iph = ip_hdr(skb);
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
+ tcp_v4_early_demux(skb);
+
+ /* must reload iph, skb->head might have changed */
+ iph = ip_hdr(skb);
+ }
+ break;
+ case IPPROTO_UDP:
+ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
+ err = udp_v4_early_demux(skb);
+ if (unlikely(err))
+ goto drop_error;
+
+ /* must reload iph, skb->head might have changed */
+ iph = ip_hdr(skb);
+ }
+ break;
}
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 445a9ecaefa1..a8a323ecbb54 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -782,7 +782,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
/* numsrc >= (4G-140)/128 overflow in 32 bits */
err = -ENOBUFS;
if (gsf->gf_numsrc >= 0x1ffffff ||
- gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+ gsf->gf_numsrc > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
goto out_free_gsf;
err = -EINVAL;
@@ -832,7 +832,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
/* numsrc >= (4G-140)/128 overflow in 32 bits */
err = -ENOBUFS;
- if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+ if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
goto out_free_gsf;
err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
&gf32->gf_group, gf32->gf_slist_flex);
@@ -1244,7 +1244,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
}
/* numsrc >= (1G-4) overflow in 32 bits */
if (msf->imsf_numsrc >= 0x3ffffffcU ||
- msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
+ msf->imsf_numsrc > READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
kfree(msf);
err = -ENOBUFS;
break;
@@ -1606,7 +1606,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
{
struct net *net = sock_net(sk);
val = (inet->uc_ttl == -1 ?
- net->ipv4.sysctl_ip_default_ttl :
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl) :
inet->uc_ttl);
break;
}
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 918c61fda0f3..d640adcaf1b1 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -62,7 +62,7 @@ struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net,
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
- net->ipv4.sysctl_ip_default_ttl);
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
@@ -117,7 +117,7 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP,
- net->ipv4.sysctl_ip_default_ttl);
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
skb_reset_transport_header(nskb);
icmph = skb_put_zero(nskb, sizeof(struct icmphdr));
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index e459a391e607..853a75a8fbaf 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1858,7 +1858,7 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
/* __ip6_del_rt does a release, so do a hold here */
fib6_info_hold(f6i);
ipv6_stub->ip6_del_rt(net, f6i,
- !net->ipv4.sysctl_nexthop_compat_mode);
+ !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode));
}
}
@@ -2361,7 +2361,8 @@ out:
if (!rc) {
nh_base_seq_inc(net);
nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
- if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode)
+ if (replace_notify &&
+ READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode))
nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 28836071f0a6..0088a4c64d77 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -387,7 +387,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
seq_printf(seq, "\nIp: %d %d",
IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
- net->ipv4.sysctl_ip_default_ttl);
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 356f535f3443..4702c61207a8 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1398,7 +1398,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
struct fib_info *fi = res->fi;
u32 mtu = 0;
- if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
+ if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) ||
fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
mtu = fi->fib_mtu;
@@ -1929,7 +1929,7 @@ static u32 fib_multipath_custom_hash_outer(const struct net *net,
const struct sk_buff *skb,
bool *p_has_inner)
{
- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
struct flow_keys keys, hash_keys;
if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
@@ -1958,7 +1958,7 @@ static u32 fib_multipath_custom_hash_inner(const struct net *net,
const struct sk_buff *skb,
bool has_inner)
{
- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
struct flow_keys keys, hash_keys;
/* We assume the packet carries an encapsulation, but if none was
@@ -2018,7 +2018,7 @@ static u32 fib_multipath_custom_hash_skb(const struct net *net,
static u32 fib_multipath_custom_hash_fl4(const struct net *net,
const struct flowi4 *fl4)
{
- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
struct flow_keys hash_keys;
if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
@@ -2048,7 +2048,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
struct flow_keys hash_keys;
u32 mhash = 0;
- switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
+ switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
case 0:
memset(&hash_keys, 0, sizeof(hash_keys));
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f33c31dd7366..942d2dfa1115 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -247,12 +247,12 @@ bool cookie_timestamp_decode(const struct net *net,
return true;
}
- if (!net->ipv4.sysctl_tcp_timestamps)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps))
return false;
tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0;
- if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack)
+ if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack))
return false;
if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK)
@@ -261,7 +261,7 @@ bool cookie_timestamp_decode(const struct net *net,
tcp_opt->wscale_ok = 1;
tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK;
- return net->ipv4.sysctl_tcp_window_scaling != 0;
+ return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0;
}
EXPORT_SYMBOL(cookie_timestamp_decode);
@@ -273,7 +273,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
if (!ecn_ok)
return false;
- if (net->ipv4.sysctl_tcp_ecn)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_ecn))
return true;
return dst_feature(dst, RTAX_FEATURE_ECN);
@@ -340,7 +340,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
struct flowi4 fl4;
u32 tsoff = 0;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
if (tcp_synq_no_recent_overflow(sk))
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index cd448cdd3b38..5490c285668b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -84,7 +84,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
* port limit.
*/
if ((range[1] < range[0]) ||
- (range[0] < net->ipv4.sysctl_ip_prot_sock))
+ (range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock)))
ret = -EINVAL;
else
set_local_port_range(net, range);
@@ -110,7 +110,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write,
.extra2 = &ip_privileged_port_max,
};
- pports = net->ipv4.sysctl_ip_prot_sock;
+ pports = READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
@@ -122,7 +122,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write,
if (range[0] < pports)
ret = -EINVAL;
else
- net->ipv4.sysctl_ip_prot_sock = pports;
+ WRITE_ONCE(net->ipv4.sysctl_ip_prot_sock, pports);
}
return ret;
@@ -350,61 +350,6 @@ bad_key:
return ret;
}
-static void proc_configure_early_demux(int enabled, int protocol)
-{
- struct net_protocol *ipprot;
-#if IS_ENABLED(CONFIG_IPV6)
- struct inet6_protocol *ip6prot;
-#endif
-
- rcu_read_lock();
-
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot)
- ipprot->early_demux = enabled ? ipprot->early_demux_handler :
- NULL;
-
-#if IS_ENABLED(CONFIG_IPV6)
- ip6prot = rcu_dereference(inet6_protos[protocol]);
- if (ip6prot)
- ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
- NULL;
-#endif
- rcu_read_unlock();
-}
-
-static int proc_tcp_early_demux(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- int ret = 0;
-
- ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
-
- if (write && !ret) {
- int enabled = init_net.ipv4.sysctl_tcp_early_demux;
-
- proc_configure_early_demux(enabled, IPPROTO_TCP);
- }
-
- return ret;
-}
-
-static int proc_udp_early_demux(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- int ret = 0;
-
- ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
-
- if (write && !ret) {
- int enabled = init_net.ipv4.sysctl_udp_early_demux;
-
- proc_configure_early_demux(enabled, IPPROTO_UDP);
- }
-
- return ret;
-}
-
static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
int write, void *buffer,
size_t *lenp, loff_t *ppos)
@@ -599,6 +544,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
},
{
.procname = "icmp_echo_enable_probe",
@@ -615,6 +562,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
},
{
.procname = "icmp_ignore_bogus_error_responses",
@@ -622,6 +571,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
},
{
.procname = "icmp_errors_use_inbound_ifaddr",
@@ -629,6 +580,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
},
{
.procname = "icmp_ratelimit",
@@ -668,6 +621,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "tcp_ecn_fallback",
@@ -675,6 +630,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "ip_dynaddr",
@@ -695,14 +652,14 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_udp_early_demux,
.maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_udp_early_demux
+ .proc_handler = proc_dou8vec_minmax,
},
{
.procname = "tcp_early_demux",
.data = &init_net.ipv4.sysctl_tcp_early_demux,
.maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_tcp_early_demux
+ .proc_handler = proc_dou8vec_minmax,
},
{
.procname = "nexthop_compat_mode",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 028513d3e2a2..766881775abb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -441,7 +441,7 @@ void tcp_init_sock(struct sock *sk)
tp->snd_cwnd_clamp = ~0;
tp->mss_cache = TCP_MSS_DEFAULT;
- tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
+ tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering);
tcp_assign_congestion_control(sk);
tp->tsoffset = 0;
@@ -452,8 +452,8 @@ void tcp_init_sock(struct sock *sk)
icsk->icsk_sync_mss = tcp_sync_mss;
- WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
- WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
+ WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
+ WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
sk_sockets_allocated_inc(sk);
}
@@ -686,7 +686,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
int size_goal)
{
return skb->len < size_goal &&
- sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&
!tcp_rtx_queue_empty(sk) &&
refcount_read(&sk->sk_wmem_alloc) > skb->truesize &&
tcp_skb_can_collapse_to(skb);
@@ -1150,7 +1150,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
struct sockaddr *uaddr = msg->msg_name;
int err, flags;
- if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+ if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) &
+ TFO_CLIENT_ENABLE) ||
(uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
uaddr->sa_family == AF_UNSPEC))
return -EOPNOTSUPP;
@@ -1723,7 +1724,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
cap = sk->sk_rcvbuf >> 1;
else
- cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
+ cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
val = min(val, cap);
WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
@@ -2715,7 +2716,8 @@ static void tcp_orphan_update(struct timer_list *unused)
static bool tcp_too_many_orphans(int shift)
{
- return READ_ONCE(tcp_orphan_cache) << shift > sysctl_tcp_max_orphans;
+ return READ_ONCE(tcp_orphan_cache) << shift >
+ READ_ONCE(sysctl_tcp_max_orphans);
}
bool tcp_check_oom(struct sock *sk, int shift)
@@ -3616,7 +3618,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
case TCP_FASTOPEN_CONNECT:
if (val > 1 || val < 0) {
err = -EINVAL;
- } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
+ } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) &
+ TFO_CLIENT_ENABLE) {
if (sk->sk_state == TCP_CLOSE)
tp->fastopen_connect = val;
else
@@ -3966,12 +3969,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = keepalive_probes(tp);
break;
case TCP_SYNCNT:
- val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
+ val = icsk->icsk_syn_retries ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
break;
case TCP_LINGER2:
val = tp->linger2;
if (val >= 0)
- val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
+ val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
break;
case TCP_DEFER_ACCEPT:
val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
@@ -4455,9 +4459,18 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
return SKB_DROP_REASON_TCP_MD5UNEXPECTED;
}
- /* check the signature */
- genhash = tp->af_specific->calc_md5_hash(newhash, hash_expected,
- NULL, skb);
+ /* Check the signature.
+ * To support dual stack listeners, we need to handle
+ * IPv4-mapped case.
+ */
+ if (family == AF_INET)
+ genhash = tcp_v4_md5_hash_skb(newhash,
+ hash_expected,
+ NULL, skb);
+ else
+ genhash = tp->af_specific->calc_md5_hash(newhash,
+ hash_expected,
+ NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index fdbcf2a6d08e..825b216d11f5 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -332,7 +332,7 @@ static bool tcp_fastopen_no_cookie(const struct sock *sk,
const struct dst_entry *dst,
int flag)
{
- return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) ||
+ return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) ||
tcp_sk(sk)->fastopen_no_cookie ||
(dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE));
}
@@ -347,7 +347,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
const struct dst_entry *dst)
{
bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
- int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child;
int ret = 0;
@@ -489,7 +489,7 @@ void tcp_fastopen_active_disable(struct sock *sk)
{
struct net *net = sock_net(sk);
- if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout))
return;
/* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */
@@ -510,7 +510,8 @@ void tcp_fastopen_active_disable(struct sock *sk)
*/
bool tcp_fastopen_active_should_disable(struct sock *sk)
{
- unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
+ unsigned int tfo_bh_timeout =
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout);
unsigned long timeout;
int tfo_da_times;
int multiplier;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2e2a9ece9af2..b1637990d570 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -426,7 +426,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
if (sk->sk_sndbuf < sndmem)
WRITE_ONCE(sk->sk_sndbuf,
- min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
+ min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2])));
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -461,7 +461,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
- int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
+ int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1;
while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len)
@@ -534,7 +534,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
*/
static void tcp_init_buffer_space(struct sock *sk)
{
- int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
+ int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
struct tcp_sock *tp = tcp_sk(sk);
int maxwin;
@@ -574,16 +574,17 @@ static void tcp_clamp_window(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk);
+ int rmem2;
icsk->icsk_ack.quick = 0;
+ rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
- if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
+ if (sk->sk_rcvbuf < rmem2 &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_under_memory_pressure(sk) &&
sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
WRITE_ONCE(sk->sk_rcvbuf,
- min(atomic_read(&sk->sk_rmem_alloc),
- net->ipv4.sysctl_tcp_rmem[2]));
+ min(atomic_read(&sk->sk_rmem_alloc), rmem2));
}
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@ -724,7 +725,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
* <prev RTT . ><current RTT .. ><next RTT .... >
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvmem, rcvbuf;
u64 rcvwin, grow;
@@ -745,7 +746,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
do_div(rcvwin, tp->advmss);
rcvbuf = min_t(u64, rcvwin * rcvmem,
- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
@@ -910,9 +911,9 @@ static void tcp_update_pacing_rate(struct sock *sk)
* end of slow start and should slow down.
*/
if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2)
- rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
+ rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio);
else
- rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
+ rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio);
rate *= max(tcp_snd_cwnd(tp), tp->packets_out);
@@ -1051,7 +1052,7 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
tp->undo_marker ? tp->undo_retrans : 0);
#endif
tp->reordering = min_t(u32, (metric + mss - 1) / mss,
- sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
}
/* This exciting event is worth to be remembered. 8) */
@@ -2030,7 +2031,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
return;
tp->reordering = min_t(u32, tp->packets_out + addend,
- sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
tp->reord_seen++;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
}
@@ -2095,7 +2096,8 @@ static inline void tcp_init_undo(struct tcp_sock *tp)
static bool tcp_is_rack(const struct sock *sk)
{
- return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
+ return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_LOSS_DETECTION;
}
/* If we detect SACK reneging, forget all SACK information
@@ -2139,6 +2141,7 @@ void tcp_enter_loss(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
+ u8 reordering;
tcp_timeout_mark_lost(sk);
@@ -2159,10 +2162,12 @@ void tcp_enter_loss(struct sock *sk)
/* Timeout in disordered state after receiving substantial DUPACKs
* suggests that the degree of reordering is over-estimated.
*/
+ reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering);
if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
- tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
+ tp->sacked_out >= reordering)
tp->reordering = min_t(unsigned int, tp->reordering,
- net->ipv4.sysctl_tcp_reordering);
+ reordering);
+
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
tcp_ecn_queue_cwr(tp);
@@ -2171,7 +2176,7 @@ void tcp_enter_loss(struct sock *sk)
* loss recovery is underway except recurring timeout(s) on
* the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
*/
- tp->frto = net->ipv4.sysctl_tcp_frto &&
+ tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
(new_recovery || icsk->icsk_retransmits) &&
!inet_csk(sk)->icsk_mtup.probe_size;
}
@@ -3054,7 +3059,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
{
- u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
+ u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
struct tcp_sock *tp = tcp_sk(sk);
if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
@@ -3464,7 +3469,8 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
* new SACK or ECE mark may first advance cwnd here and later reduce
* cwnd in tcp_fastretrans_alert() based on more states.
*/
- if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
+ if (tcp_sk(sk)->reordering >
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering))
return flag & FLAG_FORWARD_PROGRESS;
return flag & FLAG_DATA_ACKED;
@@ -3576,7 +3582,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
if (*last_oow_ack_time) {
s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
- if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
+ if (0 <= elapsed &&
+ elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
NET_INC_STATS(net, mib_idx);
return true; /* rate-limited: don't send yet! */
}
@@ -3624,7 +3631,7 @@ static void tcp_send_challenge_ack(struct sock *sk)
/* Then check host-wide RFC 5961 rate limit. */
now = jiffies / HZ;
if (now != challenge_timestamp) {
- u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
+ u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);
u32 half = (ack_limit + 1) >> 1;
challenge_timestamp = now;
@@ -4056,7 +4063,7 @@ void tcp_parse_options(const struct net *net,
break;
case TCPOPT_WINDOW:
if (opsize == TCPOLEN_WINDOW && th->syn &&
- !estab && net->ipv4.sysctl_tcp_window_scaling) {
+ !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) {
__u8 snd_wscale = *(__u8 *)ptr;
opt_rx->wscale_ok = 1;
if (snd_wscale > TCP_MAX_WSCALE) {
@@ -4072,7 +4079,7 @@ void tcp_parse_options(const struct net *net,
case TCPOPT_TIMESTAMP:
if ((opsize == TCPOLEN_TIMESTAMP) &&
((estab && opt_rx->tstamp_ok) ||
- (!estab && net->ipv4.sysctl_tcp_timestamps))) {
+ (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) {
opt_rx->saw_tstamp = 1;
opt_rx->rcv_tsval = get_unaligned_be32(ptr);
opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@@ -4080,7 +4087,7 @@ void tcp_parse_options(const struct net *net,
break;
case TCPOPT_SACK_PERM:
if (opsize == TCPOLEN_SACK_PERM && th->syn &&
- !estab && net->ipv4.sysctl_tcp_sack) {
+ !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) {
opt_rx->sack_ok = TCP_SACK_SEEN;
tcp_sack_reset(opt_rx);
}
@@ -4421,7 +4428,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
int mib_idx;
if (before(seq, tp->rcv_nxt))
@@ -4468,7 +4475,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
tcp_rcv_spurious_retrans(sk, skb);
@@ -5514,7 +5521,7 @@ send_now:
}
if (!tcp_is_sack(tp) ||
- tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
+ tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
goto send_now;
if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
@@ -5535,11 +5542,12 @@ send_now:
if (tp->srtt_us && tp->srtt_us < rtt)
rtt = tp->srtt_us;
- delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
+ delay = min_t(unsigned long,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
rtt * (NSEC_PER_USEC >> 3)/20);
sock_hold(sk);
hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
- sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
HRTIMER_MODE_REL_PINNED_SOFT);
}
@@ -5567,7 +5575,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr);
- if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
+ if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg))
ptr--;
ptr += ntohl(th->seq);
@@ -6729,7 +6737,7 @@ static void tcp_ecn_create_request(struct request_sock *req,
ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
- ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
+ ecn_ok = READ_ONCE(net->ipv4.sysctl_tcp_ecn) || ecn_ok_dst;
if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
(ecn_ok_dst & DST_FEATURE_ECN_CA) ||
@@ -6797,11 +6805,14 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
{
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
const char *msg = "Dropping request";
- bool want_cookie = false;
struct net *net = sock_net(sk);
+ bool want_cookie = false;
+ u8 syncookies;
+
+ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
#ifdef CONFIG_SYN_COOKIES
- if (net->ipv4.sysctl_tcp_syncookies) {
+ if (syncookies) {
msg = "Sending cookies";
want_cookie = true;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
@@ -6809,8 +6820,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
#endif
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
- if (!queue->synflood_warned &&
- net->ipv4.sysctl_tcp_syncookies != 2 &&
+ if (!queue->synflood_warned && syncookies != 2 &&
xchg(&queue->synflood_warned, 1) == 0)
net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
proto, sk->sk_num, msg);
@@ -6859,7 +6869,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
struct tcp_sock *tp = tcp_sk(sk);
u16 mss;
- if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 &&
!inet_csk_reqsk_queue_is_full(sk))
return 0;
@@ -6893,13 +6903,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
bool want_cookie = false;
struct dst_entry *dst;
struct flowi fl;
+ u8 syncookies;
+
+ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
/* TW buckets are converted to open requests without
* limitations, they conserve resources and peer is
* evidently real one.
*/
- if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
- inet_csk_reqsk_queue_is_full(sk)) && !isn) {
+ if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
if (!want_cookie)
goto drop;
@@ -6948,10 +6960,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
if (!want_cookie && !isn) {
+ int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
+
/* Kill the following clause, if you dislike this way. */
- if (!net->ipv4.sysctl_tcp_syncookies &&
- (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
- (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
+ if (!syncookies &&
+ (max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+ (max_syn_backlog >> 2)) &&
!tcp_peer_is_proven(req, dst)) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index da5a3c44c4fb..586c102ce152 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -108,10 +108,10 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
{
+ int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse);
const struct inet_timewait_sock *tw = inet_twsk(sktw);
const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
struct tcp_sock *tp = tcp_sk(sk);
- int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
if (reuse == 2) {
/* Still does not detect *everything* that goes through
@@ -1006,7 +1006,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
- tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
+ tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
(inet_sk(sk)->tos & INET_ECN_MASK) :
inet_sk(sk)->tos;
@@ -1526,7 +1526,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
/* Set ToS of the new socket based upon the value of incoming SYN.
* ECT bits are set later in tcp_init_transfer().
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
if (!dst) {
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 7029b0e98edb..d58e672be31c 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk)
int m;
sk_dst_confirm(sk);
- if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst)
return;
rcu_read_lock();
@@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk)
if (tcp_in_initial_slowstart(tp)) {
/* Slow start still did not finish. */
- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && (tcp_snd_cwnd(tp) >> 1) > val)
@@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk)
} else if (!tcp_in_slow_start(tp) &&
icsk->icsk_ca_state == TCP_CA_Open) {
/* Cong. avoidance phase, cwnd is reliable. */
- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh));
@@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk)
tcp_metric_set(tm, TCP_METRIC_CWND,
(val + tp->snd_ssthresh) >> 1);
}
- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && tp->snd_ssthresh > val)
@@ -428,7 +428,8 @@ void tcp_update_metrics(struct sock *sk)
if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) {
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
if (val < tp->reordering &&
- tp->reordering != net->ipv4.sysctl_tcp_reordering)
+ tp->reordering !=
+ READ_ONCE(net->ipv4.sysctl_tcp_reordering))
tcp_metric_set(tm, TCP_METRIC_REORDERING,
tp->reordering);
}
@@ -462,7 +463,7 @@ void tcp_init_metrics(struct sock *sk)
if (tcp_metric_locked(tm, TCP_METRIC_CWND))
tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
- val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ?
+ val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ?
0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val) {
tp->snd_ssthresh = val;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6854bb1fb32b..cb95d88497ae 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -173,7 +173,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
* Oh well... nobody has a sufficient solution to this
* protocol bug yet.
*/
- if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
+ if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) {
kill:
inet_twsk_deschedule_put(tw);
return TCP_TW_SUCCESS;
@@ -781,7 +781,7 @@ listen_overflow:
if (sk != req->rsk_listener)
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
- if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow)) {
inet_rsk(req)->acked = 1;
return NULL;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1c054431e358..4c376b6d8764 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -167,16 +167,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
- /* If this is the first data packet sent in response to the
- * previous received data,
- * and it is a reply for ato after last received packet,
- * increase pingpong count.
- */
- if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
- (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
- inet_csk_inc_pingpong_cnt(sk);
-
tp->lsndtime = now;
+
+ /* If it is a reply for ato after last received
+ * packet, enter pingpong mode.
+ */
+ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+ inet_csk_enter_pingpong_mode(sk);
}
/* Account for an ACK we sent. */
@@ -230,7 +227,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
* which we interpret as a sign the remote TCP is not
* misinterpreting the window field as a signed quantity.
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows))
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else
(*rcv_wnd) = min_t(u32, space, U16_MAX);
@@ -241,7 +238,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
*rcv_wscale = 0;
if (wscale_ok) {
/* Set window scaling on max possible window */
- space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp);
*rcv_wscale = clamp_t(int, ilog2(space) - 15,
@@ -285,7 +282,7 @@ static u16 tcp_select_window(struct sock *sk)
* scaled window.
*/
if (!tp->rx_opt.rcv_wscale &&
- sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows))
new_win = min(new_win, MAX_TCP_WINDOW);
else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
@@ -324,7 +321,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
- bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
+ bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
if (!use_ecn) {
@@ -346,7 +343,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
{
- if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback))
/* tp->ecn_flags are cleared at a later point in time when
* SYN ACK is ultimatively being received.
*/
@@ -791,18 +788,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
opts->mss = tcp_advertise_mss(sk);
remaining -= TCPOLEN_MSS_ALIGNED;
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) {
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) {
opts->ws = tp->rx_opt.rcv_wscale;
opts->options |= OPTION_WSCALE;
remaining -= TCPOLEN_WSCALE_ALIGNED;
}
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) {
opts->options |= OPTION_SACK_ADVERTISE;
if (unlikely(!(OPTION_TS & opts->options)))
remaining -= TCPOLEN_SACKPERM_ALIGNED;
@@ -1719,7 +1716,8 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
mss_now -= icsk->icsk_ext_hdr_len;
/* Then reserve room for full set of TCP options and 8 bytes of data */
- mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
+ mss_now = max(mss_now,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss));
return mss_now;
}
@@ -1762,10 +1760,10 @@ void tcp_mtup_init(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk);
- icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
+ icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1;
icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
icsk->icsk_af_ops->net_header_len;
- icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
+ icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, READ_ONCE(net->ipv4.sysctl_tcp_base_mss));
icsk->icsk_mtup.probe_size = 0;
if (icsk->icsk_mtup.enabled)
icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
@@ -1897,7 +1895,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
if (tp->packets_out > tp->snd_cwnd_used)
tp->snd_cwnd_used = tp->packets_out;
- if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) &&
(s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
!ca_ops->cong_control)
tcp_cwnd_application_limited(sk);
@@ -1975,7 +1973,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift);
- r = tcp_min_rtt(tcp_sk(sk)) >> sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log;
+ r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log);
if (r < BITS_PER_TYPE(sk->sk_gso_max_size))
bytes += sk->sk_gso_max_size >> r;
@@ -1994,7 +1992,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
min_tso = ca_ops->min_tso_segs ?
ca_ops->min_tso_segs(sk) :
- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
@@ -2282,7 +2280,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
u32 interval;
s32 delta;
- interval = net->ipv4.sysctl_tcp_probe_interval;
+ interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval);
delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
if (unlikely(delta >= interval * HZ)) {
int mss = tcp_current_mss(sk);
@@ -2366,7 +2364,7 @@ static int tcp_mtu_probe(struct sock *sk)
* probing process by not resetting search range to its orignal.
*/
if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
- interval < net->ipv4.sysctl_tcp_probe_threshold) {
+ interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) {
/* Check whether enough time has elaplased for
* another round of probing.
*/
@@ -2506,7 +2504,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
if (sk->sk_pacing_status == SK_PACING_NONE)
limit = min_t(unsigned long, limit,
- sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));
limit <<= factor;
if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
@@ -2740,7 +2738,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
if (rcu_access_pointer(tp->fastopen_rsk))
return false;
- early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
+ early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans);
/* Schedule a loss probe in 2*RTT for SACK capable connections
* not in loss recovery, that are either limited by cwnd or application.
*/
@@ -3104,7 +3102,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
struct sk_buff *skb = to, *tmp;
bool first = true;
- if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse))
return;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
return;
@@ -3646,7 +3644,7 @@ static void tcp_connect_init(struct sock *sk)
* See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
*/
tp->tcp_header_len = sizeof(struct tcphdr);
- if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps))
tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG
@@ -3682,7 +3680,7 @@ static void tcp_connect_init(struct sock *sk)
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd,
&tp->window_clamp,
- sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling),
&rcv_wscale,
rcv_wnd);
@@ -4089,7 +4087,7 @@ void tcp_send_probe0(struct sock *sk)
icsk->icsk_probes_out++;
if (err <= 0) {
- if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
+ if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2))
icsk->icsk_backoff++;
timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
} else {
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 48f30e7209f2..50abaa941387 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -14,7 +14,8 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk)
return 0;
if (tp->sacked_out >= tp->reordering &&
- !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH))
+ !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_NO_DUPTHRESH))
return 0;
}
@@ -187,7 +188,8 @@ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
+ if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_STATIC_REO_WND) ||
!rs->prior_delivered)
return;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 20cf4a98c69d..50bba370486e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -143,7 +143,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
*/
static int tcp_orphan_retries(struct sock *sk, bool alive)
{
- int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
+ int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */
/* We know from an ICMP that something is wrong. */
if (sk->sk_err_soft && !alive)
@@ -163,7 +163,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
int mss;
/* Black hole detection */
- if (!net->ipv4.sysctl_tcp_mtu_probing)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing))
return;
if (!icsk->icsk_mtup.enabled) {
@@ -171,9 +171,9 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
} else {
mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
- mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
- mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor);
- mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
+ mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss);
+ mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor));
+ mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss));
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
}
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
@@ -239,17 +239,18 @@ static int tcp_write_timeout(struct sock *sk)
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits)
__dst_negative_advice(sk);
- retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
+ retry_until = icsk->icsk_syn_retries ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
expired = icsk->icsk_retransmits >= retry_until;
} else {
- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) {
+ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
__dst_negative_advice(sk);
}
- retry_until = net->ipv4.sysctl_tcp_retries2;
+ retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
@@ -380,7 +381,7 @@ static void tcp_probe_timer(struct sock *sk)
msecs_to_jiffies(icsk->icsk_user_timeout))
goto abort;
- max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
+ max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
@@ -406,12 +407,15 @@ abort: tcp_write_err(sk);
static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- int max_retries = icsk->icsk_syn_retries ? :
- sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
struct tcp_sock *tp = tcp_sk(sk);
+ int max_retries;
req->rsk_ops->syn_ack_timeout(req);
+ /* add one more retry for fastopen */
+ max_retries = icsk->icsk_syn_retries ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1;
+
if (req->num_timeout >= max_retries) {
tcp_write_err(sk);
return;
@@ -574,7 +578,7 @@ out_reset_timer:
* linear-timeout retransmissions into a black hole
*/
if (sk->sk_state == TCP_ESTABLISHED &&
- (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
+ (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) &&
tcp_stream_is_thin(tp) &&
icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
icsk->icsk_backoff = 0;
@@ -585,7 +589,7 @@ out_reset_timer:
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX);
- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0))
+ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0))
__sk_dst_reset(sk);
out:;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 70564ddccc46..6f354f8be2c5 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -226,7 +226,7 @@ lookup_protocol:
RCU_INIT_POINTER(inet->mc_list, NULL);
inet->rcv_tos = 0;
- if (net->ipv4.sysctl_ip_no_pmtu_disc)
+ if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 61770220774e..9d92d51c4757 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -925,7 +925,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
break;
case ICMPV6_EXT_ECHO_REQUEST:
if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
- net->ipv4.sysctl_icmp_echo_enable_probe)
+ READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
icmpv6_echo_reply(skb);
break;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 0322cc86b84e..e1ebf5e42ebe 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -45,20 +45,23 @@
#include <net/inet_ecn.h>
#include <net/dst_metadata.h>
-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *));
static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
- void (*edemux)(struct sk_buff *skb);
-
- if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
- const struct inet6_protocol *ipprot;
-
- ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
- INDIRECT_CALL_2(edemux, tcp_v6_early_demux,
- udp_v6_early_demux, skb);
+ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
+ !skb_dst(skb) && !skb->sk) {
+ switch (ipv6_hdr(skb)->nexthdr) {
+ case IPPROTO_TCP:
+ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux))
+ tcp_v6_early_demux(skb);
+ break;
+ case IPPROTO_UDP:
+ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux))
+ udp_v6_early_demux(skb);
+ break;
+ }
}
+
if (!skb_valid_dst(skb))
ip6_route_input(skb);
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 7f695c39d9a8..87c699d57b36 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1522,7 +1522,6 @@ static void mld_query_work(struct work_struct *work)
if (++cnt >= MLD_MAX_QUEUE) {
rework = true;
- schedule_delayed_work(&idev->mc_query_work, 0);
break;
}
}
@@ -1533,8 +1532,10 @@ static void mld_query_work(struct work_struct *work)
__mld_query_work(skb);
mutex_unlock(&idev->mc_lock);
- if (!rework)
- in6_dev_put(idev);
+ if (rework && queue_delayed_work(mld_wq, &idev->mc_query_work, 0))
+ return;
+
+ in6_dev_put(idev);
}
/* called with rcu_read_lock() */
@@ -1624,7 +1625,6 @@ static void mld_report_work(struct work_struct *work)
if (++cnt >= MLD_MAX_QUEUE) {
rework = true;
- schedule_delayed_work(&idev->mc_report_work, 0);
break;
}
}
@@ -1635,8 +1635,10 @@ static void mld_report_work(struct work_struct *work)
__mld_report_work(skb);
mutex_unlock(&idev->mc_lock);
- if (!rework)
- in6_dev_put(idev);
+ if (rework && queue_delayed_work(mld_wq, &idev->mc_report_work, 0))
+ return;
+
+ in6_dev_put(idev);
}
static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index ecf3a553a0dc..8c6c2d82c1cd 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -22,6 +22,11 @@
#include <linux/proc_fs.h>
#include <net/ping.h>
+static void ping_v6_destroy(struct sock *sk)
+{
+ inet6_destroy_sock(sk);
+}
+
/* Compatibility glue so we can support IPv6 when it's compiled as a module */
static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
int *addr_len)
@@ -181,6 +186,7 @@ struct proto pingv6_prot = {
.owner = THIS_MODULE,
.init = ping_init_sock,
.close = ping_close,
+ .destroy = ping_v6_destroy,
.connect = ip6_datagram_connect_v6_only,
.disconnect = __udp_disconnect,
.setsockopt = ipv6_setsockopt,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 828355710c57..916417944ec8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5741,7 +5741,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
if (nexthop_is_blackhole(rt->nh))
rtm->rtm_type = RTN_BLACKHOLE;
- if (net->ipv4.sysctl_nexthop_compat_mode &&
+ if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) &&
rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
goto nla_put_failure;
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index d64855010948..e756ba705fd9 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -189,6 +189,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
}
#endif
+ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
skb_postpush_rcsum(skb, hdr, tot_len);
return 0;
@@ -241,6 +243,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
}
#endif
+ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
return 0;
@@ -302,7 +306,6 @@ static int seg6_do_srh(struct sk_buff *skb)
break;
}
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
nf_reset_ct(skb);
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 98a34287439c..2cd4a8d3b30a 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -826,7 +826,6 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
if (err)
goto drop;
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
seg6_lookup_nexthop(skb, NULL, 0);
@@ -858,7 +857,6 @@ static int input_action_end_b6_encap(struct sk_buff *skb,
if (err)
goto drop;
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
seg6_lookup_nexthop(skb, NULL, 0);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 9cc123f000fb..5014aa663452 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -141,7 +141,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
__u8 rcv_wscale;
u32 tsoff = 0;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
if (tcp_synq_no_recent_overflow(sk))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f37dd4aa91c6..be09941fe6d9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -546,7 +546,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
if (np->repflow && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
- tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
+ tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
(np->tclass & INET_ECN_MASK) :
np->tclass;
@@ -1314,7 +1314,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
/* Set ToS of the new socket based upon the value of incoming SYN.
* ECT bits are set later in tcp_init_transfer().
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
/* Clone native IPv6 options from listening socket (if any)
@@ -1822,7 +1822,7 @@ do_time_wait:
goto discard_it;
}
-INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
+void tcp_v6_early_demux(struct sk_buff *skb)
{
const struct ipv6hdr *hdr;
const struct tcphdr *th;
@@ -2176,12 +2176,7 @@ struct proto tcpv6_prot = {
};
EXPORT_SYMBOL_GPL(tcpv6_prot);
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol tcpv6_protocol = {
- .early_demux = tcp_v6_early_demux,
- .early_demux_handler = tcp_v6_early_demux,
+static const struct inet6_protocol tcpv6_protocol = {
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 55afd7f39c04..e2f2e087a753 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1052,7 +1052,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
return NULL;
}
-INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
+void udp_v6_early_demux(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
const struct udphdr *uh;
@@ -1660,12 +1660,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname,
return ipv6_getsockopt(sk, level, optname, optval, optlen);
}
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol udpv6_protocol = {
- .early_demux = udp_v6_early_demux,
- .early_demux_handler = udp_v6_early_demux,
+static const struct inet6_protocol udpv6_protocol = {
.handler = udpv6_rcv,
.err_handler = udpv6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f7896f257e1b..4ddf297f40f2 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -4468,14 +4468,14 @@ EXPORT_SYMBOL_GPL(ieee80211_color_change_finish);
void
ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
- u64 color_bitmap)
+ u64 color_bitmap, gfp_t gfp)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
if (sdata->vif.color_change_active || sdata->vif.csa_active)
return;
- cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap);
+ cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap, gfp);
}
EXPORT_SYMBOL_GPL(ieeee80211_obss_color_collision_notify);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 41531478437c..1a9ada411879 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -378,6 +378,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
struct cfg80211_nan_func *func;
clear_bit(SDATA_STATE_RUNNING, &sdata->state);
+ synchronize_rcu(); /* flush _ieee80211_wake_txqs() */
cancel_scan = rcu_access_pointer(local->scan_sdata) == sdata;
if (cancel_scan)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3c08ae04ddbc..1675f8cb87f1 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3217,7 +3217,8 @@ ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx)
IEEE80211_HE_OPERATION_BSS_COLOR_MASK);
if (color == bss_conf->he_bss_color.color)
ieeee80211_obss_color_collision_notify(&rx->sdata->vif,
- BIT_ULL(color));
+ BIT_ULL(color),
+ GFP_ATOMIC);
}
}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 0e4efc08c762..c425f4fb7c2e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2818,19 +2818,10 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
/*
* If the skb is shared we need to obtain our own copy.
*/
- if (skb_shared(skb)) {
- struct sk_buff *tmp_skb = skb;
-
- /* can't happen -- skb is a clone if info_id != 0 */
- WARN_ON(info_id);
-
- skb = skb_clone(skb, GFP_ATOMIC);
- kfree_skb(tmp_skb);
-
- if (!skb) {
- ret = -ENOMEM;
- goto free;
- }
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb)) {
+ ret = -ENOMEM;
+ goto free;
}
hdr.frame_control = fc;
@@ -3539,15 +3530,9 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
/* after this point (skb is modified) we cannot return false */
- if (skb_shared(skb)) {
- struct sk_buff *tmp_skb = skb;
-
- skb = skb_clone(skb, GFP_ATOMIC);
- kfree_skb(tmp_skb);
-
- if (!skb)
- return true;
- }
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb))
+ return true;
if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) &&
ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb))
@@ -4437,7 +4422,7 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
struct net_device *dev, struct sta_info *sta,
struct ieee80211_key *key, struct sk_buff *skb)
{
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_tx_info *info;
struct ieee80211_local *local = sdata->local;
struct tid_ampdu_tx *tid_tx;
u8 tid;
@@ -4452,6 +4437,11 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))
goto out_free;
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb))
+ return;
+
+ info = IEEE80211_SKB_CB(skb);
memset(info, 0, sizeof(*info));
ieee80211_aggr_check(sdata, sta, skb);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 1e26b5235add..dad42d42aa84 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -301,6 +301,9 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
local_bh_disable();
spin_lock(&fq->lock);
+ if (!test_bit(SDATA_STATE_RUNNING, &sdata->state))
+ goto out;
+
if (sdata->vif.type == NL80211_IFTYPE_AP)
ps = &sdata->bss->ps;
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 62c6733e0792..d50480b31750 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -147,8 +147,8 @@ u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
bool qos;
/* all mesh/ocb stations are required to support WME */
- if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
- sdata->vif.type == NL80211_IFTYPE_OCB)
+ if (sta && (sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
+ sdata->vif.type == NL80211_IFTYPE_OCB))
qos = true;
else if (sta)
qos = sta->sta.wme;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index bd8f0f425be4..30d289044e71 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1271,7 +1271,7 @@ raise_win:
if (unlikely(th->syn))
new_win = min(new_win, 65535U) << tp->rx_opt.rcv_wscale;
if (!tp->rx_opt.rcv_wscale &&
- sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows)
+ READ_ONCE(sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows))
new_win = min(new_win, MAX_TCP_WINDOW);
else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index cc21fafd9726..7e1518bb6115 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1908,7 +1908,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (msk->rcvq_space.copied <= msk->rcvq_space.space)
goto new_measure;
- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvmem, rcvbuf;
u64 rcvwin, grow;
@@ -1926,7 +1926,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
do_div(rcvwin, advmss);
rcvbuf = min_t(u64, rcvwin * rcvmem,
- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
u32 window_clamp;
@@ -2669,8 +2669,8 @@ static int mptcp_init_sock(struct sock *sk)
mptcp_ca_reset(sk);
sk_sockets_allocated_inc(sk);
- sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
- sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
+ sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
+ sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
return 0;
}
@@ -2919,12 +2919,12 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
static int mptcp_disconnect(struct sock *sk, int flags)
{
- struct mptcp_subflow_context *subflow;
+ struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);
inet_sk_state_store(sk, TCP_CLOSE);
- mptcp_for_each_subflow(msk, subflow) {
+ list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
__mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_FASTCLOSE);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 63e8892ec807..af28f3b60389 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1533,7 +1533,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
WRITE_ONCE(msk->allow_infinite_fallback, false);
- return err;
+ return 0;
failed_unlink:
list_del(&subflow->node);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 082a2fd8d85b..369aeabb94fe 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -729,6 +729,9 @@ static void nf_ct_gc_expired(struct nf_conn *ct)
if (!refcount_inc_not_zero(&ct->ct_general.use))
return;
+ /* load ->status after refcount increase */
+ smp_acquire__after_ctrl_dep();
+
if (nf_ct_should_gc(ct))
nf_ct_kill(ct);
@@ -795,6 +798,9 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
*/
ct = nf_ct_tuplehash_to_ctrack(h);
if (likely(refcount_inc_not_zero(&ct->ct_general.use))) {
+ /* re-check key after refcount */
+ smp_acquire__after_ctrl_dep();
+
if (likely(nf_ct_key_equal(h, tuple, zone, net)))
goto found;
@@ -1387,6 +1393,9 @@ static unsigned int early_drop_list(struct net *net,
if (!refcount_inc_not_zero(&tmp->ct_general.use))
continue;
+ /* load ->ct_net and ->status after refcount increase */
+ smp_acquire__after_ctrl_dep();
+
/* kill only if still in same netns -- might have moved due to
* SLAB_TYPESAFE_BY_RCU rules.
*
@@ -1536,6 +1545,9 @@ static void gc_worker(struct work_struct *work)
if (!refcount_inc_not_zero(&tmp->ct_general.use))
continue;
+ /* load ->status after refcount increase */
+ smp_acquire__after_ctrl_dep();
+
if (gc_worker_skip_ct(tmp)) {
nf_ct_put(tmp);
continue;
@@ -1775,6 +1787,16 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
if (!exp)
__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
+ /* Other CPU might have obtained a pointer to this object before it was
+ * released. Because refcount is 0, refcount_inc_not_zero() will fail.
+ *
+ * After refcount_set(1) it will succeed; ensure that zeroing of
+ * ct->status and the correct ct->net pointer are visible; else other
+ * core might observe CONFIRMED bit which means the entry is valid and
+ * in the hash table, but its not (anymore).
+ */
+ smp_wmb();
+
/* Now it is going to be associated with an sk_buff, set refcount to 1. */
refcount_set(&ct->ct_general.use, 1);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 722af5e309ba..f5905b5201a7 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1203,6 +1203,7 @@ restart:
hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (nf_ct_is_expired(ct)) {
+ /* need to defer nf_ct_kill() until lock is released */
if (i < ARRAY_SIZE(nf_ct_evict) &&
refcount_inc_not_zero(&ct->ct_general.use))
nf_ct_evict[i++] = ct;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 6ad7bbc90d38..05895878610c 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -306,6 +306,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (unlikely(!refcount_inc_not_zero(&ct->ct_general.use)))
return 0;
+ /* load ->status after refcount increase */
+ smp_acquire__after_ctrl_dep();
+
if (nf_ct_should_gc(ct)) {
nf_ct_kill(ct);
goto release;
diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
index 77bcb10fc586..cb894f0d63e9 100644
--- a/net/netfilter/nf_log_syslog.c
+++ b/net/netfilter/nf_log_syslog.c
@@ -67,7 +67,7 @@ dump_arp_packet(struct nf_log_buf *m,
unsigned int logflags;
struct arphdr _arph;
- ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
+ ah = skb_header_pointer(skb, nhoff, sizeof(_arph), &_arph);
if (!ah) {
nf_log_buf_add(m, "TRUNCATED");
return;
@@ -96,7 +96,7 @@ dump_arp_packet(struct nf_log_buf *m,
ah->ar_pln != sizeof(__be32))
return;
- ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp);
+ ap = skb_header_pointer(skb, nhoff + sizeof(_arph), sizeof(_arpp), &_arpp);
if (!ap) {
nf_log_buf_add(m, " INCOMPLETE [%zu bytes]",
skb->len - sizeof(_arph));
@@ -149,7 +149,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf,
nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
prefix);
- dump_arp_packet(m, loginfo, skb, 0);
+ dump_arp_packet(m, loginfo, skb, skb_network_offset(skb));
nf_log_buf_close(m);
}
@@ -850,7 +850,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
if (in)
dump_mac_header(m, loginfo, skb);
- dump_ipv4_packet(net, m, loginfo, skb, 0);
+ dump_ipv4_packet(net, m, loginfo, skb, skb_network_offset(skb));
nf_log_buf_close(m);
}
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index e479dd0561c5..16915f8eef2b 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -405,7 +405,7 @@ synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr,
iph->tos = 0;
iph->id = 0;
iph->frag_off = htons(IP_DF);
- iph->ttl = net->ipv4.sysctl_ip_default_ttl;
+ iph->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
iph->protocol = IPPROTO_TCP;
iph->check = 0;
iph->saddr = saddr;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d6b59beab3a9..9f976b11d896 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3340,6 +3340,8 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
if (err < 0)
return err;
}
+
+ cond_resched();
}
return 0;
@@ -5833,8 +5835,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL))
return -EINVAL;
- if (flags != 0)
- nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
+ if (flags != 0) {
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
+ if (err < 0)
+ return err;
+ }
if (set->flags & NFT_SET_MAP) {
if (nla[NFTA_SET_ELEM_DATA] == NULL &&
@@ -5943,7 +5948,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto err_set_elem_expr;
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+ if (err < 0)
+ goto err_parse_key;
}
if (nla[NFTA_SET_ELEM_KEY_END]) {
@@ -5952,22 +5959,31 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto err_parse_key;
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
+ if (err < 0)
+ goto err_parse_key_end;
}
if (timeout > 0) {
- nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
- if (timeout != set->timeout)
- nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
+ if (err < 0)
+ goto err_parse_key_end;
+
+ if (timeout != set->timeout) {
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
+ if (err < 0)
+ goto err_parse_key_end;
+ }
}
if (num_exprs) {
for (i = 0; i < num_exprs; i++)
size += expr_array[i]->ops->size;
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS,
- sizeof(struct nft_set_elem_expr) +
- size);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS,
+ sizeof(struct nft_set_elem_expr) + size);
+ if (err < 0)
+ goto err_parse_key_end;
}
if (nla[NFTA_SET_ELEM_OBJREF] != NULL) {
@@ -5982,7 +5998,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
err = PTR_ERR(obj);
goto err_parse_key_end;
}
- nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
+ if (err < 0)
+ goto err_parse_key_end;
}
if (nla[NFTA_SET_ELEM_DATA] != NULL) {
@@ -6016,7 +6034,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
NFT_VALIDATE_NEED);
}
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len);
+ if (err < 0)
+ goto err_parse_data;
}
/* The full maximum length of userdata can exceed the maximum
@@ -6026,9 +6046,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
ulen = 0;
if (nla[NFTA_SET_ELEM_USERDATA] != NULL) {
ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]);
- if (ulen > 0)
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
- ulen);
+ if (ulen > 0) {
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
+ ulen);
+ if (err < 0)
+ goto err_parse_data;
+ }
}
err = -ENOMEM;
@@ -6256,8 +6279,11 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
nft_set_ext_prepare(&tmpl);
- if (flags != 0)
- nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
+ if (flags != 0) {
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
+ if (err < 0)
+ return err;
+ }
if (nla[NFTA_SET_ELEM_KEY]) {
err = nft_setelem_parse_key(ctx, set, &elem.key.val,
@@ -6265,16 +6291,20 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
return err;
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+ if (err < 0)
+ goto fail_elem;
}
if (nla[NFTA_SET_ELEM_KEY_END]) {
err = nft_setelem_parse_key(ctx, set, &elem.key_end.val,
nla[NFTA_SET_ELEM_KEY_END]);
if (err < 0)
- return err;
+ goto fail_elem;
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
+ if (err < 0)
+ goto fail_elem_key_end;
}
err = -ENOMEM;
@@ -6282,7 +6312,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
elem.key_end.val.data, NULL, 0, 0,
GFP_KERNEL_ACCOUNT);
if (elem.priv == NULL)
- goto fail_elem;
+ goto fail_elem_key_end;
ext = nft_set_elem_ext(set, elem.priv);
if (flags)
@@ -6306,6 +6336,8 @@ fail_ops:
kfree(trans);
fail_trans:
kfree(elem.priv);
+fail_elem_key_end:
+ nft_data_release(&elem.key_end.val, NFT_DATA_VALUE);
fail_elem:
nft_data_release(&elem.key.val, NFT_DATA_VALUE);
return err;
@@ -9337,9 +9369,13 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
break;
}
}
+
+ cond_resched();
}
list_for_each_entry(set, &ctx->table->sets, list) {
+ cond_resched();
+
if (!nft_is_active_next(ctx->net, set))
continue;
if (!(set->flags & NFT_SET_MAP) ||
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index a364f8e5e698..87a9009d5234 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -843,11 +843,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
}
static int
-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
+nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff)
{
struct sk_buff *nskb;
if (diff < 0) {
+ unsigned int min_len = skb_transport_offset(e->skb);
+
+ if (data_len < min_len)
+ return -EINVAL;
+
if (pskb_trim(e->skb, data_len))
return -ENOMEM;
} else if (diff > 0) {
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 15e4b7640dc0..da29e92c03e2 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -68,6 +68,31 @@ static void nft_queue_sreg_eval(const struct nft_expr *expr,
regs->verdict.code = ret;
}
+static int nft_queue_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ static const unsigned int supported_hooks = ((1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING));
+
+ switch (ctx->family) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ case NFPROTO_BRIDGE:
+ break;
+ case NFPROTO_NETDEV: /* lacks okfn */
+ fallthrough;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, supported_hooks);
+}
+
static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = {
[NFTA_QUEUE_NUM] = { .type = NLA_U16 },
[NFTA_QUEUE_TOTAL] = { .type = NLA_U16 },
@@ -164,6 +189,7 @@ static const struct nft_expr_ops nft_queue_ops = {
.eval = nft_queue_eval,
.init = nft_queue_init,
.dump = nft_queue_dump,
+ .validate = nft_queue_validate,
.reduce = NFT_REDUCE_READONLY,
};
@@ -173,6 +199,7 @@ static const struct nft_expr_ops nft_queue_sreg_ops = {
.eval = nft_queue_sreg_eval,
.init = nft_queue_sreg_init,
.dump = nft_queue_sreg_dump,
+ .validate = nft_queue_validate,
.reduce = NFT_REDUCE_READONLY,
};
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 9bb4d3dcc994..ac366c99086f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3533,7 +3533,7 @@ int tc_setup_action(struct flow_action *flow_action,
struct tc_action *actions[],
struct netlink_ext_ack *extack)
{
- int i, j, index, err = 0;
+ int i, j, k, index, err = 0;
struct tc_action *act;
BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY);
@@ -3553,14 +3553,18 @@ int tc_setup_action(struct flow_action *flow_action,
if (err)
goto err_out_locked;
- entry->hw_stats = tc_act_hw_stats(act->hw_stats);
- entry->hw_index = act->tcfa_index;
index = 0;
err = tc_setup_offload_act(act, entry, &index, extack);
- if (!err)
- j += index;
- else
+ if (err)
goto err_out_locked;
+
+ for (k = 0; k < index ; k++) {
+ entry[k].hw_stats = tc_act_hw_stats(act->hw_stats);
+ entry[k].hw_index = act->tcfa_index;
+ }
+
+ j += index;
+
spin_unlock_bh(&act->tcfa_lock);
}
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index be29da09cc7a..3460abceba44 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -229,9 +229,8 @@ static struct sctp_association *sctp_association_init(
if (!sctp_ulpq_init(&asoc->ulpq, asoc))
goto fail_init;
- if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams,
- 0, gfp))
- goto fail_init;
+ if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp))
+ goto stream_free;
/* Initialize default path MTU. */
asoc->pathmtu = sp->pathmtu;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 35928fefae33..1a094b087d88 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -358,7 +358,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
ret != RTN_LOCAL &&
!sp->inet.freebind &&
- !net->ipv4.sysctl_ip_nonlocal_bind)
+ !READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind))
return 0;
if (ipv6_only_sock(sctp_opt2sk(sp)))
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 6dc95dcc0ff4..ef9fceadef8d 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
ret = sctp_stream_alloc_out(stream, outcnt, gfp);
if (ret)
- goto out_err;
+ return ret;
for (i = 0; i < stream->outcnt; i++)
SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
@@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
handle_in:
sctp_stream_interleave_init(stream);
if (!incnt)
- goto out;
-
- ret = sctp_stream_alloc_in(stream, incnt, gfp);
- if (ret)
- goto in_err;
-
- goto out;
+ return 0;
-in_err:
- sched->free(stream);
- genradix_free(&stream->in);
-out_err:
- genradix_free(&stream->out);
- stream->outcnt = 0;
-out:
- return ret;
+ return sctp_stream_alloc_in(stream, incnt, gfp);
}
int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index 518b1b9bf89d..1ad565ed5627 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -160,7 +160,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc,
if (!SCTP_SO(&asoc->stream, i)->ext)
continue;
- ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
+ ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC);
if (ret)
goto err;
}
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index c4d057b2941d..0bde36b56472 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -2122,7 +2122,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
init_waitqueue_head(&lgr->llc_flow_waiter);
init_waitqueue_head(&lgr->llc_msg_waiter);
mutex_init(&lgr->llc_conf_mutex);
- lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
+ lgr->llc_testlink_time = READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
}
/* called after lgr was removed from lgr_list */
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 43509c7e90fc..f1c3b8eb4b3d 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -517,7 +517,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
sk->sk_shutdown = 0;
sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
- sk->sk_rcvbuf = sysctl_tipc_rmem[1];
+ sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]);
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
sk->sk_destruct = tipc_sock_destruct;
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index ec6f4b699a2b..9975df34d9c2 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -97,13 +97,16 @@ static void tls_device_queue_ctx_destruction(struct tls_context *ctx)
unsigned long flags;
spin_lock_irqsave(&tls_device_lock, flags);
+ if (unlikely(!refcount_dec_and_test(&ctx->refcount)))
+ goto unlock;
+
list_move_tail(&ctx->list, &tls_device_gc_list);
/* schedule_work inside the spinlock
* to make sure tls_device_down waits for that work.
*/
schedule_work(&tls_device_gc_work);
-
+unlock:
spin_unlock_irqrestore(&tls_device_lock, flags);
}
@@ -194,8 +197,7 @@ void tls_device_sk_destruct(struct sock *sk)
clean_acked_data_disable(inet_csk(sk));
}
- if (refcount_dec_and_test(&tls_ctx->refcount))
- tls_device_queue_ctx_destruction(tls_ctx);
+ tls_device_queue_ctx_destruction(tls_ctx);
}
EXPORT_SYMBOL_GPL(tls_device_sk_destruct);
@@ -1374,8 +1376,13 @@ static int tls_device_down(struct net_device *netdev)
* by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW.
* Now release the ref taken above.
*/
- if (refcount_dec_and_test(&ctx->refcount))
+ if (refcount_dec_and_test(&ctx->refcount)) {
+ /* sk_destruct ran after tls_device_down took a ref, and
+ * it returned early. Complete the destruction here.
+ */
+ list_del(&ctx->list);
tls_device_free_ctx(ctx);
+ }
}
up_write(&device_offload_lock);
@@ -1419,9 +1426,9 @@ static struct notifier_block tls_dev_notifier = {
.notifier_call = tls_dev_event,
};
-void __init tls_device_init(void)
+int __init tls_device_init(void)
{
- register_netdevice_notifier(&tls_dev_notifier);
+ return register_netdevice_notifier(&tls_dev_notifier);
}
void __exit tls_device_cleanup(void)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 2ffede463e4a..d80ab3d1764e 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -1048,7 +1048,12 @@ static int __init tls_register(void)
if (err)
return err;
- tls_device_init();
+ err = tls_device_init();
+ if (err) {
+ unregister_pernet_subsys(&tls_proc_ops);
+ return err;
+ }
+
tcp_register_ulp(&tcp_tls_ulp_ops);
return 0;
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index ff4d48fcbfb2..607a68911047 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -1031,7 +1031,8 @@ void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid)
{
ASSERT_WDEV_LOCK(wdev);
- if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
+ wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
return;
if (WARN_ON(!wdev->current_bss) ||
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f1876ea61fdc..f1a0bab920a5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2678,8 +2678,10 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
*num_xfrms = 0;
return 0;
}
- if (IS_ERR(pols[0]))
+ if (IS_ERR(pols[0])) {
+ *num_pols = 0;
return PTR_ERR(pols[0]);
+ }
*num_xfrms = pols[0]->xfrm_nr;
@@ -2694,6 +2696,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
if (pols[1]) {
if (IS_ERR(pols[1])) {
xfrm_pols_put(pols, *num_pols);
+ *num_pols = 0;
return PTR_ERR(pols[1]);
}
(*num_pols)++;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 08564e0eef20..ccfb172eb5b8 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2620,7 +2620,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
int err;
if (family == AF_INET &&
- xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)
+ READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc))
x->props.flags |= XFRM_STATE_NOPMTUDISC;
err = -EPROTONOSUPPORT;
diff --git a/samples/fprobe/fprobe_example.c b/samples/fprobe/fprobe_example.c
index 18b1e5c4b431..e22da8573116 100644
--- a/samples/fprobe/fprobe_example.c
+++ b/samples/fprobe/fprobe_example.c
@@ -20,7 +20,7 @@
#define BACKTRACE_DEPTH 16
#define MAX_SYMBOL_LEN 4096
-struct fprobe sample_probe;
+static struct fprobe sample_probe;
static unsigned long nhit;
static char symbol[MAX_SYMBOL_LEN] = "kernel_clone";
diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c
index f991a66b5b02..fd346f58ddba 100644
--- a/samples/kprobes/kprobe_example.c
+++ b/samples/kprobes/kprobe_example.c
@@ -16,9 +16,8 @@
#include <linux/module.h>
#include <linux/kprobes.h>
-#define MAX_SYMBOL_LEN 64
-static char symbol[MAX_SYMBOL_LEN] = "kernel_clone";
-module_param_string(symbol, symbol, sizeof(symbol), 0644);
+static char symbol[KSYM_NAME_LEN] = "kernel_clone";
+module_param_string(symbol, symbol, KSYM_NAME_LEN, 0644);
/* For each probe you need to allocate a kprobe structure */
static struct kprobe kp = {
diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c
index 228321ecb161..cbf16542d84e 100644
--- a/samples/kprobes/kretprobe_example.c
+++ b/samples/kprobes/kretprobe_example.c
@@ -23,11 +23,10 @@
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/ktime.h>
-#include <linux/limits.h>
#include <linux/sched.h>
-static char func_name[NAME_MAX] = "kernel_clone";
-module_param_string(func, func_name, NAME_MAX, S_IRUGO);
+static char func_name[KSYM_NAME_LEN] = "kernel_clone";
+module_param_string(func, func_name, KSYM_NAME_LEN, 0644);
MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the"
" function's execution time");
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index d1425778664b..3fb6a99e78c4 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -236,6 +236,7 @@ objtool_args = \
$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \
$(if $(CONFIG_UNWINDER_ORC), --orc) \
$(if $(CONFIG_RETPOLINE), --retpoline) \
+ $(if $(CONFIG_RETHUNK), --rethunk) \
$(if $(CONFIG_SLS), --sls) \
$(if $(CONFIG_STACK_VALIDATION), --stackval) \
$(if $(CONFIG_HAVE_STATIC_CALL_INLINE), --static-call) \
diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o
index 3c97a1564947..84019814f33f 100644
--- a/scripts/Makefile.vmlinux_o
+++ b/scripts/Makefile.vmlinux_o
@@ -44,7 +44,7 @@ objtool-enabled := $(or $(delay-objtool),$(CONFIG_NOINSTR_VALIDATION))
objtool_args := \
$(if $(delay-objtool),$(objtool_args)) \
- $(if $(CONFIG_NOINSTR_VALIDATION), --noinstr) \
+ $(if $(CONFIG_NOINSTR_VALIDATION), --noinstr $(if $(CONFIG_CPU_UNRET_ENTRY), --unret)) \
$(if $(CONFIG_GCOV_KERNEL), --no-unreachable) \
--link
diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py
index 46f7542db08c..dc07b6d12e30 100644
--- a/scripts/gdb/linux/symbols.py
+++ b/scripts/gdb/linux/symbols.py
@@ -180,7 +180,7 @@ lx-symbols command."""
self.breakpoint.delete()
self.breakpoint = None
self.breakpoint = LoadModuleBreakpoint(
- "kernel/module.c:do_init_module", self)
+ "kernel/module/main.c:do_init_module", self)
else:
gdb.write("Note: symbol update on module loading not supported "
"with this gdb version\n")
diff --git a/scripts/remove-stale-files b/scripts/remove-stale-files
index 7adab4618035..379e86c71bed 100755
--- a/scripts/remove-stale-files
+++ b/scripts/remove-stale-files
@@ -41,3 +41,5 @@ if [ -n "${building_out_of_srctree}" ]; then
fi
rm -f scripts/extract-cert
+
+rm -f arch/x86/purgatory/kexec-purgatory.c
diff --git a/security/Kconfig b/security/Kconfig
index f29e4c656983..e6db09a779b7 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -54,17 +54,6 @@ config SECURITY_NETWORK
implement socket and networking access controls.
If you are unsure how to answer this question, answer N.
-config PAGE_TABLE_ISOLATION
- bool "Remove the kernel mapping in user mode"
- default y
- depends on (X86_64 || X86_PAE) && !UML
- help
- This feature reduces the number of hardware side channels by
- ensuring that the majority of kernel addresses are not mapped
- into userspace.
-
- See Documentation/x86/pti.rst for more details.
-
config SECURITY_INFINIBAND
bool "Infiniband Security Hooks"
depends on SECURITY && INFINIBAND
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index a733aff02006..708de9656bbd 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -75,7 +75,7 @@ static struct shash_desc *init_desc(char type, uint8_t hash_algo)
{
long rc;
const char *algo;
- struct crypto_shash **tfm, *tmp_tfm = NULL;
+ struct crypto_shash **tfm, *tmp_tfm;
struct shash_desc *desc;
if (type == EVM_XATTR_HMAC) {
@@ -120,16 +120,13 @@ unlock:
alloc:
desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(*tfm),
GFP_KERNEL);
- if (!desc) {
- crypto_free_shash(tmp_tfm);
+ if (!desc)
return ERR_PTR(-ENOMEM);
- }
desc->tfm = *tfm;
rc = crypto_shash_init(desc);
if (rc) {
- crypto_free_shash(tmp_tfm);
kfree(desc);
return ERR_PTR(rc);
}
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index cc88f02c7562..93e8bc047a73 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -755,13 +755,14 @@ void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name)
evm_update_evmxattr(dentry, xattr_name, NULL, 0);
}
-static int evm_attr_change(struct dentry *dentry, struct iattr *attr)
+static int evm_attr_change(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_backing_inode(dentry);
unsigned int ia_valid = attr->ia_valid;
- if ((!(ia_valid & ATTR_UID) || uid_eq(attr->ia_uid, inode->i_uid)) &&
- (!(ia_valid & ATTR_GID) || gid_eq(attr->ia_gid, inode->i_gid)) &&
+ if (!i_uid_needs_update(mnt_userns, attr, inode) &&
+ !i_gid_needs_update(mnt_userns, attr, inode) &&
(!(ia_valid & ATTR_MODE) || attr->ia_mode == inode->i_mode))
return 0;
@@ -775,7 +776,8 @@ static int evm_attr_change(struct dentry *dentry, struct iattr *attr)
* Permit update of file attributes when files have a valid EVM signature,
* except in the case of them having an immutable portable signature.
*/
-int evm_inode_setattr(struct dentry *dentry, struct iattr *attr)
+int evm_inode_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct iattr *attr)
{
unsigned int ia_valid = attr->ia_valid;
enum integrity_status evm_status;
@@ -801,7 +803,7 @@ int evm_inode_setattr(struct dentry *dentry, struct iattr *attr)
return 0;
if (evm_status == INTEGRITY_PASS_IMMUTABLE &&
- !evm_attr_change(dentry, attr))
+ !evm_attr_change(mnt_userns, dentry, attr))
return 0;
integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry),
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index cdb84dccd24e..bde74fcecee3 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -514,7 +514,8 @@ int ima_appraise_measurement(enum ima_hooks func,
goto out;
}
- status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, rc, iint);
+ status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value,
+ rc < 0 ? 0 : rc, iint);
switch (status) {
case INTEGRITY_PASS:
case INTEGRITY_PASS_IMMUTABLE:
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index a7206cc1d7d1..64499056648a 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -205,6 +205,7 @@ out_array:
crypto_free_shash(ima_algo_array[i].tfm);
}
+ kfree(ima_algo_array);
out:
crypto_free_shash(ima_shash_tfm);
return rc;
diff --git a/security/integrity/ima/ima_efi.c b/security/integrity/ima/ima_efi.c
index 71786d01946f..9db66fe310d4 100644
--- a/security/integrity/ima/ima_efi.c
+++ b/security/integrity/ima/ima_efi.c
@@ -67,6 +67,8 @@ const char * const *arch_get_ima_policy(void)
if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) {
if (IS_ENABLED(CONFIG_MODULE_SIG))
set_module_sig_enforced();
+ if (IS_ENABLED(CONFIG_KEXEC_SIG))
+ set_kexec_sig_enforced();
return sb_arch_rules;
}
return NULL;
diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
index 13753136f03f..419dc405c831 100644
--- a/security/integrity/ima/ima_kexec.c
+++ b/security/integrity/ima/ima_kexec.c
@@ -137,7 +137,7 @@ void ima_add_kexec_buffer(struct kimage *image)
/*
* Restore the measurement list from the previous kernel.
*/
-void ima_load_kexec_buffer(void)
+void __init ima_load_kexec_buffer(void)
{
void *kexec_buffer = NULL;
size_t kexec_buffer_size = 0;
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 73917413365b..a8802b8da946 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -2247,6 +2247,10 @@ bool ima_appraise_signature(enum kernel_read_file_id id)
if (id >= READING_MAX_ID)
return false;
+ if (id == READING_KEXEC_IMAGE && !(ima_appraise & IMA_APPRAISE_ENFORCE)
+ && security_locked_down(LOCKDOWN_KEXEC))
+ return false;
+
func = read_idmap[id] ?: FILE_CHECK;
rcu_read_lock();
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index c877f01a5471..7bf9b1507220 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -323,10 +323,10 @@ static int ima_eventdigest_init_common(const u8 *digest, u32 digestsize,
else
/*
* If digest is NULL, the event being recorded is a violation.
- * Make room for the digest by increasing the offset of
- * IMA_DIGEST_SIZE.
+ * Make room for the digest by increasing the offset by the
+ * hash algorithm digest size.
*/
- offset += IMA_DIGEST_SIZE;
+ offset += hash_digest_size[hash_algo];
return ima_write_template_field_data(buffer, offset + digestsize,
fmt, field_data);
diff --git a/security/security.c b/security/security.c
index 188b8f782220..f85afb02ea1c 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1324,7 +1324,8 @@ int security_inode_permission(struct inode *inode, int mask)
return call_int_hook(inode_permission, 0, inode, mask);
}
-int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
+int security_inode_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *attr)
{
int ret;
@@ -1333,7 +1334,7 @@ int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
ret = call_int_hook(inode_setattr, 0, dentry, attr);
if (ret)
return ret;
- return evm_inode_setattr(dentry, attr);
+ return evm_inode_setattr(mnt_userns, dentry, attr);
}
EXPORT_SYMBOL_GPL(security_inode_setattr);
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 3e541a4c0423..83ae21a01bbf 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -944,6 +944,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK),
SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x103c, 0x82b4, "HP ProDesk 600 G3", CXT_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x836e, "HP ProBook 455 G5", CXT_FIXUP_MUTE_LED_GPIO),
SND_PCI_QUIRK(0x103c, 0x837f, "HP ProBook 470 G5", CXT_FIXUP_MUTE_LED_GPIO),
SND_PCI_QUIRK(0x103c, 0x83b2, "HP EliteBook 840 G5", CXT_FIXUP_HP_DOCK),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 007dd8b5e1f2..2f55bc43bfa9 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6901,6 +6901,7 @@ enum {
ALC298_FIXUP_LENOVO_SPK_VOLUME,
ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER,
ALC269_FIXUP_ATIV_BOOK_8,
+ ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE,
ALC221_FIXUP_HP_MIC_NO_PRESENCE,
ALC256_FIXUP_ASUS_HEADSET_MODE,
ALC256_FIXUP_ASUS_MIC,
@@ -7837,6 +7838,16 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC269_FIXUP_NO_SHUTUP
},
+ [ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+ { 0x1a, 0x01813030 }, /* use as headphone mic, without its own jack detect */
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HEADSET_MODE
+ },
[ALC221_FIXUP_HP_MIC_NO_PRESENCE] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
@@ -8886,6 +8897,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1025, 0x129d, "Acer SWIFT SF313-51", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1025, 0x1300, "Acer SWIFT SF314-56", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC),
@@ -8895,6 +8907,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC),
SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
+ SND_PCI_QUIRK(0x1028, 0x053c, "Dell Latitude E5430", ALC292_FIXUP_DELL_E7X),
SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS),
SND_PCI_QUIRK(0x1028, 0x05bd, "Dell Latitude E6440", ALC292_FIXUP_DELL_E7X),
SND_PCI_QUIRK(0x1028, 0x05be, "Dell Latitude E6540", ALC292_FIXUP_DELL_E7X),
@@ -9010,6 +9023,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+ SND_PCI_QUIRK(0x103c, 0x2b5e, "HP 288 Pro G2 MT", ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x802e, "HP Z240 SFF", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x802f, "HP Z240", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x8077, "HP", ALC256_FIXUP_HP_HEADSET_MIC),
@@ -9096,6 +9110,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x89c6, "Zbook Fury 17 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x89ca, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8a78, "HP Dev One", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
+ SND_PCI_QUIRK(0x103c, 0x8aa0, "HP ProBook 440 G9 (MB 8A9E)", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -9355,6 +9373,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC),
SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED),
@@ -11217,6 +11236,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
SND_PCI_QUIRK(0x103c, 0x8719, "HP", ALC897_FIXUP_HP_HSMIC_VERB),
SND_PCI_QUIRK(0x103c, 0x873e, "HP", ALC671_FIXUP_HP_HEADSET_MIC2),
+ SND_PCI_QUIRK(0x103c, 0x877e, "HP 288 Pro G6", ALC671_FIXUP_HP_HEADSET_MIC2),
SND_PCI_QUIRK(0x103c, 0x885f, "HP 288 Pro G8", ALC671_FIXUP_HP_HEADSET_MIC2),
SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE),
SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50),
diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c
index e32871b3f68a..7434aeeda292 100644
--- a/sound/soc/codecs/arizona.c
+++ b/sound/soc/codecs/arizona.c
@@ -1760,8 +1760,8 @@ static bool arizona_aif_cfg_changed(struct snd_soc_component *component,
if (bclk != (val & ARIZONA_AIF1_BCLK_FREQ_MASK))
return true;
- val = snd_soc_component_read(component, base + ARIZONA_AIF_TX_BCLK_RATE);
- if (lrclk != (val & ARIZONA_AIF1TX_BCPF_MASK))
+ val = snd_soc_component_read(component, base + ARIZONA_AIF_RX_BCLK_RATE);
+ if (lrclk != (val & ARIZONA_AIF1RX_BCPF_MASK))
return true;
val = snd_soc_component_read(component, base + ARIZONA_AIF_FRAME_CTRL_1);
diff --git a/sound/soc/codecs/cs47l92.c b/sound/soc/codecs/cs47l92.c
index a1b8dcdb9f7b..444026b7d54b 100644
--- a/sound/soc/codecs/cs47l92.c
+++ b/sound/soc/codecs/cs47l92.c
@@ -119,7 +119,13 @@ static int cs47l92_put_demux(struct snd_kcontrol *kcontrol,
end:
snd_soc_dapm_mutex_unlock(dapm);
- return snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
+ ret = snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
+ if (ret < 0) {
+ dev_err(madera->dev, "Failed to update demux power state: %d\n", ret);
+ return ret;
+ }
+
+ return change;
}
static SOC_ENUM_SINGLE_DECL(cs47l92_outdemux_enum,
diff --git a/sound/soc/codecs/max98396.c b/sound/soc/codecs/max98396.c
index 56eb62bb041f..34db38812807 100644
--- a/sound/soc/codecs/max98396.c
+++ b/sound/soc/codecs/max98396.c
@@ -342,12 +342,15 @@ static int max98396_dai_set_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
{
struct snd_soc_component *component = codec_dai->component;
struct max98396_priv *max98396 = snd_soc_component_get_drvdata(component);
- unsigned int format = 0;
+ unsigned int format_mask, format = 0;
unsigned int bclk_pol = 0;
int ret, status;
int reg;
bool update = false;
+ format_mask = MAX98396_PCM_MODE_CFG_FORMAT_MASK |
+ MAX98396_PCM_MODE_CFG_LRCLKEDGE;
+
dev_dbg(component->dev, "%s: fmt 0x%08X\n", __func__, fmt);
switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
@@ -395,7 +398,7 @@ static int max98396_dai_set_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
ret = regmap_read(max98396->regmap, MAX98396_R2041_PCM_MODE_CFG, &reg);
if (ret < 0)
return -EINVAL;
- if (format != (reg & MAX98396_PCM_BCLKEDGE_BSEL_MASK)) {
+ if (format != (reg & format_mask)) {
update = true;
} else {
ret = regmap_read(max98396->regmap,
@@ -412,8 +415,7 @@ static int max98396_dai_set_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
regmap_update_bits(max98396->regmap,
MAX98396_R2041_PCM_MODE_CFG,
- MAX98396_PCM_BCLKEDGE_BSEL_MASK,
- format);
+ format_mask, format);
regmap_update_bits(max98396->regmap,
MAX98396_R2042_PCM_CLK_SETUP,
diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c
index 69c80d80ed9d..18b3da9211e3 100644
--- a/sound/soc/codecs/rt5640.c
+++ b/sound/soc/codecs/rt5640.c
@@ -1984,7 +1984,12 @@ static int rt5640_set_bias_level(struct snd_soc_component *component,
snd_soc_component_write(component, RT5640_PWR_DIG2, 0x0000);
snd_soc_component_write(component, RT5640_PWR_VOL, 0x0000);
snd_soc_component_write(component, RT5640_PWR_MIXER, 0x0000);
- snd_soc_component_write(component, RT5640_PWR_ANLG1, 0x0000);
+ if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER)
+ snd_soc_component_write(component, RT5640_PWR_ANLG1,
+ 0x0018);
+ else
+ snd_soc_component_write(component, RT5640_PWR_ANLG1,
+ 0x0000);
snd_soc_component_write(component, RT5640_PWR_ANLG2, 0x0000);
break;
@@ -2393,9 +2398,15 @@ static void rt5640_jack_work(struct work_struct *work)
static irqreturn_t rt5640_irq(int irq, void *data)
{
struct rt5640_priv *rt5640 = data;
+ int delay = 0;
+
+ if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER) {
+ cancel_delayed_work_sync(&rt5640->jack_work);
+ delay = 100;
+ }
if (rt5640->jack)
- queue_delayed_work(system_long_wq, &rt5640->jack_work, 0);
+ queue_delayed_work(system_long_wq, &rt5640->jack_work, delay);
return IRQ_HANDLED;
}
@@ -2580,6 +2591,12 @@ static void rt5640_enable_hda_jack_detect(
snd_soc_component_update_bits(component, RT5640_DUMMY1, 0x400, 0x0);
+ snd_soc_component_update_bits(component, RT5640_PWR_ANLG1,
+ RT5640_PWR_VREF2, RT5640_PWR_VREF2);
+ usleep_range(10000, 15000);
+ snd_soc_component_update_bits(component, RT5640_PWR_ANLG1,
+ RT5640_PWR_FV2, RT5640_PWR_FV2);
+
rt5640->jack = jack;
ret = request_irq(rt5640->irq, rt5640_irq,
@@ -2696,16 +2713,13 @@ static int rt5640_probe(struct snd_soc_component *component)
if (device_property_read_u32(component->dev,
"realtek,jack-detect-source", &val) == 0) {
- if (val <= RT5640_JD_SRC_GPIO4) {
+ if (val <= RT5640_JD_SRC_GPIO4)
rt5640->jd_src = val << RT5640_JD_SFT;
- } else if (val == RT5640_JD_SRC_HDA_HEADER) {
+ else if (val == RT5640_JD_SRC_HDA_HEADER)
rt5640->jd_src = RT5640_JD_SRC_HDA_HEADER;
- snd_soc_component_update_bits(component, RT5640_DUMMY1,
- 0x0300, 0x0);
- } else {
+ else
dev_warn(component->dev, "Warning: Invalid jack-detect-source value: %d, leaving jack-detect disabled\n",
val);
- }
}
if (!device_property_read_bool(component->dev, "realtek,jack-detect-not-inverted"))
diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c
index 2aa48aef6a97..3363d1696ad7 100644
--- a/sound/soc/codecs/sgtl5000.c
+++ b/sound/soc/codecs/sgtl5000.c
@@ -1795,6 +1795,9 @@ static int sgtl5000_i2c_remove(struct i2c_client *client)
{
struct sgtl5000_priv *sgtl5000 = i2c_get_clientdata(client);
+ regmap_write(sgtl5000->regmap, SGTL5000_CHIP_DIG_POWER, SGTL5000_DIG_POWER_DEFAULT);
+ regmap_write(sgtl5000->regmap, SGTL5000_CHIP_ANA_POWER, SGTL5000_ANA_POWER_DEFAULT);
+
clk_disable_unprepare(sgtl5000->mclk);
regulator_bulk_disable(sgtl5000->num_supplies, sgtl5000->supplies);
regulator_bulk_free(sgtl5000->num_supplies, sgtl5000->supplies);
@@ -1802,6 +1805,11 @@ static int sgtl5000_i2c_remove(struct i2c_client *client)
return 0;
}
+static void sgtl5000_i2c_shutdown(struct i2c_client *client)
+{
+ sgtl5000_i2c_remove(client);
+}
+
static const struct i2c_device_id sgtl5000_id[] = {
{"sgtl5000", 0},
{},
@@ -1822,6 +1830,7 @@ static struct i2c_driver sgtl5000_i2c_driver = {
},
.probe_new = sgtl5000_i2c_probe,
.remove = sgtl5000_i2c_remove,
+ .shutdown = sgtl5000_i2c_shutdown,
.id_table = sgtl5000_id,
};
diff --git a/sound/soc/codecs/sgtl5000.h b/sound/soc/codecs/sgtl5000.h
index 56ec5863f250..3a808c762299 100644
--- a/sound/soc/codecs/sgtl5000.h
+++ b/sound/soc/codecs/sgtl5000.h
@@ -80,6 +80,7 @@
/*
* SGTL5000_CHIP_DIG_POWER
*/
+#define SGTL5000_DIG_POWER_DEFAULT 0x0000
#define SGTL5000_ADC_EN 0x0040
#define SGTL5000_DAC_EN 0x0020
#define SGTL5000_DAP_POWERUP 0x0010
diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c
index d395feffb30b..4cb788f3e5f7 100644
--- a/sound/soc/codecs/tas2764.c
+++ b/sound/soc/codecs/tas2764.c
@@ -42,10 +42,12 @@ static void tas2764_reset(struct tas2764_priv *tas2764)
gpiod_set_value_cansleep(tas2764->reset_gpio, 0);
msleep(20);
gpiod_set_value_cansleep(tas2764->reset_gpio, 1);
+ usleep_range(1000, 2000);
}
snd_soc_component_write(tas2764->component, TAS2764_SW_RST,
TAS2764_RST);
+ usleep_range(1000, 2000);
}
static int tas2764_set_bias_level(struct snd_soc_component *component,
@@ -107,8 +109,10 @@ static int tas2764_codec_resume(struct snd_soc_component *component)
struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component);
int ret;
- if (tas2764->sdz_gpio)
+ if (tas2764->sdz_gpio) {
gpiod_set_value_cansleep(tas2764->sdz_gpio, 1);
+ usleep_range(1000, 2000);
+ }
ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL,
TAS2764_PWR_CTRL_MASK,
@@ -131,7 +135,8 @@ static const char * const tas2764_ASI1_src[] = {
};
static SOC_ENUM_SINGLE_DECL(
- tas2764_ASI1_src_enum, TAS2764_TDM_CFG2, 4, tas2764_ASI1_src);
+ tas2764_ASI1_src_enum, TAS2764_TDM_CFG2, TAS2764_TDM_CFG2_SCFG_SHIFT,
+ tas2764_ASI1_src);
static const struct snd_kcontrol_new tas2764_asi1_mux =
SOC_DAPM_ENUM("ASI1 Source", tas2764_ASI1_src_enum);
@@ -329,20 +334,22 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
{
struct snd_soc_component *component = dai->component;
struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component);
- u8 tdm_rx_start_slot = 0, asi_cfg_1 = 0;
- int iface;
+ u8 tdm_rx_start_slot = 0, asi_cfg_0 = 0, asi_cfg_1 = 0;
int ret;
switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+ case SND_SOC_DAIFMT_NB_IF:
+ asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START;
+ fallthrough;
case SND_SOC_DAIFMT_NB_NF:
asi_cfg_1 = TAS2764_TDM_CFG1_RX_RISING;
break;
+ case SND_SOC_DAIFMT_IB_IF:
+ asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START;
+ fallthrough;
case SND_SOC_DAIFMT_IB_NF:
asi_cfg_1 = TAS2764_TDM_CFG1_RX_FALLING;
break;
- default:
- dev_err(tas2764->dev, "ASI format Inverse is not found\n");
- return -EINVAL;
}
ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1,
@@ -353,13 +360,13 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
case SND_SOC_DAIFMT_I2S:
+ asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START;
+ fallthrough;
case SND_SOC_DAIFMT_DSP_A:
- iface = TAS2764_TDM_CFG2_SCFG_I2S;
tdm_rx_start_slot = 1;
break;
case SND_SOC_DAIFMT_DSP_B:
case SND_SOC_DAIFMT_LEFT_J:
- iface = TAS2764_TDM_CFG2_SCFG_LEFT_J;
tdm_rx_start_slot = 0;
break;
default:
@@ -368,14 +375,15 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
return -EINVAL;
}
- ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1,
- TAS2764_TDM_CFG1_MASK,
- (tdm_rx_start_slot << TAS2764_TDM_CFG1_51_SHIFT));
+ ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG0,
+ TAS2764_TDM_CFG0_FRAME_START,
+ asi_cfg_0);
if (ret < 0)
return ret;
- ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG2,
- TAS2764_TDM_CFG2_SCFG_MASK, iface);
+ ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1,
+ TAS2764_TDM_CFG1_MASK,
+ (tdm_rx_start_slot << TAS2764_TDM_CFG1_51_SHIFT));
if (ret < 0)
return ret;
@@ -501,8 +509,10 @@ static int tas2764_codec_probe(struct snd_soc_component *component)
tas2764->component = component;
- if (tas2764->sdz_gpio)
+ if (tas2764->sdz_gpio) {
gpiod_set_value_cansleep(tas2764->sdz_gpio, 1);
+ usleep_range(1000, 2000);
+ }
tas2764_reset(tas2764);
@@ -526,12 +536,12 @@ static int tas2764_codec_probe(struct snd_soc_component *component)
}
static DECLARE_TLV_DB_SCALE(tas2764_digital_tlv, 1100, 50, 0);
-static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10000, 50, 0);
+static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10050, 50, 1);
static const struct snd_kcontrol_new tas2764_snd_controls[] = {
SOC_SINGLE_TLV("Speaker Volume", TAS2764_DVC, 0,
TAS2764_DVC_MAX, 1, tas2764_playback_volume),
- SOC_SINGLE_TLV("Amp Gain Volume", TAS2764_CHNL_0, 0, 0x14, 0,
+ SOC_SINGLE_TLV("Amp Gain Volume", TAS2764_CHNL_0, 1, 0x14, 0,
tas2764_digital_tlv),
};
@@ -556,7 +566,7 @@ static const struct reg_default tas2764_reg_defaults[] = {
{ TAS2764_SW_RST, 0x00 },
{ TAS2764_PWR_CTRL, 0x1a },
{ TAS2764_DVC, 0x00 },
- { TAS2764_CHNL_0, 0x00 },
+ { TAS2764_CHNL_0, 0x28 },
{ TAS2764_TDM_CFG0, 0x09 },
{ TAS2764_TDM_CFG1, 0x02 },
{ TAS2764_TDM_CFG2, 0x0a },
diff --git a/sound/soc/codecs/tas2764.h b/sound/soc/codecs/tas2764.h
index 67d6fd903c42..f015f22a083b 100644
--- a/sound/soc/codecs/tas2764.h
+++ b/sound/soc/codecs/tas2764.h
@@ -47,6 +47,7 @@
#define TAS2764_TDM_CFG0_MASK GENMASK(3, 1)
#define TAS2764_TDM_CFG0_44_1_48KHZ BIT(3)
#define TAS2764_TDM_CFG0_88_2_96KHZ (BIT(3) | BIT(1))
+#define TAS2764_TDM_CFG0_FRAME_START BIT(0)
/* TDM Configuration Reg1 */
#define TAS2764_TDM_CFG1 TAS2764_REG(0X0, 0x09)
@@ -66,10 +67,7 @@
#define TAS2764_TDM_CFG2_RXS_16BITS 0x0
#define TAS2764_TDM_CFG2_RXS_24BITS BIT(0)
#define TAS2764_TDM_CFG2_RXS_32BITS BIT(1)
-#define TAS2764_TDM_CFG2_SCFG_MASK GENMASK(5, 4)
-#define TAS2764_TDM_CFG2_SCFG_I2S 0x0
-#define TAS2764_TDM_CFG2_SCFG_LEFT_J BIT(4)
-#define TAS2764_TDM_CFG2_SCFG_RIGHT_J BIT(5)
+#define TAS2764_TDM_CFG2_SCFG_SHIFT 4
/* TDM Configuration Reg3 */
#define TAS2764_TDM_CFG3 TAS2764_REG(0X0, 0x0c)
diff --git a/sound/soc/codecs/tlv320adcx140.c b/sound/soc/codecs/tlv320adcx140.c
index b55f0b836932..0b729658fde8 100644
--- a/sound/soc/codecs/tlv320adcx140.c
+++ b/sound/soc/codecs/tlv320adcx140.c
@@ -33,7 +33,6 @@ struct adcx140_priv {
bool micbias_vg;
unsigned int dai_fmt;
- unsigned int tdm_delay;
unsigned int slot_width;
};
@@ -792,12 +791,13 @@ static int adcx140_set_dai_tdm_slot(struct snd_soc_dai *codec_dai,
{
struct snd_soc_component *component = codec_dai->component;
struct adcx140_priv *adcx140 = snd_soc_component_get_drvdata(component);
- unsigned int lsb;
- /* TDM based on DSP mode requires slots to be adjacent */
- lsb = __ffs(tx_mask);
- if ((lsb + 1) != __fls(tx_mask)) {
- dev_err(component->dev, "Invalid mask, slots must be adjacent\n");
+ /*
+ * The chip itself supports arbitrary masks, but the driver currently
+ * only supports adjacent slots beginning at the first slot.
+ */
+ if (tx_mask != GENMASK(__fls(tx_mask), 0)) {
+ dev_err(component->dev, "Only lower adjacent slots are supported\n");
return -EINVAL;
}
@@ -812,7 +812,6 @@ static int adcx140_set_dai_tdm_slot(struct snd_soc_dai *codec_dai,
return -EINVAL;
}
- adcx140->tdm_delay = lsb;
adcx140->slot_width = slot_width;
return 0;
diff --git a/sound/soc/codecs/wcd9335.c b/sound/soc/codecs/wcd9335.c
index d9f135200688..3cb7a3eab8c7 100644
--- a/sound/soc/codecs/wcd9335.c
+++ b/sound/soc/codecs/wcd9335.c
@@ -342,7 +342,7 @@ struct wcd9335_codec {
struct regulator_bulk_data supplies[WCD9335_MAX_SUPPLY];
unsigned int rx_port_value[WCD9335_RX_MAX];
- unsigned int tx_port_value;
+ unsigned int tx_port_value[WCD9335_TX_MAX];
int hph_l_gain;
int hph_r_gain;
u32 rx_bias_count;
@@ -1334,8 +1334,13 @@ static int slim_tx_mixer_get(struct snd_kcontrol *kc,
struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kc);
struct wcd9335_codec *wcd = dev_get_drvdata(dapm->dev);
+ struct snd_soc_dapm_widget *widget = snd_soc_dapm_kcontrol_widget(kc);
+ struct soc_mixer_control *mixer =
+ (struct soc_mixer_control *)kc->private_value;
+ int dai_id = widget->shift;
+ int port_id = mixer->shift;
- ucontrol->value.integer.value[0] = wcd->tx_port_value;
+ ucontrol->value.integer.value[0] = wcd->tx_port_value[port_id] == dai_id;
return 0;
}
@@ -1358,12 +1363,12 @@ static int slim_tx_mixer_put(struct snd_kcontrol *kc,
case AIF2_CAP:
case AIF3_CAP:
/* only add to the list if value not set */
- if (enable && !(wcd->tx_port_value & BIT(port_id))) {
- wcd->tx_port_value |= BIT(port_id);
+ if (enable && wcd->tx_port_value[port_id] != dai_id) {
+ wcd->tx_port_value[port_id] = dai_id;
list_add_tail(&wcd->tx_chs[port_id].list,
&wcd->dai[dai_id].slim_ch_list);
- } else if (!enable && (wcd->tx_port_value & BIT(port_id))) {
- wcd->tx_port_value &= ~BIT(port_id);
+ } else if (!enable && wcd->tx_port_value[port_id] == dai_id) {
+ wcd->tx_port_value[port_id] = -1;
list_del_init(&wcd->tx_chs[port_id].list);
}
break;
diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c
index da2f8998df87..b034df47a5ef 100644
--- a/sound/soc/codecs/wm5102.c
+++ b/sound/soc/codecs/wm5102.c
@@ -680,12 +680,17 @@ static int wm5102_out_comp_coeff_put(struct snd_kcontrol *kcontrol,
{
struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
struct arizona *arizona = dev_get_drvdata(component->dev->parent);
+ uint16_t dac_comp_coeff = get_unaligned_be16(ucontrol->value.bytes.data);
+ int ret = 0;
mutex_lock(&arizona->dac_comp_lock);
- arizona->dac_comp_coeff = get_unaligned_be16(ucontrol->value.bytes.data);
+ if (arizona->dac_comp_coeff != dac_comp_coeff) {
+ arizona->dac_comp_coeff = dac_comp_coeff;
+ ret = 1;
+ }
mutex_unlock(&arizona->dac_comp_lock);
- return 0;
+ return ret;
}
static int wm5102_out_comp_switch_get(struct snd_kcontrol *kcontrol,
@@ -706,12 +711,20 @@ static int wm5102_out_comp_switch_put(struct snd_kcontrol *kcontrol,
{
struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
struct arizona *arizona = dev_get_drvdata(component->dev->parent);
+ struct soc_mixer_control *mc = (struct soc_mixer_control *)kcontrol->private_value;
+ int ret = 0;
+
+ if (ucontrol->value.integer.value[0] > mc->max)
+ return -EINVAL;
mutex_lock(&arizona->dac_comp_lock);
- arizona->dac_comp_enabled = ucontrol->value.integer.value[0];
+ if (arizona->dac_comp_enabled != ucontrol->value.integer.value[0]) {
+ arizona->dac_comp_enabled = ucontrol->value.integer.value[0];
+ ret = 1;
+ }
mutex_unlock(&arizona->dac_comp_lock);
- return 0;
+ return ret;
}
static const char * const wm5102_osr_text[] = {
diff --git a/sound/soc/codecs/wm8998.c b/sound/soc/codecs/wm8998.c
index 00b59fc9b1fe..ab5481187c71 100644
--- a/sound/soc/codecs/wm8998.c
+++ b/sound/soc/codecs/wm8998.c
@@ -108,6 +108,7 @@ static int wm8998_inmux_put(struct snd_kcontrol *kcontrol,
struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
unsigned int mode_reg, mode_index;
unsigned int mux, inmode, src_val, mode_val;
+ int change, ret;
mux = ucontrol->value.enumerated.item[0];
if (mux > 1)
@@ -137,14 +138,20 @@ static int wm8998_inmux_put(struct snd_kcontrol *kcontrol,
snd_soc_component_update_bits(component, mode_reg,
ARIZONA_IN1_MODE_MASK, mode_val);
- snd_soc_component_update_bits(component, e->reg,
- ARIZONA_IN1L_SRC_MASK |
- ARIZONA_IN1L_SRC_SE_MASK,
- src_val);
+ change = snd_soc_component_update_bits(component, e->reg,
+ ARIZONA_IN1L_SRC_MASK |
+ ARIZONA_IN1L_SRC_SE_MASK,
+ src_val);
- return snd_soc_dapm_mux_update_power(dapm, kcontrol,
- ucontrol->value.enumerated.item[0],
- e, NULL);
+ ret = snd_soc_dapm_mux_update_power(dapm, kcontrol,
+ ucontrol->value.enumerated.item[0],
+ e, NULL);
+ if (ret < 0) {
+ dev_err(arizona->dev, "Failed to update demux power state: %d\n", ret);
+ return ret;
+ }
+
+ return change;
}
static const char * const wm8998_inmux_texts[] = {
diff --git a/sound/soc/generic/audio-graph-card2.c b/sound/soc/generic/audio-graph-card2.c
index 77ac4051b827..d34b29a49268 100644
--- a/sound/soc/generic/audio-graph-card2.c
+++ b/sound/soc/generic/audio-graph-card2.c
@@ -90,12 +90,12 @@ links indicates connection part of CPU side (= A).
ports@0 {
(X) (A) mcpu: port@0 { mcpu0_ep: endpoint { remote-endpoint = <&mcodec0_ep>; }; };
(y) port@1 { mcpu1_ep: endpoint { remote-endpoint = <&cpu1_ep>; }; };
-(y) port@1 { mcpu2_ep: endpoint { remote-endpoint = <&cpu2_ep>; }; };
+(y) port@2 { mcpu2_ep: endpoint { remote-endpoint = <&cpu2_ep>; }; };
};
ports@1 {
(X) port@0 { mcodec0_ep: endpoint { remote-endpoint = <&mcpu0_ep>; }; };
-(y) port@0 { mcodec1_ep: endpoint { remote-endpoint = <&codec1_ep>; }; };
-(y) port@1 { mcodec2_ep: endpoint { remote-endpoint = <&codec2_ep>; }; };
+(y) port@1 { mcodec1_ep: endpoint { remote-endpoint = <&codec1_ep>; }; };
+(y) port@2 { mcodec2_ep: endpoint { remote-endpoint = <&codec2_ep>; }; };
};
};
};
diff --git a/sound/soc/intel/boards/sof_rt5682.c b/sound/soc/intel/boards/sof_rt5682.c
index 5d67a2c87a1d..4a90a0a5d831 100644
--- a/sound/soc/intel/boards/sof_rt5682.c
+++ b/sound/soc/intel/boards/sof_rt5682.c
@@ -69,11 +69,10 @@ static unsigned long sof_rt5682_quirk = SOF_RT5682_MCLK_EN |
static int is_legacy_cpu;
-static struct snd_soc_jack sof_hdmi[3];
-
struct sof_hdmi_pcm {
struct list_head head;
struct snd_soc_dai *codec_dai;
+ struct snd_soc_jack hdmi_jack;
int device;
};
@@ -434,7 +433,6 @@ static int sof_card_late_probe(struct snd_soc_card *card)
char jack_name[NAME_SIZE];
struct sof_hdmi_pcm *pcm;
int err;
- int i = 0;
/* HDMI is not supported by SOF on Baytrail/CherryTrail */
if (is_legacy_cpu || !ctx->idisp_codec)
@@ -455,17 +453,15 @@ static int sof_card_late_probe(struct snd_soc_card *card)
snprintf(jack_name, sizeof(jack_name),
"HDMI/DP, pcm=%d Jack", pcm->device);
err = snd_soc_card_jack_new(card, jack_name,
- SND_JACK_AVOUT, &sof_hdmi[i]);
+ SND_JACK_AVOUT, &pcm->hdmi_jack);
if (err)
return err;
err = hdac_hdmi_jack_init(pcm->codec_dai, pcm->device,
- &sof_hdmi[i]);
+ &pcm->hdmi_jack);
if (err < 0)
return err;
-
- i++;
}
if (sof_rt5682_quirk & SOF_MAX98373_SPEAKER_AMP_PRESENT) {
diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c
index 2439a574ac2f..deb7b820325e 100644
--- a/sound/soc/intel/skylake/skl-nhlt.c
+++ b/sound/soc/intel/skylake/skl-nhlt.c
@@ -99,7 +99,6 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
struct nhlt_fmt_cfg *fmt_cfg;
struct wav_fmt_ext *wav_fmt;
unsigned long rate;
- bool present = false;
int rate_index = 0;
u16 channels, bps;
u8 clk_src;
@@ -112,9 +111,12 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
if (fmt->fmt_count == 0)
return;
+ fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config;
for (i = 0; i < fmt->fmt_count; i++) {
- fmt_cfg = &fmt->fmt_config[i];
- wav_fmt = &fmt_cfg->fmt_ext;
+ struct nhlt_fmt_cfg *saved_fmt_cfg = fmt_cfg;
+ bool present = false;
+
+ wav_fmt = &saved_fmt_cfg->fmt_ext;
channels = wav_fmt->fmt.channels;
bps = wav_fmt->fmt.bits_per_sample;
@@ -132,12 +134,18 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
* derive the rate.
*/
for (j = i; j < fmt->fmt_count; j++) {
- fmt_cfg = &fmt->fmt_config[j];
- wav_fmt = &fmt_cfg->fmt_ext;
+ struct nhlt_fmt_cfg *tmp_fmt_cfg = fmt_cfg;
+
+ wav_fmt = &tmp_fmt_cfg->fmt_ext;
if ((fs == wav_fmt->fmt.samples_per_sec) &&
- (bps == wav_fmt->fmt.bits_per_sample))
+ (bps == wav_fmt->fmt.bits_per_sample)) {
channels = max_t(u16, channels,
wav_fmt->fmt.channels);
+ saved_fmt_cfg = tmp_fmt_cfg;
+ }
+ /* Move to the next nhlt_fmt_cfg */
+ tmp_fmt_cfg = (struct nhlt_fmt_cfg *)(tmp_fmt_cfg->config.caps +
+ tmp_fmt_cfg->config.size);
}
rate = channels * bps * fs;
@@ -153,8 +161,11 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
/* Fill rate and parent for sclk/sclkfs */
if (!present) {
+ struct nhlt_fmt_cfg *first_fmt_cfg;
+
+ first_fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config;
i2s_config_ext = (struct skl_i2s_config_blob_ext *)
- fmt->fmt_config[0].config.caps;
+ first_fmt_cfg->config.caps;
/* MCLK Divider Source Select */
if (is_legacy_blob(i2s_config_ext->hdr.sig)) {
@@ -168,6 +179,9 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
parent = skl_get_parent_clk(clk_src);
+ /* Move to the next nhlt_fmt_cfg */
+ fmt_cfg = (struct nhlt_fmt_cfg *)(fmt_cfg->config.caps +
+ fmt_cfg->config.size);
/*
* Do not copy the config data if there is no parent
* clock available for this clock source select
@@ -176,9 +190,9 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
continue;
sclk[id].rate_cfg[rate_index].rate = rate;
- sclk[id].rate_cfg[rate_index].config = fmt_cfg;
+ sclk[id].rate_cfg[rate_index].config = saved_fmt_cfg;
sclkfs[id].rate_cfg[rate_index].rate = rate;
- sclkfs[id].rate_cfg[rate_index].config = fmt_cfg;
+ sclkfs[id].rate_cfg[rate_index].config = saved_fmt_cfg;
sclk[id].parent_name = parent->name;
sclkfs[id].parent_name = parent->name;
@@ -192,13 +206,13 @@ static void skl_get_mclk(struct skl_dev *skl, struct skl_ssp_clk *mclk,
{
struct skl_i2s_config_blob_ext *i2s_config_ext;
struct skl_i2s_config_blob_legacy *i2s_config;
- struct nhlt_specific_cfg *fmt_cfg;
+ struct nhlt_fmt_cfg *fmt_cfg;
struct skl_clk_parent_src *parent;
u32 clkdiv, div_ratio;
u8 clk_src;
- fmt_cfg = &fmt->fmt_config[0].config;
- i2s_config_ext = (struct skl_i2s_config_blob_ext *)fmt_cfg->caps;
+ fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config;
+ i2s_config_ext = (struct skl_i2s_config_blob_ext *)fmt_cfg->config.caps;
/* MCLK Divider Source Select and divider */
if (is_legacy_blob(i2s_config_ext->hdr.sig)) {
@@ -227,7 +241,7 @@ static void skl_get_mclk(struct skl_dev *skl, struct skl_ssp_clk *mclk,
return;
mclk[id].rate_cfg[0].rate = parent->rate/div_ratio;
- mclk[id].rate_cfg[0].config = &fmt->fmt_config[0];
+ mclk[id].rate_cfg[0].config = fmt_cfg;
mclk[id].parent_name = parent->name;
}
diff --git a/sound/soc/qcom/qdsp6/q6apm.c b/sound/soc/qcom/qdsp6/q6apm.c
index f424d7aa389a..794019286c70 100644
--- a/sound/soc/qcom/qdsp6/q6apm.c
+++ b/sound/soc/qcom/qdsp6/q6apm.c
@@ -75,6 +75,7 @@ static struct audioreach_graph *q6apm_get_audioreach_graph(struct q6apm *apm, ui
id = idr_alloc(&apm->graph_idr, graph, graph_id, graph_id + 1, GFP_KERNEL);
if (id < 0) {
dev_err(apm->dev, "Unable to allocate graph id (%d)\n", graph_id);
+ kfree(graph->graph);
kfree(graph);
mutex_unlock(&apm->lock);
return ERR_PTR(id);
diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c
index 99a128a666fb..4ce5d2579387 100644
--- a/sound/soc/rockchip/rockchip_i2s.c
+++ b/sound/soc/rockchip/rockchip_i2s.c
@@ -13,7 +13,6 @@
#include <linux/of_gpio.h>
#include <linux/of_device.h>
#include <linux/clk.h>
-#include <linux/pinctrl/consumer.h>
#include <linux/pm_runtime.h>
#include <linux/regmap.h>
#include <linux/spinlock.h>
@@ -55,40 +54,8 @@ struct rk_i2s_dev {
const struct rk_i2s_pins *pins;
unsigned int bclk_ratio;
spinlock_t lock; /* tx/rx lock */
- struct pinctrl *pinctrl;
- struct pinctrl_state *bclk_on;
- struct pinctrl_state *bclk_off;
};
-static int i2s_pinctrl_select_bclk_on(struct rk_i2s_dev *i2s)
-{
- int ret = 0;
-
- if (!IS_ERR(i2s->pinctrl) && !IS_ERR_OR_NULL(i2s->bclk_on))
- ret = pinctrl_select_state(i2s->pinctrl,
- i2s->bclk_on);
-
- if (ret)
- dev_err(i2s->dev, "bclk enable failed %d\n", ret);
-
- return ret;
-}
-
-static int i2s_pinctrl_select_bclk_off(struct rk_i2s_dev *i2s)
-{
-
- int ret = 0;
-
- if (!IS_ERR(i2s->pinctrl) && !IS_ERR_OR_NULL(i2s->bclk_off))
- ret = pinctrl_select_state(i2s->pinctrl,
- i2s->bclk_off);
-
- if (ret)
- dev_err(i2s->dev, "bclk disable failed %d\n", ret);
-
- return ret;
-}
-
static int i2s_runtime_suspend(struct device *dev)
{
struct rk_i2s_dev *i2s = dev_get_drvdata(dev);
@@ -125,49 +92,38 @@ static inline struct rk_i2s_dev *to_info(struct snd_soc_dai *dai)
return snd_soc_dai_get_drvdata(dai);
}
-static int rockchip_snd_txctrl(struct rk_i2s_dev *i2s, int on)
+static void rockchip_snd_txctrl(struct rk_i2s_dev *i2s, int on)
{
unsigned int val = 0;
int retry = 10;
- int ret = 0;
spin_lock(&i2s->lock);
if (on) {
- ret = regmap_update_bits(i2s->regmap, I2S_DMACR,
- I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_ENABLE);
- if (ret < 0)
- goto end;
+ regmap_update_bits(i2s->regmap, I2S_DMACR,
+ I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_ENABLE);
- ret = regmap_update_bits(i2s->regmap, I2S_XFER,
- I2S_XFER_TXS_START | I2S_XFER_RXS_START,
- I2S_XFER_TXS_START | I2S_XFER_RXS_START);
- if (ret < 0)
- goto end;
+ regmap_update_bits(i2s->regmap, I2S_XFER,
+ I2S_XFER_TXS_START | I2S_XFER_RXS_START,
+ I2S_XFER_TXS_START | I2S_XFER_RXS_START);
i2s->tx_start = true;
} else {
i2s->tx_start = false;
- ret = regmap_update_bits(i2s->regmap, I2S_DMACR,
- I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_DISABLE);
- if (ret < 0)
- goto end;
+ regmap_update_bits(i2s->regmap, I2S_DMACR,
+ I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_DISABLE);
if (!i2s->rx_start) {
- ret = regmap_update_bits(i2s->regmap, I2S_XFER,
- I2S_XFER_TXS_START |
- I2S_XFER_RXS_START,
- I2S_XFER_TXS_STOP |
- I2S_XFER_RXS_STOP);
- if (ret < 0)
- goto end;
+ regmap_update_bits(i2s->regmap, I2S_XFER,
+ I2S_XFER_TXS_START |
+ I2S_XFER_RXS_START,
+ I2S_XFER_TXS_STOP |
+ I2S_XFER_RXS_STOP);
udelay(150);
- ret = regmap_update_bits(i2s->regmap, I2S_CLR,
- I2S_CLR_TXC | I2S_CLR_RXC,
- I2S_CLR_TXC | I2S_CLR_RXC);
- if (ret < 0)
- goto end;
+ regmap_update_bits(i2s->regmap, I2S_CLR,
+ I2S_CLR_TXC | I2S_CLR_RXC,
+ I2S_CLR_TXC | I2S_CLR_RXC);
regmap_read(i2s->regmap, I2S_CLR, &val);
@@ -182,57 +138,44 @@ static int rockchip_snd_txctrl(struct rk_i2s_dev *i2s, int on)
}
}
}
-end:
spin_unlock(&i2s->lock);
- if (ret < 0)
- dev_err(i2s->dev, "lrclk update failed\n");
-
- return ret;
}
-static int rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on)
+static void rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on)
{
unsigned int val = 0;
int retry = 10;
- int ret = 0;
spin_lock(&i2s->lock);
if (on) {
- ret = regmap_update_bits(i2s->regmap, I2S_DMACR,
+ regmap_update_bits(i2s->regmap, I2S_DMACR,
I2S_DMACR_RDE_ENABLE, I2S_DMACR_RDE_ENABLE);
- if (ret < 0)
- goto end;
- ret = regmap_update_bits(i2s->regmap, I2S_XFER,
+ regmap_update_bits(i2s->regmap, I2S_XFER,
I2S_XFER_TXS_START | I2S_XFER_RXS_START,
I2S_XFER_TXS_START | I2S_XFER_RXS_START);
- if (ret < 0)
- goto end;
i2s->rx_start = true;
} else {
i2s->rx_start = false;
- ret = regmap_update_bits(i2s->regmap, I2S_DMACR,
+ regmap_update_bits(i2s->regmap, I2S_DMACR,
I2S_DMACR_RDE_ENABLE, I2S_DMACR_RDE_DISABLE);
- if (ret < 0)
- goto end;
if (!i2s->tx_start) {
- ret = regmap_update_bits(i2s->regmap, I2S_XFER,
+ regmap_update_bits(i2s->regmap, I2S_XFER,
I2S_XFER_TXS_START |
I2S_XFER_RXS_START,
I2S_XFER_TXS_STOP |
I2S_XFER_RXS_STOP);
- if (ret < 0)
- goto end;
+
udelay(150);
- ret = regmap_update_bits(i2s->regmap, I2S_CLR,
+ regmap_update_bits(i2s->regmap, I2S_CLR,
I2S_CLR_TXC | I2S_CLR_RXC,
I2S_CLR_TXC | I2S_CLR_RXC);
- if (ret < 0)
- goto end;
+
regmap_read(i2s->regmap, I2S_CLR, &val);
+
/* Should wait for clear operation to finish */
while (val) {
regmap_read(i2s->regmap, I2S_CLR, &val);
@@ -244,12 +187,7 @@ static int rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on)
}
}
}
-end:
spin_unlock(&i2s->lock);
- if (ret < 0)
- dev_err(i2s->dev, "lrclk update failed\n");
-
- return ret;
}
static int rockchip_i2s_set_fmt(struct snd_soc_dai *cpu_dai,
@@ -487,26 +425,17 @@ static int rockchip_i2s_trigger(struct snd_pcm_substream *substream,
case SNDRV_PCM_TRIGGER_RESUME:
case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
- ret = rockchip_snd_rxctrl(i2s, 1);
+ rockchip_snd_rxctrl(i2s, 1);
else
- ret = rockchip_snd_txctrl(i2s, 1);
- /* Do not turn on bclk if lrclk open fails. */
- if (ret < 0)
- return ret;
- i2s_pinctrl_select_bclk_on(i2s);
+ rockchip_snd_txctrl(i2s, 1);
break;
case SNDRV_PCM_TRIGGER_SUSPEND:
case SNDRV_PCM_TRIGGER_STOP:
case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
- if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) {
- if (!i2s->tx_start)
- i2s_pinctrl_select_bclk_off(i2s);
- ret = rockchip_snd_rxctrl(i2s, 0);
- } else {
- if (!i2s->rx_start)
- i2s_pinctrl_select_bclk_off(i2s);
- ret = rockchip_snd_txctrl(i2s, 0);
- }
+ if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
+ rockchip_snd_rxctrl(i2s, 0);
+ else
+ rockchip_snd_txctrl(i2s, 0);
break;
default:
ret = -EINVAL;
@@ -807,33 +736,6 @@ static int rockchip_i2s_probe(struct platform_device *pdev)
}
i2s->bclk_ratio = 64;
- i2s->pinctrl = devm_pinctrl_get(&pdev->dev);
- if (IS_ERR(i2s->pinctrl))
- dev_err(&pdev->dev, "failed to find i2s pinctrl\n");
-
- i2s->bclk_on = pinctrl_lookup_state(i2s->pinctrl,
- "bclk_on");
- if (IS_ERR_OR_NULL(i2s->bclk_on))
- dev_err(&pdev->dev, "failed to find i2s default state\n");
- else
- dev_dbg(&pdev->dev, "find i2s bclk state\n");
-
- i2s->bclk_off = pinctrl_lookup_state(i2s->pinctrl,
- "bclk_off");
- if (IS_ERR_OR_NULL(i2s->bclk_off))
- dev_err(&pdev->dev, "failed to find i2s gpio state\n");
- else
- dev_dbg(&pdev->dev, "find i2s bclk_off state\n");
-
- i2s_pinctrl_select_bclk_off(i2s);
-
- i2s->playback_dma_data.addr = res->start + I2S_TXDR;
- i2s->playback_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
- i2s->playback_dma_data.maxburst = 4;
-
- i2s->capture_dma_data.addr = res->start + I2S_RXDR;
- i2s->capture_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
- i2s->capture_dma_data.maxburst = 4;
dev_set_drvdata(&pdev->dev, i2s);
diff --git a/sound/soc/ti/omap-mcbsp-priv.h b/sound/soc/ti/omap-mcbsp-priv.h
index 7865cda4bf0a..da519ea1f303 100644
--- a/sound/soc/ti/omap-mcbsp-priv.h
+++ b/sound/soc/ti/omap-mcbsp-priv.h
@@ -316,8 +316,6 @@ static inline int omap_mcbsp_read(struct omap_mcbsp *mcbsp, u16 reg,
/* Sidetone specific API */
int omap_mcbsp_st_init(struct platform_device *pdev);
-void omap_mcbsp_st_cleanup(struct platform_device *pdev);
-
int omap_mcbsp_st_start(struct omap_mcbsp *mcbsp);
int omap_mcbsp_st_stop(struct omap_mcbsp *mcbsp);
diff --git a/sound/soc/ti/omap-mcbsp-st.c b/sound/soc/ti/omap-mcbsp-st.c
index 0bc7d26c660a..7e8179cae92e 100644
--- a/sound/soc/ti/omap-mcbsp-st.c
+++ b/sound/soc/ti/omap-mcbsp-st.c
@@ -347,7 +347,7 @@ int omap_mcbsp_st_init(struct platform_device *pdev)
if (!st_data)
return -ENOMEM;
- st_data->mcbsp_iclk = clk_get(mcbsp->dev, "ick");
+ st_data->mcbsp_iclk = devm_clk_get(mcbsp->dev, "ick");
if (IS_ERR(st_data->mcbsp_iclk)) {
dev_warn(mcbsp->dev,
"Failed to get ick, sidetone might be broken\n");
@@ -359,7 +359,7 @@ int omap_mcbsp_st_init(struct platform_device *pdev)
if (!st_data->io_base_st)
return -ENOMEM;
- ret = sysfs_create_group(&mcbsp->dev->kobj, &sidetone_attr_group);
+ ret = devm_device_add_group(mcbsp->dev, &sidetone_attr_group);
if (ret)
return ret;
@@ -368,16 +368,6 @@ int omap_mcbsp_st_init(struct platform_device *pdev)
return 0;
}
-void omap_mcbsp_st_cleanup(struct platform_device *pdev)
-{
- struct omap_mcbsp *mcbsp = platform_get_drvdata(pdev);
-
- if (mcbsp->st_data) {
- sysfs_remove_group(&mcbsp->dev->kobj, &sidetone_attr_group);
- clk_put(mcbsp->st_data->mcbsp_iclk);
- }
-}
-
static int omap_mcbsp_st_info_volsw(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_info *uinfo)
{
diff --git a/sound/soc/ti/omap-mcbsp.c b/sound/soc/ti/omap-mcbsp.c
index 4479d74f0a45..9933b33c80ca 100644
--- a/sound/soc/ti/omap-mcbsp.c
+++ b/sound/soc/ti/omap-mcbsp.c
@@ -702,8 +702,7 @@ static int omap_mcbsp_init(struct platform_device *pdev)
mcbsp->max_tx_thres = max_thres(mcbsp) - 0x10;
mcbsp->max_rx_thres = max_thres(mcbsp) - 0x10;
- ret = sysfs_create_group(&mcbsp->dev->kobj,
- &additional_attr_group);
+ ret = devm_device_add_group(mcbsp->dev, &additional_attr_group);
if (ret) {
dev_err(mcbsp->dev,
"Unable to create additional controls\n");
@@ -711,16 +710,7 @@ static int omap_mcbsp_init(struct platform_device *pdev)
}
}
- ret = omap_mcbsp_st_init(pdev);
- if (ret)
- goto err_st;
-
- return 0;
-
-err_st:
- if (mcbsp->pdata->buffer_size)
- sysfs_remove_group(&mcbsp->dev->kobj, &additional_attr_group);
- return ret;
+ return omap_mcbsp_st_init(pdev);
}
/*
@@ -1431,11 +1421,6 @@ static int asoc_mcbsp_remove(struct platform_device *pdev)
if (cpu_latency_qos_request_active(&mcbsp->pm_qos_req))
cpu_latency_qos_remove_request(&mcbsp->pm_qos_req);
- if (mcbsp->pdata->buffer_size)
- sysfs_remove_group(&mcbsp->dev->kobj, &additional_attr_group);
-
- omap_mcbsp_st_cleanup(pdev);
-
return 0;
}
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 03acc823838a..a77b915d36a8 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -203,8 +203,8 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
+#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
@@ -296,6 +296,13 @@
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
+#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
+#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
+#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
+#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
+#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
@@ -316,6 +323,7 @@
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
+#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
@@ -447,5 +455,6 @@
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
+#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 36369e76cc63..33d2cd04d254 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -50,6 +50,25 @@
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
+#ifdef CONFIG_RETPOLINE
+# define DISABLE_RETPOLINE 0
+#else
+# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
+ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
+#endif
+
+#ifdef CONFIG_RETHUNK
+# define DISABLE_RETHUNK 0
+#else
+# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
+#endif
+
+#ifdef CONFIG_CPU_UNRET_ENTRY
+# define DISABLE_UNRET 0
+#else
+# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
+#endif
+
#ifdef CONFIG_INTEL_IOMMU_SVM
# define DISABLE_ENQCMD 0
#else
@@ -82,7 +101,7 @@
#define DISABLED_MASK8 (DISABLE_TDX_GUEST)
#define DISABLED_MASK9 (DISABLE_SGX)
#define DISABLED_MASK10 0
-#define DISABLED_MASK11 0
+#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
#define DISABLED_MASK12 0
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index d27e0581b777..cc615be27a54 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -51,6 +51,8 @@
#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
+#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
+#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@@ -93,6 +95,7 @@
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
+#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */
#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
#define ARCH_CAP_SSB_NO BIT(4) /*
* Not susceptible to Speculative Store Bypass
@@ -140,6 +143,13 @@
* bit available to control VERW
* behavior.
*/
+#define ARCH_CAP_RRSBA BIT(19) /*
+ * Indicates RET may use predictors
+ * other than the RSB. With eIBRS
+ * enabled predictions in kernel mode
+ * are restricted to targets in
+ * kernel.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@@ -567,6 +577,9 @@
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
+#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
+#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
+
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h
index 15b940ec1eac..10bc88cc3bf6 100644
--- a/tools/include/linux/objtool.h
+++ b/tools/include/linux/objtool.h
@@ -32,11 +32,16 @@ struct unwind_hint {
*
* UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
* Useful for code which doesn't have an ELF function annotation.
+ *
+ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
*/
#define UNWIND_HINT_TYPE_CALL 0
#define UNWIND_HINT_TYPE_REGS 1
#define UNWIND_HINT_TYPE_REGS_PARTIAL 2
#define UNWIND_HINT_TYPE_FUNC 3
+#define UNWIND_HINT_TYPE_ENTRY 4
+#define UNWIND_HINT_TYPE_SAVE 5
+#define UNWIND_HINT_TYPE_RESTORE 6
#ifdef CONFIG_OBJTOOL
@@ -124,7 +129,7 @@ struct unwind_hint {
* the debuginfo as necessary. It will also warn if it sees any
* inconsistencies.
*/
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
.Lunwind_hint_ip_\@:
.pushsection .discard.unwind_hints
/* struct unwind_hint */
@@ -177,7 +182,7 @@ struct unwind_hint {
#define ASM_REACHABLE
#else
#define ANNOTATE_INTRA_FUNCTION_CALL
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
.endm
.macro STACK_FRAME_NON_STANDARD func:req
.endm
diff --git a/tools/include/uapi/asm-generic/fcntl.h b/tools/include/uapi/asm-generic/fcntl.h
index 0197042b7dfb..1ecdb911add8 100644
--- a/tools/include/uapi/asm-generic/fcntl.h
+++ b/tools/include/uapi/asm-generic/fcntl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_GENERIC_FCNTL_H
#define _ASM_GENERIC_FCNTL_H
@@ -90,7 +91,7 @@
/* a horrid kludge trying to make sure that this will fail on old kernels */
#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
-#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)
+#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)
#ifndef O_NDELAY
#define O_NDELAY O_NONBLOCK
@@ -115,11 +116,13 @@
#define F_GETSIG 11 /* for sockets. */
#endif
+#if __BITS_PER_LONG == 32 || defined(__KERNEL__)
#ifndef F_GETLK64
#define F_GETLK64 12 /* using 'struct flock64' */
#define F_SETLK64 13
#define F_SETLKW64 14
#endif
+#endif /* __BITS_PER_LONG == 32 || defined(__KERNEL__) */
#ifndef F_SETOWN_EX
#define F_SETOWN_EX 15
@@ -178,6 +181,10 @@ struct f_owner_ex {
blocking */
#define LOCK_UN 8 /* remove lock */
+/*
+ * LOCK_MAND support has been removed from the kernel. We leave the symbols
+ * here to not break legacy builds, but these should not be used in new code.
+ */
#define LOCK_MAND 32 /* This is a mandatory flock ... */
#define LOCK_READ 64 /* which allows concurrent read operations */
#define LOCK_WRITE 128 /* which allows concurrent write operations */
@@ -185,6 +192,7 @@ struct f_owner_ex {
#define F_LINUX_SPECIFIC_BASE 1024
+#ifndef HAVE_ARCH_STRUCT_FLOCK
struct flock {
short l_type;
short l_whence;
@@ -209,5 +217,6 @@ struct flock64 {
__ARCH_FLOCK64_PAD
#endif
};
+#endif /* HAVE_ARCH_STRUCT_FLOCK */
#endif /* _ASM_GENERIC_FCNTL_H */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f4009dbdf62d..ef78e0e1a754 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5222,22 +5222,25 @@ union bpf_attr {
* Return
* Nothing. Always succeeds.
*
- * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset)
+ * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags)
* Description
* Read *len* bytes from *src* into *dst*, starting from *offset*
* into *src*.
+ * *flags* is currently unused.
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
- * of *src*'s data, -EINVAL if *src* is an invalid dynptr.
+ * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
+ * *flags* is not 0.
*
- * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len)
+ * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
+ * *flags* is currently unused.
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
- * is a read-only dynptr.
+ * is a read-only dynptr or if *flags* is not 0.
*
* void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 5088bd9f1922..860f867c50c0 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -2083,7 +2083,8 @@ struct kvm_stats_header {
#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
-#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
+#define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN
#define KVM_STATS_BASE_SHIFT 8
#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT)
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 8b990a52aada..c260006106be 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -787,3 +787,8 @@ bool arch_is_retpoline(struct symbol *sym)
{
return !strncmp(sym->name, "__x86_indirect_", 15);
}
+
+bool arch_is_rethunk(struct symbol *sym)
+{
+ return !strcmp(sym->name, "__x86_return_thunk");
+}
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index f4c3a5091737..24fbe803a0d3 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -68,6 +68,8 @@ const struct option check_options[] = {
OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
OPT_BOOLEAN('o', "orc", &opts.orc, "generate ORC metadata"),
OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
+ OPT_BOOLEAN(0, "rethunk", &opts.rethunk, "validate and annotate rethunk usage"),
+ OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"),
OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"),
OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"),
OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"),
@@ -123,6 +125,7 @@ static bool opts_valid(void)
opts.noinstr ||
opts.orc ||
opts.retpoline ||
+ opts.rethunk ||
opts.sls ||
opts.stackval ||
opts.static_call ||
@@ -135,6 +138,11 @@ static bool opts_valid(void)
return true;
}
+ if (opts.unret && !opts.rethunk) {
+ ERROR("--unret requires --rethunk");
+ return false;
+ }
+
if (opts.dump_orc)
return true;
@@ -163,6 +171,11 @@ static bool link_opts_valid(struct objtool_file *file)
return false;
}
+ if (opts.unret) {
+ ERROR("--unret requires --link");
+ return false;
+ }
+
return true;
}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 57153e00349c..b341f8a8c7c5 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -376,7 +376,8 @@ static int decode_instructions(struct objtool_file *file)
sec->text = true;
if (!strcmp(sec->name, ".noinstr.text") ||
- !strcmp(sec->name, ".entry.text"))
+ !strcmp(sec->name, ".entry.text") ||
+ !strncmp(sec->name, ".text.__x86.", 12))
sec->noinstr = true;
for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
@@ -749,6 +750,52 @@ static int create_retpoline_sites_sections(struct objtool_file *file)
return 0;
}
+static int create_return_sites_sections(struct objtool_file *file)
+{
+ struct instruction *insn;
+ struct section *sec;
+ int idx;
+
+ sec = find_section_by_name(file->elf, ".return_sites");
+ if (sec) {
+ WARN("file already has .return_sites, skipping");
+ return 0;
+ }
+
+ idx = 0;
+ list_for_each_entry(insn, &file->return_thunk_list, call_node)
+ idx++;
+
+ if (!idx)
+ return 0;
+
+ sec = elf_create_section(file->elf, ".return_sites", 0,
+ sizeof(int), idx);
+ if (!sec) {
+ WARN("elf_create_section: .return_sites");
+ return -1;
+ }
+
+ idx = 0;
+ list_for_each_entry(insn, &file->return_thunk_list, call_node) {
+
+ int *site = (int *)sec->data->d_buf + idx;
+ *site = 0;
+
+ if (elf_add_reloc_to_insn(file->elf, sec,
+ idx * sizeof(int),
+ R_X86_64_PC32,
+ insn->sec, insn->offset)) {
+ WARN("elf_add_reloc_to_insn: .return_sites");
+ return -1;
+ }
+
+ idx++;
+ }
+
+ return 0;
+}
+
static int create_ibt_endbr_seal_sections(struct objtool_file *file)
{
struct instruction *insn;
@@ -1083,6 +1130,11 @@ __weak bool arch_is_retpoline(struct symbol *sym)
return false;
}
+__weak bool arch_is_rethunk(struct symbol *sym)
+{
+ return false;
+}
+
#define NEGATIVE_RELOC ((void *)-1L)
static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
@@ -1250,6 +1302,19 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in
annotate_call_site(file, insn, false);
}
+static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add)
+{
+ /*
+ * Return thunk tail calls are really just returns in disguise,
+ * so convert them accordingly.
+ */
+ insn->type = INSN_RETURN;
+ insn->retpoline_safe = true;
+
+ if (add)
+ list_add_tail(&insn->call_node, &file->return_thunk_list);
+}
+
static bool same_function(struct instruction *insn1, struct instruction *insn2)
{
return insn1->func->pfunc == insn2->func->pfunc;
@@ -1302,6 +1367,9 @@ static int add_jump_destinations(struct objtool_file *file)
} else if (reloc->sym->retpoline_thunk) {
add_retpoline_call(file, insn);
continue;
+ } else if (reloc->sym->return_thunk) {
+ add_return_call(file, insn, true);
+ continue;
} else if (insn->func) {
/*
* External sibling call or internal sibling call with
@@ -1320,6 +1388,21 @@ static int add_jump_destinations(struct objtool_file *file)
jump_dest = find_insn(file, dest_sec, dest_off);
if (!jump_dest) {
+ struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
+
+ /*
+ * This is a special case for zen_untrain_ret().
+ * It jumps to __x86_return_thunk(), but objtool
+ * can't find the thunk's starting RET
+ * instruction, because the RET is also in the
+ * middle of another instruction. Objtool only
+ * knows about the outer instruction.
+ */
+ if (sym && sym->return_thunk) {
+ add_return_call(file, insn, false);
+ continue;
+ }
+
WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
insn->sec, insn->offset, dest_sec->name,
dest_off);
@@ -1949,16 +2032,35 @@ static int read_unwind_hints(struct objtool_file *file)
insn->hint = true;
- if (opts.ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
+ if (hint->type == UNWIND_HINT_TYPE_SAVE) {
+ insn->hint = false;
+ insn->save = true;
+ continue;
+ }
+
+ if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
+ insn->restore = true;
+ continue;
+ }
+
+ if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
- if (sym && sym->bind == STB_GLOBAL &&
- insn->type != INSN_ENDBR && !insn->noendbr) {
- WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR",
- insn->sec, insn->offset);
+ if (sym && sym->bind == STB_GLOBAL) {
+ if (opts.ibt && insn->type != INSN_ENDBR && !insn->noendbr) {
+ WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR",
+ insn->sec, insn->offset);
+ }
+
+ insn->entry = 1;
}
}
+ if (hint->type == UNWIND_HINT_TYPE_ENTRY) {
+ hint->type = UNWIND_HINT_TYPE_CALL;
+ insn->entry = 1;
+ }
+
if (hint->type == UNWIND_HINT_TYPE_FUNC) {
insn->cfi = &func_cfi;
continue;
@@ -2032,8 +2134,10 @@ static int read_retpoline_hints(struct objtool_file *file)
}
if (insn->type != INSN_JUMP_DYNAMIC &&
- insn->type != INSN_CALL_DYNAMIC) {
- WARN_FUNC("retpoline_safe hint not an indirect jump/call",
+ insn->type != INSN_CALL_DYNAMIC &&
+ insn->type != INSN_RETURN &&
+ insn->type != INSN_NOP) {
+ WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret/nop",
insn->sec, insn->offset);
return -1;
}
@@ -2184,6 +2288,9 @@ static int classify_symbols(struct objtool_file *file)
if (arch_is_retpoline(func))
func->retpoline_thunk = true;
+ if (arch_is_rethunk(func))
+ func->return_thunk = true;
+
if (!strcmp(func->name, "__fentry__"))
func->fentry = true;
@@ -3218,8 +3325,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
return 1;
}
- visited = 1 << state.uaccess;
- if (insn->visited) {
+ visited = VISITED_BRANCH << state.uaccess;
+ if (insn->visited & VISITED_BRANCH_MASK) {
if (!insn->hint && !insn_cfi_match(insn, &state.cfi))
return 1;
@@ -3233,6 +3340,35 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
state.instr += insn->instr;
if (insn->hint) {
+ if (insn->restore) {
+ struct instruction *save_insn, *i;
+
+ i = insn;
+ save_insn = NULL;
+
+ sym_for_each_insn_continue_reverse(file, func, i) {
+ if (i->save) {
+ save_insn = i;
+ break;
+ }
+ }
+
+ if (!save_insn) {
+ WARN_FUNC("no corresponding CFI save for CFI restore",
+ sec, insn->offset);
+ return 1;
+ }
+
+ if (!save_insn->visited) {
+ WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
+ sec, insn->offset);
+ return 1;
+ }
+
+ insn->cfi = save_insn->cfi;
+ nr_cfi_reused++;
+ }
+
state.cfi = *insn->cfi;
} else {
/* XXX track if we actually changed state.cfi */
@@ -3433,6 +3569,145 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
return warnings;
}
+/*
+ * Validate rethunk entry constraint: must untrain RET before the first RET.
+ *
+ * Follow every branch (intra-function) and ensure ANNOTATE_UNRET_END comes
+ * before an actual RET instruction.
+ */
+static int validate_entry(struct objtool_file *file, struct instruction *insn)
+{
+ struct instruction *next, *dest;
+ int ret, warnings = 0;
+
+ for (;;) {
+ next = next_insn_to_validate(file, insn);
+
+ if (insn->visited & VISITED_ENTRY)
+ return 0;
+
+ insn->visited |= VISITED_ENTRY;
+
+ if (!insn->ignore_alts && !list_empty(&insn->alts)) {
+ struct alternative *alt;
+ bool skip_orig = false;
+
+ list_for_each_entry(alt, &insn->alts, list) {
+ if (alt->skip_orig)
+ skip_orig = true;
+
+ ret = validate_entry(file, alt->insn);
+ if (ret) {
+ if (opts.backtrace)
+ BT_FUNC("(alt)", insn);
+ return ret;
+ }
+ }
+
+ if (skip_orig)
+ return 0;
+ }
+
+ switch (insn->type) {
+
+ case INSN_CALL_DYNAMIC:
+ case INSN_JUMP_DYNAMIC:
+ case INSN_JUMP_DYNAMIC_CONDITIONAL:
+ WARN_FUNC("early indirect call", insn->sec, insn->offset);
+ return 1;
+
+ case INSN_JUMP_UNCONDITIONAL:
+ case INSN_JUMP_CONDITIONAL:
+ if (!is_sibling_call(insn)) {
+ if (!insn->jump_dest) {
+ WARN_FUNC("unresolved jump target after linking?!?",
+ insn->sec, insn->offset);
+ return -1;
+ }
+ ret = validate_entry(file, insn->jump_dest);
+ if (ret) {
+ if (opts.backtrace) {
+ BT_FUNC("(branch%s)", insn,
+ insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : "");
+ }
+ return ret;
+ }
+
+ if (insn->type == INSN_JUMP_UNCONDITIONAL)
+ return 0;
+
+ break;
+ }
+
+ /* fallthrough */
+ case INSN_CALL:
+ dest = find_insn(file, insn->call_dest->sec,
+ insn->call_dest->offset);
+ if (!dest) {
+ WARN("Unresolved function after linking!?: %s",
+ insn->call_dest->name);
+ return -1;
+ }
+
+ ret = validate_entry(file, dest);
+ if (ret) {
+ if (opts.backtrace)
+ BT_FUNC("(call)", insn);
+ return ret;
+ }
+ /*
+ * If a call returns without error, it must have seen UNTRAIN_RET.
+ * Therefore any non-error return is a success.
+ */
+ return 0;
+
+ case INSN_RETURN:
+ WARN_FUNC("RET before UNTRAIN", insn->sec, insn->offset);
+ return 1;
+
+ case INSN_NOP:
+ if (insn->retpoline_safe)
+ return 0;
+ break;
+
+ default:
+ break;
+ }
+
+ if (!next) {
+ WARN_FUNC("teh end!", insn->sec, insn->offset);
+ return -1;
+ }
+ insn = next;
+ }
+
+ return warnings;
+}
+
+/*
+ * Validate that all branches starting at 'insn->entry' encounter UNRET_END
+ * before RET.
+ */
+static int validate_unret(struct objtool_file *file)
+{
+ struct instruction *insn;
+ int ret, warnings = 0;
+
+ for_each_insn(file, insn) {
+ if (!insn->entry)
+ continue;
+
+ ret = validate_entry(file, insn);
+ if (ret < 0) {
+ WARN_FUNC("Failed UNRET validation", insn->sec, insn->offset);
+ return ret;
+ }
+ warnings += ret;
+ }
+
+ return warnings;
+}
+
static int validate_retpoline(struct objtool_file *file)
{
struct instruction *insn;
@@ -3440,7 +3715,8 @@ static int validate_retpoline(struct objtool_file *file)
for_each_insn(file, insn) {
if (insn->type != INSN_JUMP_DYNAMIC &&
- insn->type != INSN_CALL_DYNAMIC)
+ insn->type != INSN_CALL_DYNAMIC &&
+ insn->type != INSN_RETURN)
continue;
if (insn->retpoline_safe)
@@ -3455,9 +3731,17 @@ static int validate_retpoline(struct objtool_file *file)
if (!strcmp(insn->sec->name, ".init.text") && !opts.module)
continue;
- WARN_FUNC("indirect %s found in RETPOLINE build",
- insn->sec, insn->offset,
- insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
+ if (insn->type == INSN_RETURN) {
+ if (opts.rethunk) {
+ WARN_FUNC("'naked' return found in RETHUNK build",
+ insn->sec, insn->offset);
+ } else
+ continue;
+ } else {
+ WARN_FUNC("indirect %s found in RETPOLINE build",
+ insn->sec, insn->offset,
+ insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
+ }
warnings++;
}
@@ -3945,6 +4229,17 @@ int check(struct objtool_file *file)
warnings += ret;
}
+ if (opts.unret) {
+ /*
+ * Must be after validate_branch() and friends, it plays
+ * further games with insn->visited.
+ */
+ ret = validate_unret(file);
+ if (ret < 0)
+ return ret;
+ warnings += ret;
+ }
+
if (opts.ibt) {
ret = validate_ibt(file);
if (ret < 0)
@@ -3973,6 +4268,13 @@ int check(struct objtool_file *file)
warnings += ret;
}
+ if (opts.rethunk) {
+ ret = create_return_sites_sections(file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+ }
+
if (opts.mcount) {
ret = create_mcount_loc_sections(file);
if (ret < 0)
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 9b19cc304195..beb2f3aa94ff 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -89,6 +89,7 @@ const char *arch_ret_insn(int len);
int arch_decode_hint_reg(u8 sp_reg, int *base);
bool arch_is_retpoline(struct symbol *sym);
+bool arch_is_rethunk(struct symbol *sym);
int arch_rewrite_retpolines(struct objtool_file *file);
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 280ea18b7f2b..42a52f1a0add 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -19,6 +19,8 @@ struct opts {
bool noinstr;
bool orc;
bool retpoline;
+ bool rethunk;
+ bool unret;
bool sls;
bool stackval;
bool static_call;
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index f10d7374f388..036129cebeee 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -46,16 +46,19 @@ struct instruction {
enum insn_type type;
unsigned long immediate;
- u8 dead_end : 1,
- ignore : 1,
- ignore_alts : 1,
- hint : 1,
- retpoline_safe : 1,
- noendbr : 1;
- /* 2 bit hole */
+ u16 dead_end : 1,
+ ignore : 1,
+ ignore_alts : 1,
+ hint : 1,
+ save : 1,
+ restore : 1,
+ retpoline_safe : 1,
+ noendbr : 1,
+ entry : 1;
+ /* 7 bit hole */
+
s8 instr;
u8 visited;
- /* u8 hole */
struct alt_group *alt_group;
struct symbol *call_dest;
@@ -69,6 +72,11 @@ struct instruction {
struct cfi_state *cfi;
};
+#define VISITED_BRANCH 0x01
+#define VISITED_BRANCH_UACCESS 0x02
+#define VISITED_BRANCH_MASK 0x03
+#define VISITED_ENTRY 0x04
+
static inline bool is_static_jump(struct instruction *insn)
{
return insn->type == INSN_JUMP_CONDITIONAL ||
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index adebfbc2b518..16f4067b82ae 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -57,6 +57,7 @@ struct symbol {
u8 uaccess_safe : 1;
u8 static_call_tramp : 1;
u8 retpoline_thunk : 1;
+ u8 return_thunk : 1;
u8 fentry : 1;
u8 profiling_func : 1;
struct list_head pv_target;
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index a6e72d916807..7f2d1b095333 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -24,6 +24,7 @@ struct objtool_file {
struct list_head insn_list;
DECLARE_HASHTABLE(insn_hash, 20);
struct list_head retpoline_call_list;
+ struct list_head return_thunk_list;
struct list_head static_call_list;
struct list_head mcount_loc_list;
struct list_head endbr_list;
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 512669ce064c..a7ecc32e3512 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -102,6 +102,7 @@ struct objtool_file *objtool_open_read(const char *_objname)
INIT_LIST_HEAD(&file.insn_list);
hash_init(file.insn_hash);
INIT_LIST_HEAD(&file.retpoline_call_list);
+ INIT_LIST_HEAD(&file.return_thunk_list);
INIT_LIST_HEAD(&file.static_call_list);
INIT_LIST_HEAD(&file.mcount_loc_list);
INIT_LIST_HEAD(&file.endbr_list);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 897fc504918b..f075cf37a65e 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -4280,6 +4280,7 @@ static int trace__replay(struct trace *trace)
goto out;
evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_enter");
+ trace->syscalls.events.sys_enter = evsel;
/* older kernels have syscalls tp versus raw_syscalls */
if (evsel == NULL)
evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_enter");
@@ -4292,6 +4293,7 @@ static int trace__replay(struct trace *trace)
}
evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_exit");
+ trace->syscalls.events.sys_exit = evsel;
if (evsel == NULL)
evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit");
if (evsel &&
diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py
index 5f57d9829956..4339692a8d0b 100755
--- a/tools/perf/scripts/python/arm-cs-trace-disasm.py
+++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py
@@ -61,7 +61,7 @@ def get_optional(perf_dict, field):
def get_offset(perf_dict, field):
if field in perf_dict:
- return f"+0x{perf_dict[field]:x}"
+ return "+%#x" % perf_dict[field]
return ""
def get_dso_file_path(dso_name, dso_build_id):
@@ -76,7 +76,7 @@ def get_dso_file_path(dso_name, dso_build_id):
else:
append = "/elf"
- dso_path = f"{os.environ['PERF_BUILDID_DIR']}/{dso_name}/{dso_build_id}{append}"
+ dso_path = os.environ['PERF_BUILDID_DIR'] + "/" + dso_name + "/" + dso_build_id + append;
# Replace duplicate slash chars to single slash char
dso_path = dso_path.replace('//', '/', 1)
return dso_path
@@ -94,8 +94,8 @@ def read_disam(dso_fname, dso_start, start_addr, stop_addr):
start_addr = start_addr - dso_start;
stop_addr = stop_addr - dso_start;
disasm = [ options.objdump_name, "-d", "-z",
- f"--start-address=0x{start_addr:x}",
- f"--stop-address=0x{stop_addr:x}" ]
+ "--start-address="+format(start_addr,"#x"),
+ "--stop-address="+format(stop_addr,"#x") ]
disasm += [ dso_fname ]
disasm_output = check_output(disasm).decode('utf-8').split('\n')
disasm_cache[addr_range] = disasm_output
@@ -109,12 +109,14 @@ def print_disam(dso_fname, dso_start, start_addr, stop_addr):
m = disasm_re.search(line)
if m is None:
continue
- print(f"\t{line}")
+ print("\t" + line)
def print_sample(sample):
- print(f"Sample = {{ cpu: {sample['cpu']:04} addr: 0x{sample['addr']:016x} " \
- f"phys_addr: 0x{sample['phys_addr']:016x} ip: 0x{sample['ip']:016x} " \
- f"pid: {sample['pid']} tid: {sample['tid']} period: {sample['period']} time: {sample['time']} }}")
+ print("Sample = { cpu: %04d addr: 0x%016x phys_addr: 0x%016x ip: 0x%016x " \
+ "pid: %d tid: %d period: %d time: %d }" % \
+ (sample['cpu'], sample['addr'], sample['phys_addr'], \
+ sample['ip'], sample['pid'], sample['tid'], \
+ sample['period'], sample['time']))
def trace_begin():
print('ARM CoreSight Trace Data Assembler Dump')
@@ -131,7 +133,7 @@ def common_start_str(comm, sample):
cpu = sample["cpu"]
pid = sample["pid"]
tid = sample["tid"]
- return f"{comm:>16} {pid:>5}/{tid:<5} [{cpu:04}] {sec:9}.{ns:09} "
+ return "%16s %5u/%-5u [%04u] %9u.%09u " % (comm, pid, tid, cpu, sec, ns)
# This code is copied from intel-pt-events.py for printing source code
# line and symbols.
@@ -171,7 +173,7 @@ def print_srccode(comm, param_dict, sample, symbol, dso):
glb_line_number = line_number
glb_source_file_name = source_file_name
- print(f"{start_str}{src_str}")
+ print(start_str, src_str)
def process_event(param_dict):
global cache_size
@@ -188,7 +190,7 @@ def process_event(param_dict):
symbol = get_optional(param_dict, "symbol")
if (options.verbose == True):
- print(f"Event type: {name}")
+ print("Event type: %s" % name)
print_sample(sample)
# If cannot find dso so cannot dump assembler, bail out
@@ -197,7 +199,7 @@ def process_event(param_dict):
# Validate dso start and end addresses
if ((dso_start == '[unknown]') or (dso_end == '[unknown]')):
- print(f"Failed to find valid dso map for dso {dso}")
+ print("Failed to find valid dso map for dso %s" % dso)
return
if (name[0:12] == "instructions"):
@@ -244,15 +246,15 @@ def process_event(param_dict):
# Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4
if (start_addr == 0 and stop_addr == 4):
- print(f"CPU{cpu}: CS_ETM_TRACE_ON packet is inserted")
+ print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu)
return
if (start_addr < int(dso_start) or start_addr > int(dso_end)):
- print(f"Start address 0x{start_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}")
+ print("Start address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (start_addr, int(dso_start), int(dso_end), dso))
return
if (stop_addr < int(dso_start) or stop_addr > int(dso_end)):
- print(f"Stop address 0x{stop_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}")
+ print("Stop address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (stop_addr, int(dso_start), int(dso_end), dso))
return
if (options.objdump_name != None):
@@ -267,6 +269,6 @@ def process_event(param_dict):
if path.exists(dso_fname):
print_disam(dso_fname, dso_vm_start, start_addr, stop_addr)
else:
- print(f"Failed to find dso {dso} for address range [ 0x{start_addr:x} .. 0x{stop_addr:x} ]")
+ print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr))
print_srccode(comm, param_dict, sample, symbol, dso)
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index 4ad0dfbc8b21..7c7d20fc503a 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -20,8 +20,6 @@
#include "tsc.h"
#include "mmap.h"
#include "tests.h"
-#include "pmu.h"
-#include "pmu-hybrid.h"
/*
* Except x86_64/i386 and Arm64, other archs don't support TSC in perf. Just
@@ -106,28 +104,21 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
evlist__config(evlist, &opts, NULL);
- evsel = evlist__first(evlist);
-
- evsel->core.attr.comm = 1;
- evsel->core.attr.disabled = 1;
- evsel->core.attr.enable_on_exec = 0;
-
- /*
- * For hybrid "cycles:u", it creates two events.
- * Init the second evsel here.
- */
- if (perf_pmu__has_hybrid() && perf_pmu__hybrid_mounted("cpu_atom")) {
- evsel = evsel__next(evsel);
+ /* For hybrid "cycles:u", it creates two events */
+ evlist__for_each_entry(evlist, evsel) {
evsel->core.attr.comm = 1;
evsel->core.attr.disabled = 1;
evsel->core.attr.enable_on_exec = 0;
}
- if (evlist__open(evlist) == -ENOENT) {
- err = TEST_SKIP;
+ ret = evlist__open(evlist);
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ err = TEST_SKIP;
+ else
+ pr_debug("evlist__open() failed\n");
goto out_err;
}
- CHECK__(evlist__open(evlist));
CHECK__(evlist__mmap(evlist, UINT_MAX));
@@ -167,10 +158,12 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
goto next_event;
if (strcmp(event->comm.comm, comm1) == 0) {
+ CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event));
CHECK__(evsel__parse_sample(evsel, event, &sample));
comm1_time = sample.time;
}
if (strcmp(event->comm.comm, comm2) == 0) {
+ CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event));
CHECK__(evsel__parse_sample(evsel, event, &sample));
comm2_time = sample.time;
}
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index f8ad581ea247..cdd6463a5b68 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -63,20 +63,16 @@ static struct hashmap *bpf_map_hash;
static struct bpf_perf_object *
bpf_perf_object__next(struct bpf_perf_object *prev)
{
- struct bpf_perf_object *next;
-
- if (!prev)
- next = list_first_entry(&bpf_objects_list,
- struct bpf_perf_object,
- list);
- else
- next = list_next_entry(prev, list);
+ if (!prev) {
+ if (list_empty(&bpf_objects_list))
+ return NULL;
- /* Empty list is noticed here so don't need checking on entry. */
- if (&next->list == &bpf_objects_list)
+ return list_first_entry(&bpf_objects_list, struct bpf_perf_object, list);
+ }
+ if (list_is_last(&prev->list, &bpf_objects_list))
return NULL;
- return next;
+ return list_next_entry(prev, list);
}
#define bpf_perf_object__for_each(perf_obj, tmp) \
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index ecd377938eea..b3be5b1d9dbb 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -233,6 +233,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
return NULL;
}
+static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr)
+{
+ size_t i, phdrnum;
+ u64 sz;
+
+ if (elf_getphdrnum(elf, &phdrnum))
+ return -1;
+
+ for (i = 0; i < phdrnum; i++) {
+ if (gelf_getphdr(elf, i, phdr) == NULL)
+ return -1;
+
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ sz = max(phdr->p_memsz, phdr->p_filesz);
+ if (!sz)
+ continue;
+
+ if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz))
+ return 0;
+ }
+
+ /* Not found any valid program header */
+ return -1;
+}
+
static bool want_demangle(bool is_kernel_sym)
{
return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
@@ -1209,6 +1236,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
sym.st_value);
used_opd = true;
}
+
/*
* When loading symbols in a data mapping, ABS symbols (which
* has a value of SHN_ABS in its st_shndx) failed at
@@ -1227,6 +1255,17 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
gelf_getshdr(sec, &shdr);
+ /*
+ * If the attribute bit SHF_ALLOC is not set, the section
+ * doesn't occupy memory during process execution.
+ * E.g. ".gnu.warning.*" section is used by linker to generate
+ * warnings when calling deprecated functions, the symbols in
+ * the section aren't loaded to memory during process execution,
+ * so skip them.
+ */
+ if (!(shdr.sh_flags & SHF_ALLOC))
+ continue;
+
secstrs = secstrs_sym;
/*
@@ -1262,11 +1301,20 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
goto out_elf_end;
} else if ((used_opd && runtime_ss->adjust_symbols) ||
(!used_opd && syms_ss->adjust_symbols)) {
+ GElf_Phdr phdr;
+
+ if (elf_read_program_header(syms_ss->elf,
+ (u64)sym.st_value, &phdr)) {
+ pr_warning("%s: failed to find program header for "
+ "symbol: %s st_value: %#" PRIx64 "\n",
+ __func__, elf_name, (u64)sym.st_value);
+ continue;
+ }
pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
- "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
- (u64)sym.st_value, (u64)shdr.sh_addr,
- (u64)shdr.sh_offset);
- sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+ "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n",
+ __func__, (u64)sym.st_value, (u64)phdr.p_vaddr,
+ (u64)phdr.p_offset);
+ sym.st_value -= phdr.p_vaddr - phdr.p_offset;
}
demangled = demangle_sym(dso, kmodule, elf_name);
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index d811cff73597..0a26c243e6e9 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -140,12 +140,12 @@ int use_after_invalid(void *ctx)
bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(read_data), 0, &ptr);
- bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
bpf_ringbuf_submit_dynptr(&ptr, 0);
/* this should fail */
- bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
return 0;
}
@@ -338,7 +338,7 @@ int invalid_helper2(void *ctx)
get_map_val_dynptr(&ptr);
/* this should fail */
- bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0);
+ bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0, 0);
return 0;
}
@@ -377,7 +377,7 @@ int invalid_write2(void *ctx)
memcpy((void *)&ptr + 8, &x, sizeof(x));
/* this should fail */
- bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
bpf_ringbuf_submit_dynptr(&ptr, 0);
@@ -473,7 +473,7 @@ int invalid_read2(void *ctx)
get_map_val_dynptr(&ptr);
/* this should fail */
- bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0);
+ bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0, 0);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index d67be48df4b2..a3a6103c8569 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -43,10 +43,10 @@ int test_read_write(void *ctx)
bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr);
/* Write data into the dynptr */
- err = err ?: bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data));
+ err = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
/* Read the data that was written into the dynptr */
- err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+ err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
/* Ensure the data we read matches the data we wrote */
for (i = 0; i < sizeof(read_data); i++) {
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index 71b306602368..616ed4019655 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -3,6 +3,6 @@
TEST_PROGS := gpio-mockup.sh gpio-sim.sh
TEST_FILES := gpio-mockup-sysfs.sh
TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name
-CFLAGS += -O2 -g -Wall -I../../../../usr/include/
+CFLAGS += -O2 -g -Wall -I../../../../usr/include/ $(KHDR_INCLUDES)
include ../lib.mk
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index 4158da0da2bb..2237d1aac801 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -82,8 +82,9 @@ static int next_cpu(int cpu)
return cpu;
}
-static void *migration_worker(void *ign)
+static void *migration_worker(void *__rseq_tid)
{
+ pid_t rseq_tid = (pid_t)(unsigned long)__rseq_tid;
cpu_set_t allowed_mask;
int r, i, cpu;
@@ -106,7 +107,7 @@ static void *migration_worker(void *ign)
* stable, i.e. while changing affinity is in-progress.
*/
smp_wmb();
- r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask);
+ r = sched_setaffinity(rseq_tid, sizeof(allowed_mask), &allowed_mask);
TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
errno, strerror(errno));
smp_wmb();
@@ -231,7 +232,8 @@ int main(int argc, char *argv[])
vm = vm_create_default(VCPU_ID, 0, guest_code);
ucall_init(vm, NULL);
- pthread_create(&migration_thread, NULL, migration_worker, 0);
+ pthread_create(&migration_thread, NULL, migration_worker,
+ (void *)(unsigned long)gettid());
for (i = 0; !done; i++) {
vcpu_run(vm, VCPU_ID);
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index a29f79618934..ffc35a22e914 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -36,4 +36,5 @@ test_unix_oob
gro
ioam6_parser
toeplitz
+tun
cmsg_sender
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index ddad703ace34..db05b3764b77 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -11,7 +11,7 @@ TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh
-TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh
+TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh
TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh
TEST_PROGS += route_localnet.sh
TEST_PROGS += reuseaddr_ports_exhausted.sh
diff --git a/tools/testing/selftests/net/fib_nexthop_nongw.sh b/tools/testing/selftests/net/fib_nexthop_nongw.sh
new file mode 100755
index 000000000000..b7b928b38ce4
--- /dev/null
+++ b/tools/testing/selftests/net/fib_nexthop_nongw.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# ns: h1 | ns: h2
+# 192.168.0.1/24 |
+# eth0 |
+# | 192.168.1.1/32
+# veth0 <---|---> veth1
+# Validate source address selection for route without gateway
+
+PAUSE_ON_FAIL=no
+VERBOSE=0
+ret=0
+
+################################################################################
+# helpers
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ out=$(eval $cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo "$out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+################################################################################
+# config
+setup()
+{
+ ip netns add h1
+ ip -n h1 link set lo up
+ ip netns add h2
+ ip -n h2 link set lo up
+
+ # Add a fake eth0 to support an ip address
+ ip -n h1 link add name eth0 type dummy
+ ip -n h1 link set eth0 up
+ ip -n h1 address add 192.168.0.1/24 dev eth0
+
+ # Configure veths (same @mac, arp off)
+ ip -n h1 link add name veth0 type veth peer name veth1 netns h2
+ ip -n h1 link set veth0 up
+
+ ip -n h2 link set veth1 up
+
+ # Configure @IP in the peer netns
+ ip -n h2 address add 192.168.1.1/32 dev veth1
+ ip -n h2 route add default dev veth1
+
+ # Add a nexthop without @gw and use it in a route
+ ip -n h1 nexthop add id 1 dev veth0
+ ip -n h1 route add 192.168.1.1 nhid 1
+}
+
+cleanup()
+{
+ ip netns del h1 2>/dev/null
+ ip netns del h2 2>/dev/null
+}
+
+trap cleanup EXIT
+
+################################################################################
+# main
+
+while getopts :pv o
+do
+ case $o in
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=1;;
+ esac
+done
+
+cleanup
+setup
+
+run_cmd ip -netns h1 route get 192.168.1.1
+log_test $? 0 "nexthop: get route with nexthop without gw"
+run_cmd ip netns exec h1 ping -c1 192.168.1.1
+log_test $? 0 "nexthop: ping through nexthop without gw"
+
+exit $ret
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 8f481218a492..57b84e0c879e 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -37,6 +37,7 @@ TEST_PROGS = bridge_igmp.sh \
ipip_hier_gre_key.sh \
ipip_hier_gre_keys.sh \
ipip_hier_gre.sh \
+ local_termination.sh \
loopback.sh \
mirror_gre_bound.sh \
mirror_gre_bridge_1d.sh \
@@ -52,6 +53,7 @@ TEST_PROGS = bridge_igmp.sh \
mirror_gre_vlan_bridge_1q.sh \
mirror_gre_vlan.sh \
mirror_vlan.sh \
+ no_forwarding.sh \
pedit_dsfield.sh \
pedit_ip.sh \
pedit_l4port.sh \
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index f905d5358e68..48a99e1453e1 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -6,7 +6,7 @@ KSFT_KHDR_INSTALL := 1
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
- simult_flows.sh mptcp_sockopt.sh
+ simult_flows.sh mptcp_sockopt.sh userspace_pm.sh
TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq
diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h
index b86642f90d7f..3a391c9bf468 100644
--- a/tools/testing/selftests/rseq/rseq-riscv.h
+++ b/tools/testing/selftests/rseq/rseq-riscv.h
@@ -86,7 +86,7 @@ do { \
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
- "la "RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n" \
+ "la " RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n" \
REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(rseq_cs) "]\n" \
__rseq_str(label) ":\n"
@@ -103,17 +103,17 @@ do { \
#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \
REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
- "bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
+ "bne " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
__rseq_str(label) "\n"
#define RSEQ_ASM_OP_CMPEQ32(var, expect, label) \
- "lw "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
- "bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
+ "lw " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
+ "bne " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
__rseq_str(label) "\n"
#define RSEQ_ASM_OP_CMPNE(var, expect, label) \
REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
- "beq "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
+ "beq " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
__rseq_str(label) "\n"
#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
@@ -127,12 +127,12 @@ do { \
REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"
#define RSEQ_ASM_OP_R_LOAD_OFF(offset) \
- "add "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], " \
+ "add " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], " \
RSEQ_ASM_TMP_REG_1 "\n" \
REG_L RSEQ_ASM_TMP_REG_1 ", (" RSEQ_ASM_TMP_REG_1 ")\n"
#define RSEQ_ASM_OP_R_ADD(count) \
- "add "RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \
+ "add " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \
", %[" __rseq_str(count) "]\n"
#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \
@@ -194,8 +194,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[expect] "r" (expect),
[newv] "r" (newv)
@@ -251,8 +251,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[expectnot] "r" (expectnot),
[load] "m" (*load),
@@ -301,8 +301,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[count] "r" (count)
RSEQ_INJECT_INPUT
@@ -352,8 +352,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[expect] "r" (expect),
[v] "m" (*v),
[newv] "r" (newv),
@@ -411,8 +411,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[expect] "r" (expect),
[v] "m" (*v),
[newv] "r" (newv),
@@ -472,8 +472,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[expect] "r" (expect),
[v2] "m" (*v2),
@@ -532,8 +532,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[expect] "r" (expect),
[v] "m" (*v),
[newv] "r" (newv),
@@ -593,8 +593,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[expect] "r" (expect),
[v] "m" (*v),
[newv] "r" (newv),
@@ -651,8 +651,8 @@ int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[ptr] "r" (ptr),
[off] "er" (off),
[inc] "er" (inc)
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 986b9458efb2..4177f9507bbe 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -111,7 +111,8 @@ void rseq_init(void)
libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
- if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p) {
+ if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
+ *libc_rseq_size_p != 0) {
/* rseq registration owned by glibc */
rseq_offset = *libc_rseq_offset_p;
rseq_size = *libc_rseq_size_p;
diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c
index 9b68658b6bb8..5b98f3ee58a5 100644
--- a/tools/vm/slabinfo.c
+++ b/tools/vm/slabinfo.c
@@ -233,6 +233,24 @@ static unsigned long read_slab_obj(struct slabinfo *s, const char *name)
return l;
}
+static unsigned long read_debug_slab_obj(struct slabinfo *s, const char *name)
+{
+ char x[128];
+ FILE *f;
+ size_t l;
+
+ snprintf(x, 128, "/sys/kernel/debug/slab/%s/%s", s->name, name);
+ f = fopen(x, "r");
+ if (!f) {
+ buffer[0] = 0;
+ l = 0;
+ } else {
+ l = fread(buffer, 1, sizeof(buffer), f);
+ buffer[l] = 0;
+ fclose(f);
+ }
+ return l;
+}
/*
* Put a size string together
@@ -409,14 +427,18 @@ static void show_tracking(struct slabinfo *s)
{
printf("\n%s: Kernel object allocation\n", s->name);
printf("-----------------------------------------------------------------------\n");
- if (read_slab_obj(s, "alloc_calls"))
+ if (read_debug_slab_obj(s, "alloc_traces"))
+ printf("%s", buffer);
+ else if (read_slab_obj(s, "alloc_calls"))
printf("%s", buffer);
else
printf("No Data\n");
printf("\n%s: Kernel object freeing\n", s->name);
printf("------------------------------------------------------------------------\n");
- if (read_slab_obj(s, "free_calls"))
+ if (read_debug_slab_obj(s, "free_traces"))
+ printf("%s", buffer);
+ else if (read_slab_obj(s, "free_calls"))
printf("%s", buffer);
else
printf("No Data\n");