summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Makefile23
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S66
-rw-r--r--arch/x86/events/core.c2
-rw-r--r--arch/x86/include/asm/atomic.h12
-rw-r--r--arch/x86/include/asm/atomic64_32.h8
-rw-r--r--arch/x86/include/asm/atomic64_64.h12
-rw-r--r--arch/x86/include/asm/irqflags.h3
-rw-r--r--arch/x86/include/asm/kdebug.h12
-rw-r--r--arch/x86/include/asm/kvm_host.h22
-rw-r--r--arch/x86/include/asm/pgtable-3level.h7
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/pgtable_64.h20
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/signal.h7
-rw-r--r--arch/x86/include/asm/stacktrace.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h40
-rw-r--r--arch/x86/include/asm/vgtod.h2
-rw-r--r--arch/x86/kernel/alternative.c9
-rw-r--r--arch/x86/kernel/apic/vector.c2
-rw-r--r--arch/x86/kernel/cpu/bugs.c46
-rw-r--r--arch/x86/kernel/cpu/common.c1
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c24
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c17
-rw-r--r--arch/x86/kernel/dumpstack.c31
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c12
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kvm/lapic.c27
-rw-r--r--arch/x86/kvm/mmu.c26
-rw-r--r--arch/x86/kvm/svm.c19
-rw-r--r--arch/x86/kvm/vmx.c43
-rw-r--r--arch/x86/kvm/x86.c28
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/x86/lib/usercopy.c5
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/pageattr.c25
-rw-r--r--arch/x86/mm/pgtable.c8
-rw-r--r--arch/x86/mm/pti.c2
-rw-r--r--arch/x86/mm/tlb.c7
-rw-r--r--arch/x86/platform/efi/efi_32.c8
-rw-r--r--arch/x86/xen/mmu_pv.c9
43 files changed, 400 insertions, 208 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c5ff296bc5d1..1a0be022f91d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2843,7 +2843,7 @@ config X86_SYSFB
This option, if enabled, marks VGA/VBE/EFI framebuffers as generic
framebuffers so the new generic system-framebuffer drivers can be
used on x86. If the framebuffer is not compatible with the generic
- modes, it is adverticed as fallback platform framebuffer so legacy
+ modes, it is advertised as fallback platform framebuffer so legacy
drivers like efifb, vesafb and uvesafb can pick it up.
If this option is not selected, all system framebuffers are always
marked as fallback platform framebuffers as usual.
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 94859241bc3e..8f6e7eb8ae9f 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -175,22 +175,6 @@ ifdef CONFIG_FUNCTION_GRAPH_TRACER
endif
endif
-ifndef CC_HAVE_ASM_GOTO
- $(error Compiler lacks asm-goto support.)
-endif
-
-#
-# Jump labels need '-maccumulate-outgoing-args' for gcc < 4.5.2 to prevent a
-# GCC bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226). There's no way
-# to test for this bug at compile-time because the test case needs to execute,
-# which is a no-go for cross compilers. So check the GCC version instead.
-#
-ifdef CONFIG_JUMP_LABEL
- ifneq ($(ACCUMULATE_OUTGOING_ARGS), 1)
- ACCUMULATE_OUTGOING_ARGS = $(call cc-if-fullversion, -lt, 040502, 1)
- endif
-endif
-
ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
# This compiler flag is not supported by Clang:
KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,)
@@ -312,6 +296,13 @@ PHONY += vdso_install
vdso_install:
$(Q)$(MAKE) $(build)=arch/x86/entry/vdso $@
+archprepare: checkbin
+checkbin:
+ifndef CC_HAVE_ASM_GOTO
+ @echo Compiler lacks asm-goto support.
+ @exit 1
+endif
+
archclean:
$(Q)rm -rf $(objtree)/arch/i386
$(Q)rm -rf $(objtree)/arch/x86_64
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 9bd139569b41..cb2deb61c5d9 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -223,34 +223,34 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
pcmpeqd TWOONE(%rip), \TMP2
pand POLY(%rip), \TMP2
pxor \TMP2, \TMP3
- movdqa \TMP3, HashKey(%arg2)
+ movdqu \TMP3, HashKey(%arg2)
movdqa \TMP3, \TMP5
pshufd $78, \TMP3, \TMP1
pxor \TMP3, \TMP1
- movdqa \TMP1, HashKey_k(%arg2)
+ movdqu \TMP1, HashKey_k(%arg2)
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
# TMP5 = HashKey^2<<1 (mod poly)
- movdqa \TMP5, HashKey_2(%arg2)
+ movdqu \TMP5, HashKey_2(%arg2)
# HashKey_2 = HashKey^2<<1 (mod poly)
pshufd $78, \TMP5, \TMP1
pxor \TMP5, \TMP1
- movdqa \TMP1, HashKey_2_k(%arg2)
+ movdqu \TMP1, HashKey_2_k(%arg2)
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
# TMP5 = HashKey^3<<1 (mod poly)
- movdqa \TMP5, HashKey_3(%arg2)
+ movdqu \TMP5, HashKey_3(%arg2)
pshufd $78, \TMP5, \TMP1
pxor \TMP5, \TMP1
- movdqa \TMP1, HashKey_3_k(%arg2)
+ movdqu \TMP1, HashKey_3_k(%arg2)
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
# TMP5 = HashKey^3<<1 (mod poly)
- movdqa \TMP5, HashKey_4(%arg2)
+ movdqu \TMP5, HashKey_4(%arg2)
pshufd $78, \TMP5, \TMP1
pxor \TMP5, \TMP1
- movdqa \TMP1, HashKey_4_k(%arg2)
+ movdqu \TMP1, HashKey_4_k(%arg2)
.endm
# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding.
@@ -271,7 +271,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
- movdqa HashKey(%arg2), %xmm13
+ movdqu HashKey(%arg2), %xmm13
CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
%xmm4, %xmm5, %xmm6
@@ -997,7 +997,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pshufd $78, \XMM5, \TMP6
pxor \XMM5, \TMP6
paddd ONE(%rip), \XMM0 # INCR CNT
- movdqa HashKey_4(%arg2), \TMP5
+ movdqu HashKey_4(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
movdqa \XMM0, \XMM1
paddd ONE(%rip), \XMM0 # INCR CNT
@@ -1016,7 +1016,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pxor (%arg1), \XMM2
pxor (%arg1), \XMM3
pxor (%arg1), \XMM4
- movdqa HashKey_4_k(%arg2), \TMP5
+ movdqu HashKey_4_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
movaps 0x10(%arg1), \TMP1
AESENC \TMP1, \XMM1 # Round 1
@@ -1031,7 +1031,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM6, \TMP1
pshufd $78, \XMM6, \TMP2
pxor \XMM6, \TMP2
- movdqa HashKey_3(%arg2), \TMP5
+ movdqu HashKey_3(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
movaps 0x30(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 3
@@ -1044,7 +1044,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
- movdqa HashKey_3_k(%arg2), \TMP5
+ movdqu HashKey_3_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x50(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 5
@@ -1058,7 +1058,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM7, \TMP1
pshufd $78, \XMM7, \TMP2
pxor \XMM7, \TMP2
- movdqa HashKey_2(%arg2), \TMP5
+ movdqu HashKey_2(%arg2), \TMP5
# Multiply TMP5 * HashKey using karatsuba
@@ -1074,7 +1074,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
- movdqa HashKey_2_k(%arg2), \TMP5
+ movdqu HashKey_2_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x80(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 8
@@ -1092,7 +1092,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM8, \TMP1
pshufd $78, \XMM8, \TMP2
pxor \XMM8, \TMP2
- movdqa HashKey(%arg2), \TMP5
+ movdqu HashKey(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x90(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 9
@@ -1121,7 +1121,7 @@ aes_loop_par_enc_done\@:
AESENCLAST \TMP3, \XMM2
AESENCLAST \TMP3, \XMM3
AESENCLAST \TMP3, \XMM4
- movdqa HashKey_k(%arg2), \TMP5
+ movdqu HashKey_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movdqu (%arg4,%r11,1), \TMP3
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
@@ -1205,7 +1205,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pshufd $78, \XMM5, \TMP6
pxor \XMM5, \TMP6
paddd ONE(%rip), \XMM0 # INCR CNT
- movdqa HashKey_4(%arg2), \TMP5
+ movdqu HashKey_4(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
movdqa \XMM0, \XMM1
paddd ONE(%rip), \XMM0 # INCR CNT
@@ -1224,7 +1224,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pxor (%arg1), \XMM2
pxor (%arg1), \XMM3
pxor (%arg1), \XMM4
- movdqa HashKey_4_k(%arg2), \TMP5
+ movdqu HashKey_4_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
movaps 0x10(%arg1), \TMP1
AESENC \TMP1, \XMM1 # Round 1
@@ -1239,7 +1239,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM6, \TMP1
pshufd $78, \XMM6, \TMP2
pxor \XMM6, \TMP2
- movdqa HashKey_3(%arg2), \TMP5
+ movdqu HashKey_3(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
movaps 0x30(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 3
@@ -1252,7 +1252,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
- movdqa HashKey_3_k(%arg2), \TMP5
+ movdqu HashKey_3_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x50(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 5
@@ -1266,7 +1266,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM7, \TMP1
pshufd $78, \XMM7, \TMP2
pxor \XMM7, \TMP2
- movdqa HashKey_2(%arg2), \TMP5
+ movdqu HashKey_2(%arg2), \TMP5
# Multiply TMP5 * HashKey using karatsuba
@@ -1282,7 +1282,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
- movdqa HashKey_2_k(%arg2), \TMP5
+ movdqu HashKey_2_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x80(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 8
@@ -1300,7 +1300,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM8, \TMP1
pshufd $78, \XMM8, \TMP2
pxor \XMM8, \TMP2
- movdqa HashKey(%arg2), \TMP5
+ movdqu HashKey(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x90(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 9
@@ -1329,7 +1329,7 @@ aes_loop_par_dec_done\@:
AESENCLAST \TMP3, \XMM2
AESENCLAST \TMP3, \XMM3
AESENCLAST \TMP3, \XMM4
- movdqa HashKey_k(%arg2), \TMP5
+ movdqu HashKey_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movdqu (%arg4,%r11,1), \TMP3
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
@@ -1405,10 +1405,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
movdqa \XMM1, \TMP6
pshufd $78, \XMM1, \TMP2
pxor \XMM1, \TMP2
- movdqa HashKey_4(%arg2), \TMP5
+ movdqu HashKey_4(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0
- movdqa HashKey_4_k(%arg2), \TMP4
+ movdqu HashKey_4_k(%arg2), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movdqa \XMM1, \XMMDst
movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1
@@ -1418,10 +1418,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
movdqa \XMM2, \TMP1
pshufd $78, \XMM2, \TMP2
pxor \XMM2, \TMP2
- movdqa HashKey_3(%arg2), \TMP5
+ movdqu HashKey_3(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0
- movdqa HashKey_3_k(%arg2), \TMP4
+ movdqu HashKey_3_k(%arg2), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6
pxor \XMM2, \XMMDst
@@ -1433,10 +1433,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
movdqa \XMM3, \TMP1
pshufd $78, \XMM3, \TMP2
pxor \XMM3, \TMP2
- movdqa HashKey_2(%arg2), \TMP5
+ movdqu HashKey_2(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0
- movdqa HashKey_2_k(%arg2), \TMP4
+ movdqu HashKey_2_k(%arg2), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6
pxor \XMM3, \XMMDst
@@ -1446,10 +1446,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
movdqa \XMM4, \TMP1
pshufd $78, \XMM4, \TMP2
pxor \XMM4, \TMP2
- movdqa HashKey(%arg2), \TMP5
+ movdqu HashKey(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0
- movdqa HashKey_k(%arg2), \TMP4
+ movdqu HashKey_k(%arg2), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6
pxor \XMM4, \XMMDst
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 5f4829f10129..dfb2f7c0d019 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2465,7 +2465,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
perf_callchain_store(entry, regs->ip);
- if (!current->mm)
+ if (!nmi_uaccess_okay())
return;
if (perf_callchain_user32(regs, entry))
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index b143717b92b3..ce84388e540c 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -80,11 +80,11 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
* true if the result is zero, or false for all
* other cases.
*/
-#define arch_atomic_sub_and_test arch_atomic_sub_and_test
static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
}
+#define arch_atomic_sub_and_test arch_atomic_sub_and_test
/**
* arch_atomic_inc - increment atomic variable
@@ -92,12 +92,12 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
*
* Atomically increments @v by 1.
*/
-#define arch_atomic_inc arch_atomic_inc
static __always_inline void arch_atomic_inc(atomic_t *v)
{
asm volatile(LOCK_PREFIX "incl %0"
: "+m" (v->counter));
}
+#define arch_atomic_inc arch_atomic_inc
/**
* arch_atomic_dec - decrement atomic variable
@@ -105,12 +105,12 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
*
* Atomically decrements @v by 1.
*/
-#define arch_atomic_dec arch_atomic_dec
static __always_inline void arch_atomic_dec(atomic_t *v)
{
asm volatile(LOCK_PREFIX "decl %0"
: "+m" (v->counter));
}
+#define arch_atomic_dec arch_atomic_dec
/**
* arch_atomic_dec_and_test - decrement and test
@@ -120,11 +120,11 @@ static __always_inline void arch_atomic_dec(atomic_t *v)
* returns true if the result is 0, or false for all other
* cases.
*/
-#define arch_atomic_dec_and_test arch_atomic_dec_and_test
static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
}
+#define arch_atomic_dec_and_test arch_atomic_dec_and_test
/**
* arch_atomic_inc_and_test - increment and test
@@ -134,11 +134,11 @@ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
* and returns true if the result is zero, or false for all
* other cases.
*/
-#define arch_atomic_inc_and_test arch_atomic_inc_and_test
static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
}
+#define arch_atomic_inc_and_test arch_atomic_inc_and_test
/**
* arch_atomic_add_negative - add and test if negative
@@ -149,11 +149,11 @@ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
-#define arch_atomic_add_negative arch_atomic_add_negative
static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
}
+#define arch_atomic_add_negative arch_atomic_add_negative
/**
* arch_atomic_add_return - add integer and return
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index ef959f02d070..6a5b0ec460da 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -205,12 +205,12 @@ static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
*
* Atomically increments @v by 1.
*/
-#define arch_atomic64_inc arch_atomic64_inc
static inline void arch_atomic64_inc(atomic64_t *v)
{
__alternative_atomic64(inc, inc_return, /* no output */,
"S" (v) : "memory", "eax", "ecx", "edx");
}
+#define arch_atomic64_inc arch_atomic64_inc
/**
* arch_atomic64_dec - decrement atomic64 variable
@@ -218,12 +218,12 @@ static inline void arch_atomic64_inc(atomic64_t *v)
*
* Atomically decrements @v by 1.
*/
-#define arch_atomic64_dec arch_atomic64_dec
static inline void arch_atomic64_dec(atomic64_t *v)
{
__alternative_atomic64(dec, dec_return, /* no output */,
"S" (v) : "memory", "eax", "ecx", "edx");
}
+#define arch_atomic64_dec arch_atomic64_dec
/**
* arch_atomic64_add_unless - add unless the number is a given value
@@ -245,7 +245,6 @@ static inline int arch_atomic64_add_unless(atomic64_t *v, long long a,
return (int)a;
}
-#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
{
int r;
@@ -253,8 +252,8 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
"S" (v) : "ecx", "edx", "memory");
return r;
}
+#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
-#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
{
long long r;
@@ -262,6 +261,7 @@ static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
"S" (v) : "ecx", "memory");
return r;
}
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
#undef alternative_atomic64
#undef __alternative_atomic64
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 4343d9b4f30e..5f851d92eecd 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -71,11 +71,11 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
* true if the result is zero, or false for all
* other cases.
*/
-#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
}
+#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
/**
* arch_atomic64_inc - increment atomic64 variable
@@ -83,13 +83,13 @@ static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
*
* Atomically increments @v by 1.
*/
-#define arch_atomic64_inc arch_atomic64_inc
static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "incq %0"
: "=m" (v->counter)
: "m" (v->counter));
}
+#define arch_atomic64_inc arch_atomic64_inc
/**
* arch_atomic64_dec - decrement atomic64 variable
@@ -97,13 +97,13 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
*
* Atomically decrements @v by 1.
*/
-#define arch_atomic64_dec arch_atomic64_dec
static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "decq %0"
: "=m" (v->counter)
: "m" (v->counter));
}
+#define arch_atomic64_dec arch_atomic64_dec
/**
* arch_atomic64_dec_and_test - decrement and test
@@ -113,11 +113,11 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
* returns true if the result is 0, or false for all other
* cases.
*/
-#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
}
+#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
/**
* arch_atomic64_inc_and_test - increment and test
@@ -127,11 +127,11 @@ static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
* and returns true if the result is zero, or false for all
* other cases.
*/
-#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
}
+#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
/**
* arch_atomic64_add_negative - add and test if negative
@@ -142,11 +142,11 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
-#define arch_atomic64_add_negative arch_atomic64_add_negative
static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
}
+#define arch_atomic64_add_negative arch_atomic64_add_negative
/**
* arch_atomic64_add_return - add and return
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c14f2a74b2be..15450a675031 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -33,7 +33,8 @@ extern inline unsigned long native_save_fl(void)
return flags;
}
-static inline void native_restore_fl(unsigned long flags)
+extern inline void native_restore_fl(unsigned long flags);
+extern inline void native_restore_fl(unsigned long flags)
{
asm volatile("push %0 ; popf"
: /* no output */
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 395c9631e000..75f1e35e7c15 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -22,10 +22,20 @@ enum die_val {
DIE_NMIUNKNOWN,
};
+enum show_regs_mode {
+ SHOW_REGS_SHORT,
+ /*
+ * For when userspace crashed, but we don't think it's our fault, and
+ * therefore don't print kernel registers.
+ */
+ SHOW_REGS_USER,
+ SHOW_REGS_ALL
+};
+
extern void die(const char *, struct pt_regs *,long);
extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_stack_regs(struct pt_regs *regs);
-extern void __show_regs(struct pt_regs *regs, int all);
+extern void __show_regs(struct pt_regs *regs, enum show_regs_mode);
extern void show_iret_regs(struct pt_regs *regs);
extern unsigned long oops_begin(void);
extern void oops_end(unsigned long, struct pt_regs *, int signr);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 00ddb0c9e612..8e90488c3d56 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1237,19 +1237,12 @@ enum emulation_result {
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
-#define EMULTYPE_RETRY (1 << 3)
-#define EMULTYPE_NO_REEXECUTE (1 << 4)
-#define EMULTYPE_NO_UD_ON_FAIL (1 << 5)
-#define EMULTYPE_VMWARE (1 << 6)
-int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
- int emulation_type, void *insn, int insn_len);
-
-static inline int emulate_instruction(struct kvm_vcpu *vcpu,
- int emulation_type)
-{
- return x86_emulate_instruction(vcpu, 0,
- emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0);
-}
+#define EMULTYPE_ALLOW_RETRY (1 << 3)
+#define EMULTYPE_NO_UD_ON_FAIL (1 << 4)
+#define EMULTYPE_VMWARE (1 << 5)
+int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
+int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
+ void *insn, int insn_len);
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
@@ -1450,7 +1443,6 @@ asmlinkage void kvm_spurious_fault(void);
____kvm_handle_fault_on_reboot(insn, "")
#define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
@@ -1463,7 +1455,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
- unsigned long ipi_bitmap_high, int min,
+ unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit);
u64 kvm_get_arch_capabilities(void);
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index a564084c6141..f8b1ad2c3828 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_PGTABLE_3LEVEL_H
#define _ASM_X86_PGTABLE_3LEVEL_H
+#include <asm/atomic64_32.h>
+
/*
* Intel Physical Address Extension (PAE) Mode - three-level page
* tables on PPro+ CPUs.
@@ -150,10 +152,7 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
{
pte_t res;
- /* xchg acts as a barrier before the setting of the high bits */
- res.pte_low = xchg(&ptep->pte_low, 0);
- res.pte_high = ptep->pte_high;
- ptep->pte_high = 0;
+ res.pte = (pteval_t)arch_atomic64_xchg((atomic64_t *)ptep, 0);
return res;
}
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index e4ffa565a69f..690c0307afed 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1195,7 +1195,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
return xchg(pmdp, pmd);
} else {
pmd_t old = *pmdp;
- *pmdp = pmd;
+ WRITE_ONCE(*pmdp, pmd);
return old;
}
}
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index f773d5e6c8cc..ce2b59047cb8 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -55,15 +55,15 @@ struct mm_struct;
void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte);
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
-static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
+static inline void native_set_pte(pte_t *ptep, pte_t pte)
{
- *ptep = native_make_pte(0);
+ WRITE_ONCE(*ptep, pte);
}
-static inline void native_set_pte(pte_t *ptep, pte_t pte)
+static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
{
- *ptep = pte;
+ native_set_pte(ptep, native_make_pte(0));
}
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
@@ -73,7 +73,7 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
{
- *pmdp = pmd;
+ WRITE_ONCE(*pmdp, pmd);
}
static inline void native_pmd_clear(pmd_t *pmd)
@@ -109,7 +109,7 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
static inline void native_set_pud(pud_t *pudp, pud_t pud)
{
- *pudp = pud;
+ WRITE_ONCE(*pudp, pud);
}
static inline void native_pud_clear(pud_t *pud)
@@ -137,13 +137,13 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
pgd_t pgd;
if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
- *p4dp = p4d;
+ WRITE_ONCE(*p4dp, p4d);
return;
}
pgd = native_make_pgd(native_p4d_val(p4d));
pgd = pti_set_user_pgtbl((pgd_t *)p4dp, pgd);
- *p4dp = native_make_p4d(native_pgd_val(pgd));
+ WRITE_ONCE(*p4dp, native_make_p4d(native_pgd_val(pgd)));
}
static inline void native_p4d_clear(p4d_t *p4d)
@@ -153,7 +153,7 @@ static inline void native_p4d_clear(p4d_t *p4d)
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
- *pgdp = pti_set_user_pgtbl(pgdp, pgd);
+ WRITE_ONCE(*pgdp, pti_set_user_pgtbl(pgdp, pgd));
}
static inline void native_pgd_clear(pgd_t *pgd)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c24297268ebc..d53c54b842da 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -132,6 +132,8 @@ struct cpuinfo_x86 {
/* Index into per_cpu list: */
u16 cpu_index;
u32 microcode;
+ /* Address space bits used by the cache internally */
+ u8 x86_cache_bits;
unsigned initialized : 1;
} __randomize_layout;
@@ -183,7 +185,7 @@ extern void cpu_detect(struct cpuinfo_x86 *c);
static inline unsigned long long l1tf_pfn_limit(void)
{
- return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT);
+ return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
}
extern void early_cpu_init(void);
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 5f9012ff52ed..33d3c88a7225 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -39,6 +39,7 @@ extern void do_signal(struct pt_regs *regs);
#define __ARCH_HAS_SA_RESTORER
+#include <asm/asm.h>
#include <uapi/asm/sigcontext.h>
#ifdef __i386__
@@ -86,9 +87,9 @@ static inline int __const_sigismember(sigset_t *set, int _sig)
static inline int __gen_sigismember(sigset_t *set, int _sig)
{
- unsigned char ret;
- asm("btl %2,%1\n\tsetc %0"
- : "=qm"(ret) : "m"(*set), "Ir"(_sig-1) : "cc");
+ bool ret;
+ asm("btl %2,%1" CC_SET(c)
+ : CC_OUT(c) (ret) : "m"(*set), "Ir"(_sig-1));
return ret;
}
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index b6dc698f992a..f335aad404a4 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -111,6 +111,6 @@ static inline unsigned long caller_frame_pointer(void)
return (unsigned long)frame;
}
-void show_opcodes(u8 *rip, const char *loglvl);
+void show_opcodes(struct pt_regs *regs, const char *loglvl);
void show_ip(struct pt_regs *regs, const char *loglvl);
#endif /* _ASM_X86_STACKTRACE_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 29c9da6c62fc..58ce5288878e 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -175,8 +175,16 @@ struct tlb_state {
* are on. This means that it may not match current->active_mm,
* which will contain the previous user mm when we're in lazy TLB
* mode even if we've already switched back to swapper_pg_dir.
+ *
+ * During switch_mm_irqs_off(), loaded_mm will be set to
+ * LOADED_MM_SWITCHING during the brief interrupts-off window
+ * when CR3 and loaded_mm would otherwise be inconsistent. This
+ * is for nmi_uaccess_okay()'s benefit.
*/
struct mm_struct *loaded_mm;
+
+#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
+
u16 loaded_mm_asid;
u16 next_asid;
/* last user mm's ctx id */
@@ -246,6 +254,38 @@ struct tlb_state {
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
+/*
+ * Blindly accessing user memory from NMI context can be dangerous
+ * if we're in the middle of switching the current user task or
+ * switching the loaded mm. It can also be dangerous if we
+ * interrupted some kernel code that was temporarily using a
+ * different mm.
+ */
+static inline bool nmi_uaccess_okay(void)
+{
+ struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+ struct mm_struct *current_mm = current->mm;
+
+ VM_WARN_ON_ONCE(!loaded_mm);
+
+ /*
+ * The condition we want to check is
+ * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though,
+ * if we're running in a VM with shadow paging, and nmi_uaccess_okay()
+ * is supposed to be reasonably fast.
+ *
+ * Instead, we check the almost equivalent but somewhat conservative
+ * condition below, and we rely on the fact that switch_mm_irqs_off()
+ * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3.
+ */
+ if (loaded_mm != current_mm)
+ return false;
+
+ VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));
+
+ return true;
+}
+
/* Initialize cr4 shadow for this CPU. */
static inline void cr4_init_shadow(void)
{
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index fb856c9f0449..53748541c487 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -93,7 +93,7 @@ static inline unsigned int __getcpu(void)
*
* If RDPID is available, use it.
*/
- alternative_io ("lsl %[p],%[seg]",
+ alternative_io ("lsl %[seg],%[p]",
".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
X86_FEATURE_RDPID,
[p] "=a" (p), [seg] "r" (__PER_CPU_SEG));
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 014f214da581..b9d5e7c9ef43 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -684,8 +684,6 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
* It means the size must be writable atomically and the address must be aligned
* in a way that permits an atomic write. It also makes sure we fit on a single
* page.
- *
- * Note: Must be called under text_mutex.
*/
void *text_poke(void *addr, const void *opcode, size_t len)
{
@@ -700,6 +698,8 @@ void *text_poke(void *addr, const void *opcode, size_t len)
*/
BUG_ON(!after_bootmem);
+ lockdep_assert_held(&text_mutex);
+
if (!core_kernel_text((unsigned long)addr)) {
pages[0] = vmalloc_to_page(addr);
pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
@@ -782,8 +782,6 @@ int poke_int3_handler(struct pt_regs *regs)
* - replace the first byte (int3) by the first byte of
* replacing opcode
* - sync cores
- *
- * Note: must be called under text_mutex.
*/
void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
{
@@ -792,6 +790,9 @@ void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
bp_int3_handler = handler;
bp_int3_addr = (u8 *)addr + sizeof(int3);
bp_patching_in_progress = true;
+
+ lockdep_assert_held(&text_mutex);
+
/*
* Corresponding read barrier in int3 notifier for making sure the
* in_progress and handler are correctly ordered wrt. patching.
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 9f148e3d45b4..7654febd5102 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -413,7 +413,7 @@ static int activate_managed(struct irq_data *irqd)
if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) {
/* Something in the core code broke! Survive gracefully */
pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq);
- return EINVAL;
+ return -EINVAL;
}
ret = assign_managed_vector(irqd, vector_searchmask);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 4c2313d0b9ca..40bdaea97fe7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -668,6 +668,45 @@ EXPORT_SYMBOL_GPL(l1tf_mitigation);
enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation);
+/*
+ * These CPUs all support 44bits physical address space internally in the
+ * cache but CPUID can report a smaller number of physical address bits.
+ *
+ * The L1TF mitigation uses the top most address bit for the inversion of
+ * non present PTEs. When the installed memory reaches into the top most
+ * address bit due to memory holes, which has been observed on machines
+ * which report 36bits physical address bits and have 32G RAM installed,
+ * then the mitigation range check in l1tf_select_mitigation() triggers.
+ * This is a false positive because the mitigation is still possible due to
+ * the fact that the cache uses 44bit internally. Use the cache bits
+ * instead of the reported physical bits and adjust them on the affected
+ * machines to 44bit if the reported bits are less than 44.
+ */
+static void override_cache_bits(struct cpuinfo_x86 *c)
+{
+ if (c->x86 != 6)
+ return;
+
+ switch (c->x86_model) {
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL_ULT:
+ case INTEL_FAM6_HASWELL_GT3E:
+ case INTEL_FAM6_BROADWELL_CORE:
+ case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
+ if (c->x86_cache_bits < 44)
+ c->x86_cache_bits = 44;
+ break;
+ }
+}
+
static void __init l1tf_select_mitigation(void)
{
u64 half_pa;
@@ -675,6 +714,8 @@ static void __init l1tf_select_mitigation(void)
if (!boot_cpu_has_bug(X86_BUG_L1TF))
return;
+ override_cache_bits(&boot_cpu_data);
+
switch (l1tf_mitigation) {
case L1TF_MITIGATION_OFF:
case L1TF_MITIGATION_FLUSH_NOWARN:
@@ -694,11 +735,6 @@ static void __init l1tf_select_mitigation(void)
return;
#endif
- /*
- * This is extremely unlikely to happen because almost all
- * systems have far more MAX_PA/2 than RAM can be fit into
- * DIMM slots.
- */
half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 84dee5ab745a..44c4ef3d989b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -919,6 +919,7 @@ void get_cpu_address_sizes(struct cpuinfo_x86 *c)
else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
c->x86_phys_bits = 36;
#endif
+ c->x86_cache_bits = c->x86_phys_bits;
}
static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 401e8c133108..fc3c07fe7df5 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -150,6 +150,9 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_HYPERVISOR))
return false;
+ if (c->x86 != 6)
+ return false;
+
for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
if (c->x86_model == spectre_bad_microcodes[i].model &&
c->x86_stepping == spectre_bad_microcodes[i].stepping)
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 0624957aa068..07b5fc00b188 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -504,6 +504,7 @@ static enum ucode_state apply_microcode_amd(int cpu)
struct microcode_amd *mc_amd;
struct ucode_cpu_info *uci;
struct ucode_patch *p;
+ enum ucode_state ret;
u32 rev, dummy;
BUG_ON(raw_smp_processor_id() != cpu);
@@ -521,9 +522,8 @@ static enum ucode_state apply_microcode_amd(int cpu)
/* need to apply patch? */
if (rev >= mc_amd->hdr.patch_id) {
- c->microcode = rev;
- uci->cpu_sig.rev = rev;
- return UCODE_OK;
+ ret = UCODE_OK;
+ goto out;
}
if (__apply_microcode_amd(mc_amd)) {
@@ -531,13 +531,21 @@ static enum ucode_state apply_microcode_amd(int cpu)
cpu, mc_amd->hdr.patch_id);
return UCODE_ERROR;
}
- pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
- mc_amd->hdr.patch_id);
- uci->cpu_sig.rev = mc_amd->hdr.patch_id;
- c->microcode = mc_amd->hdr.patch_id;
+ rev = mc_amd->hdr.patch_id;
+ ret = UCODE_UPDATED;
+
+ pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
- return UCODE_UPDATED;
+out:
+ uci->cpu_sig.rev = rev;
+ c->microcode = rev;
+
+ /* Update boot_cpu_data's revision too, if we're on the BSP: */
+ if (c->cpu_index == boot_cpu_data.cpu_index)
+ boot_cpu_data.microcode = rev;
+
+ return ret;
}
static int install_equiv_cpu_table(const u8 *buf)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 97ccf4c3b45b..16936a24795c 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -795,6 +795,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct microcode_intel *mc;
+ enum ucode_state ret;
static int prev_rev;
u32 rev;
@@ -817,9 +818,8 @@ static enum ucode_state apply_microcode_intel(int cpu)
*/
rev = intel_get_microcode_revision();
if (rev >= mc->hdr.rev) {
- uci->cpu_sig.rev = rev;
- c->microcode = rev;
- return UCODE_OK;
+ ret = UCODE_OK;
+ goto out;
}
/*
@@ -848,10 +848,17 @@ static enum ucode_state apply_microcode_intel(int cpu)
prev_rev = rev;
}
+ ret = UCODE_UPDATED;
+
+out:
uci->cpu_sig.rev = rev;
- c->microcode = rev;
+ c->microcode = rev;
+
+ /* Update boot_cpu_data's revision too, if we're on the BSP: */
+ if (c->cpu_index == boot_cpu_data.cpu_index)
+ boot_cpu_data.microcode = rev;
- return UCODE_UPDATED;
+ return ret;
}
static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 9c8652974f8e..2b5886401e5f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -17,6 +17,7 @@
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/sysfs.h>
+#include <linux/kasan.h>
#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
@@ -89,14 +90,24 @@ static void printk_stack_address(unsigned long address, int reliable,
* Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random
* guesstimate in attempt to achieve all of the above.
*/
-void show_opcodes(u8 *rip, const char *loglvl)
+void show_opcodes(struct pt_regs *regs, const char *loglvl)
{
#define PROLOGUE_SIZE 42
#define EPILOGUE_SIZE 21
#define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE)
u8 opcodes[OPCODE_BUFSIZE];
+ unsigned long prologue = regs->ip - PROLOGUE_SIZE;
+ bool bad_ip;
- if (probe_kernel_read(opcodes, rip - PROLOGUE_SIZE, OPCODE_BUFSIZE)) {
+ /*
+ * Make sure userspace isn't trying to trick us into dumping kernel
+ * memory by pointing the userspace instruction pointer at it.
+ */
+ bad_ip = user_mode(regs) &&
+ __chk_range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX);
+
+ if (bad_ip || probe_kernel_read(opcodes, (u8 *)prologue,
+ OPCODE_BUFSIZE)) {
printk("%sCode: Bad RIP value.\n", loglvl);
} else {
printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"
@@ -112,7 +123,7 @@ void show_ip(struct pt_regs *regs, const char *loglvl)
#else
printk("%sRIP: %04x:%pS\n", loglvl, (int)regs->cs, (void *)regs->ip);
#endif
- show_opcodes((u8 *)regs->ip, loglvl);
+ show_opcodes(regs, loglvl);
}
void show_iret_regs(struct pt_regs *regs)
@@ -135,7 +146,7 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
* they can be printed in the right context.
*/
if (!partial && on_stack(info, regs, sizeof(*regs))) {
- __show_regs(regs, 0);
+ __show_regs(regs, SHOW_REGS_SHORT);
} else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
IRET_FRAME_SIZE)) {
@@ -333,7 +344,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
oops_exit();
/* Executive summary in case the oops scrolled away */
- __show_regs(&exec_summary_regs, true);
+ __show_regs(&exec_summary_regs, SHOW_REGS_ALL);
if (!signr)
return;
@@ -346,7 +357,10 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
* We're not going to return, but we might be on an IST stack or
* have very little stack space left. Rewind the stack and kill
* the task.
+ * Before we rewind the stack, we have to tell KASAN that we're going to
+ * reuse the task stack and that existing poisons are invalid.
*/
+ kasan_unpoison_task_stack(current);
rewind_stack_do_exit(signr);
}
NOKPROBE_SYMBOL(oops_end);
@@ -393,14 +407,9 @@ void die(const char *str, struct pt_regs *regs, long err)
void show_regs(struct pt_regs *regs)
{
- bool all = true;
-
show_regs_print_info(KERN_DEFAULT);
- if (IS_ENABLED(CONFIG_X86_32))
- all = !user_mode(regs);
-
- __show_regs(regs, all);
+ __show_regs(regs, user_mode(regs) ? SHOW_REGS_USER : SHOW_REGS_ALL);
/*
* When in-kernel, we also print out the stack at the time of the fault..
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 2924fd447e61..5046a3c9dec2 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -59,7 +59,7 @@
#include <asm/intel_rdt_sched.h>
#include <asm/proto.h>
-void __show_regs(struct pt_regs *regs, int all)
+void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
unsigned long d0, d1, d2, d3, d6, d7;
@@ -85,7 +85,7 @@ void __show_regs(struct pt_regs *regs, int all)
printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n",
(u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags);
- if (!all)
+ if (mode != SHOW_REGS_ALL)
return;
cr0 = read_cr0();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index a451bc374b9b..ea5ea850348d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -62,7 +62,7 @@
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
/* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs *regs, int all)
+void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
unsigned long d0, d1, d2, d3, d6, d7;
@@ -87,9 +87,17 @@ void __show_regs(struct pt_regs *regs, int all)
printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
regs->r13, regs->r14, regs->r15);
- if (!all)
+ if (mode == SHOW_REGS_SHORT)
return;
+ if (mode == SHOW_REGS_USER) {
+ rdmsrl(MSR_FS_BASE, fs);
+ rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
+ printk(KERN_DEFAULT "FS: %016lx GS: %016lx\n",
+ fs, shadowgs);
+ return;
+ }
+
asm("movl %%ds,%0" : "=r" (ds));
asm("movl %%cs,%0" : "=r" (cs));
asm("movl %%es,%0" : "=r" (es));
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 1463468ba9a0..6490f618e096 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1415,7 +1415,7 @@ static bool __init determine_cpu_tsc_frequencies(bool early)
static unsigned long __init get_loops_per_jiffy(void)
{
- unsigned long lpj = tsc_khz * KHZ;
+ u64 lpj = (u64)tsc_khz * KHZ;
do_div(lpj, HZ);
return lpj;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0cefba28c864..17c0472c5b34 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -548,7 +548,7 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
}
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
- unsigned long ipi_bitmap_high, int min,
+ unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit)
{
int i;
@@ -571,18 +571,31 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
rcu_read_lock();
map = rcu_dereference(kvm->arch.apic_map);
+ if (min > map->max_apic_id)
+ goto out;
/* Bits above cluster_size are masked in the caller. */
- for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) {
- vcpu = map->phys_map[min + i]->vcpu;
- count += kvm_apic_set_irq(vcpu, &irq, NULL);
+ for_each_set_bit(i, &ipi_bitmap_low,
+ min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
+ if (map->phys_map[min + i]) {
+ vcpu = map->phys_map[min + i]->vcpu;
+ count += kvm_apic_set_irq(vcpu, &irq, NULL);
+ }
}
min += cluster_size;
- for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) {
- vcpu = map->phys_map[min + i]->vcpu;
- count += kvm_apic_set_irq(vcpu, &irq, NULL);
+
+ if (min > map->max_apic_id)
+ goto out;
+
+ for_each_set_bit(i, &ipi_bitmap_high,
+ min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
+ if (map->phys_map[min + i]) {
+ vcpu = map->phys_map[min + i]->vcpu;
+ count += kvm_apic_set_irq(vcpu, &irq, NULL);
+ }
}
+out:
rcu_read_unlock();
return count;
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a282321329b5..e24ea7067373 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1853,11 +1853,6 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
}
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
-{
- return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
-}
-
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
{
return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
@@ -5217,7 +5212,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
void *insn, int insn_len)
{
- int r, emulation_type = EMULTYPE_RETRY;
+ int r, emulation_type = 0;
enum emulation_result er;
bool direct = vcpu->arch.mmu.direct_map;
@@ -5230,10 +5225,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
r = RET_PF_INVALID;
if (unlikely(error_code & PFERR_RSVD_MASK)) {
r = handle_mmio_page_fault(vcpu, cr2, direct);
- if (r == RET_PF_EMULATE) {
- emulation_type = 0;
+ if (r == RET_PF_EMULATE)
goto emulate;
- }
}
if (r == RET_PF_INVALID) {
@@ -5260,8 +5253,19 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
return 1;
}
- if (mmio_info_in_cache(vcpu, cr2, direct))
- emulation_type = 0;
+ /*
+ * vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still
+ * optimistically try to just unprotect the page and let the processor
+ * re-execute the instruction that caused the page fault. Do not allow
+ * retrying MMIO emulation, as it's not only pointless but could also
+ * cause us to enter an infinite loop because the processor will keep
+ * faulting on the non-existent MMIO address. Retrying an instruction
+ * from a nested guest is also pointless and dangerous as we are only
+ * explicitly shadowing L1's page tables, i.e. unprotecting something
+ * for L1 isn't going to magically fix whatever issue cause L2 to fail.
+ */
+ if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu))
+ emulation_type = EMULTYPE_ALLOW_RETRY;
emulate:
/*
* On AMD platforms, under certain conditions insn_len may be zero on #NPF.
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6276140044d0..89c4c5aa15f1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -776,7 +776,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
}
if (!svm->next_rip) {
- if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
+ if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) !=
EMULATE_DONE)
printk(KERN_DEBUG "%s: NOP\n", __func__);
return;
@@ -2715,7 +2715,7 @@ static int gp_interception(struct vcpu_svm *svm)
WARN_ON_ONCE(!enable_vmware_backdoor);
- er = emulate_instruction(vcpu,
+ er = kvm_emulate_instruction(vcpu,
EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
if (er == EMULATE_USER_EXIT)
return 0;
@@ -2819,7 +2819,7 @@ static int io_interception(struct vcpu_svm *svm)
string = (io_info & SVM_IOIO_STR_MASK) != 0;
in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
if (string)
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
port = io_info >> 16;
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
@@ -3861,7 +3861,7 @@ static int iret_interception(struct vcpu_svm *svm)
static int invlpg_interception(struct vcpu_svm *svm)
{
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
- return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
return kvm_skip_emulated_instruction(&svm->vcpu);
@@ -3869,13 +3869,13 @@ static int invlpg_interception(struct vcpu_svm *svm)
static int emulate_on_interception(struct vcpu_svm *svm)
{
- return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
}
static int rsm_interception(struct vcpu_svm *svm)
{
- return x86_emulate_instruction(&svm->vcpu, 0, 0,
- rsm_ins_bytes, 2) == EMULATE_DONE;
+ return kvm_emulate_instruction_from_buffer(&svm->vcpu,
+ rsm_ins_bytes, 2) == EMULATE_DONE;
}
static int rdpmc_interception(struct vcpu_svm *svm)
@@ -4700,7 +4700,7 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
ret = avic_unaccel_trap_write(svm);
} else {
/* Handling Fault */
- ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
+ ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
}
return ret;
@@ -6747,7 +6747,7 @@ e_free:
static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
{
unsigned long vaddr, vaddr_end, next_vaddr;
- unsigned long dst_vaddr, dst_vaddr_end;
+ unsigned long dst_vaddr;
struct page **src_p, **dst_p;
struct kvm_sev_dbg debug;
unsigned long n;
@@ -6763,7 +6763,6 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
size = debug.len;
vaddr_end = vaddr + size;
dst_vaddr = debug.dst_uaddr;
- dst_vaddr_end = dst_vaddr + size;
for (; vaddr < vaddr_end; vaddr = next_vaddr) {
int len, s_off, d_off;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1d26f3c4985b..533a327372c8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6983,7 +6983,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
* Cause the #SS fault with 0 error code in VM86 mode.
*/
if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
- if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
+ if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) {
if (vcpu->arch.halt_request) {
vcpu->arch.halt_request = 0;
return kvm_vcpu_halt(vcpu);
@@ -7054,7 +7054,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
WARN_ON_ONCE(!enable_vmware_backdoor);
- er = emulate_instruction(vcpu,
+ er = kvm_emulate_instruction(vcpu,
EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
if (er == EMULATE_USER_EXIT)
return 0;
@@ -7157,7 +7157,7 @@ static int handle_io(struct kvm_vcpu *vcpu)
++vcpu->stat.io_exits;
if (string)
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
port = exit_qualification >> 16;
size = (exit_qualification & 7) + 1;
@@ -7231,7 +7231,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
static int handle_desc(struct kvm_vcpu *vcpu)
{
WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
}
static int handle_cr(struct kvm_vcpu *vcpu)
@@ -7480,7 +7480,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu)
static int handle_invd(struct kvm_vcpu *vcpu)
{
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
}
static int handle_invlpg(struct kvm_vcpu *vcpu)
@@ -7547,7 +7547,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
}
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
}
static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
@@ -7704,8 +7704,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
return kvm_skip_emulated_instruction(vcpu);
else
- return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP,
- NULL, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) ==
+ EMULATE_DONE;
}
return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
@@ -7748,7 +7748,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (kvm_test_request(KVM_REQ_EVENT, vcpu))
return 1;
- err = emulate_instruction(vcpu, 0);
+ err = kvm_emulate_instruction(vcpu, 0);
if (err == EMULATE_USER_EXIT) {
++vcpu->stat.mmio_exits;
@@ -12537,8 +12537,11 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
bool from_vmentry = !!exit_qual;
u32 dummy_exit_qual;
+ u32 vmcs01_cpu_exec_ctrl;
int r = 0;
+ vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+
enter_guest_mode(vcpu);
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
@@ -12575,6 +12578,25 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
}
/*
+ * If L1 had a pending IRQ/NMI until it executed
+ * VMLAUNCH/VMRESUME which wasn't delivered because it was
+ * disallowed (e.g. interrupts disabled), L0 needs to
+ * evaluate if this pending event should cause an exit from L2
+ * to L1 or delivered directly to L2 (e.g. In case L1 don't
+ * intercept EXTERNAL_INTERRUPT).
+ *
+ * Usually this would be handled by L0 requesting a
+ * IRQ/NMI window by setting VMCS accordingly. However,
+ * this setting was done on VMCS01 and now VMCS02 is active
+ * instead. Thus, we force L0 to perform pending event
+ * evaluation by requesting a KVM_REQ_EVENT.
+ */
+ if (vmcs01_cpu_exec_ctrl &
+ (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) {
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ }
+
+ /*
* Note no nested_vmx_succeed or nested_vmx_fail here. At this point
* we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
* returned as far as L1 is concerned. It will only return (and set
@@ -13988,9 +14010,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
return -EINVAL;
- if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)
- vmx->nested.nested_run_pending = 1;
-
vmx->nested.dirty_vmcs12 = true;
ret = enter_vmx_non_root_mode(vcpu, NULL);
if (ret)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 506bd2b4b8bb..542f6315444d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4987,7 +4987,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
emul_type = 0;
}
- er = emulate_instruction(vcpu, emul_type);
+ er = kvm_emulate_instruction(vcpu, emul_type);
if (er == EMULATE_USER_EXIT)
return 0;
if (er != EMULATE_DONE)
@@ -5870,7 +5870,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
gpa_t gpa = cr2;
kvm_pfn_t pfn;
- if (emulation_type & EMULTYPE_NO_REEXECUTE)
+ if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
+ return false;
+
+ if (WARN_ON_ONCE(is_guest_mode(vcpu)))
return false;
if (!vcpu->arch.mmu.direct_map) {
@@ -5958,7 +5961,10 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
*/
vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
- if (!(emulation_type & EMULTYPE_RETRY))
+ if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
+ return false;
+
+ if (WARN_ON_ONCE(is_guest_mode(vcpu)))
return false;
if (x86_page_table_writing_insn(ctxt))
@@ -6276,7 +6282,19 @@ restart:
return r;
}
-EXPORT_SYMBOL_GPL(x86_emulate_instruction);
+
+int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
+{
+ return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
+
+int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
+ void *insn, int insn_len)
+{
+ return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
unsigned short port)
@@ -7734,7 +7752,7 @@ static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
{
int r;
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+ r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (r != EMULATE_DONE)
return 0;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 257f27620bc2..67b9568613f3 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -274,6 +274,8 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
int page_num);
bool kvm_vector_hashing_enabled(void);
+int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
+ int emulation_type, void *insn, int insn_len);
#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index c8c6ad0d58b8..3f435d7fca5e 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -7,6 +7,8 @@
#include <linux/uaccess.h>
#include <linux/export.h>
+#include <asm/tlbflush.h>
+
/*
* We rely on the nested NMI work to allow atomic faults from the NMI path; the
* nested NMI paths are careful to preserve CR2.
@@ -19,6 +21,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
if (__range_not_ok(from, n, TASK_SIZE))
return n;
+ if (!nmi_uaccess_okay())
+ return n;
+
/*
* Even though this function is typically called from NMI/IRQ context
* disable pagefaults so that its behaviour is consistent even when
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b9123c497e0a..47bebfe6efa7 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -837,7 +837,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
printk(KERN_CONT "\n");
- show_opcodes((u8 *)regs->ip, loglvl);
+ show_opcodes(regs, loglvl);
}
static void
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 8d6c34fe49be..51a5a69ecac9 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1420,6 +1420,29 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
return 0;
}
+/*
+ * Machine check recovery code needs to change cache mode of poisoned
+ * pages to UC to avoid speculative access logging another error. But
+ * passing the address of the 1:1 mapping to set_memory_uc() is a fine
+ * way to encourage a speculative access. So we cheat and flip the top
+ * bit of the address. This works fine for the code that updates the
+ * page tables. But at the end of the process we need to flush the cache
+ * and the non-canonical address causes a #GP fault when used by the
+ * CLFLUSH instruction.
+ *
+ * But in the common case we already have a canonical address. This code
+ * will fix the top bit if needed and is a no-op otherwise.
+ */
+static inline unsigned long make_addr_canonical_again(unsigned long addr)
+{
+#ifdef CONFIG_X86_64
+ return (long)(addr << 1) >> 1;
+#else
+ return addr;
+#endif
+}
+
+
static int change_page_attr_set_clr(unsigned long *addr, int numpages,
pgprot_t mask_set, pgprot_t mask_clr,
int force_split, int in_flag,
@@ -1465,7 +1488,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
* Save address for cache flush. *addr is modified in the call
* to __change_page_attr_set_clr() below.
*/
- baddr = *addr;
+ baddr = make_addr_canonical_again(*addr);
}
/* Must avoid aliasing mappings in the highmem code */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index e848a4811785..ae394552fb94 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -269,7 +269,7 @@ static void mop_up_one_pmd(struct mm_struct *mm, pgd_t *pgdp)
if (pgd_val(pgd) != 0) {
pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
- *pgdp = native_make_pgd(0);
+ pgd_clear(pgdp);
paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
pmd_free(mm, pmd);
@@ -494,7 +494,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
int changed = !pte_same(*ptep, entry);
if (changed && dirty)
- *ptep = entry;
+ set_pte(ptep, entry);
return changed;
}
@@ -509,7 +509,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
if (changed && dirty) {
- *pmdp = entry;
+ set_pmd(pmdp, entry);
/*
* We had a write-protection fault here and changed the pmd
* to to more permissive. No need to flush the TLB for that,
@@ -529,7 +529,7 @@ int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
VM_BUG_ON(address & ~HPAGE_PUD_MASK);
if (changed && dirty) {
- *pudp = entry;
+ set_pud(pudp, entry);
/*
* We had a write-protection fault here and changed the pud
* to to more permissive. No need to flush the TLB for that,
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 31341ae7309f..c1fc1ae6b429 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -248,7 +248,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
*
* Returns a pointer to a PTE on success, or NULL on failure.
*/
-static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
+static pte_t *pti_user_pagetable_walk_pte(unsigned long address)
{
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
pmd_t *pmd;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 9517d1b2a281..e96b99eb800c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -305,6 +305,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+ /* Let nmi_uaccess_okay() know that we're changing CR3. */
+ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
+ barrier();
+
if (need_flush) {
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
@@ -335,6 +339,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (next != &init_mm)
this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
+ /* Make sure we write CR3 before loaded_mm. */
+ barrier();
+
this_cpu_write(cpu_tlbstate.loaded_mm, next);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
}
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 324b93328b37..05ca14222463 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -85,14 +85,10 @@ pgd_t * __init efi_call_phys_prolog(void)
void __init efi_call_phys_epilog(pgd_t *save_pgd)
{
- struct desc_ptr gdt_descr;
-
- gdt_descr.address = (unsigned long)get_cpu_gdt_rw(0);
- gdt_descr.size = GDT_SIZE - 1;
- load_gdt(&gdt_descr);
-
load_cr3(save_pgd);
__flush_tlb_all();
+
+ load_fixmap_gdt(0);
}
void __init efi_runtime_update_mappings(void)
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 45b700ac5fe7..2fe5c9b1816b 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -435,14 +435,13 @@ static void xen_set_pud(pud_t *ptr, pud_t val)
static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
{
trace_xen_mmu_set_pte_atomic(ptep, pte);
- set_64bit((u64 *)ptep, native_pte_val(pte));
+ __xen_set_pte(ptep, pte);
}
static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
trace_xen_mmu_pte_clear(mm, addr, ptep);
- if (!xen_batched_set_pte(ptep, native_make_pte(0)))
- native_pte_clear(mm, addr, ptep);
+ __xen_set_pte(ptep, native_make_pte(0));
}
static void xen_pmd_clear(pmd_t *pmdp)
@@ -1570,7 +1569,7 @@ static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
pte_val_ma(pte));
#endif
- native_set_pte(ptep, pte);
+ __xen_set_pte(ptep, pte);
}
/* Early in boot, while setting up the initial pagetable, assume
@@ -2061,7 +2060,6 @@ void __init xen_relocate_p2m(void)
pud_t *pud;
pgd_t *pgd;
unsigned long *new_p2m;
- int save_pud;
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
n_pte = roundup(size, PAGE_SIZE) >> PAGE_SHIFT;
@@ -2091,7 +2089,6 @@ void __init xen_relocate_p2m(void)
pgd = __va(read_cr3_pa());
new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
- save_pud = n_pud;
for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
pud = early_memremap(pud_phys, PAGE_SIZE);
clear_page(pud);