summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-05-23 06:12:25 -1000
committerLinus Torvalds <torvalds@linux-foundation.org>2021-05-23 06:12:25 -1000
commit7de7ac8d60697d844489b6a68649fa9873174eec (patch)
tree5fe925a59eaadfd0e22d15a458c2058f6fe62d7b
parent28ceac6959e1db015729c52ec74e0a4ff496c2b8 (diff)
parent4954f5b8ef0baf70fe978d1a99a5f70e4dd5c877 (diff)
Merge tag 'x86_urgent_for_v5.13_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Borislav Petkov: - Fix how SEV handles MMIO accesses by forwarding potential page faults instead of killing the machine and by using the accessors with the exact functionality needed when accessing memory. - Fix a confusion with Clang LTO compiler switches passed to the it - Handle the case gracefully when VMGEXIT has been executed in userspace * tag 'x86_urgent_for_v5.13_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/sev-es: Use __put_user()/__get_user() for data accesses x86/sev-es: Forward page-faults which happen during emulation x86/sev-es: Don't return NULL from sev_es_get_ghcb() x86/build: Fix location of '-plugin-opt=' flags x86/sev-es: Invalidate the GHCB after completing VMGEXIT x86/sev-es: Move sev_es_put_ghcb() in prep for follow on patch
-rw-r--r--arch/x86/Makefile12
-rw-r--r--arch/x86/kernel/sev-shared.c1
-rw-r--r--arch/x86/kernel/sev.c136
3 files changed, 92 insertions, 57 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index c77c5d8a7b3e..307529417021 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -178,11 +178,6 @@ ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,)
endif
-ifdef CONFIG_LTO_CLANG
-KBUILD_LDFLAGS += -plugin-opt=-code-model=kernel \
- -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
-endif
-
# Workaround for a gcc prelease that unfortunately was shipped in a suse release
KBUILD_CFLAGS += -Wno-sign-compare
#
@@ -202,7 +197,12 @@ ifdef CONFIG_RETPOLINE
endif
endif
-KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
+KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
+
+ifdef CONFIG_LTO_CLANG
+KBUILD_LDFLAGS += -plugin-opt=-code-model=kernel \
+ -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
+endif
ifdef CONFIG_X86_NEED_RELOCS
LDFLAGS_vmlinux := --emit-relocs --discard-none
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 6ec8b3bfd76e..9f90f460a28c 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -63,6 +63,7 @@ static bool sev_es_negotiate_protocol(void)
static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
{
+ ghcb->save.sw_exit_code = 0;
memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
}
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 9578c82832aa..651b81cd648e 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -203,8 +203,18 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
if (unlikely(data->ghcb_active)) {
/* GHCB is already in use - save its contents */
- if (unlikely(data->backup_ghcb_active))
- return NULL;
+ if (unlikely(data->backup_ghcb_active)) {
+ /*
+ * Backup-GHCB is also already in use. There is no way
+ * to continue here so just kill the machine. To make
+ * panic() work, mark GHCBs inactive so that messages
+ * can be printed out.
+ */
+ data->ghcb_active = false;
+ data->backup_ghcb_active = false;
+
+ panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
+ }
/* Mark backup_ghcb active before writing to it */
data->backup_ghcb_active = true;
@@ -221,24 +231,6 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
return ghcb;
}
-static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
-{
- struct sev_es_runtime_data *data;
- struct ghcb *ghcb;
-
- data = this_cpu_read(runtime_data);
- ghcb = &data->ghcb_page;
-
- if (state->ghcb) {
- /* Restore GHCB from Backup */
- *ghcb = *state->ghcb;
- data->backup_ghcb_active = false;
- state->ghcb = NULL;
- } else {
- data->ghcb_active = false;
- }
-}
-
/* Needed in vc_early_forward_exception */
void do_early_exception(struct pt_regs *regs, int trapnr);
@@ -323,31 +315,44 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
u16 d2;
u8 d1;
- /* If instruction ran in kernel mode and the I/O buffer is in kernel space */
- if (!user_mode(ctxt->regs) && !access_ok(target, size)) {
- memcpy(dst, buf, size);
- return ES_OK;
- }
-
+ /*
+ * This function uses __put_user() independent of whether kernel or user
+ * memory is accessed. This works fine because __put_user() does no
+ * sanity checks of the pointer being accessed. All that it does is
+ * to report when the access failed.
+ *
+ * Also, this function runs in atomic context, so __put_user() is not
+ * allowed to sleep. The page-fault handler detects that it is running
+ * in atomic context and will not try to take mmap_sem and handle the
+ * fault, so additional pagefault_enable()/disable() calls are not
+ * needed.
+ *
+ * The access can't be done via copy_to_user() here because
+ * vc_write_mem() must not use string instructions to access unsafe
+ * memory. The reason is that MOVS is emulated by the #VC handler by
+ * splitting the move up into a read and a write and taking a nested #VC
+ * exception on whatever of them is the MMIO access. Using string
+ * instructions here would cause infinite nesting.
+ */
switch (size) {
case 1:
memcpy(&d1, buf, 1);
- if (put_user(d1, target))
+ if (__put_user(d1, target))
goto fault;
break;
case 2:
memcpy(&d2, buf, 2);
- if (put_user(d2, target))
+ if (__put_user(d2, target))
goto fault;
break;
case 4:
memcpy(&d4, buf, 4);
- if (put_user(d4, target))
+ if (__put_user(d4, target))
goto fault;
break;
case 8:
memcpy(&d8, buf, 8);
- if (put_user(d8, target))
+ if (__put_user(d8, target))
goto fault;
break;
default:
@@ -378,30 +383,43 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
u16 d2;
u8 d1;
- /* If instruction ran in kernel mode and the I/O buffer is in kernel space */
- if (!user_mode(ctxt->regs) && !access_ok(s, size)) {
- memcpy(buf, src, size);
- return ES_OK;
- }
-
+ /*
+ * This function uses __get_user() independent of whether kernel or user
+ * memory is accessed. This works fine because __get_user() does no
+ * sanity checks of the pointer being accessed. All that it does is
+ * to report when the access failed.
+ *
+ * Also, this function runs in atomic context, so __get_user() is not
+ * allowed to sleep. The page-fault handler detects that it is running
+ * in atomic context and will not try to take mmap_sem and handle the
+ * fault, so additional pagefault_enable()/disable() calls are not
+ * needed.
+ *
+ * The access can't be done via copy_from_user() here because
+ * vc_read_mem() must not use string instructions to access unsafe
+ * memory. The reason is that MOVS is emulated by the #VC handler by
+ * splitting the move up into a read and a write and taking a nested #VC
+ * exception on whatever of them is the MMIO access. Using string
+ * instructions here would cause infinite nesting.
+ */
switch (size) {
case 1:
- if (get_user(d1, s))
+ if (__get_user(d1, s))
goto fault;
memcpy(buf, &d1, 1);
break;
case 2:
- if (get_user(d2, s))
+ if (__get_user(d2, s))
goto fault;
memcpy(buf, &d2, 2);
break;
case 4:
- if (get_user(d4, s))
+ if (__get_user(d4, s))
goto fault;
memcpy(buf, &d4, 4);
break;
case 8:
- if (get_user(d8, s))
+ if (__get_user(d8, s))
goto fault;
memcpy(buf, &d8, 8);
break;
@@ -461,6 +479,29 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt
/* Include code shared with pre-decompression boot stage */
#include "sev-shared.c"
+static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
+{
+ struct sev_es_runtime_data *data;
+ struct ghcb *ghcb;
+
+ data = this_cpu_read(runtime_data);
+ ghcb = &data->ghcb_page;
+
+ if (state->ghcb) {
+ /* Restore GHCB from Backup */
+ *ghcb = *state->ghcb;
+ data->backup_ghcb_active = false;
+ state->ghcb = NULL;
+ } else {
+ /*
+ * Invalidate the GHCB so a VMGEXIT instruction issued
+ * from userspace won't appear to be valid.
+ */
+ vc_ghcb_invalidate(ghcb);
+ data->ghcb_active = false;
+ }
+}
+
void noinstr __sev_es_nmi_complete(void)
{
struct ghcb_state state;
@@ -1255,6 +1296,10 @@ static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
case X86_TRAP_UD:
exc_invalid_op(ctxt->regs);
break;
+ case X86_TRAP_PF:
+ write_cr2(ctxt->fi.cr2);
+ exc_page_fault(ctxt->regs, error_code);
+ break;
case X86_TRAP_AC:
exc_alignment_check(ctxt->regs, error_code);
break;
@@ -1284,7 +1329,6 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
*/
DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
{
- struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
irqentry_state_t irq_state;
struct ghcb_state state;
struct es_em_ctxt ctxt;
@@ -1310,16 +1354,6 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
*/
ghcb = sev_es_get_ghcb(&state);
- if (!ghcb) {
- /*
- * Mark GHCBs inactive so that panic() is able to print the
- * message.
- */
- data->ghcb_active = false;
- data->backup_ghcb_active = false;
-
- panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
- }
vc_ghcb_invalidate(ghcb);
result = vc_init_em_ctxt(&ctxt, regs, error_code);