summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Makefile6
-rw-r--r--arch/x86/boot/compressed/eboot.c2
-rw-r--r--arch/x86/entry/common.c2
-rw-r--r--arch/x86/include/asm/barrier.h2
-rw-r--r--arch/x86/include/asm/vmx.h3
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c60
-rw-r--r--arch/x86/kernel/cpu/bugs.c4
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/common.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c44
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c5
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/quirks.c11
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/kernel/traps.c14
-rw-r--r--arch/x86/kernel/uprobes.c2
-rw-r--r--arch/x86/kvm/vmx.c67
-rw-r--r--arch/x86/kvm/x86.h9
-rw-r--r--arch/x86/mm/init_64.c20
-rw-r--r--arch/x86/xen/enlighten.c7
-rw-r--r--arch/x86/xen/enlighten_pv.c1
-rw-r--r--arch/x86/xen/enlighten_pvh.c1
-rw-r--r--arch/x86/xen/smp_pv.c5
24 files changed, 234 insertions, 45 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index f0a6ea22429d..a08e82856563 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -258,11 +258,6 @@ archscripts: scripts_basic
archheaders:
$(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all
-archprepare:
-ifeq ($(CONFIG_KEXEC_FILE),y)
- $(Q)$(MAKE) $(build)=arch/x86/purgatory arch/x86/purgatory/kexec-purgatory.c
-endif
-
###
# Kernel objects
@@ -327,7 +322,6 @@ archclean:
$(Q)rm -rf $(objtree)/arch/x86_64
$(Q)$(MAKE) $(clean)=$(boot)
$(Q)$(MAKE) $(clean)=arch/x86/tools
- $(Q)$(MAKE) $(clean)=arch/x86/purgatory
define archhelp
echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index a8a8642d2b0b..e57665b4ba1c 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -118,7 +118,7 @@ __setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
void *romimage;
status = efi_call_proto(efi_pci_io_protocol, attributes, pci,
- EfiPciIoAttributeOperationGet, 0, 0,
+ EfiPciIoAttributeOperationGet, 0ULL,
&attributes);
if (status != EFI_SUCCESS)
return status;
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 92190879b228..3b2490b81918 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -164,7 +164,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
if (cached_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
- rseq_handle_notify_resume(regs);
+ rseq_handle_notify_resume(NULL, regs);
}
if (cached_flags & _TIF_USER_RETURN_NOTIFY)
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 042b5e892ed1..14de0432d288 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -38,7 +38,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
{
unsigned long mask;
- asm ("cmp %1,%2; sbb %0,%0;"
+ asm volatile ("cmp %1,%2; sbb %0,%0;"
:"=r" (mask)
:"g"(size),"r" (index)
:"cc");
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 425e6b8b9547..6aa8499e1f62 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -114,6 +114,7 @@
#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
#define VMX_MISC_ACTIVITY_HLT 0x00000040
+#define VMX_MISC_ZERO_LEN_INS 0x40000000
/* VMFUNC functions */
#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001
@@ -351,11 +352,13 @@ enum vmcs_field {
#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK
#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
+#define INTR_TYPE_RESERVED (1 << 8) /* reserved */
#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */
#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */
#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
#define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */
#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */
+#define INTR_TYPE_OTHER_EVENT (7 << 8) /* other event */
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define GUEST_INTR_STATE_STI 0x00000001
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index efaf2d4f9c3c..d492752f79e1 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -26,6 +26,7 @@
#include <linux/delay.h>
#include <linux/crash_dump.h>
#include <linux/reboot.h>
+#include <linux/memory.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
@@ -392,6 +393,51 @@ extern int uv_hub_info_version(void)
}
EXPORT_SYMBOL(uv_hub_info_version);
+/* Default UV memory block size is 2GB */
+static unsigned long mem_block_size = (2UL << 30);
+
+/* Kernel parameter to specify UV mem block size */
+static int parse_mem_block_size(char *ptr)
+{
+ unsigned long size = memparse(ptr, NULL);
+
+ /* Size will be rounded down by set_block_size() below */
+ mem_block_size = size;
+ return 0;
+}
+early_param("uv_memblksize", parse_mem_block_size);
+
+static __init int adj_blksize(u32 lgre)
+{
+ unsigned long base = (unsigned long)lgre << UV_GAM_RANGE_SHFT;
+ unsigned long size;
+
+ for (size = mem_block_size; size > MIN_MEMORY_BLOCK_SIZE; size >>= 1)
+ if (IS_ALIGNED(base, size))
+ break;
+
+ if (size >= mem_block_size)
+ return 0;
+
+ mem_block_size = size;
+ return 1;
+}
+
+static __init void set_block_size(void)
+{
+ unsigned int order = ffs(mem_block_size);
+
+ if (order) {
+ /* adjust for ffs return of 1..64 */
+ set_memory_block_size_order(order - 1);
+ pr_info("UV: mem_block_size set to 0x%lx\n", mem_block_size);
+ } else {
+ /* bad or zero value, default to 1UL << 31 (2GB) */
+ pr_err("UV: mem_block_size error with 0x%lx\n", mem_block_size);
+ set_memory_block_size_order(31);
+ }
+}
+
/* Build GAM range lookup table: */
static __init void build_uv_gr_table(void)
{
@@ -1180,23 +1226,30 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
<< UV_GAM_RANGE_SHFT);
int order = 0;
char suffix[] = " KMGTPE";
+ int flag = ' ';
while (size > 9999 && order < sizeof(suffix)) {
size /= 1024;
order++;
}
+ /* adjust max block size to current range start */
+ if (gre->type == 1 || gre->type == 2)
+ if (adj_blksize(lgre))
+ flag = '*';
+
if (!index) {
pr_info("UV: GAM Range Table...\n");
- pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
+ pr_info("UV: # %20s %14s %6s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
}
- pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d %04x %02x %02x\n",
+ pr_info("UV: %2d: 0x%014lx-0x%014lx%c %5lu%c %3d %04x %02x %02x\n",
index++,
(unsigned long)lgre << UV_GAM_RANGE_SHFT,
(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
- size, suffix[order],
+ flag, size, suffix[order],
gre->type, gre->nasid, gre->sockid, gre->pnode);
+ /* update to next range start */
lgre = gre->limit;
if (sock_min > gre->sockid)
sock_min = gre->sockid;
@@ -1427,6 +1480,7 @@ static void __init uv_system_init_hub(void)
build_socket_tables();
build_uv_gr_table();
+ set_block_size();
uv_init_hub_info(&hub_info);
uv_possible_blades = num_possible_nodes();
if (!_node_to_pnode)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index cd0fda1fff6d..404df26b7de8 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -27,6 +27,7 @@
#include <asm/pgtable.h>
#include <asm/set_memory.h>
#include <asm/intel-family.h>
+#include <asm/hypervisor.h>
static void __init spectre_v2_select_mitigation(void);
static void __init ssb_select_mitigation(void);
@@ -664,6 +665,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
if (boot_cpu_has(X86_FEATURE_PTI))
return sprintf(buf, "Mitigation: PTI\n");
+ if (hypervisor_is_type(X86_HYPER_XEN_PV))
+ return sprintf(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n");
+
break;
case X86_BUG_SPECTRE_V1:
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 38354c66df81..0c5fcbd998cf 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -671,7 +671,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
if (num_sharing_cache) {
- int bits = get_count_order(num_sharing_cache) - 1;
+ int bits = get_count_order(num_sharing_cache);
per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0df7151cfef4..eb4cb3efd20e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1,3 +1,6 @@
+/* cpu_feature_enabled() cannot be used this early */
+#define USE_EARLY_PGTABLE_L5
+
#include <linux/bootmem.h>
#include <linux/linkage.h>
#include <linux/bitops.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 5bbd06f38ff6..f34d89c01edc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -160,6 +160,11 @@ static struct severity {
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
USER
),
+ MCESEV(
+ PANIC, "Data load in unrecoverable area of kernel",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+ KERNEL
+ ),
#endif
MCESEV(
PANIC, "Action required: unknown MCACOD",
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index e4cf6ff1c2e1..c102ad51025e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -772,23 +772,25 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
struct pt_regs *regs)
{
- int i, ret = 0;
char *tmp;
+ int i;
for (i = 0; i < mca_cfg.banks; i++) {
m->status = mce_rdmsrl(msr_ops.status(i));
- if (m->status & MCI_STATUS_VAL) {
- __set_bit(i, validp);
- if (quirk_no_way_out)
- quirk_no_way_out(i, m, regs);
- }
+ if (!(m->status & MCI_STATUS_VAL))
+ continue;
+
+ __set_bit(i, validp);
+ if (quirk_no_way_out)
+ quirk_no_way_out(i, m, regs);
if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+ mce_read_aux(m, i);
*msg = tmp;
- ret = 1;
+ return 1;
}
}
- return ret;
+ return 0;
}
/*
@@ -1205,13 +1207,18 @@ void do_machine_check(struct pt_regs *regs, long error_code)
lmce = m.mcgstatus & MCG_STATUS_LMCES;
/*
+ * Local machine check may already know that we have to panic.
+ * Broadcast machine check begins rendezvous in mce_start()
* Go through all banks in exclusion of the other CPUs. This way we
* don't report duplicated events on shared banks because the first one
- * to see it will clear it. If this is a Local MCE, then no need to
- * perform rendezvous.
+ * to see it will clear it.
*/
- if (!lmce)
+ if (lmce) {
+ if (no_way_out)
+ mce_panic("Fatal local machine check", &m, msg);
+ } else {
order = mce_start(&no_way_out);
+ }
for (i = 0; i < cfg->banks; i++) {
__clear_bit(i, toclear);
@@ -1287,12 +1294,17 @@ void do_machine_check(struct pt_regs *regs, long error_code)
no_way_out = worst >= MCE_PANIC_SEVERITY;
} else {
/*
- * Local MCE skipped calling mce_reign()
- * If we found a fatal error, we need to panic here.
+ * If there was a fatal machine check we should have
+ * already called mce_panic earlier in this function.
+ * Since we re-read the banks, we might have found
+ * something new. Check again to see if we found a
+ * fatal error. We call "mce_severity()" again to
+ * make sure we have the right "msg".
*/
- if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
- mce_panic("Machine check from unknown source",
- NULL, NULL);
+ if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
+ mce_severity(&m, cfg->tolerant, &msg, true);
+ mce_panic("Local fatal machine check!", &m, msg);
+ }
}
/*
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 1c2cfa0644aa..97ccf4c3b45b 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -190,8 +190,11 @@ static void save_microcode_patch(void *data, unsigned int size)
p = memdup_patch(data, size);
if (!p)
pr_err("Error allocating buffer %p\n", data);
- else
+ else {
list_replace(&iter->plist, &p->plist);
+ kfree(iter->data);
+ kfree(iter);
+ }
}
}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index a21d6ace648e..8047379e575a 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -44,7 +44,7 @@ static unsigned int __initdata next_early_pgt;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
#ifdef CONFIG_X86_5LEVEL
-unsigned int __pgtable_l5_enabled __initdata;
+unsigned int __pgtable_l5_enabled __ro_after_init;
unsigned int pgdir_shift __ro_after_init = 39;
EXPORT_SYMBOL(pgdir_shift);
unsigned int ptrs_per_p4d __ro_after_init = 1;
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 697a4ce04308..736348ead421 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -645,12 +645,19 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
/* Skylake */
static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
{
- u32 capid0;
+ u32 capid0, capid5;
pci_read_config_dword(pdev, 0x84, &capid0);
+ pci_read_config_dword(pdev, 0x98, &capid5);
- if ((capid0 & 0xc0) == 0xc0)
+ /*
+ * CAPID0{7:6} indicate whether this is an advanced RAS SKU
+ * CAPID5{8:5} indicate that various NVDIMM usage modes are
+ * enabled, so memory machine check recovery is also enabled.
+ */
+ if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0))
static_branch_inc(&mcsafe_key);
+
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 445ca11ff863..92a3b312a53c 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -692,7 +692,7 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
* Increment event counter and perform fixup for the pre-signal
* frame.
*/
- rseq_signal_deliver(regs);
+ rseq_signal_deliver(ksig, regs);
/* Set up the stack frame */
if (is_ia32_frame(ksig)) {
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a535dd64de63..e6db475164ed 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -835,16 +835,18 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" :
"simd exception";
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP)
- return;
cond_local_irq_enable(regs);
if (!user_mode(regs)) {
- if (!fixup_exception(regs, trapnr)) {
- task->thread.error_code = error_code;
- task->thread.trap_nr = trapnr;
+ if (fixup_exception(regs, trapnr))
+ return;
+
+ task->thread.error_code = error_code;
+ task->thread.trap_nr = trapnr;
+
+ if (notify_die(DIE_TRAP, str, regs, error_code,
+ trapnr, SIGFPE) != NOTIFY_STOP)
die(str, regs, error_code);
- }
return;
}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 58d8d800875d..deb576b23b7c 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -293,7 +293,7 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
insn_init(insn, auprobe->insn, sizeof(auprobe->insn), x86_64);
/* has the side-effect of processing the entire instruction */
insn_get_length(insn);
- if (WARN_ON_ONCE(!insn_complete(insn)))
+ if (!insn_complete(insn))
return -ENOEXEC;
if (is_prefix_bad(insn))
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 559a12b6184d..1689f433f3a0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1705,6 +1705,17 @@ static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu)
MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS;
}
+static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu)
+{
+ return to_vmx(vcpu)->nested.msrs.misc_low & VMX_MISC_ZERO_LEN_INS;
+}
+
+static inline bool nested_cpu_supports_monitor_trap_flag(struct kvm_vcpu *vcpu)
+{
+ return to_vmx(vcpu)->nested.msrs.procbased_ctls_high &
+ CPU_BASED_MONITOR_TRAP_FLAG;
+}
+
static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
{
return vmcs12->cpu_based_vm_exec_control & bit;
@@ -11620,6 +11631,62 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
!nested_cr3_valid(vcpu, vmcs12->host_cr3))
return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
+ /*
+ * From the Intel SDM, volume 3:
+ * Fields relevant to VM-entry event injection must be set properly.
+ * These fields are the VM-entry interruption-information field, the
+ * VM-entry exception error code, and the VM-entry instruction length.
+ */
+ if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) {
+ u32 intr_info = vmcs12->vm_entry_intr_info_field;
+ u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
+ u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
+ bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
+ bool should_have_error_code;
+ bool urg = nested_cpu_has2(vmcs12,
+ SECONDARY_EXEC_UNRESTRICTED_GUEST);
+ bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
+
+ /* VM-entry interruption-info field: interruption type */
+ if (intr_type == INTR_TYPE_RESERVED ||
+ (intr_type == INTR_TYPE_OTHER_EVENT &&
+ !nested_cpu_supports_monitor_trap_flag(vcpu)))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+ /* VM-entry interruption-info field: vector */
+ if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
+ (intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
+ (intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+ /* VM-entry interruption-info field: deliver error code */
+ should_have_error_code =
+ intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
+ x86_exception_has_error_code(vector);
+ if (has_error_code != should_have_error_code)
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+ /* VM-entry exception error code */
+ if (has_error_code &&
+ vmcs12->vm_entry_exception_error_code & GENMASK(31, 15))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+ /* VM-entry interruption-info field: reserved bits */
+ if (intr_info & INTR_INFO_RESVD_BITS_MASK)
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+ /* VM-entry instruction length */
+ switch (intr_type) {
+ case INTR_TYPE_SOFT_EXCEPTION:
+ case INTR_TYPE_SOFT_INTR:
+ case INTR_TYPE_PRIV_SW_EXCEPTION:
+ if ((vmcs12->vm_entry_instruction_len > 15) ||
+ (vmcs12->vm_entry_instruction_len == 0 &&
+ !nested_cpu_has_zero_length_injection(vcpu)))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ }
+ }
+
return 0;
}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 331993c49dae..257f27620bc2 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -110,6 +110,15 @@ static inline bool is_la57_mode(struct kvm_vcpu *vcpu)
#endif
}
+static inline bool x86_exception_has_error_code(unsigned int vector)
+{
+ static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) |
+ BIT(NP_VECTOR) | BIT(SS_VECTOR) | BIT(GP_VECTOR) |
+ BIT(PF_VECTOR) | BIT(AC_VECTOR);
+
+ return (1U << vector) & exception_has_error_code;
+}
+
static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
{
return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 045f492d5f68..a688617c727e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1350,16 +1350,28 @@ int kern_addr_valid(unsigned long addr)
/* Amount of ram needed to start using large blocks */
#define MEM_SIZE_FOR_LARGE_BLOCK (64UL << 30)
+/* Adjustable memory block size */
+static unsigned long set_memory_block_size;
+int __init set_memory_block_size_order(unsigned int order)
+{
+ unsigned long size = 1UL << order;
+
+ if (size > MEM_SIZE_FOR_LARGE_BLOCK || size < MIN_MEMORY_BLOCK_SIZE)
+ return -EINVAL;
+
+ set_memory_block_size = size;
+ return 0;
+}
+
static unsigned long probe_memory_block_size(void)
{
unsigned long boot_mem_end = max_pfn << PAGE_SHIFT;
unsigned long bz;
- /* If this is UV system, always set 2G block size */
- if (is_uv_system()) {
- bz = MAX_BLOCK_SIZE;
+ /* If memory block size has been set, then use it */
+ bz = set_memory_block_size;
+ if (bz)
goto done;
- }
/* Use regular block if RAM is smaller than MEM_SIZE_FOR_LARGE_BLOCK */
if (boot_mem_end < MEM_SIZE_FOR_LARGE_BLOCK) {
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c9081c6671f0..3b5318505c69 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -65,6 +65,13 @@ __read_mostly int xen_have_vector_callback;
EXPORT_SYMBOL_GPL(xen_have_vector_callback);
/*
+ * NB: needs to live in .data because it's used by xen_prepare_pvh which runs
+ * before clearing the bss.
+ */
+uint32_t xen_start_flags __attribute__((section(".data"))) = 0;
+EXPORT_SYMBOL(xen_start_flags);
+
+/*
* Point at some empty memory to start with. We map the real shared_info
* page as soon as fixmap is up and running.
*/
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 357969a3697c..8d4e2e1ae60b 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1203,6 +1203,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
return;
xen_domain_type = XEN_PV_DOMAIN;
+ xen_start_flags = xen_start_info->flags;
xen_setup_features();
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index aa1c6a6831a9..c85d1a88f476 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -97,6 +97,7 @@ void __init xen_prepare_pvh(void)
}
xen_pvh = 1;
+ xen_start_flags = pvh_start_info.flags;
msr = cpuid_ebx(xen_cpuid_base() + 2);
pfn = __pa(hypercall_page);
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 2e20ae2fa2d6..e3b18ad49889 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -32,6 +32,7 @@
#include <xen/interface/vcpu.h>
#include <xen/interface/xenpmu.h>
+#include <asm/spec-ctrl.h>
#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>
@@ -70,6 +71,8 @@ static void cpu_bringup(void)
cpu_data(cpu).x86_max_cores = 1;
set_cpu_sibling_map(cpu);
+ speculative_store_bypass_ht_init();
+
xen_setup_cpu_clockevents();
notify_cpu_starting(cpu);
@@ -250,6 +253,8 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
}
set_cpu_sibling_map(0);
+ speculative_store_bypass_ht_init();
+
xen_pmu_init(0);
if (xen_smp_intr_init(0) || xen_smp_intr_init_pv(0))