From b558bc0a25c82ef2a9d2683b0beb3e4b87cea20b Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 9 Oct 2008 16:01:52 +0800 Subject: x86: Rename mtrr_state struct and macro names Prepare for exporting them. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kernel/cpu/mtrr/generic.c | 8 ++++---- arch/x86/kernel/cpu/mtrr/main.c | 4 ++-- arch/x86/kernel/cpu/mtrr/mtrr.h | 7 ++++--- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 4e8d77f01eeb..90db91e15931 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -14,9 +14,9 @@ #include #include "mtrr.h" -struct mtrr_state { - struct mtrr_var_range var_ranges[MAX_VAR_RANGES]; - mtrr_type fixed_ranges[NUM_FIXED_RANGES]; +struct mtrr_state_type { + struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; + mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; unsigned char enabled; unsigned char have_fixed; mtrr_type def_type; @@ -35,7 +35,7 @@ static struct fixed_range_block fixed_range_blocks[] = { }; static unsigned long smp_changes_mask; -static struct mtrr_state mtrr_state = {}; +static struct mtrr_state_type mtrr_state = {}; static int mtrr_state_set; u64 mtrr_tom2; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 1159e269e596..d6ec7ec30274 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -49,7 +49,7 @@ u32 num_var_ranges = 0; -unsigned int mtrr_usage_table[MAX_VAR_RANGES]; +unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; static DEFINE_MUTEX(mtrr_mutex); u64 size_or_mask, size_and_mask; @@ -574,7 +574,7 @@ struct mtrr_value { unsigned long lsize; }; -static struct mtrr_value mtrr_state[MAX_VAR_RANGES]; +static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES]; static int mtrr_save(struct sys_device * sysdev, pm_message_t state) { diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 2dc4ec656b23..988538202ddc 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -11,8 +11,9 @@ #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) -#define NUM_FIXED_RANGES 88 -#define MAX_VAR_RANGES 256 +#define MTRR_NUM_FIXED_RANGES 88 +#define MTRR_MAX_VAR_RANGES 256 + #define MTRRfix64K_00000_MSR 0x250 #define MTRRfix16K_80000_MSR 0x258 #define MTRRfix16K_A0000_MSR 0x259 @@ -33,7 +34,7 @@ an 8 bit field: */ typedef u8 mtrr_type; -extern unsigned int mtrr_usage_table[MAX_VAR_RANGES]; +extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; struct mtrr_ops { u32 vendor; -- cgit v1.2.3 From 932d27a7913fc6b3c64c6e6082628b0a1561dec9 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 9 Oct 2008 16:01:53 +0800 Subject: x86: Export some definition of MTRR For KVM can reuse the type define, and need them to support shadow MTRR. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/mtrr.h | 25 +++++++++++++++++++++++++ arch/x86/kernel/cpu/mtrr/generic.c | 12 +++--------- arch/x86/kernel/cpu/mtrr/mtrr.h | 17 ----------------- 3 files changed, 28 insertions(+), 26 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index 7c1e4258b31e..cb988aab716d 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -57,6 +57,31 @@ struct mtrr_gentry { }; #endif /* !__i386__ */ +struct mtrr_var_range { + u32 base_lo; + u32 base_hi; + u32 mask_lo; + u32 mask_hi; +}; + +/* In the Intel processor's MTRR interface, the MTRR type is always held in + an 8 bit field: */ +typedef u8 mtrr_type; + +#define MTRR_NUM_FIXED_RANGES 88 +#define MTRR_MAX_VAR_RANGES 256 + +struct mtrr_state_type { + struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; + mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; + unsigned char enabled; + unsigned char have_fixed; + mtrr_type def_type; +}; + +#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) +#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) + /* These are the various ioctls */ #define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) #define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 90db91e15931..b59ddcc88cd8 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -14,14 +14,6 @@ #include #include "mtrr.h" -struct mtrr_state_type { - struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; - mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; - unsigned char enabled; - unsigned char have_fixed; - mtrr_type def_type; -}; - struct fixed_range_block { int base_msr; /* start address of an MTRR block */ int ranges; /* number of MTRRs in this block */ @@ -35,10 +27,12 @@ static struct fixed_range_block fixed_range_blocks[] = { }; static unsigned long smp_changes_mask; -static struct mtrr_state_type mtrr_state = {}; static int mtrr_state_set; u64 mtrr_tom2; +struct mtrr_state_type mtrr_state = {}; +EXPORT_SYMBOL_GPL(mtrr_state); + #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "mtrr." diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 988538202ddc..ffd60409cc6d 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -8,12 +8,6 @@ #define MTRRcap_MSR 0x0fe #define MTRRdefType_MSR 0x2ff -#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) -#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) - -#define MTRR_NUM_FIXED_RANGES 88 -#define MTRR_MAX_VAR_RANGES 256 - #define MTRRfix64K_00000_MSR 0x250 #define MTRRfix16K_80000_MSR 0x258 #define MTRRfix16K_A0000_MSR 0x259 @@ -30,10 +24,6 @@ #define MTRR_CHANGE_MASK_VARIABLE 0x02 #define MTRR_CHANGE_MASK_DEFTYPE 0x04 -/* In the Intel processor's MTRR interface, the MTRR type is always held in - an 8 bit field: */ -typedef u8 mtrr_type; - extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; struct mtrr_ops { @@ -71,13 +61,6 @@ struct set_mtrr_context { u32 ccr3; }; -struct mtrr_var_range { - u32 base_lo; - u32 base_hi; - u32 mask_lo; - u32 mask_hi; -}; - void set_mtrr_done(struct set_mtrr_context *ctxt); void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); -- cgit v1.2.3 From 2340b62f77c782c305e6ae7748675a638436d1ef Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 17 Nov 2008 19:03:23 -0200 Subject: kdump: forcibly disable VMX and SVM on machine_crash_shutdown() We need to disable virtualization extensions on all CPUs before booting the kdump kernel, otherwise the kdump kernel booting will fail, and rebooting after the kdump kernel did its task may also fail. We do it using cpu_emergency_vmxoff() and cpu_emergency_svm_disable(), that should always work, because those functions check if the CPUs support SVM or VMX before doing their tasks. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/kernel/crash.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index d84a852e4cd7..c689d19e35ab 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -49,6 +50,15 @@ static void kdump_nmi_callback(int cpu, struct die_args *args) #endif crash_save_cpu(regs, cpu); + /* Disable VMX or SVM if needed. + * + * We need to disable virtualization on all CPUs. + * Having VMX or SVM enabled on any CPU may break rebooting + * after the kdump kernel has finished its task. + */ + cpu_emergency_vmxoff(); + cpu_emergency_svm_disable(); + disable_local_APIC(); } @@ -80,6 +90,14 @@ void native_machine_crash_shutdown(struct pt_regs *regs) local_irq_disable(); kdump_nmi_shootdown_cpus(); + + /* Booting kdump kernel with VMX or SVM enabled won't work, + * because (among other limitations) we can't disable paging + * with the virt flags. + */ + cpu_emergency_vmxoff(); + cpu_emergency_svm_disable(); + lapic_shutdown(); #if defined(CONFIG_X86_IO_APIC) disable_IO_APIC(); -- cgit v1.2.3 From d176720d34c72f7a8474a12204add93e54fe3ef1 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 17 Nov 2008 19:03:24 -0200 Subject: x86: disable VMX on all CPUs on reboot On emergency_restart, we may need to use an NMI to disable virtualization on all CPUs. We do that using nmi_shootdown_cpus() if VMX is enabled. Note: With this patch, we will run the NMI stuff only when the CPU where emergency_restart() was called has VMX enabled. This should work on most cases because KVM enables VMX on all CPUs, but we may miss the small window where KVM is doing that. Also, I don't know if all code using VMX out there always enable VMX on all CPUs like KVM does. We have two other alternatives for that: a) Have an API that all code that enables VMX on any CPU should use to tell the kernel core that it is going to enable VMX on the CPUs. b) Always call nmi_shootdown_cpus() if the CPU supports VMX. This is a bit intrusive and more risky, as it would run nmi_shootdown_cpus() on emergency_reboot() even on systems where virtualization is never enabled. Finding a proper point to hook the nmi_shootdown_cpus() call isn't trivial, as the non-emergency machine_restart() (that doesn't need the NMI tricks) uses machine_emergency_restart() directly. The solution to make this work without adding a new function or argument to machine_ops was setting a 'reboot_emergency' flag that tells if native_machine_emergency_restart() needs to do the virt cleanup or not. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/kernel/reboot.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 61f718df6eec..72e0e4e712d6 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_X86_32 # include @@ -39,6 +40,12 @@ int reboot_force; static int reboot_cpu = -1; #endif +/* This is set if we need to go through the 'emergency' path. + * When machine_emergency_restart() is called, we may be on + * an inconsistent state and won't be able to do a clean cleanup + */ +static int reboot_emergency; + /* This is set by the PCI code if either type 1 or type 2 PCI is detected */ bool port_cf9_safe = false; @@ -368,6 +375,48 @@ static inline void kb_wait(void) } } +static void vmxoff_nmi(int cpu, struct die_args *args) +{ + cpu_emergency_vmxoff(); +} + +/* Use NMIs as IPIs to tell all CPUs to disable virtualization + */ +static void emergency_vmx_disable_all(void) +{ + /* Just make sure we won't change CPUs while doing this */ + local_irq_disable(); + + /* We need to disable VMX on all CPUs before rebooting, otherwise + * we risk hanging up the machine, because the CPU ignore INIT + * signals when VMX is enabled. + * + * We can't take any locks and we may be on an inconsistent + * state, so we use NMIs as IPIs to tell the other CPUs to disable + * VMX and halt. + * + * For safety, we will avoid running the nmi_shootdown_cpus() + * stuff unnecessarily, but we don't have a way to check + * if other CPUs have VMX enabled. So we will call it only if the + * CPU we are running on has VMX enabled. + * + * We will miss cases where VMX is not enabled on all CPUs. This + * shouldn't do much harm because KVM always enable VMX on all + * CPUs anyway. But we can miss it on the small window where KVM + * is still enabling VMX. + */ + if (cpu_has_vmx() && cpu_vmx_enabled()) { + /* Disable VMX on this CPU. + */ + cpu_vmxoff(); + + /* Halt and disable VMX on the other CPUs */ + nmi_shootdown_cpus(vmxoff_nmi); + + } +} + + void __attribute__((weak)) mach_reboot_fixups(void) { } @@ -376,6 +425,9 @@ static void native_machine_emergency_restart(void) { int i; + if (reboot_emergency) + emergency_vmx_disable_all(); + /* Tell the BIOS if we want cold or warm reboot */ *((unsigned short *)__va(0x472)) = reboot_mode; @@ -482,13 +534,19 @@ void native_machine_shutdown(void) #endif } +static void __machine_emergency_restart(int emergency) +{ + reboot_emergency = emergency; + machine_ops.emergency_restart(); +} + static void native_machine_restart(char *__unused) { printk("machine restart\n"); if (!reboot_force) machine_shutdown(); - machine_emergency_restart(); + __machine_emergency_restart(0); } static void native_machine_halt(void) @@ -532,7 +590,7 @@ void machine_shutdown(void) void machine_emergency_restart(void) { - machine_ops.emergency_restart(); + __machine_emergency_restart(1); } void machine_restart(char *cmd) -- cgit v1.2.3 From 423cd25a5ade17b8a5cc85e6f0a0f37028d2c4a2 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 24 Nov 2008 15:45:23 -0200 Subject: x86: KVM guest: sign kvmclock as paravirt Currently, we only set the KVM paravirt signature in case of CONFIG_KVM_GUEST. However, it is possible to have it turned off, while CONFIG_KVM_CLOCK is turned on. This is also a paravirt case, and should be shown accordingly. Signed-off-by: Glauber Costa Signed-off-by: Avi Kivity --- arch/x86/kernel/kvmclock.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index e169ae9b6a62..b38e801014e3 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -194,5 +194,7 @@ void __init kvmclock_init(void) #endif kvm_get_preset_lpj(); clocksource_register(&kvm_clock); + pv_info.paravirt_enabled = 1; + pv_info.name = "KVM"; } } -- cgit v1.2.3 From e93353c93a3ba4215633ce930784f40a4e94e3f9 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Fri, 5 Dec 2008 18:36:45 -0200 Subject: x86: KVM guest: kvm_get_tsc_khz: return khz, not lpj kvm_get_tsc_khz() currently returns the previously-calculated preset_lpj value, but it is in loops-per-jiffy, not kHz. The current code works correctly only when HZ=1000. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/kernel/kvmclock.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index b38e801014e3..652fce6d2cce 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void) */ static unsigned long kvm_get_tsc_khz(void) { - return preset_lpj; + struct pvclock_vcpu_time_info *src; + src = &per_cpu(hv_clock, 0); + return pvclock_tsc_khz(src); } static void kvm_get_preset_lpj(void) { - struct pvclock_vcpu_time_info *src; unsigned long khz; u64 lpj; - src = &per_cpu(hv_clock, 0); - khz = pvclock_tsc_khz(src); + khz = kvm_get_tsc_khz(); lpj = ((u64)khz * 1000); do_div(lpj, HZ); -- cgit v1.2.3