summaryrefslogtreecommitdiff
path: root/arch/x86/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/lapic.c27
-rw-r--r--arch/x86/kvm/mmu.c26
-rw-r--r--arch/x86/kvm/svm.c19
-rw-r--r--arch/x86/kvm/vmx.c43
-rw-r--r--arch/x86/kvm/x86.c28
-rw-r--r--arch/x86/kvm/x86.h2
6 files changed, 100 insertions, 45 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0cefba28c864..17c0472c5b34 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -548,7 +548,7 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
}
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
- unsigned long ipi_bitmap_high, int min,
+ unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit)
{
int i;
@@ -571,18 +571,31 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
rcu_read_lock();
map = rcu_dereference(kvm->arch.apic_map);
+ if (min > map->max_apic_id)
+ goto out;
/* Bits above cluster_size are masked in the caller. */
- for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) {
- vcpu = map->phys_map[min + i]->vcpu;
- count += kvm_apic_set_irq(vcpu, &irq, NULL);
+ for_each_set_bit(i, &ipi_bitmap_low,
+ min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
+ if (map->phys_map[min + i]) {
+ vcpu = map->phys_map[min + i]->vcpu;
+ count += kvm_apic_set_irq(vcpu, &irq, NULL);
+ }
}
min += cluster_size;
- for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) {
- vcpu = map->phys_map[min + i]->vcpu;
- count += kvm_apic_set_irq(vcpu, &irq, NULL);
+
+ if (min > map->max_apic_id)
+ goto out;
+
+ for_each_set_bit(i, &ipi_bitmap_high,
+ min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
+ if (map->phys_map[min + i]) {
+ vcpu = map->phys_map[min + i]->vcpu;
+ count += kvm_apic_set_irq(vcpu, &irq, NULL);
+ }
}
+out:
rcu_read_unlock();
return count;
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a282321329b5..e24ea7067373 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1853,11 +1853,6 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
}
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
-{
- return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
-}
-
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
{
return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
@@ -5217,7 +5212,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
void *insn, int insn_len)
{
- int r, emulation_type = EMULTYPE_RETRY;
+ int r, emulation_type = 0;
enum emulation_result er;
bool direct = vcpu->arch.mmu.direct_map;
@@ -5230,10 +5225,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
r = RET_PF_INVALID;
if (unlikely(error_code & PFERR_RSVD_MASK)) {
r = handle_mmio_page_fault(vcpu, cr2, direct);
- if (r == RET_PF_EMULATE) {
- emulation_type = 0;
+ if (r == RET_PF_EMULATE)
goto emulate;
- }
}
if (r == RET_PF_INVALID) {
@@ -5260,8 +5253,19 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
return 1;
}
- if (mmio_info_in_cache(vcpu, cr2, direct))
- emulation_type = 0;
+ /*
+ * vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still
+ * optimistically try to just unprotect the page and let the processor
+ * re-execute the instruction that caused the page fault. Do not allow
+ * retrying MMIO emulation, as it's not only pointless but could also
+ * cause us to enter an infinite loop because the processor will keep
+ * faulting on the non-existent MMIO address. Retrying an instruction
+ * from a nested guest is also pointless and dangerous as we are only
+ * explicitly shadowing L1's page tables, i.e. unprotecting something
+ * for L1 isn't going to magically fix whatever issue cause L2 to fail.
+ */
+ if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu))
+ emulation_type = EMULTYPE_ALLOW_RETRY;
emulate:
/*
* On AMD platforms, under certain conditions insn_len may be zero on #NPF.
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6276140044d0..89c4c5aa15f1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -776,7 +776,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
}
if (!svm->next_rip) {
- if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
+ if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) !=
EMULATE_DONE)
printk(KERN_DEBUG "%s: NOP\n", __func__);
return;
@@ -2715,7 +2715,7 @@ static int gp_interception(struct vcpu_svm *svm)
WARN_ON_ONCE(!enable_vmware_backdoor);
- er = emulate_instruction(vcpu,
+ er = kvm_emulate_instruction(vcpu,
EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
if (er == EMULATE_USER_EXIT)
return 0;
@@ -2819,7 +2819,7 @@ static int io_interception(struct vcpu_svm *svm)
string = (io_info & SVM_IOIO_STR_MASK) != 0;
in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
if (string)
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
port = io_info >> 16;
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
@@ -3861,7 +3861,7 @@ static int iret_interception(struct vcpu_svm *svm)
static int invlpg_interception(struct vcpu_svm *svm)
{
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
- return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
return kvm_skip_emulated_instruction(&svm->vcpu);
@@ -3869,13 +3869,13 @@ static int invlpg_interception(struct vcpu_svm *svm)
static int emulate_on_interception(struct vcpu_svm *svm)
{
- return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
}
static int rsm_interception(struct vcpu_svm *svm)
{
- return x86_emulate_instruction(&svm->vcpu, 0, 0,
- rsm_ins_bytes, 2) == EMULATE_DONE;
+ return kvm_emulate_instruction_from_buffer(&svm->vcpu,
+ rsm_ins_bytes, 2) == EMULATE_DONE;
}
static int rdpmc_interception(struct vcpu_svm *svm)
@@ -4700,7 +4700,7 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
ret = avic_unaccel_trap_write(svm);
} else {
/* Handling Fault */
- ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
+ ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
}
return ret;
@@ -6747,7 +6747,7 @@ e_free:
static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
{
unsigned long vaddr, vaddr_end, next_vaddr;
- unsigned long dst_vaddr, dst_vaddr_end;
+ unsigned long dst_vaddr;
struct page **src_p, **dst_p;
struct kvm_sev_dbg debug;
unsigned long n;
@@ -6763,7 +6763,6 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
size = debug.len;
vaddr_end = vaddr + size;
dst_vaddr = debug.dst_uaddr;
- dst_vaddr_end = dst_vaddr + size;
for (; vaddr < vaddr_end; vaddr = next_vaddr) {
int len, s_off, d_off;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1d26f3c4985b..533a327372c8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6983,7 +6983,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
* Cause the #SS fault with 0 error code in VM86 mode.
*/
if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
- if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
+ if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) {
if (vcpu->arch.halt_request) {
vcpu->arch.halt_request = 0;
return kvm_vcpu_halt(vcpu);
@@ -7054,7 +7054,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
WARN_ON_ONCE(!enable_vmware_backdoor);
- er = emulate_instruction(vcpu,
+ er = kvm_emulate_instruction(vcpu,
EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
if (er == EMULATE_USER_EXIT)
return 0;
@@ -7157,7 +7157,7 @@ static int handle_io(struct kvm_vcpu *vcpu)
++vcpu->stat.io_exits;
if (string)
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
port = exit_qualification >> 16;
size = (exit_qualification & 7) + 1;
@@ -7231,7 +7231,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
static int handle_desc(struct kvm_vcpu *vcpu)
{
WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
}
static int handle_cr(struct kvm_vcpu *vcpu)
@@ -7480,7 +7480,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu)
static int handle_invd(struct kvm_vcpu *vcpu)
{
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
}
static int handle_invlpg(struct kvm_vcpu *vcpu)
@@ -7547,7 +7547,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
}
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
}
static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
@@ -7704,8 +7704,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
return kvm_skip_emulated_instruction(vcpu);
else
- return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP,
- NULL, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) ==
+ EMULATE_DONE;
}
return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
@@ -7748,7 +7748,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (kvm_test_request(KVM_REQ_EVENT, vcpu))
return 1;
- err = emulate_instruction(vcpu, 0);
+ err = kvm_emulate_instruction(vcpu, 0);
if (err == EMULATE_USER_EXIT) {
++vcpu->stat.mmio_exits;
@@ -12537,8 +12537,11 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
bool from_vmentry = !!exit_qual;
u32 dummy_exit_qual;
+ u32 vmcs01_cpu_exec_ctrl;
int r = 0;
+ vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+
enter_guest_mode(vcpu);
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
@@ -12575,6 +12578,25 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
}
/*
+ * If L1 had a pending IRQ/NMI until it executed
+ * VMLAUNCH/VMRESUME which wasn't delivered because it was
+ * disallowed (e.g. interrupts disabled), L0 needs to
+ * evaluate if this pending event should cause an exit from L2
+ * to L1 or delivered directly to L2 (e.g. In case L1 don't
+ * intercept EXTERNAL_INTERRUPT).
+ *
+ * Usually this would be handled by L0 requesting a
+ * IRQ/NMI window by setting VMCS accordingly. However,
+ * this setting was done on VMCS01 and now VMCS02 is active
+ * instead. Thus, we force L0 to perform pending event
+ * evaluation by requesting a KVM_REQ_EVENT.
+ */
+ if (vmcs01_cpu_exec_ctrl &
+ (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) {
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ }
+
+ /*
* Note no nested_vmx_succeed or nested_vmx_fail here. At this point
* we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
* returned as far as L1 is concerned. It will only return (and set
@@ -13988,9 +14010,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
return -EINVAL;
- if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)
- vmx->nested.nested_run_pending = 1;
-
vmx->nested.dirty_vmcs12 = true;
ret = enter_vmx_non_root_mode(vcpu, NULL);
if (ret)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 506bd2b4b8bb..542f6315444d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4987,7 +4987,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
emul_type = 0;
}
- er = emulate_instruction(vcpu, emul_type);
+ er = kvm_emulate_instruction(vcpu, emul_type);
if (er == EMULATE_USER_EXIT)
return 0;
if (er != EMULATE_DONE)
@@ -5870,7 +5870,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
gpa_t gpa = cr2;
kvm_pfn_t pfn;
- if (emulation_type & EMULTYPE_NO_REEXECUTE)
+ if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
+ return false;
+
+ if (WARN_ON_ONCE(is_guest_mode(vcpu)))
return false;
if (!vcpu->arch.mmu.direct_map) {
@@ -5958,7 +5961,10 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
*/
vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
- if (!(emulation_type & EMULTYPE_RETRY))
+ if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
+ return false;
+
+ if (WARN_ON_ONCE(is_guest_mode(vcpu)))
return false;
if (x86_page_table_writing_insn(ctxt))
@@ -6276,7 +6282,19 @@ restart:
return r;
}
-EXPORT_SYMBOL_GPL(x86_emulate_instruction);
+
+int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
+{
+ return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
+
+int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
+ void *insn, int insn_len)
+{
+ return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
unsigned short port)
@@ -7734,7 +7752,7 @@ static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
{
int r;
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+ r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (r != EMULATE_DONE)
return 0;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 257f27620bc2..67b9568613f3 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -274,6 +274,8 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
int page_num);
bool kvm_vector_hashing_enabled(void);
+int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
+ int emulation_type, void *insn, int insn_len);
#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \