Skip to content

Commit

Permalink
Merge tag 'kvm-4.15-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull KVM updates from Radim Krčmář:
 "Trimmed second batch of KVM changes for Linux 4.15:

   - GICv4 Support for KVM/ARM

   - re-introduce support for CPUs without virtual NMI (cc stable) and
     allow testing of KVM without virtual NMI on available CPUs

   - fix long-standing performance issues with assigned devices on AMD
     (cc stable)"

* tag 'kvm-4.15-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (30 commits)
  kvm: vmx: Allow disabling virtual NMI support
  kvm: vmx: Reinstate support for CPUs without virtual NMI
  KVM: SVM: obey guest PAT
  KVM: arm/arm64: Don't queue VLPIs on INV/INVALL
  KVM: arm/arm64: Fix GICv4 ITS initialization issues
  KVM: arm/arm64: GICv4: Theory of operations
  KVM: arm/arm64: GICv4: Enable VLPI support
  KVM: arm/arm64: GICv4: Prevent userspace from changing doorbell affinity
  KVM: arm/arm64: GICv4: Prevent a VM using GICv4 from being saved
  KVM: arm/arm64: GICv4: Enable virtual cpuif if VLPIs can be delivered
  KVM: arm/arm64: GICv4: Hook vPE scheduling into vgic flush/sync
  KVM: arm/arm64: GICv4: Use the doorbell interrupt as an unblocking source
  KVM: arm/arm64: GICv4: Add doorbell interrupt handling
  KVM: arm/arm64: GICv4: Use pending_last as a scheduling hint
  KVM: arm/arm64: GICv4: Handle INVALL applied to a vPE
  KVM: arm/arm64: GICv4: Propagate property updates to VLPIs
  KVM: arm/arm64: GICv4: Handle MOVALL applied to a vPE
  KVM: arm/arm64: GICv4: Handle CLEAR applied to a VLPI
  KVM: arm/arm64: GICv4: Propagate affinity changes to the physical ITS
  KVM: arm/arm64: GICv4: Unmap VLPI when freeing an LPI
  ...
  • Loading branch information
torvalds committed Nov 25, 2017
2 parents 83ada03 + d02fcf5 commit 7753ea0
Show file tree
Hide file tree
Showing 19 changed files with 819 additions and 158 deletions.
4 changes: 4 additions & 0 deletions Documentation/admin-guide/kernel-parameters.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1890,6 +1890,10 @@
[KVM,ARM] Trap guest accesses to GICv3 common
system registers

kvm-arm.vgic_v4_enable=
[KVM,ARM] Allow use of GICv4 for direct injection of
LPIs.

kvm-intel.ept= [KVM,Intel] Disable extended page tables
(virtualized MMU) support on capable Intel chips.
Default is 1 (enabled)
Expand Down
2 changes: 2 additions & 0 deletions Documentation/virtual/kvm/devices/arm-vgic-its.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ Groups:
-EINVAL: Inconsistent restored data
-EFAULT: Invalid guest ram access
-EBUSY: One or more VCPUS are running
-EACCES: The virtual ITS is backed by a physical GICv4 ITS, and the
state is not available

KVM_DEV_ARM_VGIC_GRP_ITS_REGS
Attributes:
Expand Down
5 changes: 5 additions & 0 deletions arch/arm/kvm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#

source "virt/kvm/Kconfig"
source "virt/lib/Kconfig"

menuconfig VIRTUALIZATION
bool "Virtualization"
Expand All @@ -23,6 +24,8 @@ config KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select ARM_GIC
select ARM_GIC_V3
select ARM_GIC_V3_ITS
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO
Expand All @@ -36,6 +39,8 @@ config KVM
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_MSI
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
---help---
Support hosting virtualized guest machines.
Expand Down
1 change: 1 addition & 0 deletions arch/arm/kvm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ obj-y += $(KVM)/arm/vgic/vgic-init.o
obj-y += $(KVM)/arm/vgic/vgic-irqfd.o
obj-y += $(KVM)/arm/vgic/vgic-v2.o
obj-y += $(KVM)/arm/vgic/vgic-v3.o
obj-y += $(KVM)/arm/vgic/vgic-v4.o
obj-y += $(KVM)/arm/vgic/vgic-mmio.o
obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o
obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o
Expand Down
3 changes: 3 additions & 0 deletions arch/arm64/kvm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#

source "virt/kvm/Kconfig"
source "virt/lib/Kconfig"

menuconfig VIRTUALIZATION
bool "Virtualization"
Expand Down Expand Up @@ -36,6 +37,8 @@ config KVM
select HAVE_KVM_MSI
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
---help---
Support hosting virtualized guest machines.
We don't support KVM with 16K page tables yet, due to the multiple
Expand Down
1 change: 1 addition & 0 deletions arch/arm64/kvm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
Expand Down
7 changes: 7 additions & 0 deletions arch/x86/kvm/svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -3671,6 +3671,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
u32 ecx = msr->index;
u64 data = msr->data;
switch (ecx) {
case MSR_IA32_CR_PAT:
if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
return 1;
vcpu->arch.pat = data;
svm->vmcb->save.g_pat = data;
mark_dirty(svm->vmcb, VMCB_NPT);
break;
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr);
break;
Expand Down
161 changes: 117 additions & 44 deletions arch/x86/kvm/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
static bool __read_mostly enable_vpid = 1;
module_param_named(vpid, enable_vpid, bool, 0444);

static bool __read_mostly enable_vnmi = 1;
module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);

static bool __read_mostly flexpriority_enabled = 1;
module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);

Expand Down Expand Up @@ -202,6 +205,10 @@ struct loaded_vmcs {
bool nmi_known_unmasked;
unsigned long vmcs_host_cr3; /* May not match real cr3 */
unsigned long vmcs_host_cr4; /* May not match real cr4 */
/* Support for vnmi-less CPUs */
int soft_vnmi_blocked;
ktime_t entry_time;
s64 vnmi_blocked_time;
struct list_head loaded_vmcss_on_cpu_link;
};

Expand Down Expand Up @@ -1291,6 +1298,11 @@ static inline bool cpu_has_vmx_invpcid(void)
SECONDARY_EXEC_ENABLE_INVPCID;
}

static inline bool cpu_has_virtual_nmis(void)
{
return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
}

static inline bool cpu_has_vmx_wbinvd_exit(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
Expand Down Expand Up @@ -1348,11 +1360,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
(vmcs12->secondary_vm_exec_control & bit);
}

static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
{
return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
}

static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
{
return vmcs12->pin_based_vm_exec_control &
Expand Down Expand Up @@ -3712,9 +3719,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
&_vmexit_control) < 0)
return -EIO;

min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
PIN_BASED_VIRTUAL_NMIS;
opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
PIN_BASED_VMX_PREEMPTION_TIMER;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
&_pin_based_exec_control) < 0)
return -EIO;
Expand Down Expand Up @@ -5232,6 +5239,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)

if (!kvm_vcpu_apicv_active(&vmx->vcpu))
pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;

if (!enable_vnmi)
pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;

/* Enable the preemption timer dynamically */
pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
return pin_based_exec_ctrl;
Expand Down Expand Up @@ -5666,7 +5677,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)

static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
if (!enable_vnmi ||
vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
enable_irq_window(vcpu);
return;
}
Expand Down Expand Up @@ -5706,6 +5718,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);

if (!enable_vnmi) {
/*
* Tracking the NMI-blocked state in software is built upon
* finding the next open IRQ window. This, in turn, depends on
* well-behaving guests: They have to keep IRQs disabled at
* least as long as the NMI handler runs. Otherwise we may
* cause NMI nesting, maybe breaking the guest. But as this is
* highly unlikely, we can live with the residual risk.
*/
vmx->loaded_vmcs->soft_vnmi_blocked = 1;
vmx->loaded_vmcs->vnmi_blocked_time = 0;
}

++vcpu->stat.nmi_injections;
vmx->loaded_vmcs->nmi_known_unmasked = false;

Expand All @@ -5724,6 +5749,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
bool masked;

if (!enable_vnmi)
return vmx->loaded_vmcs->soft_vnmi_blocked;
if (vmx->loaded_vmcs->nmi_known_unmasked)
return false;
masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
Expand All @@ -5735,20 +5762,31 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);

vmx->loaded_vmcs->nmi_known_unmasked = !masked;
if (masked)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
else
vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
if (!enable_vnmi) {
if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
vmx->loaded_vmcs->soft_vnmi_blocked = masked;
vmx->loaded_vmcs->vnmi_blocked_time = 0;
}
} else {
vmx->loaded_vmcs->nmi_known_unmasked = !masked;
if (masked)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
else
vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
}
}

static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
{
if (to_vmx(vcpu)->nested.nested_run_pending)
return 0;

if (!enable_vnmi &&
to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
return 0;

return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
| GUEST_INTR_STATE_NMI));
Expand Down Expand Up @@ -6476,6 +6514,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
* AAK134, BY25.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
enable_vnmi &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);

Expand Down Expand Up @@ -6535,6 +6574,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)

static int handle_nmi_window(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(!enable_vnmi);
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_VIRTUAL_NMI_PENDING);
++vcpu->stat.nmi_window_exits;
Expand Down Expand Up @@ -6758,6 +6798,9 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_flexpriority())
flexpriority_enabled = 0;

if (!cpu_has_virtual_nmis())
enable_vnmi = 0;

/*
* set_apic_access_page_addr() is used to reload apic access
* page upon invalidation. No need to do anything if not
Expand Down Expand Up @@ -6962,7 +7005,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
}

/* Create a new VMCS */
item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
if (!item)
return NULL;
item->vmcs02.vmcs = alloc_vmcs();
Expand Down Expand Up @@ -7979,6 +8022,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
* "blocked by NMI" bit has to be set before next VM entry.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
enable_vnmi &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
Expand Down Expand Up @@ -8823,6 +8867,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
return 0;
}

if (unlikely(!enable_vnmi &&
vmx->loaded_vmcs->soft_vnmi_blocked)) {
if (vmx_interrupt_allowed(vcpu)) {
vmx->loaded_vmcs->soft_vnmi_blocked = 0;
} else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
vcpu->arch.nmi_pending) {
/*
* This CPU don't support us in finding the end of an
* NMI-blocked window if the guest runs with IRQs
* disabled. So we pull the trigger after 1 s of
* futile waiting, but inform the user about this.
*/
printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
"state on VCPU %d after 1 s timeout\n",
__func__, vcpu->vcpu_id);
vmx->loaded_vmcs->soft_vnmi_blocked = 0;
}
}

if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu);
Expand Down Expand Up @@ -9105,33 +9168,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)

idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;

if (vmx->loaded_vmcs->nmi_known_unmasked)
return;
/*
* Can't use vmx->exit_intr_info since we're not sure what
* the exit reason is.
*/
exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
/*
* SDM 3: 27.7.1.2 (September 2008)
* Re-set bit "block by NMI" before VM entry if vmexit caused by
* a guest IRET fault.
* SDM 3: 23.2.2 (September 2008)
* Bit 12 is undefined in any of the following cases:
* If the VM exit sets the valid bit in the IDT-vectoring
* information field.
* If the VM exit is due to a double fault.
*/
if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
vector != DF_VECTOR && !idtv_info_valid)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
else
vmx->loaded_vmcs->nmi_known_unmasked =
!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
& GUEST_INTR_STATE_NMI);
if (enable_vnmi) {
if (vmx->loaded_vmcs->nmi_known_unmasked)
return;
/*
* Can't use vmx->exit_intr_info since we're not sure what
* the exit reason is.
*/
exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
/*
* SDM 3: 27.7.1.2 (September 2008)
* Re-set bit "block by NMI" before VM entry if vmexit caused by
* a guest IRET fault.
* SDM 3: 23.2.2 (September 2008)
* Bit 12 is undefined in any of the following cases:
* If the VM exit sets the valid bit in the IDT-vectoring
* information field.
* If the VM exit is due to a double fault.
*/
if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
vector != DF_VECTOR && !idtv_info_valid)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
else
vmx->loaded_vmcs->nmi_known_unmasked =
!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
& GUEST_INTR_STATE_NMI);
} else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
vmx->loaded_vmcs->vnmi_blocked_time +=
ktime_to_ns(ktime_sub(ktime_get(),
vmx->loaded_vmcs->entry_time));
}

static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
Expand Down Expand Up @@ -9248,6 +9316,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long debugctlmsr, cr3, cr4;

/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!enable_vnmi &&
vmx->loaded_vmcs->soft_vnmi_blocked))
vmx->loaded_vmcs->entry_time = ktime_get();

/* Don't enter VMX if guest state is invalid, let the exit handler
start emulation until we arrive back to a valid state */
if (vmx->emulation_required)
Expand Down
Loading

0 comments on commit 7753ea0

Please sign in to comment.