Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull KVM fixes from Paolo Bonzini:

 - Doc fixes

 - selftests fixes

 - Add runstate information to the new Xen support

 - Allow compiling out the Xen interface

 - 32-bit PAE without EPT bugfix

 - NULL pointer dereference bugfix

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: SVM: Clear the CR4 register on reset
  KVM: x86/xen: Add support for vCPU runstate information
  KVM: x86/xen: Fix return code when clearing vcpu_info and vcpu_time_info
  selftests: kvm: Mmap the entire vcpu mmap area
  KVM: Documentation: Fix index for KVM_CAP_PPC_DAWR1
  KVM: x86: allow compiling out the Xen hypercall interface
  KVM: xen: flush deferred static key before checking it
  KVM: x86/mmu: Set SPTE_AD_WRPROT_ONLY_MASK if and only if PML is enabled
  KVM: x86: hyper-v: Fix Hyper-V context null-ptr-deref
  KVM: x86: remove misplaced comment on active_mmu_pages
  KVM: Documentation: rectify rst markup in kvm_run->flags
  Documentation: kvm: fix messy conversion from .txt to .rst
  • Loading branch information
torvalds committed Mar 4, 2021
2 parents c5a58f8 + 9e46f6c commit cee407c
Show file tree
Hide file tree
Showing 13 changed files with 633 additions and 76 deletions.
115 changes: 60 additions & 55 deletions Documentation/virt/kvm/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3856,49 +3856,20 @@ base 2 of the page size in the bottom 6 bits.
-EFAULT if struct kvm_reinject_control cannot be read,
-EINVAL if the supplied shift or flags are invalid,
-ENOMEM if unable to allocate the new HPT,
-ENOSPC if there was a hash collision

::

struct kvm_ppc_rmmu_info {
struct kvm_ppc_radix_geom {
__u8 page_shift;
__u8 level_bits[4];
__u8 pad[3];
} geometries[8];
__u32 ap_encodings[8];
};

The geometries[] field gives up to 8 supported geometries for the
radix page table, in terms of the log base 2 of the smallest page
size, and the number of bits indexed at each level of the tree, from
the PTE level up to the PGD level in that order. Any unused entries
will have 0 in the page_shift field.

The ap_encodings gives the supported page sizes and their AP field
encodings, encoded with the AP value in the top 3 bits and the log
base 2 of the page size in the bottom 6 bits.

4.102 KVM_PPC_RESIZE_HPT_PREPARE
--------------------------------

:Capability: KVM_CAP_SPAPR_RESIZE_HPT
:Architectures: powerpc
:Type: vm ioctl
:Parameters: struct kvm_ppc_resize_hpt (in)
:Returns: 0 on successful completion,
>0 if a new HPT is being prepared, the value is an estimated
number of milliseconds until preparation is complete,
-EFAULT if struct kvm_reinject_control cannot be read,
-EINVAL if the supplied shift or flags are invalid,when moving existing
HPT entries to the new HPT,
-EIO on other error conditions

Used to implement the PAPR extension for runtime resizing of a guest's
Hashed Page Table (HPT). Specifically this starts, stops or monitors
the preparation of a new potential HPT for the guest, essentially
implementing the H_RESIZE_HPT_PREPARE hypercall.

::

struct kvm_ppc_resize_hpt {
__u64 flags;
__u32 shift;
__u32 pad;
};

If called with shift > 0 when there is no pending HPT for the guest,
this begins preparation of a new pending HPT of size 2^(shift) bytes.
It then returns a positive integer with the estimated number of
Expand Down Expand Up @@ -3926,14 +3897,6 @@ Normally this will be called repeatedly with the same parameters until
it returns <= 0. The first call will initiate preparation, subsequent
ones will monitor preparation until it completes or fails.

::

struct kvm_ppc_resize_hpt {
__u64 flags;
__u32 shift;
__u32 pad;
};

4.103 KVM_PPC_RESIZE_HPT_COMMIT
-------------------------------

Expand All @@ -3956,6 +3919,14 @@ Hashed Page Table (HPT). Specifically this requests that the guest be
transferred to working with the new HPT, essentially implementing the
H_RESIZE_HPT_COMMIT hypercall.

::

struct kvm_ppc_resize_hpt {
__u64 flags;
__u32 shift;
__u32 pad;
};

This should only be called after KVM_PPC_RESIZE_HPT_PREPARE has
returned 0 with the same parameters. In other cases
KVM_PPC_RESIZE_HPT_COMMIT will return an error (usually -ENXIO or
Expand All @@ -3971,14 +3942,6 @@ HPT and the previous HPT will be discarded.

On failure, the guest will still be operating on its previous HPT.

::

struct kvm_ppc_resize_hpt {
__u64 flags;
__u32 shift;
__u32 pad;
};

4.104 KVM_X86_GET_MCE_CAP_SUPPORTED
-----------------------------------

Expand Down Expand Up @@ -4915,6 +4878,14 @@ see KVM_XEN_HVM_SET_ATTR above.
union {
__u64 gpa;
__u64 pad[4];
struct {
__u64 state;
__u64 state_entry_time;
__u64 time_running;
__u64 time_runnable;
__u64 time_blocked;
__u64 time_offline;
} runstate;
} u;
};

Expand All @@ -4927,6 +4898,31 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
Sets the guest physical address of an additional pvclock structure
for a given vCPU. This is typically used for guest vsyscall support.

KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR
Sets the guest physical address of the vcpu_runstate_info for a given
vCPU. This is how a Xen guest tracks CPU state such as steal time.

KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT
Sets the runstate (RUNSTATE_running/_runnable/_blocked/_offline) of
the given vCPU from the .u.runstate.state member of the structure.
KVM automatically accounts running and runnable time but blocked
and offline states are only entered explicitly.

KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA
Sets all fields of the vCPU runstate data from the .u.runstate member
of the structure, including the current runstate. The state_entry_time
must equal the sum of the other four times.

KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST
This *adds* the contents of the .u.runstate members of the structure
to the corresponding members of the given vCPU's runstate data, thus
permitting atomic adjustments to the runstate times. The adjustment
to the state_entry_time must equal the sum of the adjustments to the
other four times. The state field must be set to -1, or to a valid
runstate value (RUNSTATE_running, RUNSTATE_runnable, RUNSTATE_blocked
or RUNSTATE_offline) to set the current accounted state as of the
adjusted state_entry_time.

4.130 KVM_XEN_VCPU_GET_ATTR
---------------------------

Expand All @@ -4939,6 +4935,9 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
Allows Xen vCPU attributes to be read. For the structure and types,
see KVM_XEN_VCPU_SET_ATTR above.

The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used
with the KVM_XEN_VCPU_GET_ATTR ioctl.

5. The kvm_run structure
========================

Expand Down Expand Up @@ -5000,7 +4999,8 @@ local APIC is not used.
__u16 flags;

More architecture-specific flags detailing state of the VCPU that may
affect the device's behavior. Current defined flags:
affect the device's behavior. Current defined flags::

/* x86, set if the VCPU is in system management mode */
#define KVM_RUN_X86_SMM (1 << 0)
/* x86, set if bus lock detected in VM */
Expand Down Expand Up @@ -6217,7 +6217,7 @@ the bus lock vm exit can be preempted by a higher priority VM exit, the exit
notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons.
KVM_RUN_BUS_LOCK flag is used to distinguish between them.

7.22 KVM_CAP_PPC_DAWR1
7.23 KVM_CAP_PPC_DAWR1
----------------------

:Architectures: ppc
Expand Down Expand Up @@ -6702,6 +6702,7 @@ PVHVM guests. Valid flags are::
#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 2)

The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
ioctl is available, for the guest to set its hypercall page.
Expand All @@ -6716,3 +6717,7 @@ KVM_XEN_HVM_SET_ATTR, KVM_XEN_HVM_GET_ATTR, KVM_XEN_VCPU_SET_ATTR and
KVM_XEN_VCPU_GET_ATTR ioctls, as well as the delivery of exception vectors
for event channel upcalls when the evtchn_upcall_pending field of a vcpu's
vcpu_info is set.

The KVM_XEN_HVM_CONFIG_RUNSTATE flag indicates that the runstate-related
features KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR/_CURRENT/_DATA/_ADJUST are
supported by the KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR ioctls.
9 changes: 6 additions & 3 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -535,10 +535,16 @@ struct kvm_vcpu_hv {
/* Xen HVM per vcpu emulation context */
struct kvm_vcpu_xen {
u64 hypercall_rip;
u32 current_runstate;
bool vcpu_info_set;
bool vcpu_time_info_set;
bool runstate_set;
struct gfn_to_hva_cache vcpu_info_cache;
struct gfn_to_hva_cache vcpu_time_info_cache;
struct gfn_to_hva_cache runstate_cache;
u64 last_steal;
u64 runstate_entry_time;
u64 runstate_times[4];
};

struct kvm_vcpu_arch {
Expand Down Expand Up @@ -939,9 +945,6 @@ struct kvm_arch {
unsigned int indirect_shadow_pages;
u8 mmu_valid_gen;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
* Hash table of struct kvm_mmu_page.
*/
struct list_head active_mmu_pages;
struct list_head zapped_obsolete_pages;
struct list_head lpage_disallowed_mmu_pages;
Expand Down
9 changes: 9 additions & 0 deletions arch/x86/kvm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,15 @@ config KVM_AMD_SEV
Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
with Encrypted State (SEV-ES) on AMD processors.

config KVM_XEN
bool "Support for Xen hypercall interface"
depends on KVM
help
Provides KVM support for the hosting Xen HVM guests and
passing Xen hypercalls to userspace.

If in doubt, say "N".

config KVM_MMU_AUDIT
bool "Audit KVM MMU"
depends on KVM && TRACEPOINTS
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/kvm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@ kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
$(KVM)/dirty_ring.o
kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o

kvm-y += x86.o emulate.o i8259.o irq.o lapic.o xen.o \
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
mmu/spte.o
kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
kvm-$(CONFIG_KVM_XEN) += xen.o

kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kvm/hyperv.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
struct kvm_vcpu_hv_synic *synic;

vcpu = get_vcpu_by_vpidx(kvm, vpidx);
if (!vcpu)
if (!vcpu || !to_hv_vcpu(vcpu))
return NULL;
synic = to_hv_synic(vcpu);
return (synic->active) ? synic : NULL;
Expand Down
16 changes: 8 additions & 8 deletions arch/x86/kvm/mmu/mmu_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,15 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
{
/*
* When using the EPT page-modification log, the GPAs in the log
* would come from L2 rather than L1. Therefore, we need to rely
* on write protection to record dirty pages. This also bypasses
* PML, since writes now result in a vmexit. Note, this helper will
* tag SPTEs as needing write-protection even if PML is disabled or
* unsupported, but that's ok because the tag is consumed if and only
* if PML is enabled. Omit the PML check to save a few uops.
* When using the EPT page-modification log, the GPAs in the CPU dirty
* log would come from L2 rather than L1. Therefore, we need to rely
* on write protection to record dirty pages, which bypasses PML, since
* writes now result in a vmexit. Note, the check on CPU dirty logging
* being enabled is mandatory as the bits used to denote WP-only SPTEs
* are reserved for NPT w/ PAE (32-bit KVM).
*/
return vcpu->arch.mmu == &vcpu->arch.guest_mmu;
return vcpu->arch.mmu == &vcpu->arch.guest_mmu &&
kvm_x86_ops.cpu_dirty_log_size;
}

bool is_nx_huge_page_enabled(void);
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kvm/svm/svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1200,6 +1200,7 @@ static void init_vmcb(struct vcpu_svm *svm)
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);

svm_set_cr4(&svm->vcpu, 0);
svm_set_efer(&svm->vcpu, 0);
save->dr6 = 0xffff0ff0;
kvm_set_rflags(&svm->vcpu, X86_EFLAGS_FIXED);
Expand Down
22 changes: 21 additions & 1 deletion arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -2957,6 +2957,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
struct kvm_host_map map;
struct kvm_steal_time *st;

if (kvm_xen_msr_enabled(vcpu->kvm)) {
kvm_xen_runstate_set_running(vcpu);
return;
}

if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;

Expand Down Expand Up @@ -3756,11 +3761,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
r = 1;
break;
#ifdef CONFIG_KVM_XEN
case KVM_CAP_XEN_HVM:
r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
KVM_XEN_HVM_CONFIG_SHARED_INFO;
if (sched_info_on())
r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
break;
#endif
case KVM_CAP_SYNC_REGS:
r = KVM_SYNC_X86_VALID_FIELDS;
break;
Expand Down Expand Up @@ -4038,7 +4047,11 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
if (vcpu->preempted && !vcpu->arch.guest_state_protected)
vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);

kvm_steal_time_set_preempted(vcpu);
if (kvm_xen_msr_enabled(vcpu->kvm))
kvm_xen_runstate_set_preempted(vcpu);
else
kvm_steal_time_set_preempted(vcpu);

static_call(kvm_x86_vcpu_put)(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
/*
Expand Down Expand Up @@ -5013,6 +5026,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_GET_SUPPORTED_HV_CPUID:
r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp);
break;
#ifdef CONFIG_KVM_XEN
case KVM_XEN_VCPU_GET_ATTR: {
struct kvm_xen_vcpu_attr xva;

Expand All @@ -5033,6 +5047,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_xen_vcpu_set_attr(vcpu, &xva);
break;
}
#endif
default:
r = -EINVAL;
}
Expand Down Expand Up @@ -5654,6 +5669,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
kvm->arch.bsp_vcpu_id = arg;
mutex_unlock(&kvm->lock);
break;
#ifdef CONFIG_KVM_XEN
case KVM_XEN_HVM_CONFIG: {
struct kvm_xen_hvm_config xhc;
r = -EFAULT;
Expand Down Expand Up @@ -5682,6 +5698,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_xen_hvm_set_attr(kvm, &xha);
break;
}
#endif
case KVM_SET_CLOCK: {
struct kvm_clock_data user_ns;
u64 now_ns;
Expand Down Expand Up @@ -8040,7 +8057,10 @@ void kvm_arch_exit(void)
kvm_mmu_module_exit();
free_percpu(user_return_msrs);
kmem_cache_destroy(x86_fpu_cache);
#ifdef CONFIG_KVM_XEN
static_key_deferred_flush(&kvm_xen_enabled);
WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
#endif
}

static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
Expand Down
Loading

0 comments on commit cee407c

Please sign in to comment.