Skip to content

Commit

Permalink
Merge branch 'kvm-ppc-next' of git://git.kernel.org/pub/scm/linux/ker…
Browse files Browse the repository at this point in the history
…nel/git/paulus/powerpc

KVM/PPC update for 4.14

There are various minor fixes and cleanups.  The only new feature is
that we now export information about storage key support to userspace,
so it can advertise it to the guest.

I have pulled in Michael Ellerman's topic/ppc-kvm branch from the
powerpc tree to get a couple of fixes that touch both KVM PPC code and
other PPC code.  That's why there is some arch/powerpc stuff in the
diffstat that isn't arch/powerpc/kvm.
  • Loading branch information
rkrcmar committed Sep 7, 2017
2 parents 082d390 + 43f6b0c commit 78809a6
Show file tree
Hide file tree
Showing 21 changed files with 183 additions and 75 deletions.
1 change: 1 addition & 0 deletions arch/powerpc/include/asm/book3s/64/mmu-hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
#define HPTE_R_C ASM_CONST(0x0000000000000080)
#define HPTE_R_R ASM_CONST(0x0000000000000100)
#define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00)
#define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI)

#define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000)
#define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000)
Expand Down
4 changes: 4 additions & 0 deletions arch/powerpc/include/asm/kvm_book3s_asm.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ struct kvmppc_host_state {
u8 napping;

#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* hwthread_req/hwthread_state pair is used to pull sibling threads
* out of guest on pre-ISAv3.0B CPUs where threads share MMU.
*/
u8 hwthread_req;
u8 hwthread_state;
u8 host_ipi;
Expand Down
10 changes: 1 addition & 9 deletions arch/powerpc/include/asm/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,8 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_large(pmd) 0
#endif
pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *shift);
static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *shift)
{
VM_WARN(!arch_irqs_disabled(),
"%s called with irq enabled\n", __func__);
return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift);
}

/* can we use this in kvm */
unsigned long vmalloc_to_phys(void *vmalloc_addr);

void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
Expand Down
35 changes: 35 additions & 0 deletions arch/powerpc/include/asm/pte-walk.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#ifndef _ASM_POWERPC_PTE_WALK_H
#define _ASM_POWERPC_PTE_WALK_H

#include <linux/sched.h>

/* Don't use this directly */
extern pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *hshift);

static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *hshift)
{
VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
return __find_linux_pte(pgdir, ea, is_thp, hshift);
}

static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift)
{
pgd_t *pgdir = init_mm.pgd;
return __find_linux_pte(pgdir, ea, NULL, hshift);
}
/*
* This is what we should always use. Any other lockless page table lookup needs
* careful audit against THP split.
*/
static inline pte_t *find_current_mm_pte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *hshift)
{
VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
VM_WARN(pgdir != current->mm->pgd,
"%s lock less page table lookup called on wrong mm\n", __func__);
return __find_linux_pte(pgdir, ea, is_thp, hshift);
}

#endif /* _ASM_POWERPC_PTE_WALK_H */
4 changes: 2 additions & 2 deletions arch/powerpc/kernel/eeh.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include <asm/machdep.h>
#include <asm/ppc-pci.h>
#include <asm/rtas.h>
#include <asm/pte-walk.h>


/** Overview:
Expand Down Expand Up @@ -352,8 +353,7 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
* worried about _PAGE_SPLITTING/collapse. Also we will not hit
* page table free, because of init_mm.
*/
ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token,
NULL, &hugepage_shift);
ptep = find_init_mm_pte(token, &hugepage_shift);
if (!ptep)
return token;
WARN_ON(hugepage_shift);
Expand Down
35 changes: 24 additions & 11 deletions arch/powerpc/kernel/idle_book3s.S
Original file line number Diff line number Diff line change
Expand Up @@ -242,13 +242,20 @@ enter_winkle:
/*
* r3 - PSSCR value corresponding to the requested stop state.
*/
power_enter_stop:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/* Tell KVM we're entering idle */
power_enter_stop_kvm_rm:
/*
* This is currently unused because POWER9 KVM does not have to
* gather secondary threads into sibling mode, but the code is
* here in case that function is required.
*
* Tell KVM we're entering idle.
*/
li r4,KVM_HWTHREAD_IN_IDLE
/* DO THIS IN REAL MODE! See comment above. */
stb r4,HSTATE_HWTHREAD_STATE(r13)
#endif
power_enter_stop:
/*
* Check if we are executing the lite variant with ESL=EC=0
*/
Expand Down Expand Up @@ -411,6 +418,18 @@ pnv_powersave_wakeup_mce:

b pnv_powersave_wakeup

#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
kvm_start_guest_check:
li r0,KVM_HWTHREAD_IN_KERNEL
stb r0,HSTATE_HWTHREAD_STATE(r13)
/* Order setting hwthread_state vs. testing hwthread_req */
sync
lbz r0,HSTATE_HWTHREAD_REQ(r13)
cmpwi r0,0
beqlr
b kvm_start_guest
#endif

/*
* Called from reset vector for powersave wakeups.
* cr3 - set to gt if waking up with partial/complete hypervisor state loss
Expand All @@ -435,15 +454,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mr r3,r12

#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
li r0,KVM_HWTHREAD_IN_KERNEL
stb r0,HSTATE_HWTHREAD_STATE(r13)
/* Order setting hwthread_state vs. testing hwthread_req */
sync
lbz r0,HSTATE_HWTHREAD_REQ(r13)
cmpwi r0,0
beq 1f
b kvm_start_guest
1:
BEGIN_FTR_SECTION
bl kvm_start_guest_check
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#endif

/* Return SRR1 from power7_nap() */
Expand Down
5 changes: 3 additions & 2 deletions arch/powerpc/kernel/io-workarounds.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include <asm/pgtable.h>
#include <asm/ppc-pci.h>
#include <asm/io-workarounds.h>
#include <asm/pte-walk.h>


#define IOWA_MAX_BUS 8

Expand Down Expand Up @@ -75,8 +77,7 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
* We won't find huge pages here (iomem). Also can't hit
* a page table free due to init_mm
*/
ptep = __find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
NULL, &hugepage_shift);
ptep = find_init_mm_pte(vaddr, &hugepage_shift);
if (ptep == NULL)
paddr = 0;
else {
Expand Down
6 changes: 4 additions & 2 deletions arch/powerpc/kvm/book3s_64_mmu_hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
#include <asm/pte-walk.h>

#include "trace_hv.h"

Expand Down Expand Up @@ -599,8 +600,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
* hugepage split and collapse.
*/
local_irq_save(flags);
ptep = find_linux_pte_or_hugepte(current->mm->pgd,
hva, NULL, NULL);
ptep = find_current_mm_pte(current->mm->pgd,
hva, NULL, NULL);
if (ptep) {
pte = kvmppc_read_update_linux_pte(ptep, 1);
if (__pte_write(pte))
Expand Down Expand Up @@ -1940,6 +1941,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
if (ret < 0) {
kfree(ctx);
kvm_put_kvm(kvm);
return ret;
}
Expand Down
28 changes: 14 additions & 14 deletions arch/powerpc/kvm/book3s_64_mmu_radix.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <asm/mmu.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/pte-walk.h>

/*
* Supported radix tree geometry.
Expand Down Expand Up @@ -359,8 +360,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (writing)
pgflags |= _PAGE_DIRTY;
local_irq_save(flags);
ptep = __find_linux_pte_or_hugepte(current->mm->pgd, hva,
NULL, NULL);
ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL);
if (ptep) {
pte = READ_ONCE(*ptep);
if (pte_present(pte) &&
Expand All @@ -374,8 +374,12 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
spin_unlock(&kvm->mmu_lock);
return RESUME_GUEST;
}
ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable,
gpa, NULL, &shift);
/*
* We are walking the secondary page table here. We can do this
* without disabling irq.
*/
ptep = __find_linux_pte(kvm->arch.pgtable,
gpa, NULL, &shift);
if (ptep && pte_present(*ptep)) {
kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
gpa, shift);
Expand Down Expand Up @@ -427,8 +431,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
pgflags |= _PAGE_WRITE;
} else {
local_irq_save(flags);
ptep = __find_linux_pte_or_hugepte(current->mm->pgd,
hva, NULL, NULL);
ptep = find_current_mm_pte(current->mm->pgd,
hva, NULL, NULL);
if (ptep && pte_write(*ptep) && pte_dirty(*ptep))
pgflags |= _PAGE_WRITE;
local_irq_restore(flags);
Expand Down Expand Up @@ -499,8 +503,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned int shift;
unsigned long old;

ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
NULL, &shift);
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
if (ptep && pte_present(*ptep)) {
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0,
gpa, shift);
Expand All @@ -525,8 +528,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned int shift;
int ref = 0;

ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
NULL, &shift);
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
gpa, shift);
Expand All @@ -545,8 +547,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned int shift;
int ref = 0;

ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
NULL, &shift);
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
if (ptep && pte_present(*ptep) && pte_young(*ptep))
ref = 1;
return ref;
Expand All @@ -562,8 +563,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
unsigned int shift;
int ret = 0;

ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
NULL, &shift);
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
ret = 1;
if (shift)
Expand Down
12 changes: 11 additions & 1 deletion arch/powerpc/kvm/book3s_64_vio_hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include <asm/udbg.h>
#include <asm/iommu.h>
#include <asm/tce.h>
#include <asm/pte-walk.h>

#ifdef CONFIG_BUG

Expand Down Expand Up @@ -353,7 +354,16 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
pte_t *ptep, pte;
unsigned shift = 0;

ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift);
/*
* Called in real mode with MSR_EE = 0. We are safe here.
* It is ok to do the lookup with arch.pgdir here, because
* we are doing this on secondary cpus and current task there
* is not the hypervisor. Also this is safe against THP in the
* host, because an IPI to primary thread will wait for the secondary
* to exit which will agains result in the below page table walk
* to finish.
*/
ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift);
if (!ptep || !pte_present(*ptep))
return -ENXIO;
pte = *ptep;
Expand Down
29 changes: 27 additions & 2 deletions arch/powerpc/kvm/book3s_hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,13 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,

switch (subfunc) {
case H_VPA_REG_VPA: /* register VPA */
if (len < sizeof(struct lppaca))
/*
* The size of our lppaca is 1kB because of the way we align
* it for the guest to avoid crossing a 4kB boundary. We only
* use 640 bytes of the structure though, so we should accept
* clients that set a size of 640.
*/
if (len < 640)
break;
vpap = &tvcpu->arch.vpa;
err = 0;
Expand Down Expand Up @@ -2111,6 +2117,15 @@ static int kvmppc_grab_hwthread(int cpu)
struct paca_struct *tpaca;
long timeout = 10000;

/*
* ISA v3.0 idle routines do not set hwthread_state or test
* hwthread_req, so they can not grab idle threads.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
WARN(1, "KVM: can not control sibling threads\n");
return -EBUSY;
}

tpaca = &paca[cpu];

/* Ensure the thread won't go into the kernel if it wakes */
Expand Down Expand Up @@ -2145,10 +2160,12 @@ static void kvmppc_release_hwthread(int cpu)
struct paca_struct *tpaca;

tpaca = &paca[cpu];
tpaca->kvm_hstate.hwthread_req = 0;
tpaca->kvm_hstate.kvm_vcpu = NULL;
tpaca->kvm_hstate.kvm_vcore = NULL;
tpaca->kvm_hstate.kvm_split_mode = NULL;
if (!cpu_has_feature(CPU_FTR_ARCH_300))
tpaca->kvm_hstate.hwthread_req = 0;

}

static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
Expand Down Expand Up @@ -3325,6 +3342,14 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
if (radix_enabled())
return -EINVAL;

/*
* POWER7, POWER8 and POWER9 all support 32 storage keys for data.
* POWER7 doesn't support keys for instruction accesses,
* POWER8 and POWER9 do.
*/
info->data_keys = 32;
info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0;

info->flags = KVM_PPC_PAGE_SIZES_REAL;
if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
info->flags |= KVM_PPC_1T_SEGMENTS;
Expand Down
Loading

0 comments on commit 78809a6

Please sign in to comment.