Skip to content

Commit

Permalink
KVM: MMU: Concurrent guest walkers
Browse files Browse the repository at this point in the history
Do not hold kvm->lock mutex across the entire pagefault code,
only acquire it in places where it is necessary, such as mmu
hash list, active list, rmap and parent pte handling.

Allow concurrent guest walkers by switching walk_addr() to use
mmap_sem in read-mode.

And get rid of the lockless __gfn_to_page.

[avi: move kvm_mmu_pte_write() locking inside the function]
[avi: add locking for real mode]
[avi: fix cmpxchg locking]

Signed-off-by: Marcelo Tosatti <[email protected]>
Signed-off-by: Avi Kivity <[email protected]>
  • Loading branch information
matosatti authored and avikivity committed Jan 30, 2008
1 parent 774ead3 commit 10589a4
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 71 deletions.
41 changes: 33 additions & 8 deletions arch/x86/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -974,7 +974,7 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
{
}

static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
{
int level = PT32E_ROOT_LEVEL;
hpa_t table_addr = vcpu->arch.mmu.root_hpa;
Expand Down Expand Up @@ -1015,6 +1015,17 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
}
}

static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
{
int r;

mutex_lock(&vcpu->kvm->lock);
r = __nonpaging_map(vcpu, v, write, gfn);
mutex_unlock(&vcpu->kvm->lock);
return r;
}


static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp)
{
Expand All @@ -1031,13 +1042,15 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)

if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
mutex_lock(&vcpu->kvm->lock);
#ifdef CONFIG_X86_64
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
hpa_t root = vcpu->arch.mmu.root_hpa;

sp = page_header(root);
--sp->root_count;
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
mutex_unlock(&vcpu->kvm->lock);
return;
}
#endif
Expand All @@ -1051,6 +1064,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
}
vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
}
mutex_unlock(&vcpu->kvm->lock);
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
}

Expand Down Expand Up @@ -1250,15 +1264,15 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
{
int r;

mutex_lock(&vcpu->kvm->lock);
r = mmu_topup_memory_caches(vcpu);
if (r)
goto out;
mutex_lock(&vcpu->kvm->lock);
mmu_alloc_roots(vcpu);
mutex_unlock(&vcpu->kvm->lock);
kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
kvm_mmu_flush_tlb(vcpu);
out:
mutex_unlock(&vcpu->kvm->lock);
return r;
}
EXPORT_SYMBOL_GPL(kvm_mmu_load);
Expand Down Expand Up @@ -1353,6 +1367,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
int npte;

pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
mutex_lock(&vcpu->kvm->lock);
++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, "pre pte write");
if (gfn == vcpu->arch.last_pt_write_gfn
Expand Down Expand Up @@ -1421,17 +1436,27 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
}
}
kvm_mmu_audit(vcpu, "post pte write");
mutex_unlock(&vcpu->kvm->lock);
}

int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
{
gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
gpa_t gpa;
int r;

return kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
down_read(&current->mm->mmap_sem);
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
up_read(&current->mm->mmap_sem);

mutex_lock(&vcpu->kvm->lock);
r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
mutex_unlock(&vcpu->kvm->lock);
return r;
}

void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
{
mutex_lock(&vcpu->kvm->lock);
while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) {
struct kvm_mmu_page *sp;

Expand All @@ -1440,14 +1465,14 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
kvm_mmu_zap_page(vcpu->kvm, sp);
++vcpu->kvm->stat.mmu_recycled;
}
mutex_unlock(&vcpu->kvm->lock);
}

int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
{
int r;
enum emulation_result er;

mutex_lock(&vcpu->kvm->lock);
r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code);
if (r < 0)
goto out;
Expand All @@ -1462,7 +1487,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
goto out;

er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
mutex_unlock(&vcpu->kvm->lock);

switch (er) {
case EMULATE_DONE:
Expand All @@ -1477,7 +1501,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
BUG();
}
out:
mutex_unlock(&vcpu->kvm->lock);
return r;
}
EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
Expand Down Expand Up @@ -1574,8 +1597,10 @@ void kvm_mmu_zap_all(struct kvm *kvm)
{
struct kvm_mmu_page *sp, *node;

mutex_lock(&kvm->lock);
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
kvm_mmu_zap_page(kvm, sp);
mutex_unlock(&kvm->lock);

kvm_flush_remote_tlbs(kvm);
}
Expand Down
8 changes: 7 additions & 1 deletion arch/x86/kvm/paging_tmpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -368,11 +368,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
if (r)
return r;

down_read(&current->mm->mmap_sem);
/*
* Look up the shadow pte for the faulting address.
*/
r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
fetch_fault);
up_read(&current->mm->mmap_sem);

/*
* The page is not mapped by the guest. Let the guest handle it.
Expand All @@ -384,6 +386,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
return 0;
}

mutex_lock(&vcpu->kvm->lock);
shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
&write_pt);
pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
Expand All @@ -395,11 +398,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
/*
* mmio: emulate if accessible, otherwise its a guest fault.
*/
if (shadow_pte && is_io_pte(*shadow_pte))
if (shadow_pte && is_io_pte(*shadow_pte)) {
mutex_unlock(&vcpu->kvm->lock);
return 1;
}

++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, "post page fault (fixed)");
mutex_unlock(&vcpu->kvm->lock);

return write_pt;
}
Expand Down
25 changes: 17 additions & 8 deletions arch/x86/kvm/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1432,27 +1432,34 @@ static int init_rmode_tss(struct kvm *kvm)
{
gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
u16 data = 0;
int ret = 0;
int r;

down_read(&current->mm->mmap_sem);
r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
if (r < 0)
return 0;
goto out;
data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
r = kvm_write_guest_page(kvm, fn++, &data, 0x66, sizeof(u16));
if (r < 0)
return 0;
goto out;
r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
if (r < 0)
return 0;
goto out;
r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
if (r < 0)
return 0;
goto out;
data = ~0;
r = kvm_write_guest_page(kvm, fn, &data, RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
sizeof(u8));
r = kvm_write_guest_page(kvm, fn, &data,
RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
sizeof(u8));
if (r < 0)
return 0;
return 1;
goto out;

ret = 1;
out:
up_read(&current->mm->mmap_sem);
return ret;
}

static void seg_setup(int seg)
Expand All @@ -1471,6 +1478,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
int r = 0;

mutex_lock(&kvm->lock);
down_write(&current->mm->mmap_sem);
if (kvm->arch.apic_access_page)
goto out;
kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
Expand All @@ -1482,6 +1490,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
goto out;
kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
out:
up_write(&current->mm->mmap_sem);
mutex_unlock(&kvm->lock);
return r;
}
Expand Down
Loading

0 comments on commit 10589a4

Please sign in to comment.