Skip to content

Commit

Permalink
mm, thp: Do not make pmd/pud dirty without a reason
Browse files Browse the repository at this point in the history
Currently we make page table entries dirty all the time regardless of
access type and don't even consider if the mapping is write-protected.
The reasoning is that we don't really need dirty tracking on THP and
making the entry dirty upfront may save some time on first write to the
page.

Unfortunately, such approach may result in false-positive
can_follow_write_pmd() for huge zero page or read-only shmem file.

Let's only make page dirty only if we about to write to the page anyway
(as we do for small pages).

I've restructured the code to make entry dirty inside
maybe_p[mu]d_mkwrite(). It also takes into account if the vma is
write-protected.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Acked-by: Michal Hocko <[email protected]>
Cc: Hugh Dickins <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
kiryl authored and torvalds committed Nov 27, 2017
1 parent a8f9736 commit 152e93a
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 16 deletions.
31 changes: 19 additions & 12 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -474,10 +474,13 @@ static int __init setup_transparent_hugepage(char *str)
}
__setup("transparent_hugepage=", setup_transparent_hugepage);

pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma, bool dirty)
{
if (likely(vma->vm_flags & VM_WRITE))
if (likely(vma->vm_flags & VM_WRITE)) {
pmd = pmd_mkwrite(pmd);
if (dirty)
pmd = pmd_mkdirty(pmd);
}
return pmd;
}

Expand Down Expand Up @@ -599,7 +602,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
}

entry = mk_huge_pmd(page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
entry = maybe_pmd_mkwrite(entry, vma, true);
page_add_new_anon_rmap(page, vma, haddr, true);
mem_cgroup_commit_charge(page, memcg, false, true);
lru_cache_add_active_or_unevictable(page, vma);
Expand Down Expand Up @@ -741,8 +744,8 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
if (pfn_t_devmap(pfn))
entry = pmd_mkdevmap(entry);
if (write) {
entry = pmd_mkyoung(pmd_mkdirty(entry));
entry = maybe_pmd_mkwrite(entry, vma);
entry = pmd_mkyoung(entry);
entry = maybe_pmd_mkwrite(entry, vma, true);
}

if (pgtable) {
Expand Down Expand Up @@ -788,10 +791,14 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);

#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma)
static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma,
bool dirty)
{
if (likely(vma->vm_flags & VM_WRITE))
if (likely(vma->vm_flags & VM_WRITE)) {
pud = pud_mkwrite(pud);
if (dirty)
pud = pud_mkdirty(pud);
}
return pud;
}

Expand All @@ -807,8 +814,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
if (pfn_t_devmap(pfn))
entry = pud_mkdevmap(entry);
if (write) {
entry = pud_mkyoung(pud_mkdirty(entry));
entry = maybe_pud_mkwrite(entry, vma);
entry = pud_mkyoung(entry);
entry = maybe_pud_mkwrite(entry, vma, true);
}
set_pud_at(mm, addr, pud, entry);
update_mmu_cache_pud(vma, addr, pud);
Expand Down Expand Up @@ -1279,7 +1286,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
if (reuse_swap_page(page, NULL)) {
pmd_t entry;
entry = pmd_mkyoung(orig_pmd);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
entry = maybe_pmd_mkwrite(entry, vma, true);
if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry, 1))
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
ret |= VM_FAULT_WRITE;
Expand Down Expand Up @@ -1349,7 +1356,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
} else {
pmd_t entry;
entry = mk_huge_pmd(new_page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
entry = maybe_pmd_mkwrite(entry, vma, true);
pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd);
page_add_new_anon_rmap(new_page, vma, haddr, true);
mem_cgroup_commit_charge(new_page, memcg, false, true);
Expand Down Expand Up @@ -2928,7 +2935,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
if (pmd_swp_soft_dirty(*pvmw->pmd))
pmde = pmd_mksoft_dirty(pmde);
if (is_write_migration_entry(entry))
pmde = maybe_pmd_mkwrite(pmde, vma);
pmde = maybe_pmd_mkwrite(pmde, vma, false);

flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE);
page_add_anon_rmap(new, vma, mmun_start, true);
Expand Down
3 changes: 2 additions & 1 deletion mm/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,8 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
}
}

extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma,
bool dirty);

/*
* At what user virtual address is page expected in @vma?
Expand Down
2 changes: 1 addition & 1 deletion mm/khugepaged.c
Original file line number Diff line number Diff line change
Expand Up @@ -1057,7 +1057,7 @@ static void collapse_huge_page(struct mm_struct *mm,
pgtable = pmd_pgtable(_pmd);

_pmd = mk_huge_pmd(new_page, vma->vm_page_prot);
_pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
_pmd = maybe_pmd_mkwrite(_pmd, vma, false);

/*
* spin_lock() below is not the equivalent of smp_wmb(), so
Expand Down
2 changes: 1 addition & 1 deletion mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -3335,7 +3335,7 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page)

entry = mk_huge_pmd(page, vma->vm_page_prot);
if (write)
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
entry = maybe_pmd_mkwrite(entry, vma, true);

add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR);
page_add_file_rmap(page, true);
Expand Down
2 changes: 1 addition & 1 deletion mm/migrate.c
Original file line number Diff line number Diff line change
Expand Up @@ -2068,7 +2068,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
}

entry = mk_huge_pmd(new_page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
entry = maybe_pmd_mkwrite(entry, vma, false);

/*
* Clear the old entry under pagetable lock and establish the new PTE.
Expand Down

0 comments on commit 152e93a

Please sign in to comment.