Skip to content

Commit

Permalink
mm: memcontrol: convert anon and file-thp to new mem_cgroup_charge() API
Browse files Browse the repository at this point in the history
With the page->mapping requirement gone from memcg, we can charge anon and
file-thp pages in one single step, right after they're allocated.

This removes two out of three API calls - especially the tricky commit
step that needed to happen at just the right time between when the page is
"set up" and when it's "published" - somewhat vague and fluid concepts
that varied by page type.  All we need is a freshly allocated page and a
memcg context to charge.

v2: prevent double charges on pre-allocated hugepages in khugepaged

[[email protected]: Fix crash - *hpage could be ERR_PTR instead of NULL]
  Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Johannes Weiner <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Reviewed-by: Joonsoo Kim <[email protected]>
Cc: Alex Shi <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: "Kirill A. Shutemov" <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Roman Gushchin <[email protected]>
Cc: Shakeel Butt <[email protected]>
Cc: Balbir Singh <[email protected]>
Cc: Qian Cai <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
hnaz authored and torvalds committed Jun 4, 2020
1 parent 468c398 commit 9d82c69
Show file tree
Hide file tree
Showing 9 changed files with 31 additions and 82 deletions.
4 changes: 1 addition & 3 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,6 @@ struct vm_fault {
pte_t orig_pte; /* Value of PTE at the time of fault */

struct page *cow_page; /* Page handler may use for COW fault */
struct mem_cgroup *memcg; /* Cgroup cow_page belongs to */
struct page *page; /* ->fault handlers should return a
* page here, unless VM_FAULT_NOPAGE
* is set (which is also implied by
Expand Down Expand Up @@ -946,8 +945,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
return pte;
}

vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
struct page *page);
vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page);
vm_fault_t finish_fault(struct vm_fault *vmf);
vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
#endif
Expand Down
11 changes: 3 additions & 8 deletions kernel/events/uprobes.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,14 +162,13 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
};
int err;
struct mmu_notifier_range range;
struct mem_cgroup *memcg;

mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
addr + PAGE_SIZE);

if (new_page) {
err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
&memcg);
err = mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL,
false);
if (err)
return err;
}
Expand All @@ -179,16 +178,12 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,

mmu_notifier_invalidate_range_start(&range);
err = -EAGAIN;
if (!page_vma_mapped_walk(&pvmw)) {
if (new_page)
mem_cgroup_cancel_charge(new_page, memcg);
if (!page_vma_mapped_walk(&pvmw))
goto unlock;
}
VM_BUG_ON_PAGE(addr != pvmw.address, old_page);

if (new_page) {
get_page(new_page);
mem_cgroup_commit_charge(new_page, memcg, false);
page_add_new_anon_rmap(new_page, vma, addr, false);
lru_cache_add_active_or_unevictable(new_page, vma);
} else
Expand Down
2 changes: 1 addition & 1 deletion mm/filemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -2633,7 +2633,7 @@ void filemap_map_pages(struct vm_fault *vmf,
if (vmf->pte)
vmf->pte += xas.xa_index - last_pgoff;
last_pgoff = xas.xa_index;
if (alloc_set_pte(vmf, NULL, page))
if (alloc_set_pte(vmf, page))
goto unlock;
unlock_page(page);
goto next;
Expand Down
9 changes: 3 additions & 6 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -587,19 +587,19 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
struct page *page, gfp_t gfp)
{
struct vm_area_struct *vma = vmf->vma;
struct mem_cgroup *memcg;
pgtable_t pgtable;
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
vm_fault_t ret = 0;

VM_BUG_ON_PAGE(!PageCompound(page), page);

if (mem_cgroup_try_charge_delay(page, vma->vm_mm, gfp, &memcg)) {
if (mem_cgroup_charge(page, vma->vm_mm, gfp, false)) {
put_page(page);
count_vm_event(THP_FAULT_FALLBACK);
count_vm_event(THP_FAULT_FALLBACK_CHARGE);
return VM_FAULT_FALLBACK;
}
cgroup_throttle_swaprate(page, gfp);

pgtable = pte_alloc_one(vma->vm_mm);
if (unlikely(!pgtable)) {
Expand Down Expand Up @@ -630,7 +630,6 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
vm_fault_t ret2;

spin_unlock(vmf->ptl);
mem_cgroup_cancel_charge(page, memcg);
put_page(page);
pte_free(vma->vm_mm, pgtable);
ret2 = handle_userfault(vmf, VM_UFFD_MISSING);
Expand All @@ -640,7 +639,6 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,

entry = mk_huge_pmd(page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
mem_cgroup_commit_charge(page, memcg, false);
page_add_new_anon_rmap(page, vma, haddr, true);
lru_cache_add_active_or_unevictable(page, vma);
pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
Expand All @@ -649,7 +647,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
mm_inc_nr_ptes(vma->vm_mm);
spin_unlock(vmf->ptl);
count_vm_event(THP_FAULT_ALLOC);
count_memcg_events(memcg, THP_FAULT_ALLOC, 1);
count_memcg_event_mm(vma->vm_mm, THP_FAULT_ALLOC);
}

return 0;
Expand All @@ -658,7 +656,6 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
release:
if (pgtable)
pte_free(vma->vm_mm, pgtable);
mem_cgroup_cancel_charge(page, memcg);
put_page(page);
return ret;

Expand Down
35 changes: 10 additions & 25 deletions mm/khugepaged.c
Original file line number Diff line number Diff line change
Expand Up @@ -1037,7 +1037,6 @@ static void collapse_huge_page(struct mm_struct *mm,
struct page *new_page;
spinlock_t *pmd_ptl, *pte_ptl;
int isolated = 0, result = 0;
struct mem_cgroup *memcg;
struct vm_area_struct *vma;
struct mmu_notifier_range range;
gfp_t gfp;
Expand All @@ -1060,23 +1059,22 @@ static void collapse_huge_page(struct mm_struct *mm,
goto out_nolock;
}

if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg))) {
if (unlikely(mem_cgroup_charge(new_page, mm, gfp, false))) {
result = SCAN_CGROUP_CHARGE_FAIL;
goto out_nolock;
}
count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC);

down_read(&mm->mmap_sem);
result = hugepage_vma_revalidate(mm, address, &vma);
if (result) {
mem_cgroup_cancel_charge(new_page, memcg);
up_read(&mm->mmap_sem);
goto out_nolock;
}

pmd = mm_find_pmd(mm, address);
if (!pmd) {
result = SCAN_PMD_NULL;
mem_cgroup_cancel_charge(new_page, memcg);
up_read(&mm->mmap_sem);
goto out_nolock;
}
Expand All @@ -1088,7 +1086,6 @@ static void collapse_huge_page(struct mm_struct *mm,
*/
if (unmapped && !__collapse_huge_page_swapin(mm, vma, address,
pmd, referenced)) {
mem_cgroup_cancel_charge(new_page, memcg);
up_read(&mm->mmap_sem);
goto out_nolock;
}
Expand Down Expand Up @@ -1175,9 +1172,7 @@ static void collapse_huge_page(struct mm_struct *mm,

spin_lock(pmd_ptl);
BUG_ON(!pmd_none(*pmd));
mem_cgroup_commit_charge(new_page, memcg, false);
page_add_new_anon_rmap(new_page, vma, address, true);
count_memcg_events(memcg, THP_COLLAPSE_ALLOC, 1);
lru_cache_add_active_or_unevictable(new_page, vma);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, address, pmd, _pmd);
Expand All @@ -1191,10 +1186,11 @@ static void collapse_huge_page(struct mm_struct *mm,
out_up_write:
up_write(&mm->mmap_sem);
out_nolock:
if (!IS_ERR_OR_NULL(*hpage))
mem_cgroup_uncharge(*hpage);
trace_mm_collapse_huge_page(mm, isolated, result);
return;
out:
mem_cgroup_cancel_charge(new_page, memcg);
goto out_up_write;
}

Expand Down Expand Up @@ -1618,7 +1614,6 @@ static void collapse_file(struct mm_struct *mm,
struct address_space *mapping = file->f_mapping;
gfp_t gfp;
struct page *new_page;
struct mem_cgroup *memcg;
pgoff_t index, end = start + HPAGE_PMD_NR;
LIST_HEAD(pagelist);
XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
Expand All @@ -1637,10 +1632,11 @@ static void collapse_file(struct mm_struct *mm,
goto out;
}

if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg))) {
if (unlikely(mem_cgroup_charge(new_page, mm, gfp, false))) {
result = SCAN_CGROUP_CHARGE_FAIL;
goto out;
}
count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC);

/* This will be less messy when we use multi-index entries */
do {
Expand All @@ -1650,7 +1646,6 @@ static void collapse_file(struct mm_struct *mm,
break;
xas_unlock_irq(&xas);
if (!xas_nomem(&xas, GFP_KERNEL)) {
mem_cgroup_cancel_charge(new_page, memcg);
result = SCAN_FAIL;
goto out;
}
Expand Down Expand Up @@ -1844,18 +1839,9 @@ static void collapse_file(struct mm_struct *mm,
}

if (nr_none) {
struct lruvec *lruvec;
/*
* XXX: We have started try_charge and pinned the
* memcg, but the page isn't committed yet so we
* cannot use mod_lruvec_page_state(). This hackery
* will be cleaned up when remove the page->mapping
* dependency from memcg and fully charge above.
*/
lruvec = mem_cgroup_lruvec(memcg, page_pgdat(new_page));
__mod_lruvec_state(lruvec, NR_FILE_PAGES, nr_none);
__mod_lruvec_page_state(new_page, NR_FILE_PAGES, nr_none);
if (is_shmem)
__mod_lruvec_state(lruvec, NR_SHMEM, nr_none);
__mod_lruvec_page_state(new_page, NR_SHMEM, nr_none);
}

xa_locked:
Expand Down Expand Up @@ -1893,15 +1879,13 @@ static void collapse_file(struct mm_struct *mm,

SetPageUptodate(new_page);
page_ref_add(new_page, HPAGE_PMD_NR - 1);
mem_cgroup_commit_charge(new_page, memcg, false);

if (is_shmem) {
set_page_dirty(new_page);
lru_cache_add_anon(new_page);
} else {
lru_cache_add_file(new_page);
}
count_memcg_events(memcg, THP_COLLAPSE_ALLOC, 1);

/*
* Remove pte page tables, so we can re-fault the page as huge.
Expand Down Expand Up @@ -1948,13 +1932,14 @@ static void collapse_file(struct mm_struct *mm,
VM_BUG_ON(nr_none);
xas_unlock_irq(&xas);

mem_cgroup_cancel_charge(new_page, memcg);
new_page->mapping = NULL;
}

unlock_page(new_page);
out:
VM_BUG_ON(!list_empty(&pagelist));
if (!IS_ERR_OR_NULL(*hpage))
mem_cgroup_uncharge(*hpage);
/* TODO: tracepoints */
}

Expand Down
Loading

0 comments on commit 9d82c69

Please sign in to comment.