Skip to content

Commit

Permalink
rmap: add argument to charge compound page
Browse files Browse the repository at this point in the history
We're going to allow mapping of individual 4k pages of THP compound
page.  It means we cannot rely on PageTransHuge() check to decide if
map/unmap small page or THP.

The patch adds new argument to rmap functions to indicate whether we
want to operate on whole compound page or only the small page.

[[email protected]: fix mapcount mismatch in hugepage migration]
Signed-off-by: Kirill A. Shutemov <[email protected]>
Tested-by: Sasha Levin <[email protected]>
Tested-by: Aneesh Kumar K.V <[email protected]>
Acked-by: Vlastimil Babka <[email protected]>
Acked-by: Jerome Marchand <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Steve Capper <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Christoph Lameter <[email protected]>
Cc: David Rientjes <[email protected]>
Signed-off-by: Naoya Horiguchi <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
kiryl authored and torvalds committed Jan 16, 2016
1 parent afd9883 commit d281ee6
Show file tree
Hide file tree
Showing 10 changed files with 68 additions and 48 deletions.
12 changes: 9 additions & 3 deletions include/linux/rmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,16 +161,22 @@ static inline void anon_vma_merge(struct vm_area_struct *vma,

struct anon_vma *page_get_anon_vma(struct page *page);

/* bitflags for do_page_add_anon_rmap() */
#define RMAP_EXCLUSIVE 0x01
#define RMAP_COMPOUND 0x02

/*
* rmap interfaces called when adding or removing pte of page
*/
void page_move_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
void page_add_anon_rmap(struct page *, struct vm_area_struct *,
unsigned long, bool);
void do_page_add_anon_rmap(struct page *, struct vm_area_struct *,
unsigned long, int);
void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
void page_add_new_anon_rmap(struct page *, struct vm_area_struct *,
unsigned long, bool);
void page_add_file_rmap(struct page *);
void page_remove_rmap(struct page *);
void page_remove_rmap(struct page *, bool);

void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
unsigned long);
Expand Down
4 changes: 2 additions & 2 deletions kernel/events/uprobes.c
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
goto unlock;

get_page(kpage);
page_add_new_anon_rmap(kpage, vma, addr);
page_add_new_anon_rmap(kpage, vma, addr, false);
mem_cgroup_commit_charge(kpage, memcg, false);
lru_cache_add_active_or_unevictable(kpage, vma);

Expand All @@ -188,7 +188,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
ptep_clear_flush_notify(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));

page_remove_rmap(page);
page_remove_rmap(page, false);
if (!page_mapped(page))
try_to_free_swap(page);
pte_unmap_unlock(ptep, ptl);
Expand Down
16 changes: 8 additions & 8 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -797,7 +797,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,

entry = mk_huge_pmd(page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
page_add_new_anon_rmap(page, vma, haddr);
page_add_new_anon_rmap(page, vma, haddr, true);
mem_cgroup_commit_charge(page, memcg, false);
lru_cache_add_active_or_unevictable(page, vma);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
Expand Down Expand Up @@ -1139,7 +1139,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
memcg = (void *)page_private(pages[i]);
set_page_private(pages[i], 0);
page_add_new_anon_rmap(pages[i], vma, haddr);
page_add_new_anon_rmap(pages[i], vma, haddr, false);
mem_cgroup_commit_charge(pages[i], memcg, false);
lru_cache_add_active_or_unevictable(pages[i], vma);
pte = pte_offset_map(&_pmd, haddr);
Expand All @@ -1151,7 +1151,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,

smp_wmb(); /* make pte visible before pmd */
pmd_populate(mm, pmd, pgtable);
page_remove_rmap(page);
page_remove_rmap(page, true);
spin_unlock(ptl);

mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
Expand Down Expand Up @@ -1271,7 +1271,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
entry = mk_huge_pmd(new_page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
pmdp_huge_clear_flush_notify(vma, haddr, pmd);
page_add_new_anon_rmap(new_page, vma, haddr);
page_add_new_anon_rmap(new_page, vma, haddr, true);
mem_cgroup_commit_charge(new_page, memcg, false);
lru_cache_add_active_or_unevictable(new_page, vma);
set_pmd_at(mm, haddr, pmd, entry);
Expand All @@ -1281,7 +1281,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
put_huge_zero_page();
} else {
VM_BUG_ON_PAGE(!PageHead(page), page);
page_remove_rmap(page);
page_remove_rmap(page, true);
put_page(page);
}
ret |= VM_FAULT_WRITE;
Expand Down Expand Up @@ -1508,7 +1508,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
put_huge_zero_page();
} else {
struct page *page = pmd_page(orig_pmd);
page_remove_rmap(page);
page_remove_rmap(page, true);
VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
VM_BUG_ON_PAGE(!PageHead(page), page);
Expand Down Expand Up @@ -2371,7 +2371,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
* superfluous.
*/
pte_clear(vma->vm_mm, address, _pte);
page_remove_rmap(src_page);
page_remove_rmap(src_page, false);
spin_unlock(ptl);
free_page_and_swap_cache(src_page);
}
Expand Down Expand Up @@ -2682,7 +2682,7 @@ static void collapse_huge_page(struct mm_struct *mm,

spin_lock(pmd_ptl);
BUG_ON(!pmd_none(*pmd));
page_add_new_anon_rmap(new_page, vma, address);
page_add_new_anon_rmap(new_page, vma, address, true);
mem_cgroup_commit_charge(new_page, memcg, false);
lru_cache_add_active_or_unevictable(new_page, vma);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
Expand Down
4 changes: 2 additions & 2 deletions mm/hugetlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -3186,7 +3186,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
set_page_dirty(page);

hugetlb_count_sub(pages_per_huge_page(h), mm);
page_remove_rmap(page);
page_remove_rmap(page, true);
force_flush = !__tlb_remove_page(tlb, page);
if (force_flush) {
address += sz;
Expand Down Expand Up @@ -3415,7 +3415,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
set_huge_pte_at(mm, address, ptep,
make_huge_pte(vma, new_page, 1));
page_remove_rmap(old_page);
page_remove_rmap(old_page, true);
hugepage_add_new_anon_rmap(new_page, vma, address);
/* Make the old page be freed below */
new_page = old_page;
Expand Down
4 changes: 2 additions & 2 deletions mm/ksm.c
Original file line number Diff line number Diff line change
Expand Up @@ -956,13 +956,13 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
}

get_page(kpage);
page_add_anon_rmap(kpage, vma, addr);
page_add_anon_rmap(kpage, vma, addr, false);

flush_cache_page(vma, addr, pte_pfn(*ptep));
ptep_clear_flush_notify(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));

page_remove_rmap(page);
page_remove_rmap(page, false);
if (!page_mapped(page))
try_to_free_swap(page);
put_page(page);
Expand Down
14 changes: 7 additions & 7 deletions mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1118,7 +1118,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
mark_page_accessed(page);
}
rss[mm_counter(page)]--;
page_remove_rmap(page);
page_remove_rmap(page, false);
if (unlikely(page_mapcount(page) < 0))
print_bad_pte(vma, addr, ptent, page);
if (unlikely(!__tlb_remove_page(tlb, page))) {
Expand Down Expand Up @@ -2118,7 +2118,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
* thread doing COW.
*/
ptep_clear_flush_notify(vma, address, page_table);
page_add_new_anon_rmap(new_page, vma, address);
page_add_new_anon_rmap(new_page, vma, address, false);
mem_cgroup_commit_charge(new_page, memcg, false);
lru_cache_add_active_or_unevictable(new_page, vma);
/*
Expand Down Expand Up @@ -2151,7 +2151,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
* mapcount is visible. So transitively, TLBs to
* old page will be flushed before it can be reused.
*/
page_remove_rmap(old_page);
page_remove_rmap(old_page, false);
}

/* Free the old page.. */
Expand Down Expand Up @@ -2567,7 +2567,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
flags &= ~FAULT_FLAG_WRITE;
ret |= VM_FAULT_WRITE;
exclusive = 1;
exclusive = RMAP_EXCLUSIVE;
}
flush_icache_page(vma, page);
if (pte_swp_soft_dirty(orig_pte))
Expand All @@ -2577,7 +2577,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
do_page_add_anon_rmap(page, vma, address, exclusive);
mem_cgroup_commit_charge(page, memcg, true);
} else { /* ksm created a completely new copy */
page_add_new_anon_rmap(page, vma, address);
page_add_new_anon_rmap(page, vma, address, false);
mem_cgroup_commit_charge(page, memcg, false);
lru_cache_add_active_or_unevictable(page, vma);
}
Expand Down Expand Up @@ -2735,7 +2735,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
}

inc_mm_counter_fast(mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address);
page_add_new_anon_rmap(page, vma, address, false);
mem_cgroup_commit_charge(page, memcg, false);
lru_cache_add_active_or_unevictable(page, vma);
setpte:
Expand Down Expand Up @@ -2824,7 +2824,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
if (anon) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address);
page_add_new_anon_rmap(page, vma, address, false);
} else {
inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
page_add_file_rmap(page);
Expand Down
8 changes: 4 additions & 4 deletions mm/migrate.c
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
else
page_dup_rmap(new);
} else if (PageAnon(new))
page_add_anon_rmap(new, vma, addr);
page_add_anon_rmap(new, vma, addr, false);
else
page_add_file_rmap(new);

Expand Down Expand Up @@ -1815,7 +1815,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
* guarantee the copy is visible before the pagetable update.
*/
flush_cache_range(vma, mmun_start, mmun_end);
page_add_anon_rmap(new_page, vma, mmun_start);
page_add_anon_rmap(new_page, vma, mmun_start, true);
pmdp_huge_clear_flush_notify(vma, mmun_start, pmd);
set_pmd_at(mm, mmun_start, pmd, entry);
flush_tlb_range(vma, mmun_start, mmun_end);
Expand All @@ -1826,14 +1826,14 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
flush_tlb_range(vma, mmun_start, mmun_end);
mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
update_mmu_cache_pmd(vma, address, &entry);
page_remove_rmap(new_page);
page_remove_rmap(new_page, true);
goto fail_putback;
}

mlock_migrate_page(new_page, page);
set_page_memcg(new_page, page_memcg(page));
set_page_memcg(page, NULL);
page_remove_rmap(page);
page_remove_rmap(page, true);

spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
Expand Down
48 changes: 31 additions & 17 deletions mm/rmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -1133,16 +1133,17 @@ static void __page_check_anon_rmap(struct page *page,
* @page: the page to add the mapping to
* @vma: the vm area in which the mapping is added
* @address: the user virtual address mapped
* @compound: charge the page as compound or small page
*
* The caller needs to hold the pte lock, and the page must be locked in
* the anon_vma case: to serialize mapping,index checking after setting,
* and to ensure that PageAnon is not being upgraded racily to PageKsm
* (but PageKsm is never downgraded to PageAnon).
*/
void page_add_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address)
struct vm_area_struct *vma, unsigned long address, bool compound)
{
do_page_add_anon_rmap(page, vma, address, 0);
do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0);
}

/*
Expand All @@ -1151,29 +1152,33 @@ void page_add_anon_rmap(struct page *page,
* Everybody else should continue to use page_add_anon_rmap above.
*/
void do_page_add_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address, int exclusive)
struct vm_area_struct *vma, unsigned long address, int flags)
{
int first = atomic_inc_and_test(&page->_mapcount);
if (first) {
bool compound = flags & RMAP_COMPOUND;
int nr = compound ? hpage_nr_pages(page) : 1;
/*
* We use the irq-unsafe __{inc|mod}_zone_page_stat because
* these counters are not modified in interrupt context, and
* pte lock(a spinlock) is held, which implies preemption
* disabled.
*/
if (PageTransHuge(page))
if (compound) {
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
__inc_zone_page_state(page,
NR_ANON_TRANSPARENT_HUGEPAGES);
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
hpage_nr_pages(page));
}
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES, nr);
}
if (unlikely(PageKsm(page)))
return;

VM_BUG_ON_PAGE(!PageLocked(page), page);
/* address might be in next vma when migration races vma_adjust */
if (first)
__page_set_anon_rmap(page, vma, address, exclusive);
__page_set_anon_rmap(page, vma, address,
flags & RMAP_EXCLUSIVE);
else
__page_check_anon_rmap(page, vma, address);
}
Expand All @@ -1183,21 +1188,25 @@ void do_page_add_anon_rmap(struct page *page,
* @page: the page to add the mapping to
* @vma: the vm area in which the mapping is added
* @address: the user virtual address mapped
* @compound: charge the page as compound or small page
*
* Same as page_add_anon_rmap but must only be called on *new* pages.
* This means the inc-and-test can be bypassed.
* Page does not have to be locked.
*/
void page_add_new_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address)
struct vm_area_struct *vma, unsigned long address, bool compound)
{
int nr = compound ? hpage_nr_pages(page) : 1;

VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
SetPageSwapBacked(page);
atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
if (PageTransHuge(page))
if (compound) {
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
__inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
hpage_nr_pages(page));
}
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES, nr);
__page_set_anon_rmap(page, vma, address, 1);
}

Expand Down Expand Up @@ -1249,13 +1258,17 @@ static void page_remove_file_rmap(struct page *page)

/**
* page_remove_rmap - take down pte mapping from a page
* @page: page to remove mapping from
* @page: page to remove mapping from
* @compound: uncharge the page as compound or small page
*
* The caller needs to hold the pte lock.
*/
void page_remove_rmap(struct page *page)
void page_remove_rmap(struct page *page, bool compound)
{
int nr = compound ? hpage_nr_pages(page) : 1;

if (!PageAnon(page)) {
VM_BUG_ON_PAGE(compound && !PageHuge(page), page);
page_remove_file_rmap(page);
return;
}
Expand All @@ -1273,11 +1286,12 @@ void page_remove_rmap(struct page *page)
* these counters are not modified in interrupt context, and
* pte lock(a spinlock) is held, which implies preemption disabled.
*/
if (PageTransHuge(page))
if (compound) {
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
__dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
}

__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
-hpage_nr_pages(page));
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES, -nr);

if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
Expand Down Expand Up @@ -1416,7 +1430,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
} else
dec_mm_counter(mm, mm_counter_file(page));

page_remove_rmap(page);
page_remove_rmap(page, PageHuge(page));
page_cache_release(page);

out_unmap:
Expand Down
4 changes: 2 additions & 2 deletions mm/swapfile.c
Original file line number Diff line number Diff line change
Expand Up @@ -1160,10 +1160,10 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
set_pte_at(vma->vm_mm, addr, pte,
pte_mkold(mk_pte(page, vma->vm_page_prot)));
if (page == swapcache) {
page_add_anon_rmap(page, vma, addr);
page_add_anon_rmap(page, vma, addr, false);
mem_cgroup_commit_charge(page, memcg, true);
} else { /* ksm created a completely new copy */
page_add_new_anon_rmap(page, vma, addr);
page_add_new_anon_rmap(page, vma, addr, false);
mem_cgroup_commit_charge(page, memcg, false);
lru_cache_add_active_or_unevictable(page, vma);
}
Expand Down
Loading

0 comments on commit d281ee6

Please sign in to comment.