Skip to content

Commit

Permalink
mm/vmscan: protect the workingset on anonymous LRU
Browse files Browse the repository at this point in the history
In current implementation, newly created or swap-in anonymous page is
started on active list.  Growing active list results in rebalancing
active/inactive list so old pages on active list are demoted to inactive
list.  Hence, the page on active list isn't protected at all.

Following is an example of this situation.

Assume that 50 hot pages on active list.  Numbers denote the number of
pages on active/inactive list (active | inactive).

1. 50 hot pages on active list
50(h) | 0

2. workload: 50 newly created (used-once) pages
50(uo) | 50(h)

3. workload: another 50 newly created (used-once) pages
50(uo) | 50(uo), swap-out 50(h)

This patch tries to fix this issue.  Like as file LRU, newly created or
swap-in anonymous pages will be inserted to the inactive list.  They are
promoted to active list if enough reference happens.  This simple
modification changes the above example as following.

1. 50 hot pages on active list
50(h) | 0

2. workload: 50 newly created (used-once) pages
50(h) | 50(uo)

3. workload: another 50 newly created (used-once) pages
50(h) | 50(uo), swap-out 50(uo)

As you can see, hot pages on active list would be protected.

Note that, this implementation has a drawback that the page cannot be
promoted and will be swapped-out if re-access interval is greater than the
size of inactive list but less than the size of total(active+inactive).
To solve this potential issue, following patch will apply workingset
detection similar to the one that's already applied to file LRU.

Signed-off-by: Joonsoo Kim <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Acked-by: Johannes Weiner <[email protected]>
Acked-by: Vlastimil Babka <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Matthew Wilcox <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Minchan Kim <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
JoonsooKim authored and torvalds committed Aug 12, 2020
1 parent ccc5dc6 commit b518154
Show file tree
Hide file tree
Showing 10 changed files with 19 additions and 21 deletions.
2 changes: 1 addition & 1 deletion include/linux/swap.h
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ extern void deactivate_page(struct page *page);
extern void mark_page_lazyfree(struct page *page);
extern void swap_setup(void);

extern void lru_cache_add_active_or_unevictable(struct page *page,
extern void lru_cache_add_inactive_or_unevictable(struct page *page,
struct vm_area_struct *vma);

/* linux/mm/vmscan.c */
Expand Down
2 changes: 1 addition & 1 deletion kernel/events/uprobes.c
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
if (new_page) {
get_page(new_page);
page_add_new_anon_rmap(new_page, vma, addr, false);
lru_cache_add_active_or_unevictable(new_page, vma);
lru_cache_add_inactive_or_unevictable(new_page, vma);
} else
/* no new page, just dec_mm_counter for old_page */
dec_mm_counter(mm, MM_ANONPAGES);
Expand Down
2 changes: 1 addition & 1 deletion mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
entry = mk_huge_pmd(page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
page_add_new_anon_rmap(page, vma, haddr, true);
lru_cache_add_active_or_unevictable(page, vma);
lru_cache_add_inactive_or_unevictable(page, vma);
pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
Expand Down
2 changes: 1 addition & 1 deletion mm/khugepaged.c
Original file line number Diff line number Diff line change
Expand Up @@ -1173,7 +1173,7 @@ static void collapse_huge_page(struct mm_struct *mm,
spin_lock(pmd_ptl);
BUG_ON(!pmd_none(*pmd));
page_add_new_anon_rmap(new_page, vma, address, true);
lru_cache_add_active_or_unevictable(new_page, vma);
lru_cache_add_inactive_or_unevictable(new_page, vma);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, address, pmd, _pmd);
update_mmu_cache_pmd(vma, address, pmd);
Expand Down
9 changes: 4 additions & 5 deletions mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -2715,7 +2715,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
*/
ptep_clear_flush_notify(vma, vmf->address, vmf->pte);
page_add_new_anon_rmap(new_page, vma, vmf->address, false);
lru_cache_add_active_or_unevictable(new_page, vma);
lru_cache_add_inactive_or_unevictable(new_page, vma);
/*
* We call the notify macro here because, when using secondary
* mmu page tables (such as kvm shadow page tables), we want the
Expand Down Expand Up @@ -3266,10 +3266,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
/* ksm created a completely new copy */
if (unlikely(page != swapcache && swapcache)) {
page_add_new_anon_rmap(page, vma, vmf->address, false);
lru_cache_add_active_or_unevictable(page, vma);
lru_cache_add_inactive_or_unevictable(page, vma);
} else {
do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
activate_page(page);
}

swap_free(entry);
Expand Down Expand Up @@ -3414,7 +3413,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)

inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, vmf->address, false);
lru_cache_add_active_or_unevictable(page, vma);
lru_cache_add_inactive_or_unevictable(page, vma);
setpte:
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);

Expand Down Expand Up @@ -3672,7 +3671,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page)
if (write && !(vma->vm_flags & VM_SHARED)) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, vmf->address, false);
lru_cache_add_active_or_unevictable(page, vma);
lru_cache_add_inactive_or_unevictable(page, vma);
} else {
inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
page_add_file_rmap(page, false);
Expand Down
2 changes: 1 addition & 1 deletion mm/migrate.c
Original file line number Diff line number Diff line change
Expand Up @@ -2830,7 +2830,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
inc_mm_counter(mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, addr, false);
if (!is_zone_device_page(page))
lru_cache_add_active_or_unevictable(page, vma);
lru_cache_add_inactive_or_unevictable(page, vma);
get_page(page);

if (flush) {
Expand Down
13 changes: 7 additions & 6 deletions mm/swap.c
Original file line number Diff line number Diff line change
Expand Up @@ -476,23 +476,24 @@ void lru_cache_add(struct page *page)
EXPORT_SYMBOL(lru_cache_add);

/**
* lru_cache_add_active_or_unevictable
* lru_cache_add_inactive_or_unevictable
* @page: the page to be added to LRU
* @vma: vma in which page is mapped for determining reclaimability
*
* Place @page on the active or unevictable LRU list, depending on its
* Place @page on the inactive or unevictable LRU list, depending on its
* evictability. Note that if the page is not evictable, it goes
* directly back onto it's zone's unevictable list, it does NOT use a
* per cpu pagevec.
*/
void lru_cache_add_active_or_unevictable(struct page *page,
void lru_cache_add_inactive_or_unevictable(struct page *page,
struct vm_area_struct *vma)
{
bool unevictable;

VM_BUG_ON_PAGE(PageLRU(page), page);

if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED))
SetPageActive(page);
else if (!TestSetPageMlocked(page)) {
unevictable = (vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED;
if (unlikely(unevictable) && !TestSetPageMlocked(page)) {
/*
* We use the irq-unsafe __mod_zone_page_stat because this
* counter is not modified from interrupt context, and the pte
Expand Down
2 changes: 1 addition & 1 deletion mm/swapfile.c
Original file line number Diff line number Diff line change
Expand Up @@ -1915,7 +1915,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
page_add_anon_rmap(page, vma, addr, false);
} else { /* ksm created a completely new copy */
page_add_new_anon_rmap(page, vma, addr, false);
lru_cache_add_active_or_unevictable(page, vma);
lru_cache_add_inactive_or_unevictable(page, vma);
}
swap_free(entry);
/*
Expand Down
2 changes: 1 addition & 1 deletion mm/userfaultfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,

inc_mm_counter(dst_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
lru_cache_add_active_or_unevictable(page, dst_vma);
lru_cache_add_inactive_or_unevictable(page, dst_vma);

set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);

Expand Down
4 changes: 1 addition & 3 deletions mm/vmscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -998,8 +998,6 @@ static enum page_references page_check_references(struct page *page,
return PAGEREF_RECLAIM;

if (referenced_ptes) {
if (PageSwapBacked(page))
return PAGEREF_ACTIVATE;
/*
* All mapped pages start out with page table
* references from the instantiating fault, so we need
Expand All @@ -1022,7 +1020,7 @@ static enum page_references page_check_references(struct page *page,
/*
* Activate file-backed executable pages after first usage.
*/
if (vm_flags & VM_EXEC)
if ((vm_flags & VM_EXEC) && !PageSwapBacked(page))
return PAGEREF_ACTIVATE;

return PAGEREF_KEEP;
Expand Down

0 comments on commit b518154

Please sign in to comment.