Skip to content

Commit

Permalink
mm, page_alloc: remove fair zone allocation policy
Browse files Browse the repository at this point in the history
The fair zone allocation policy interleaves allocation requests between
zones to avoid an age inversion problem whereby new pages are reclaimed
to balance a zone.  Reclaim is now node-based so this should no longer
be an issue and the fair zone allocation policy is not free.  This patch
removes it.

Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Mel Gorman <[email protected]>
Acked-by: Vlastimil Babka <[email protected]>
Cc: Hillf Danton <[email protected]>
Acked-by: Johannes Weiner <[email protected]>
Cc: Joonsoo Kim <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Minchan Kim <[email protected]>
Cc: Rik van Riel <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
gormanm authored and torvalds committed Jul 28, 2016
1 parent e5146b1 commit e6cbd7f
Show file tree
Hide file tree
Showing 4 changed files with 2 additions and 83 deletions.
5 changes: 0 additions & 5 deletions include/linux/mmzone.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@ struct zone_padding {
enum zone_stat_item {
/* First 128 byte cacheline (assuming 64 bit words) */
NR_FREE_PAGES,
NR_ALLOC_BATCH,
NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */
NR_ZONE_LRU_ANON = NR_ZONE_LRU_BASE,
NR_ZONE_LRU_FILE,
Expand Down Expand Up @@ -516,10 +515,6 @@ struct zone {
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
} ____cacheline_internodealigned_in_smp;

enum zone_flags {
ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */
};

enum pgdat_flags {
PGDAT_CONGESTED, /* pgdat has many dirty pages backed by
* a congested BDI
Expand Down
1 change: 0 additions & 1 deletion mm/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,6 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
#define ALLOC_CPUSET 0x40 /* check for correct cpuset */
#define ALLOC_CMA 0x80 /* allow allocations from CMA areas */
#define ALLOC_FAIR 0x100 /* fair zone allocation */

enum ttu_flags;
struct tlbflush_unmap_batch;
Expand Down
75 changes: 1 addition & 74 deletions mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2587,7 +2587,6 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
else
page = list_first_entry(list, struct page, lru);

__dec_zone_state(zone, NR_ALLOC_BATCH);
list_del(&page->lru);
pcp->count--;

Expand All @@ -2613,15 +2612,10 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
spin_unlock(&zone->lock);
if (!page)
goto failed;
__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
__mod_zone_freepage_state(zone, -(1 << order),
get_pcppage_migratetype(page));
}

if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0 &&
!test_bit(ZONE_FAIR_DEPLETED, &zone->flags))
set_bit(ZONE_FAIR_DEPLETED, &zone->flags);

__count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(preferred_zone, zone, gfp_flags);
local_irq_restore(flags);
Expand Down Expand Up @@ -2832,40 +2826,18 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
}

#ifdef CONFIG_NUMA
static bool zone_local(struct zone *local_zone, struct zone *zone)
{
return local_zone->node == zone->node;
}

static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
{
return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <
RECLAIM_DISTANCE;
}
#else /* CONFIG_NUMA */
static bool zone_local(struct zone *local_zone, struct zone *zone)
{
return true;
}

static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
{
return true;
}
#endif /* CONFIG_NUMA */

static void reset_alloc_batches(struct zone *preferred_zone)
{
struct zone *zone = preferred_zone->zone_pgdat->node_zones;

do {
mod_zone_page_state(zone, NR_ALLOC_BATCH,
high_wmark_pages(zone) - low_wmark_pages(zone) -
atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
clear_bit(ZONE_FAIR_DEPLETED, &zone->flags);
} while (zone++ != preferred_zone);
}

/*
* get_page_from_freelist goes through the zonelist trying to allocate
* a page.
Expand All @@ -2876,10 +2848,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
{
struct zoneref *z = ac->preferred_zoneref;
struct zone *zone;
bool fair_skipped = false;
bool apply_fair = (alloc_flags & ALLOC_FAIR);

zonelist_scan:
/*
* Scan zonelist, looking for a zone with enough free.
* See also __cpuset_node_allowed() comment in kernel/cpuset.c.
Expand All @@ -2893,23 +2861,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
(alloc_flags & ALLOC_CPUSET) &&
!__cpuset_zone_allowed(zone, gfp_mask))
continue;
/*
* Distribute pages in proportion to the individual
* zone size to ensure fair page aging. The zone a
* page was allocated in should have no effect on the
* time the page has in memory before being reclaimed.
*/
if (apply_fair) {
if (test_bit(ZONE_FAIR_DEPLETED, &zone->flags)) {
fair_skipped = true;
continue;
}
if (!zone_local(ac->preferred_zoneref->zone, zone)) {
if (fair_skipped)
goto reset_fair;
apply_fair = false;
}
}
/*
* When allocating a page cache page for writing, we
* want to get it from a node that is within its dirty
Expand Down Expand Up @@ -2981,23 +2932,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
}
}

/*
* The first pass makes sure allocations are spread fairly within the
* local node. However, the local node might have free pages left
* after the fairness batches are exhausted, and remote zones haven't
* even been considered yet. Try once more without fairness, and
* include remote zones now, before entering the slowpath and waking
* kswapd: prefer spilling to a remote zone over swapping locally.
*/
if (fair_skipped) {
reset_fair:
apply_fair = false;
fair_skipped = false;
reset_alloc_batches(ac->preferred_zoneref->zone);
z = ac->preferred_zoneref;
goto zonelist_scan;
}

return NULL;
}

Expand Down Expand Up @@ -3746,7 +3680,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
{
struct page *page;
unsigned int cpuset_mems_cookie;
unsigned int alloc_flags = ALLOC_WMARK_LOW|ALLOC_FAIR;
unsigned int alloc_flags = ALLOC_WMARK_LOW;
gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */
struct alloc_context ac = {
.high_zoneidx = gfp_zone(gfp_mask),
Expand Down Expand Up @@ -5958,9 +5892,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
zone_seqlock_init(zone);
zone_pcp_init(zone);

/* For bootup, initialized properly in watermark setup */
mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages);

if (!size)
continue;

Expand Down Expand Up @@ -6808,10 +6739,6 @@ static void __setup_per_zone_wmarks(void)
zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp;
zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2;

__mod_zone_page_state(zone, NR_ALLOC_BATCH,
high_wmark_pages(zone) - low_wmark_pages(zone) -
atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));

spin_unlock_irqrestore(&zone->lock, flags);
}

Expand Down
4 changes: 1 addition & 3 deletions mm/vmstat.c
Original file line number Diff line number Diff line change
Expand Up @@ -921,7 +921,6 @@ int fragmentation_index(struct zone *zone, unsigned int order)
const char * const vmstat_text[] = {
/* enum zone_stat_item countes */
"nr_free_pages",
"nr_alloc_batch",
"nr_zone_anon_lru",
"nr_zone_file_lru",
"nr_zone_write_pending",
Expand Down Expand Up @@ -1632,10 +1631,9 @@ int vmstat_refresh(struct ctl_table *table, int write,
val = atomic_long_read(&vm_zone_stat[i]);
if (val < 0) {
switch (i) {
case NR_ALLOC_BATCH:
case NR_PAGES_SCANNED:
/*
* These are often seen to go negative in
* This is often seen to go negative in
* recent kernels, but not to go permanently
* negative. Whilst it would be nicer not to
* have exceptions, rooting them out would be
Expand Down

0 comments on commit e6cbd7f

Please sign in to comment.