Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

6.5: mm patches #2

Merged
merged 8 commits into from
Sep 23, 2023
23 changes: 2 additions & 21 deletions lib/scatterlist.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,31 +150,12 @@ EXPORT_SYMBOL(sg_init_one);
*/
static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
{
if (nents == SG_MAX_SINGLE_ALLOC) {
/*
* Kmemleak doesn't track page allocations as they are not
* commonly used (in a raw form) for kernel data structures.
* As we chain together a list of pages and then a normal
* kmalloc (tracked by kmemleak), in order to for that last
* allocation not to become decoupled (and thus a
* false-positive) we need to inform kmemleak of all the
* intermediate allocations.
*/
void *ptr = (void *) __get_free_page(gfp_mask);
kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
return ptr;
} else
return kmalloc_array(nents, sizeof(struct scatterlist),
gfp_mask);
return kmalloc_array(nents, sizeof(struct scatterlist), gfp_mask);
}

static void sg_kfree(struct scatterlist *sg, unsigned int nents)
{
if (nents == SG_MAX_SINGLE_ALLOC) {
kmemleak_free(sg);
free_page((unsigned long) sg);
} else
kfree(sg);
kfree(sg);
}

/**
Expand Down
2 changes: 1 addition & 1 deletion mm/compaction.c
Original file line number Diff line number Diff line change
Expand Up @@ -1780,7 +1780,7 @@ static int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNE
* aggressively the kernel should compact memory in the
* background. It takes values in the range [0, 100].
*/
static unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
static unsigned int __read_mostly sysctl_compaction_proactiveness;
static int sysctl_extfrag_threshold = 500;
static int __read_mostly sysctl_compact_memory;

Expand Down
1 change: 1 addition & 0 deletions mm/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,7 @@ extern void prep_compound_page(struct page *page, unsigned int order);
extern void post_alloc_hook(struct page *page, unsigned int order,
gfp_t gfp_flags);
extern int user_min_free_kbytes;
extern atomic_long_t kswapd_waiters;

extern void free_unref_page(struct page *page, unsigned int order);
extern void free_unref_page_list(struct list_head *list);
Expand Down
4 changes: 4 additions & 0 deletions mm/list_lru.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ EXPORT_SYMBOL_GPL(list_lru_isolate_move);
unsigned long list_lru_count_one(struct list_lru *lru,
int nid, struct mem_cgroup *memcg)
{
#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
struct list_lru_one *l;
long count;

Expand All @@ -190,6 +191,9 @@ unsigned long list_lru_count_one(struct list_lru *lru,
count = 0;

return count;
#else
return READ_ONCE(lru->node[nid].lru.nr_items);
#endif
}
EXPORT_SYMBOL_GPL(list_lru_count_one);

Expand Down
42 changes: 33 additions & 9 deletions mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,8 @@ EXPORT_SYMBOL(node_states);

gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;

atomic_long_t kswapd_waiters = ATOMIC_LONG_INIT(0);

/*
* A cached value of the page's pageblock's migratetype, used when the page is
* put on a pcplist. Used to avoid the pageblock migratetype lookup when
Expand Down Expand Up @@ -297,7 +299,7 @@ static compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS] = {

int min_free_kbytes = 1024;
int user_min_free_kbytes = -1;
static int watermark_boost_factor __read_mostly = 15000;
static int watermark_boost_factor __read_mostly;
static int watermark_scale_factor = 10;

/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
Expand Down Expand Up @@ -2152,16 +2154,17 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
}

/*
* Obtain a specified number of elements from the buddy allocator, all under
* a single hold of the lock, for efficiency. Add them to the supplied list.
* Returns the number of new pages which were placed at *list.
* Obtain a specified number of elements from the buddy allocator, and relax the
* zone lock when needed. Add them to the supplied list. Returns the number of
* new pages which were placed at *list.
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
int migratetype, unsigned int alloc_flags)
{
const bool can_resched = !preempt_count() && !irqs_disabled();
unsigned long flags;
int i;
int i, last_mod = 0;

spin_lock_irqsave(&zone->lock, flags);
for (i = 0; i < count; ++i) {
Expand All @@ -2170,6 +2173,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
if (unlikely(page == NULL))
break;

/* Reschedule and ease the contention on the lock if needed */
if (i + 1 < count && ((can_resched && need_resched()) ||
spin_needbreak(&zone->lock))) {
__mod_zone_page_state(zone, NR_FREE_PAGES,
-((i + 1 - last_mod) << order));
last_mod = i + 1;
spin_unlock_irqrestore(&zone->lock, flags);
if (can_resched)
cond_resched();
spin_lock_irqsave(&zone->lock, flags);
}

/*
* Split buddy pages returned by expand() are received here in
* physical page order. The page is added to the tail of
Expand All @@ -2186,7 +2201,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
-(1 << order));
}

__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
__mod_zone_page_state(zone, NR_FREE_PAGES, -((i - last_mod) << order));
spin_unlock_irqrestore(&zone->lock, flags);

return i;
Expand Down Expand Up @@ -3962,6 +3977,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
unsigned int cpuset_mems_cookie;
unsigned int zonelist_iter_cookie;
int reserve_flags;
bool woke_kswapd = false;

restart:
compaction_retries = 0;
Expand Down Expand Up @@ -4001,8 +4017,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
goto nopage;
}

if (alloc_flags & ALLOC_KSWAPD)
if (alloc_flags & ALLOC_KSWAPD) {
if (!woke_kswapd) {
atomic_long_inc(&kswapd_waiters);
woke_kswapd = true;
}
wake_all_kswapds(order, gfp_mask, ac);
}

/*
* The adjusted alloc_flags might result in immediate success, so try
Expand Down Expand Up @@ -4217,9 +4238,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
goto retry;
}
fail:
warn_alloc(gfp_mask, ac->nodemask,
"page allocation failure: order:%u", order);
got_pg:
if (woke_kswapd)
atomic_long_dec(&kswapd_waiters);
if (!page)
warn_alloc(gfp_mask, ac->nodemask,
"page allocation failure: order:%u", order);
return page;
}

Expand Down
20 changes: 14 additions & 6 deletions mm/vmscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -6901,7 +6901,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
return 0;
}

static bool allow_direct_reclaim(pg_data_t *pgdat)
static bool allow_direct_reclaim(pg_data_t *pgdat, bool using_kswapd)
{
struct zone *zone;
unsigned long pfmemalloc_reserve = 0;
Expand Down Expand Up @@ -6930,6 +6930,10 @@ static bool allow_direct_reclaim(pg_data_t *pgdat)

wmark_ok = free_pages > pfmemalloc_reserve / 2;

/* The throttled direct reclaimer is now a kswapd waiter */
if (unlikely(!using_kswapd && !wmark_ok))
atomic_long_inc(&kswapd_waiters);

/* kswapd must be awake if processes are being throttled */
if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) {
if (READ_ONCE(pgdat->kswapd_highest_zoneidx) > ZONE_NORMAL)
Expand Down Expand Up @@ -6995,7 +6999,7 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,

/* Throttle based on the first usable node */
pgdat = zone->zone_pgdat;
if (allow_direct_reclaim(pgdat))
if (allow_direct_reclaim(pgdat, gfp_mask & __GFP_KSWAPD_RECLAIM))
goto out;
break;
}
Expand All @@ -7017,11 +7021,14 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
*/
if (!(gfp_mask & __GFP_FS))
wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
allow_direct_reclaim(pgdat), HZ);
allow_direct_reclaim(pgdat, true), HZ);
else
/* Throttle until kswapd wakes the process */
wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
allow_direct_reclaim(pgdat));
allow_direct_reclaim(pgdat, true));

if (unlikely(!(gfp_mask & __GFP_KSWAPD_RECLAIM)))
atomic_long_dec(&kswapd_waiters);

if (fatal_signal_pending(current))
return true;
Expand Down Expand Up @@ -7519,14 +7526,15 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
* able to safely make forward progress. Wake them
*/
if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
allow_direct_reclaim(pgdat))
allow_direct_reclaim(pgdat, true))
wake_up_all(&pgdat->pfmemalloc_wait);

/* Check if kswapd should be suspending */
__fs_reclaim_release(_THIS_IP_);
ret = try_to_freeze();
__fs_reclaim_acquire(_THIS_IP_);
if (ret || kthread_should_stop())
if (ret || kthread_should_stop() ||
!atomic_long_read(&kswapd_waiters))
break;

/*
Expand Down