Skip to content

Commit

Permalink
mm: fix race between kmem_cache destroy, create and deactivate
Browse files Browse the repository at this point in the history
The memcg kmem cache creation and deactivation (SLUB only) is
asynchronous.  If a root kmem cache is destroyed whose memcg cache is in
the process of creation or deactivation, the kernel may crash.

Example of one such crash:
	general protection fault: 0000 [#1] SMP PTI
	CPU: 1 PID: 1721 Comm: kworker/14:1 Not tainted 4.17.0-smp
	...
	Workqueue: memcg_kmem_cache kmemcg_deactivate_workfn
	RIP: 0010:has_cpu_slab
	...
	Call Trace:
	? on_each_cpu_cond
	__kmem_cache_shrink
	kmemcg_cache_deact_after_rcu
	kmemcg_deactivate_workfn
	process_one_work
	worker_thread
	kthread
	ret_from_fork+0x35/0x40

To fix this race, on root kmem cache destruction, mark the cache as
dying and flush the workqueue used for memcg kmem cache creation and
deactivation.  SLUB's memcg kmem cache deactivation also includes RCU
callback and thus make sure all previous registered RCU callbacks have
completed as well.

[[email protected]: handle the RCU callbacks for SLUB deactivation]
  Link: http://lkml.kernel.org/r/[email protected]
[[email protected]: add more documentation, rename fields for readability]
  Link: http://lkml.kernel.org/r/[email protected]
[[email protected]: fix build, per Shakeel]
[[email protected]: v3.  Instead of refcount, flush the workqueue]
  Link: http://lkml.kernel.org/r/[email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Shakeel Butt <[email protected]>
Acked-by: Vladimir Davydov <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Greg Thelen <[email protected]>
Cc: Christoph Lameter <[email protected]>
Cc: Pekka Enberg <[email protected]>
Cc: David Rientjes <[email protected]>
Cc: Joonsoo Kim <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Tejun Heo <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
shakeelb authored and torvalds committed Jun 14, 2018
1 parent 2bdce74 commit 92ee383
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 1 deletion.
1 change: 1 addition & 0 deletions include/linux/slab.h
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,7 @@ struct memcg_cache_params {
struct memcg_cache_array __rcu *memcg_caches;
struct list_head __root_caches_node;
struct list_head children;
bool dying;
};
struct {
struct mem_cgroup *memcg;
Expand Down
33 changes: 32 additions & 1 deletion mm/slab_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ void slab_init_memcg_params(struct kmem_cache *s)
s->memcg_params.root_cache = NULL;
RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
INIT_LIST_HEAD(&s->memcg_params.children);
s->memcg_params.dying = false;
}

static int init_memcg_params(struct kmem_cache *s,
Expand Down Expand Up @@ -608,7 +609,7 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
* The memory cgroup could have been offlined while the cache
* creation work was pending.
*/
if (memcg->kmem_state != KMEM_ONLINE)
if (memcg->kmem_state != KMEM_ONLINE || root_cache->memcg_params.dying)
goto out_unlock;

idx = memcg_cache_id(memcg);
Expand Down Expand Up @@ -712,6 +713,9 @@ void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
WARN_ON_ONCE(s->memcg_params.deact_fn))
return;

if (s->memcg_params.root_cache->memcg_params.dying)
return;

/* pin memcg so that @s doesn't get destroyed in the middle */
css_get(&s->memcg_params.memcg->css);

Expand Down Expand Up @@ -823,11 +827,36 @@ static int shutdown_memcg_caches(struct kmem_cache *s)
return -EBUSY;
return 0;
}

static void flush_memcg_workqueue(struct kmem_cache *s)
{
mutex_lock(&slab_mutex);
s->memcg_params.dying = true;
mutex_unlock(&slab_mutex);

/*
* SLUB deactivates the kmem_caches through call_rcu_sched. Make
* sure all registered rcu callbacks have been invoked.
*/
if (IS_ENABLED(CONFIG_SLUB))
rcu_barrier_sched();

/*
* SLAB and SLUB create memcg kmem_caches through workqueue and SLUB
* deactivates the memcg kmem_caches through workqueue. Make sure all
* previous workitems on workqueue are processed.
*/
flush_workqueue(memcg_kmem_cache_wq);
}
#else
static inline int shutdown_memcg_caches(struct kmem_cache *s)
{
return 0;
}

static inline void flush_memcg_workqueue(struct kmem_cache *s)
{
}
#endif /* CONFIG_MEMCG && !CONFIG_SLOB */

void slab_kmem_cache_release(struct kmem_cache *s)
Expand All @@ -845,6 +874,8 @@ void kmem_cache_destroy(struct kmem_cache *s)
if (unlikely(!s))
return;

flush_memcg_workqueue(s);

get_online_cpus();
get_online_mems();

Expand Down

0 comments on commit 92ee383

Please sign in to comment.