Skip to content

Commit

Permalink
cgroup: Reorganize css_set_lock and kernfs path processing
Browse files Browse the repository at this point in the history
The commit 74e4b95 incorrectly wrapped kernfs_walk_and_get
(might_sleep) under css_set_lock (spinlock). css_set_lock is needed by
__cset_cgroup_from_root to ensure stable cset->cgrp_links but not for
kernfs_walk_and_get.

We only need to make sure that the returned root_cgrp won't be freed
under us. This is given in the case of global root because it is static
(cgrp_dfl_root.cgrp). When the root_cgrp is lower in the hierarchy, it
is pinned by cgroup_ns->root_cset (and `current` task cannot switch
namespace asynchronously so ns_proxy pins cgroup_ns).

Note this reasoning won't hold for root cgroups in v1 hierarchies,
therefore create a special-cased helper function just for the default
hierarchy.

Fixes: 74e4b95 ("cgroup: Honor caller's cgroup NS when resolving path")
Reported-by: Dan Carpenter <[email protected]>
Signed-off-by: Michal Koutný <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
  • Loading branch information
Werkov authored and htejun committed Oct 10, 2022
1 parent 4de65c5 commit 46307fd
Showing 1 changed file with 27 additions and 13 deletions.
40 changes: 27 additions & 13 deletions kernel/cgroup/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -1392,6 +1392,9 @@ static void cgroup_destroy_root(struct cgroup_root *root)
cgroup_free_root(root);
}

/*
* Returned cgroup is without refcount but it's valid as long as cset pins it.
*/
static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
struct cgroup_root *root)
{
Expand All @@ -1403,6 +1406,7 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
res_cgroup = cset->dfl_cgrp;
} else {
struct cgrp_cset_link *link;
lockdep_assert_held(&css_set_lock);

list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
struct cgroup *c = link->cgrp;
Expand All @@ -1414,6 +1418,7 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
}
}

BUG_ON(!res_cgroup);
return res_cgroup;
}

Expand All @@ -1436,23 +1441,36 @@ current_cgns_cgroup_from_root(struct cgroup_root *root)

rcu_read_unlock();

BUG_ON(!res);
return res;
}

/*
* Look up cgroup associated with current task's cgroup namespace on the default
* hierarchy.
*
* Unlike current_cgns_cgroup_from_root(), this doesn't need locks:
* - Internal rcu_read_lock is unnecessary because we don't dereference any rcu
* pointers.
* - css_set_lock is not needed because we just read cset->dfl_cgrp.
* - As a bonus returned cgrp is pinned with the current because it cannot
* switch cgroup_ns asynchronously.
*/
static struct cgroup *current_cgns_cgroup_dfl(void)
{
struct css_set *cset;

cset = current->nsproxy->cgroup_ns->root_cset;
return __cset_cgroup_from_root(cset, &cgrp_dfl_root);
}

/* look up cgroup associated with given css_set on the specified hierarchy */
static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
struct cgroup_root *root)
{
struct cgroup *res = NULL;

lockdep_assert_held(&cgroup_mutex);
lockdep_assert_held(&css_set_lock);

res = __cset_cgroup_from_root(cset, root);

BUG_ON(!res);
return res;
return __cset_cgroup_from_root(cset, root);
}

/*
Expand Down Expand Up @@ -6105,9 +6123,7 @@ struct cgroup *cgroup_get_from_id(u64 id)
if (!cgrp)
return ERR_PTR(-ENOENT);

spin_lock_irq(&css_set_lock);
root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root);
spin_unlock_irq(&css_set_lock);
root_cgrp = current_cgns_cgroup_dfl();
if (!cgroup_is_descendant(cgrp, root_cgrp)) {
cgroup_put(cgrp);
return ERR_PTR(-ENOENT);
Expand Down Expand Up @@ -6686,10 +6702,8 @@ struct cgroup *cgroup_get_from_path(const char *path)
struct cgroup *cgrp = ERR_PTR(-ENOENT);
struct cgroup *root_cgrp;

spin_lock_irq(&css_set_lock);
root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root);
root_cgrp = current_cgns_cgroup_dfl();
kn = kernfs_walk_and_get(root_cgrp->kn, path);
spin_unlock_irq(&css_set_lock);
if (!kn)
goto out;

Expand Down

0 comments on commit 46307fd

Please sign in to comment.