Skip to content

Commit

Permalink
cgroups: fix race between userspace and kernelspace
Browse files Browse the repository at this point in the history
Notify userspace about cgroup removing only after rmdir of cgroup
directory to avoid race between userspace and kernelspace.

eventfd are used to notify about two types of event:
 - control file-specific, like crossing memory threshold;
 - cgroup removing.

To understand what really happen, userspace can check if the cgroup still
exists.  To avoid race beetween userspace and kernelspace we have to
notify userspace about cgroup removing only after rmdir of cgroup
directory.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Reviewed-by: KAMEZAWA Hiroyuki <[email protected]>
Cc: Paul Menage <[email protected]>
Acked-by: Li Zefan <[email protected]>
Cc: Balbir Singh <[email protected]>
Cc: Pavel Emelyanov <[email protected]>
Cc: Dan Malek <[email protected]>
Cc: Daisuke Nishimura <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
kiryl authored and torvalds committed Mar 12, 2010
1 parent daaf1e6 commit 4ab7868
Showing 1 changed file with 17 additions and 15 deletions.
32 changes: 17 additions & 15 deletions kernel/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -795,28 +795,15 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
static int cgroup_call_pre_destroy(struct cgroup *cgrp)
{
struct cgroup_subsys *ss;
struct cgroup_event *event, *tmp;
int ret = 0;

for_each_subsys(cgrp->root, ss)
if (ss->pre_destroy) {
ret = ss->pre_destroy(ss, cgrp);
if (ret)
goto out;
break;
}

/*
* Unregister events and notify userspace.
*/
spin_lock(&cgrp->event_list_lock);
list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
list_del(&event->list);
eventfd_signal(event->eventfd, 1);
schedule_work(&event->remove);
}
spin_unlock(&cgrp->event_list_lock);

out:
return ret;
}

Expand Down Expand Up @@ -3006,7 +2993,6 @@ static void cgroup_event_remove(struct work_struct *work)
event->cft->unregister_event(cgrp, event->cft, event->eventfd);

eventfd_ctx_put(event->eventfd);
remove_wait_queue(event->wqh, &event->wait);
kfree(event);
}

Expand All @@ -3024,6 +3010,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
unsigned long flags = (unsigned long)key;

if (flags & POLLHUP) {
remove_wait_queue_locked(event->wqh, &event->wait);
spin_lock(&cgrp->event_list_lock);
list_del(&event->list);
spin_unlock(&cgrp->event_list_lock);
Expand Down Expand Up @@ -3472,6 +3459,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
struct dentry *d;
struct cgroup *parent;
DEFINE_WAIT(wait);
struct cgroup_event *event, *tmp;
int ret;

/* the vfs holds both inode->i_mutex already */
Expand Down Expand Up @@ -3555,6 +3543,20 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
set_bit(CGRP_RELEASABLE, &parent->flags);
check_for_release(parent);

/*
* Unregister events and notify userspace.
* Notify userspace about cgroup removing only after rmdir of cgroup
* directory to avoid race between userspace and kernelspace
*/
spin_lock(&cgrp->event_list_lock);
list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
list_del(&event->list);
remove_wait_queue(event->wqh, &event->wait);
eventfd_signal(event->eventfd, 1);
schedule_work(&event->remove);
}
spin_unlock(&cgrp->event_list_lock);

mutex_unlock(&cgroup_mutex);
return 0;
}
Expand Down

0 comments on commit 4ab7868

Please sign in to comment.