Skip to content

Commit

Permalink
rtmutex: Turn the plist into an rb-tree
Browse files Browse the repository at this point in the history
Turn the pi-chains from plist to rb-tree, in the rt_mutex code,
and provide a proper comparison function for -deadline and
-priority tasks.

This is done mainly because:
 - classical prio field of the plist is just an int, which might
   not be enough for representing a deadline;
 - manipulating such a list would become O(nr_deadline_tasks),
   which might be to much, as the number of -deadline task increases.

Therefore, an rb-tree is used, and tasks are queued in it according
to the following logic:
 - among two -priority (i.e., SCHED_BATCH/OTHER/RR/FIFO) tasks, the
   one with the higher (lower, actually!) prio wins;
 - among a -priority and a -deadline task, the latter always wins;
 - among two -deadline tasks, the one with the earliest deadline
   wins.

Queueing and dequeueing functions are changed accordingly, for both
the list of a task's pi-waiters and the list of tasks blocked on
a pi-lock.

Signed-off-by: Peter Zijlstra <[email protected]>
Signed-off-by: Dario Faggioli <[email protected]>
Signed-off-by: Juri Lelli <[email protected]>
Signed-off-again-by: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
Peter Zijlstra authored and Ingo Molnar committed Jan 13, 2014
1 parent af6ace7 commit fb00aca
Show file tree
Hide file tree
Showing 9 changed files with 157 additions and 65 deletions.
10 changes: 10 additions & 0 deletions include/linux/init_task.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <linux/user_namespace.h>
#include <linux/securebits.h>
#include <linux/seqlock.h>
#include <linux/rbtree.h>
#include <net/net_namespace.h>
#include <linux/sched/rt.h>

Expand Down Expand Up @@ -154,6 +155,14 @@ extern struct task_group root_task_group;

#define INIT_TASK_COMM "swapper"

#ifdef CONFIG_RT_MUTEXES
# define INIT_RT_MUTEXES(tsk) \
.pi_waiters = RB_ROOT, \
.pi_waiters_leftmost = NULL,
#else
# define INIT_RT_MUTEXES(tsk)
#endif

/*
* INIT_TASK is used to set up the first task table, touch at
* your own risk!. Base=0, limit=0x1fffff (=2MB)
Expand Down Expand Up @@ -221,6 +230,7 @@ extern struct task_group root_task_group;
INIT_TRACE_RECURSION \
INIT_TASK_RCU_PREEMPT(tsk) \
INIT_CPUSET_SEQ(tsk) \
INIT_RT_MUTEXES(tsk) \
INIT_VTIME(tsk) \
}

Expand Down
18 changes: 6 additions & 12 deletions include/linux/rtmutex.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#define __LINUX_RT_MUTEX_H

#include <linux/linkage.h>
#include <linux/plist.h>
#include <linux/rbtree.h>
#include <linux/spinlock_types.h>

extern int max_lock_depth; /* for sysctl */
Expand All @@ -22,12 +22,14 @@ extern int max_lock_depth; /* for sysctl */
* The rt_mutex structure
*
* @wait_lock: spinlock to protect the structure
* @wait_list: pilist head to enqueue waiters in priority order
* @waiters: rbtree root to enqueue waiters in priority order
* @waiters_leftmost: top waiter
* @owner: the mutex owner
*/
struct rt_mutex {
raw_spinlock_t wait_lock;
struct plist_head wait_list;
struct rb_root waiters;
struct rb_node *waiters_leftmost;
struct task_struct *owner;
#ifdef CONFIG_DEBUG_RT_MUTEXES
int save_state;
Expand Down Expand Up @@ -66,7 +68,7 @@ struct hrtimer_sleeper;

#define __RT_MUTEX_INITIALIZER(mutexname) \
{ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
, .wait_list = PLIST_HEAD_INIT(mutexname.wait_list) \
, .waiters = RB_ROOT \
, .owner = NULL \
__DEBUG_RT_MUTEX_INITIALIZER(mutexname)}

Expand Down Expand Up @@ -98,12 +100,4 @@ extern int rt_mutex_trylock(struct rt_mutex *lock);

extern void rt_mutex_unlock(struct rt_mutex *lock);

#ifdef CONFIG_RT_MUTEXES
# define INIT_RT_MUTEXES(tsk) \
.pi_waiters = PLIST_HEAD_INIT(tsk.pi_waiters), \
INIT_RT_MUTEX_DEBUG(tsk)
#else
# define INIT_RT_MUTEXES(tsk)
#endif

#endif
4 changes: 3 additions & 1 deletion include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ struct sched_param {
#include <linux/types.h>
#include <linux/timex.h>
#include <linux/jiffies.h>
#include <linux/plist.h>
#include <linux/rbtree.h>
#include <linux/thread_info.h>
#include <linux/cpumask.h>
Expand Down Expand Up @@ -1354,7 +1355,8 @@ struct task_struct {

#ifdef CONFIG_RT_MUTEXES
/* PI waiters blocked on a rt_mutex held by this task */
struct plist_head pi_waiters;
struct rb_root pi_waiters;
struct rb_node *pi_waiters_leftmost;
/* Deadlock detection and priority inheritance handling */
struct rt_mutex_waiter *pi_blocked_on;
#endif
Expand Down
3 changes: 2 additions & 1 deletion kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -1087,7 +1087,8 @@ static void rt_mutex_init_task(struct task_struct *p)
{
raw_spin_lock_init(&p->pi_lock);
#ifdef CONFIG_RT_MUTEXES
plist_head_init(&p->pi_waiters);
p->pi_waiters = RB_ROOT;
p->pi_waiters_leftmost = NULL;
p->pi_blocked_on = NULL;
#endif
}
Expand Down
2 changes: 2 additions & 0 deletions kernel/futex.c
Original file line number Diff line number Diff line change
Expand Up @@ -2316,6 +2316,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
* code while we sleep on uaddr.
*/
debug_rt_mutex_init_waiter(&rt_waiter);
RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
RB_CLEAR_NODE(&rt_waiter.tree_entry);
rt_waiter.task = NULL;

ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
Expand Down
8 changes: 2 additions & 6 deletions kernel/locking/rtmutex-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include <linux/kallsyms.h>
#include <linux/syscalls.h>
#include <linux/interrupt.h>
#include <linux/plist.h>
#include <linux/rbtree.h>
#include <linux/fs.h>
#include <linux/debug_locks.h>

Expand Down Expand Up @@ -57,7 +57,7 @@ static void printk_lock(struct rt_mutex *lock, int print_owner)

void rt_mutex_debug_task_free(struct task_struct *task)
{
DEBUG_LOCKS_WARN_ON(!plist_head_empty(&task->pi_waiters));
DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters));
DEBUG_LOCKS_WARN_ON(task->pi_blocked_on);
}

Expand Down Expand Up @@ -154,16 +154,12 @@ void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
{
memset(waiter, 0x11, sizeof(*waiter));
plist_node_init(&waiter->list_entry, MAX_PRIO);
plist_node_init(&waiter->pi_list_entry, MAX_PRIO);
waiter->deadlock_task_pid = NULL;
}

void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
{
put_pid(waiter->deadlock_task_pid);
DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->list_entry));
DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
memset(waiter, 0x22, sizeof(*waiter));
}

Expand Down
Loading

0 comments on commit fb00aca

Please sign in to comment.