Skip to content

Commit

Permalink
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "The main changes in this cycle are:

   - 'Nested Sleep Debugging', activated when CONFIG_DEBUG_ATOMIC_SLEEP=y.

     This instruments might_sleep() checks to catch places that nest
     blocking primitives - such as mutex usage in a wait loop.  Such
     bugs can result in hard to debug races/hangs.

     Another category of invalid nesting that this facility will detect
     is the calling of blocking functions from within schedule() ->
     sched_submit_work() -> blk_schedule_flush_plug().

     There's some potential for false positives (if secondary blocking
     primitives themselves are not ready yet for this facility), but the
     kernel will warn once about such bugs per bootup, so the warning
     isn't much of a nuisance.

     This feature comes with a number of fixes, for problems uncovered
     with it, so no messages are expected normally.

   - Another round of sched/numa optimizations and refinements, for
     CONFIG_NUMA_BALANCING=y.

   - Another round of sched/dl fixes and refinements.

  Plus various smaller fixes and cleanups"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (54 commits)
  sched: Add missing rcu protection to wake_up_all_idle_cpus
  sched/deadline: Introduce start_hrtick_dl() for !CONFIG_SCHED_HRTICK
  sched/numa: Init numa balancing fields of init_task
  sched/deadline: Remove unnecessary definitions in cpudeadline.h
  sched/cpupri: Remove unnecessary definitions in cpupri.h
  sched/deadline: Fix rq->dl.pushable_tasks bug in push_dl_task()
  sched/fair: Fix stale overloaded status in the busiest group finding logic
  sched: Move p->nr_cpus_allowed check to select_task_rq()
  sched/completion: Document when to use wait_for_completion_io_*()
  sched: Update comments about CLONE_NEWUTS and CLONE_NEWIPC
  sched/fair: Kill task_struct::numa_entry and numa_group::task_list
  sched: Refactor task_struct to use numa_faults instead of numa_* pointers
  sched/deadline: Don't check CONFIG_SMP in switched_from_dl()
  sched/deadline: Reschedule from switched_from_dl() after a successful pull
  sched/deadline: Push task away if the deadline is equal to curr during wakeup
  sched/deadline: Add deadline rq status print
  sched/deadline: Fix artificial overrun introduced by yield_task_dl()
  sched/rt: Clean up check_preempt_equal_prio()
  sched/core: Use dl_bw_of() under rcu_read_lock_sched()
  sched: Check if we got a shallowest_idle_cpu before searching for least_loaded_cpu
  ...
  • Loading branch information
torvalds committed Dec 10, 2014
2 parents bee2782 + fd7de1e commit 86c6a2f
Show file tree
Hide file tree
Showing 31 changed files with 915 additions and 335 deletions.
3 changes: 0 additions & 3 deletions arch/x86/include/asm/preempt.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,6 @@ static __always_inline void preempt_count_set(int pc)
/*
* must be macros to avoid header recursion hell
*/
#define task_preempt_count(p) \
(task_thread_info(p)->saved_preempt_count & ~PREEMPT_NEED_RESCHED)

#define init_task_preempt_count(p) do { \
task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \
} while (0)
Expand Down
17 changes: 5 additions & 12 deletions drivers/tty/n_tty.c
Original file line number Diff line number Diff line change
Expand Up @@ -2123,7 +2123,7 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
{
struct n_tty_data *ldata = tty->disc_data;
unsigned char __user *b = buf;
DECLARE_WAITQUEUE(wait, current);
DEFINE_WAIT_FUNC(wait, woken_wake_function);
int c;
int minimum, time;
ssize_t retval = 0;
Expand Down Expand Up @@ -2186,10 +2186,6 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
nr--;
break;
}
/* This statement must be first before checking for input
so that any interrupt will set the state back to
TASK_RUNNING. */
set_current_state(TASK_INTERRUPTIBLE);

if (((minimum - (b - buf)) < ldata->minimum_to_wake) &&
((minimum - (b - buf)) >= 1))
Expand Down Expand Up @@ -2220,13 +2216,13 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
n_tty_set_room(tty);
up_read(&tty->termios_rwsem);

timeout = schedule_timeout(timeout);
timeout = wait_woken(&wait, TASK_INTERRUPTIBLE,
timeout);

down_read(&tty->termios_rwsem);
continue;
}
}
__set_current_state(TASK_RUNNING);

/* Deal with packet mode. */
if (packet && b == buf) {
Expand Down Expand Up @@ -2273,7 +2269,6 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,

mutex_unlock(&ldata->atomic_read_lock);

__set_current_state(TASK_RUNNING);
if (b - buf)
retval = b - buf;

Expand Down Expand Up @@ -2306,7 +2301,7 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
const unsigned char *buf, size_t nr)
{
const unsigned char *b = buf;
DECLARE_WAITQUEUE(wait, current);
DEFINE_WAIT_FUNC(wait, woken_wake_function);
int c;
ssize_t retval = 0;

Expand All @@ -2324,7 +2319,6 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,

add_wait_queue(&tty->write_wait, &wait);
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
if (signal_pending(current)) {
retval = -ERESTARTSYS;
break;
Expand Down Expand Up @@ -2378,12 +2372,11 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
}
up_read(&tty->termios_rwsem);

schedule();
wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);

down_read(&tty->termios_rwsem);
}
break_out:
__set_current_state(TASK_RUNNING);
remove_wait_queue(&tty->write_wait, &wait);
if (b - buf != nr && tty->fasync)
set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
Expand Down
9 changes: 4 additions & 5 deletions fs/notify/inotify/inotify_user.c
Original file line number Diff line number Diff line change
Expand Up @@ -227,14 +227,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
struct fsnotify_event *kevent;
char __user *start;
int ret;
DEFINE_WAIT(wait);
DEFINE_WAIT_FUNC(wait, woken_wake_function);

start = buf;
group = file->private_data;

add_wait_queue(&group->notification_waitq, &wait);
while (1) {
prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);

mutex_lock(&group->notification_mutex);
kevent = get_one_event(group, count);
mutex_unlock(&group->notification_mutex);
Expand Down Expand Up @@ -264,10 +263,10 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
if (start != buf)
break;

schedule();
wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
remove_wait_queue(&group->notification_waitq, &wait);

finish_wait(&group->notification_waitq, &wait);
if (start != buf && ret != -EFAULT)
ret = buf - start;
return ret;
Expand Down
3 changes: 0 additions & 3 deletions include/asm-generic/preempt.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ static __always_inline void preempt_count_set(int pc)
/*
* must be macros to avoid header recursion hell
*/
#define task_preempt_count(p) \
(task_thread_info(p)->preempt_count & ~PREEMPT_NEED_RESCHED)

#define init_task_preempt_count(p) do { \
task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \
} while (0)
Expand Down
50 changes: 0 additions & 50 deletions include/linux/freezer.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,15 +246,6 @@ static inline int freezable_schedule_hrtimeout_range(ktime_t *expires,
* defined in <linux/wait.h>
*/

#define wait_event_freezekillable(wq, condition) \
({ \
int __retval; \
freezer_do_not_count(); \
__retval = wait_event_killable(wq, (condition)); \
freezer_count(); \
__retval; \
})

/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
#define wait_event_freezekillable_unsafe(wq, condition) \
({ \
Expand All @@ -265,35 +256,6 @@ static inline int freezable_schedule_hrtimeout_range(ktime_t *expires,
__retval; \
})

#define wait_event_freezable(wq, condition) \
({ \
int __retval; \
freezer_do_not_count(); \
__retval = wait_event_interruptible(wq, (condition)); \
freezer_count(); \
__retval; \
})

#define wait_event_freezable_timeout(wq, condition, timeout) \
({ \
long __retval = timeout; \
freezer_do_not_count(); \
__retval = wait_event_interruptible_timeout(wq, (condition), \
__retval); \
freezer_count(); \
__retval; \
})

#define wait_event_freezable_exclusive(wq, condition) \
({ \
int __retval; \
freezer_do_not_count(); \
__retval = wait_event_interruptible_exclusive(wq, condition); \
freezer_count(); \
__retval; \
})


#else /* !CONFIG_FREEZER */
static inline bool frozen(struct task_struct *p) { return false; }
static inline bool freezing(struct task_struct *p) { return false; }
Expand Down Expand Up @@ -331,18 +293,6 @@ static inline void set_freezable(void) {}
#define freezable_schedule_hrtimeout_range(expires, delta, mode) \
schedule_hrtimeout_range(expires, delta, mode)

#define wait_event_freezable(wq, condition) \
wait_event_interruptible(wq, condition)

#define wait_event_freezable_timeout(wq, condition, timeout) \
wait_event_interruptible_timeout(wq, condition, timeout)

#define wait_event_freezable_exclusive(wq, condition) \
wait_event_interruptible_exclusive(wq, condition)

#define wait_event_freezekillable(wq, condition) \
wait_event_killable(wq, condition)

#define wait_event_freezekillable_unsafe(wq, condition) \
wait_event_killable(wq, condition)

Expand Down
10 changes: 10 additions & 0 deletions include/linux/init_task.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,15 @@ extern struct task_group root_task_group;
# define INIT_RT_MUTEXES(tsk)
#endif

#ifdef CONFIG_NUMA_BALANCING
# define INIT_NUMA_BALANCING(tsk) \
.numa_preferred_nid = -1, \
.numa_group = NULL, \
.numa_faults = NULL,
#else
# define INIT_NUMA_BALANCING(tsk)
#endif

/*
* INIT_TASK is used to set up the first task table, touch at
* your own risk!. Base=0, limit=0x1fffff (=2MB)
Expand Down Expand Up @@ -237,6 +246,7 @@ extern struct task_group root_task_group;
INIT_CPUSET_SEQ(tsk) \
INIT_RT_MUTEXES(tsk) \
INIT_VTIME(tsk) \
INIT_NUMA_BALANCING(tsk) \
}


Expand Down
5 changes: 5 additions & 0 deletions include/linux/kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ extern int _cond_resched(void);
#endif

#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
void ___might_sleep(const char *file, int line, int preempt_offset);
void __might_sleep(const char *file, int line, int preempt_offset);
/**
* might_sleep - annotation for functions that can sleep
Expand All @@ -175,10 +176,14 @@ extern int _cond_resched(void);
*/
# define might_sleep() \
do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
# define sched_annotate_sleep() __set_current_state(TASK_RUNNING)
#else
static inline void ___might_sleep(const char *file, int line,
int preempt_offset) { }
static inline void __might_sleep(const char *file, int line,
int preempt_offset) { }
# define might_sleep() do { might_resched(); } while (0)
# define sched_annotate_sleep() do { } while (0)
#endif

#define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
Expand Down
87 changes: 64 additions & 23 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,43 @@ extern char ___assert_task_state[1 - 2*!!(
((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
(task->flags & PF_FROZEN) == 0)

#ifdef CONFIG_DEBUG_ATOMIC_SLEEP

#define __set_task_state(tsk, state_value) \
do { \
(tsk)->task_state_change = _THIS_IP_; \
(tsk)->state = (state_value); \
} while (0)
#define set_task_state(tsk, state_value) \
do { \
(tsk)->task_state_change = _THIS_IP_; \
set_mb((tsk)->state, (state_value)); \
} while (0)

/*
* set_current_state() includes a barrier so that the write of current->state
* is correctly serialised wrt the caller's subsequent test of whether to
* actually sleep:
*
* set_current_state(TASK_UNINTERRUPTIBLE);
* if (do_i_need_to_sleep())
* schedule();
*
* If the caller does not need such serialisation then use __set_current_state()
*/
#define __set_current_state(state_value) \
do { \
current->task_state_change = _THIS_IP_; \
current->state = (state_value); \
} while (0)
#define set_current_state(state_value) \
do { \
current->task_state_change = _THIS_IP_; \
set_mb(current->state, (state_value)); \
} while (0)

#else

#define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0)
#define set_task_state(tsk, state_value) \
Expand All @@ -259,11 +296,13 @@ extern char ___assert_task_state[1 - 2*!!(
*
* If the caller does not need such serialisation then use __set_current_state()
*/
#define __set_current_state(state_value) \
#define __set_current_state(state_value) \
do { current->state = (state_value); } while (0)
#define set_current_state(state_value) \
#define set_current_state(state_value) \
set_mb(current->state, (state_value))

#endif

/* Task command name length */
#define TASK_COMM_LEN 16

Expand Down Expand Up @@ -1558,27 +1597,22 @@ struct task_struct {
struct numa_group *numa_group;

/*
* Exponential decaying average of faults on a per-node basis.
* Scheduling placement decisions are made based on the these counts.
* The values remain static for the duration of a PTE scan
* numa_faults is an array split into four regions:
* faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
* in this precise order.
*
* faults_memory: Exponential decaying average of faults on a per-node
* basis. Scheduling placement decisions are made based on these
* counts. The values remain static for the duration of a PTE scan.
* faults_cpu: Track the nodes the process was running on when a NUMA
* hinting fault was incurred.
* faults_memory_buffer and faults_cpu_buffer: Record faults per node
* during the current scan window. When the scan completes, the counts
* in faults_memory and faults_cpu decay and these values are copied.
*/
unsigned long *numa_faults_memory;
unsigned long *numa_faults;
unsigned long total_numa_faults;

/*
* numa_faults_buffer records faults per node during the current
* scan window. When the scan completes, the counts in
* numa_faults_memory decay and these values are copied.
*/
unsigned long *numa_faults_buffer_memory;

/*
* Track the nodes the process was running on when a NUMA hinting
* fault was incurred.
*/
unsigned long *numa_faults_cpu;
unsigned long *numa_faults_buffer_cpu;

/*
* numa_faults_locality tracks if faults recorded during the last
* scan window were remote/local. The task scan period is adapted
Expand Down Expand Up @@ -1661,6 +1695,9 @@ struct task_struct {
unsigned int sequential_io;
unsigned int sequential_io_avg;
#endif
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
unsigned long task_state_change;
#endif
};

/* Future-safe accessor for struct task_struct's cpus_allowed. */
Expand Down Expand Up @@ -2052,6 +2089,10 @@ static inline void tsk_restore_flags(struct task_struct *task,
task->flags |= orig_flags & flags;
}

extern int cpuset_cpumask_can_shrink(const struct cpumask *cur,
const struct cpumask *trial);
extern int task_can_attach(struct task_struct *p,
const struct cpumask *cs_cpus_allowed);
#ifdef CONFIG_SMP
extern void do_set_cpus_allowed(struct task_struct *p,
const struct cpumask *new_mask);
Expand Down Expand Up @@ -2760,7 +2801,7 @@ static inline int signal_pending_state(long state, struct task_struct *p)
extern int _cond_resched(void);

#define cond_resched() ({ \
__might_sleep(__FILE__, __LINE__, 0); \
___might_sleep(__FILE__, __LINE__, 0); \
_cond_resched(); \
})

Expand All @@ -2773,14 +2814,14 @@ extern int __cond_resched_lock(spinlock_t *lock);
#endif

#define cond_resched_lock(lock) ({ \
__might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
__cond_resched_lock(lock); \
})

extern int __cond_resched_softirq(void);

#define cond_resched_softirq() ({ \
__might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
__cond_resched_softirq(); \
})

Expand Down
Loading

0 comments on commit 86c6a2f

Please sign in to comment.