Skip to content

Commit

Permalink
nohz: prevent tick stop outside of the idle loop
Browse files Browse the repository at this point in the history
Jack Ren and Eric Miao tracked down the following long standing
problem in the NOHZ code:

	scheduler switch to idle task
	enable interrupts

Window starts here

	----> interrupt happens (does not set NEED_RESCHED)
	      	irq_exit() stops the tick

	----> interrupt happens (does set NEED_RESCHED)

	return from schedule()
	
	cpu_idle(): preempt_disable();

Window ends here

The interrupts can happen at any point inside the race window. The
first interrupt stops the tick, the second one causes the scheduler to
rerun and switch away from idle again and we end up with the tick
disabled.

The fact that it needs two interrupts where the first one does not set
NEED_RESCHED and the second one does made the bug obscure and extremly
hard to reproduce and analyse. Kudos to Jack and Eric.

Solution: Limit the NOHZ functionality to the idle loop to make sure
that we can not run into such a situation ever again.

cpu_idle()
{
	preempt_disable();

	while(1) {
		 tick_nohz_stop_sched_tick(1); <- tell NOHZ code that we
		 			          are in the idle loop

		 while (!need_resched())
		       halt();

		 tick_nohz_restart_sched_tick(); <- disables NOHZ mode
		 preempt_enable_no_resched();
		 schedule();
		 preempt_disable();
	}
}

In hindsight we should have done this forever, but ... 

/me grabs a large brown paperbag.

Debugged-by: Jack Ren <[email protected]>, 
Debugged-by: eric miao <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
  • Loading branch information
KAGA-KOKO committed Jul 18, 2008
1 parent 857f3fd commit b8f8c3c
Show file tree
Hide file tree
Showing 14 changed files with 26 additions and 17 deletions.
2 changes: 1 addition & 1 deletion arch/arm/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ void cpu_idle(void)
if (!idle)
idle = default_idle;
leds_event(led_idle_start);
tick_nohz_stop_sched_tick();
tick_nohz_stop_sched_tick(1);
while (!need_resched())
idle();
leds_event(led_idle_end);
Expand Down
2 changes: 1 addition & 1 deletion arch/avr32/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ void cpu_idle(void)
{
/* endless idle loop with no priority at all */
while (1) {
tick_nohz_stop_sched_tick();
tick_nohz_stop_sched_tick(1);
while (!need_resched())
cpu_idle_sleep();
tick_nohz_restart_sched_tick();
Expand Down
2 changes: 1 addition & 1 deletion arch/blackfin/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ void cpu_idle(void)
#endif
if (!idle)
idle = default_idle;
tick_nohz_stop_sched_tick();
tick_nohz_stop_sched_tick(1);
while (!need_resched())
idle();
tick_nohz_restart_sched_tick();
Expand Down
2 changes: 1 addition & 1 deletion arch/mips/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void __noreturn cpu_idle(void)
{
/* endless idle loop with no priority at all */
while (1) {
tick_nohz_stop_sched_tick();
tick_nohz_stop_sched_tick(1);
while (!need_resched()) {
#ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG
extern void smtc_idle_loop_hook(void);
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/kernel/idle.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ void cpu_idle(void)

set_thread_flag(TIF_POLLING_NRFLAG);
while (1) {
tick_nohz_stop_sched_tick();
tick_nohz_stop_sched_tick(1);
while (!need_resched() && !cpu_should_die()) {
ppc64_runlatch_off();

Expand Down
4 changes: 2 additions & 2 deletions arch/powerpc/platforms/iseries/setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ static void yield_shared_processor(void)
static void iseries_shared_idle(void)
{
while (1) {
tick_nohz_stop_sched_tick();
tick_nohz_stop_sched_tick(1);
while (!need_resched() && !hvlpevent_is_pending()) {
local_irq_disable();
ppc64_runlatch_off();
Expand Down Expand Up @@ -591,7 +591,7 @@ static void iseries_dedicated_idle(void)
set_thread_flag(TIF_POLLING_NRFLAG);

while (1) {
tick_nohz_stop_sched_tick();
tick_nohz_stop_sched_tick(1);
if (!need_resched()) {
while (!need_resched()) {
ppc64_runlatch_off();
Expand Down
2 changes: 1 addition & 1 deletion arch/sh/kernel/process_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ void cpu_idle(void)
if (!idle)
idle = default_idle;

tick_nohz_stop_sched_tick();
tick_nohz_stop_sched_tick(1);
while (!need_resched())
idle();
tick_nohz_restart_sched_tick();
Expand Down
2 changes: 1 addition & 1 deletion arch/sparc64/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ void cpu_idle(void)
set_thread_flag(TIF_POLLING_NRFLAG);

while(1) {
tick_nohz_stop_sched_tick();
tick_nohz_stop_sched_tick(1);

while (!need_resched() && !cpu_is_offline(cpu))
sparc64_yield(cpu);
Expand Down
2 changes: 1 addition & 1 deletion arch/um/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ void default_idle(void)
if (need_resched())
schedule();

tick_nohz_stop_sched_tick();
tick_nohz_stop_sched_tick(1);
nsecs = disable_timer();
idle_sleep(nsecs);
tick_nohz_restart_sched_tick();
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/process_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ void cpu_idle(void)

/* endless idle loop with no priority at all */
while (1) {
tick_nohz_stop_sched_tick();
tick_nohz_stop_sched_tick(1);
while (!need_resched()) {
void (*idle)(void);

Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/process_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ void cpu_idle(void)
current_thread_info()->status |= TS_POLLING;
/* endless idle loop with no priority at all */
while (1) {
tick_nohz_stop_sched_tick();
tick_nohz_stop_sched_tick(1);
while (!need_resched()) {
void (*idle)(void);

Expand Down
5 changes: 3 additions & 2 deletions include/linux/tick.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ struct tick_sched {
unsigned long check_clocks;
enum tick_nohz_mode nohz_mode;
ktime_t idle_tick;
int inidle;
int tick_stopped;
unsigned long idle_jiffies;
unsigned long idle_calls;
Expand Down Expand Up @@ -105,14 +106,14 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */

# ifdef CONFIG_NO_HZ
extern void tick_nohz_stop_sched_tick(void);
extern void tick_nohz_stop_sched_tick(int inidle);
extern void tick_nohz_restart_sched_tick(void);
extern void tick_nohz_update_jiffies(void);
extern ktime_t tick_nohz_get_sleep_length(void);
extern void tick_nohz_stop_idle(int cpu);
extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
# else
static inline void tick_nohz_stop_sched_tick(void) { }
static inline void tick_nohz_stop_sched_tick(int inidle) { }
static inline void tick_nohz_restart_sched_tick(void) { }
static inline void tick_nohz_update_jiffies(void) { }
static inline ktime_t tick_nohz_get_sleep_length(void)
Expand Down
2 changes: 1 addition & 1 deletion kernel/softirq.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ void irq_exit(void)
#ifdef CONFIG_NO_HZ
/* Make sure that timer wheel updates are propagated */
if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
tick_nohz_stop_sched_tick();
tick_nohz_stop_sched_tick(0);
rcu_irq_exit();
#endif
preempt_enable_no_resched();
Expand Down
12 changes: 10 additions & 2 deletions kernel/time/tick-sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
* Called either from the idle loop or from irq_exit() when an idle period was
* just interrupted by an interrupt which did not cause a reschedule.
*/
void tick_nohz_stop_sched_tick(void)
void tick_nohz_stop_sched_tick(int inidle)
{
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
struct tick_sched *ts;
Expand Down Expand Up @@ -224,6 +224,11 @@ void tick_nohz_stop_sched_tick(void)
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
goto end;

if (!inidle && !ts->inidle)
goto end;

ts->inidle = 1;

if (need_resched())
goto end;

Expand Down Expand Up @@ -372,11 +377,14 @@ void tick_nohz_restart_sched_tick(void)
local_irq_disable();
tick_nohz_stop_idle(cpu);

if (!ts->tick_stopped) {
if (!ts->inidle || !ts->tick_stopped) {
ts->inidle = 0;
local_irq_enable();
return;
}

ts->inidle = 0;

rcu_exit_nohz();

/* Update jiffies first */
Expand Down

0 comments on commit b8f8c3c

Please sign in to comment.