Skip to content

Commit

Permalink
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/li…
Browse files Browse the repository at this point in the history
…nux/kernel/git/tip/tip

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (39 commits)
  perf tools: Fix compile error on x86_64 Ubuntu
  perf report: Fix --stdio output alignment when --showcpuutilization used
  perf annotate: Get rid of field_sep check
  perf annotate: Fix usage string
  perf kmem: Fix a memory leak
  perf kmem: Add missing closedir() calls
  perf top: Add error message for EMFILE
  perf test: Change type of '-v' option to INCR
  perf script: Add missing closedir() calls
  tracing: Fix compile error when static ftrace is enabled
  recordmcount: Fix handling of elf64 big-endian objects.
  perf tools: Add const.h to MANIFEST to make perf-tar-src-pkg work again
  perf tools: Add support for guest/host-only profiling
  perf kvm: Do guest-only counting by default
  perf top: Don't update total_period on process_sample
  perf hists: Stop using 'self' for struct hist_entry
  perf hists: Rename total_session to total_period
  x86: Add counter when debug stack is used with interrupts enabled
  x86: Allow NMIs to hit breakpoints in i386
  x86: Keep current stack in NMI breakpoints
  ...
  • Loading branch information
torvalds committed Jan 15, 2012
2 parents f0ed5b9 + 172d1b0 commit 83c2f91
Show file tree
Hide file tree
Showing 29 changed files with 1,262 additions and 468 deletions.
8 changes: 8 additions & 0 deletions Documentation/kernel-parameters.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2475,6 +2475,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
stacktrace [FTRACE]
Enabled the stack tracer on boot up.

stacktrace_filter=[function-list]
[FTRACE] Limit the functions that the stack tracer
will trace at boot up. function-list is a comma separated
list of functions. This list can be changed at run
time by the stack_trace_filter file in the debugfs
tracing directory. Note, this enables stack tracing
and the stacktrace above is not needed.

sti= [PARISC,HW]
Format: <num>
Set the STI (builtin display/keyboard on the HP-PARISC
Expand Down
22 changes: 22 additions & 0 deletions arch/x86/include/asm/debugreg.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump);

extern void hw_breakpoint_restore(void);

#ifdef CONFIG_X86_64
DECLARE_PER_CPU(int, debug_stack_usage);
static inline void debug_stack_usage_inc(void)
{
__get_cpu_var(debug_stack_usage)++;
}
static inline void debug_stack_usage_dec(void)
{
__get_cpu_var(debug_stack_usage)--;
}
int is_debug_stack(unsigned long addr);
void debug_stack_set_zero(void);
void debug_stack_reset(void);
#else /* !X86_64 */
static inline int is_debug_stack(unsigned long addr) { return 0; }
static inline void debug_stack_set_zero(void) { }
static inline void debug_stack_reset(void) { }
static inline void debug_stack_usage_inc(void) { }
static inline void debug_stack_usage_dec(void) { }
#endif /* X86_64 */


#endif /* __KERNEL__ */

#endif /* _ASM_X86_DEBUGREG_H */
12 changes: 12 additions & 0 deletions arch/x86/include/asm/desc.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in

extern struct desc_ptr idt_descr;
extern gate_desc idt_table[];
extern struct desc_ptr nmi_idt_descr;
extern gate_desc nmi_idt_table[];

struct gdt_page {
struct desc_struct gdt[GDT_ENTRIES];
Expand Down Expand Up @@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
desc->limit = (limit >> 16) & 0xf;
}

#ifdef CONFIG_X86_64
static inline void set_nmi_gate(int gate, void *addr)
{
gate_desc s;

pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
write_idt_entry(nmi_idt_table, gate, &s);
}
#endif

static inline void _set_gate(int gate, unsigned type, void *addr,
unsigned dpl, unsigned ist, unsigned seg)
{
Expand Down
24 changes: 24 additions & 0 deletions arch/x86/kernel/cpu/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -1021,6 +1021,8 @@ __setup("clearcpuid=", setup_disablecpuid);

#ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
(unsigned long) nmi_idt_table };

DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE);
Expand Down Expand Up @@ -1085,6 +1087,26 @@ unsigned long kernel_eflags;
*/
DEFINE_PER_CPU(struct orig_ist, orig_ist);

static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
DEFINE_PER_CPU(int, debug_stack_usage);

int is_debug_stack(unsigned long addr)
{
return __get_cpu_var(debug_stack_usage) ||
(addr <= __get_cpu_var(debug_stack_addr) &&
addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
}

void debug_stack_set_zero(void)
{
load_idt((const struct desc_ptr *)&nmi_idt_descr);
}

void debug_stack_reset(void)
{
load_idt((const struct desc_ptr *)&idt_descr);
}

#else /* CONFIG_X86_64 */

DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
Expand Down Expand Up @@ -1212,6 +1234,8 @@ void __cpuinit cpu_init(void)
estacks += exception_stack_sizes[v];
oist->ist[v] = t->x86_tss.ist[v] =
(unsigned long)estacks;
if (v == DEBUG_STACK-1)
per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
}
}

Expand Down
218 changes: 185 additions & 33 deletions arch/x86/kernel/entry_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -1480,62 +1480,214 @@ ENTRY(error_exit)
CFI_ENDPROC
END(error_exit)

/*
* Test if a given stack is an NMI stack or not.
*/
.macro test_in_nmi reg stack nmi_ret normal_ret
cmpq %\reg, \stack
ja \normal_ret
subq $EXCEPTION_STKSZ, %\reg
cmpq %\reg, \stack
jb \normal_ret
jmp \nmi_ret
.endm

/* runs on exception stack */
ENTRY(nmi)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq_cfi $-1
/*
* We allow breakpoints in NMIs. If a breakpoint occurs, then
* the iretq it performs will take us out of NMI context.
* This means that we can have nested NMIs where the next
* NMI is using the top of the stack of the previous NMI. We
* can't let it execute because the nested NMI will corrupt the
* stack of the previous NMI. NMI handlers are not re-entrant
* anyway.
*
* To handle this case we do the following:
* Check the a special location on the stack that contains
* a variable that is set when NMIs are executing.
* The interrupted task's stack is also checked to see if it
* is an NMI stack.
* If the variable is not set and the stack is not the NMI
* stack then:
* o Set the special variable on the stack
* o Copy the interrupt frame into a "saved" location on the stack
* o Copy the interrupt frame into a "copy" location on the stack
* o Continue processing the NMI
* If the variable is set or the previous stack is the NMI stack:
* o Modify the "copy" location to jump to the repeate_nmi
* o return back to the first NMI
*
* Now on exit of the first NMI, we first clear the stack variable
* The NMI stack will tell any nested NMIs at that point that it is
* nested. Then we pop the stack normally with iret, and if there was
* a nested NMI that updated the copy interrupt stack frame, a
* jump will be made to the repeat_nmi code that will handle the second
* NMI.
*/

/* Use %rdx as out temp variable throughout */
pushq_cfi %rdx

/*
* Check the special variable on the stack to see if NMIs are
* executing.
*/
cmp $1, -8(%rsp)
je nested_nmi

/*
* Now test if the previous stack was an NMI stack.
* We need the double check. We check the NMI stack to satisfy the
* race when the first NMI clears the variable before returning.
* We check the variable because the first NMI could be in a
* breakpoint routine using a breakpoint stack.
*/
lea 6*8(%rsp), %rdx
test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi

nested_nmi:
/*
* Do nothing if we interrupted the fixup in repeat_nmi.
* It's about to repeat the NMI handler, so we are fine
* with ignoring this one.
*/
movq $repeat_nmi, %rdx
cmpq 8(%rsp), %rdx
ja 1f
movq $end_repeat_nmi, %rdx
cmpq 8(%rsp), %rdx
ja nested_nmi_out

1:
/* Set up the interrupted NMIs stack to jump to repeat_nmi */
leaq -6*8(%rsp), %rdx
movq %rdx, %rsp
CFI_ADJUST_CFA_OFFSET 6*8
pushq_cfi $__KERNEL_DS
pushq_cfi %rdx
pushfq_cfi
pushq_cfi $__KERNEL_CS
pushq_cfi $repeat_nmi

/* Put stack back */
addq $(11*8), %rsp
CFI_ADJUST_CFA_OFFSET -11*8

nested_nmi_out:
popq_cfi %rdx

/* No need to check faults here */
INTERRUPT_RETURN

first_nmi:
/*
* Because nested NMIs will use the pushed location that we
* stored in rdx, we must keep that space available.
* Here's what our stack frame will look like:
* +-------------------------+
* | original SS |
* | original Return RSP |
* | original RFLAGS |
* | original CS |
* | original RIP |
* +-------------------------+
* | temp storage for rdx |
* +-------------------------+
* | NMI executing variable |
* +-------------------------+
* | Saved SS |
* | Saved Return RSP |
* | Saved RFLAGS |
* | Saved CS |
* | Saved RIP |
* +-------------------------+
* | copied SS |
* | copied Return RSP |
* | copied RFLAGS |
* | copied CS |
* | copied RIP |
* +-------------------------+
* | pt_regs |
* +-------------------------+
*
* The saved RIP is used to fix up the copied RIP that a nested
* NMI may zero out. The original stack frame and the temp storage
* is also used by nested NMIs and can not be trusted on exit.
*/
/* Set the NMI executing variable on the stack. */
pushq_cfi $1

/* Copy the stack frame to the Saved frame */
.rept 5
pushq_cfi 6*8(%rsp)
.endr

/* Make another copy, this one may be modified by nested NMIs */
.rept 5
pushq_cfi 4*8(%rsp)
.endr

/* Do not pop rdx, nested NMIs will corrupt it */
movq 11*8(%rsp), %rdx

/*
* Everything below this point can be preempted by a nested
* NMI if the first NMI took an exception. Repeated NMIs
* caused by an exception and nested NMI will start here, and
* can still be preempted by another NMI.
*/
restart_nmi:
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
/*
* Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
* as we should not be calling schedule in NMI context.
* Even with normal interrupts enabled. An NMI should not be
* setting NEED_RESCHED or anything that normal interrupts and
* exceptions might do.
*/
call save_paranoid
DEFAULT_FRAME 0
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
movq %rsp,%rdi
movq $-1,%rsi
call do_nmi
#ifdef CONFIG_TRACE_IRQFLAGS
/* paranoidexit; without TRACE_IRQS_OFF */
/* ebx: no swapgs flag */
DISABLE_INTERRUPTS(CLBR_NONE)
testl %ebx,%ebx /* swapgs needed? */
jnz nmi_restore
testl $3,CS(%rsp)
jnz nmi_userspace
nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
RESTORE_ALL 8
/* Clear the NMI executing stack variable */
movq $0, 10*8(%rsp)
jmp irq_return
nmi_userspace:
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%ebx
andl $_TIF_WORK_MASK,%ebx
jz nmi_swapgs
movq %rsp,%rdi /* &pt_regs */
call sync_regs
movq %rax,%rsp /* switch stack for scheduling */
testl $_TIF_NEED_RESCHED,%ebx
jnz nmi_schedule
movl %ebx,%edx /* arg3: thread flags */
ENABLE_INTERRUPTS(CLBR_NONE)
xorl %esi,%esi /* arg2: oldset */
movq %rsp,%rdi /* arg1: &pt_regs */
call do_notify_resume
DISABLE_INTERRUPTS(CLBR_NONE)
jmp nmi_userspace
nmi_schedule:
ENABLE_INTERRUPTS(CLBR_ANY)
call schedule
DISABLE_INTERRUPTS(CLBR_ANY)
jmp nmi_userspace
CFI_ENDPROC
#else
jmp paranoid_exit
CFI_ENDPROC
#endif
END(nmi)

/*
* If an NMI hit an iret because of an exception or breakpoint,
* it can lose its NMI context, and a nested NMI may come in.
* In that case, the nested NMI will change the preempted NMI's
* stack to jump to here when it does the final iret.
*/
repeat_nmi:
INTR_FRAME
/* Update the stack variable to say we are still in NMI */
movq $1, 5*8(%rsp)

/* copy the saved stack back to copy stack */
.rept 5
pushq_cfi 4*8(%rsp)
.endr

jmp restart_nmi
CFI_ENDPROC
end_repeat_nmi:

ENTRY(ignore_sysret)
CFI_STARTPROC
mov $-ENOSYS,%eax
Expand Down
4 changes: 4 additions & 0 deletions arch/x86/kernel/head_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,10 @@ ENTRY(phys_base)
ENTRY(idt_table)
.skip IDT_ENTRIES * 16

.align L1_CACHE_BYTES
ENTRY(nmi_idt_table)
.skip IDT_ENTRIES * 16

__PAGE_ALIGNED_BSS
.align PAGE_SIZE
ENTRY(empty_zero_page)
Expand Down
Loading

0 comments on commit 83c2f91

Please sign in to comment.