Skip to content

Commit

Permalink
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/…
Browse files Browse the repository at this point in the history
…linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
 "Mostly tooling fixes, but also some kernel side fixes: uncore PMU
  driver fix, user regs sampling fix and an instruction decoder fix that
  unbreaks PEBS precise sampling"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/uncore/hsw-ep: Handle systems with only two SBOXes
  perf/x86_64: Improve user regs sampling
  perf: Move task_pt_regs sampling into arch code
  x86: Fix off-by-one in instruction decoder
  perf hists browser: Fix segfault when showing callchain
  perf callchain: Free callchains when hist entries are deleted
  perf hists: Fix children sort key behavior
  perf diff: Fix to sort by baseline field by default
  perf list: Fix --raw-dump option
  perf probe: Fix crash in dwarf_getcfi_elf
  perf probe: Fix to fall back to find probe point in symbols
  perf callchain: Append callchains only when requested
  perf ui/tui: Print backtrace symbols when segfault occurs
  perf report: Show progress bar for output resorting
  • Loading branch information
torvalds committed Jan 11, 2015
2 parents 1e6c3e8 + 5306c31 commit ddb321a
Show file tree
Hide file tree
Showing 26 changed files with 371 additions and 77 deletions.
8 changes: 8 additions & 0 deletions arch/arm/kernel/perf_regs.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,11 @@ u64 perf_reg_abi(struct task_struct *task)
{
return PERF_SAMPLE_REGS_ABI_32;
}

void perf_get_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs,
struct pt_regs *regs_user_copy)
{
regs_user->regs = task_pt_regs(current);
regs_user->abi = perf_reg_abi(current);
}
8 changes: 8 additions & 0 deletions arch/arm64/kernel/perf_regs.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,11 @@ u64 perf_reg_abi(struct task_struct *task)
else
return PERF_SAMPLE_REGS_ABI_64;
}

void perf_get_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs,
struct pt_regs *regs_user_copy)
{
regs_user->regs = task_pt_regs(current);
regs_user->abi = perf_reg_abi(current);
}
2 changes: 1 addition & 1 deletion arch/x86/kernel/cpu/perf_event_intel_uncore.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
#define UNCORE_EXTRA_PCI_DEV 0xff
#define UNCORE_EXTRA_PCI_DEV_MAX 2
#define UNCORE_EXTRA_PCI_DEV_MAX 3

/* support up to 8 sockets */
#define UNCORE_SOCKET_MAX 8
Expand Down
17 changes: 17 additions & 0 deletions arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
Original file line number Diff line number Diff line change
Expand Up @@ -891,6 +891,7 @@ void snbep_uncore_cpu_init(void)
enum {
SNBEP_PCI_QPI_PORT0_FILTER,
SNBEP_PCI_QPI_PORT1_FILTER,
HSWEP_PCI_PCU_3,
};

static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
Expand Down Expand Up @@ -2026,6 +2027,17 @@ void hswep_uncore_cpu_init(void)
{
if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;

/* Detect 6-8 core systems with only two SBOXes */
if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) {
u32 capid4;

pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3],
0x94, &capid4);
if (((capid4 >> 6) & 0x3) == 0)
hswep_uncore_sbox.num_boxes = 2;
}

uncore_msr_uncores = hswep_msr_uncores;
}

Expand Down Expand Up @@ -2287,6 +2299,11 @@ static DEFINE_PCI_DEVICE_TABLE(hswep_uncore_pci_ids) = {
.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
SNBEP_PCI_QPI_PORT1_FILTER),
},
{ /* PCU.3 (for Capability registers) */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0),
.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
HSWEP_PCI_PCU_3),
},
{ /* end: all zeroes */ }
};

Expand Down
90 changes: 90 additions & 0 deletions arch/x86/kernel/perf_regs.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ u64 perf_reg_abi(struct task_struct *task)
{
return PERF_SAMPLE_REGS_ABI_32;
}

void perf_get_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs,
struct pt_regs *regs_user_copy)
{
regs_user->regs = task_pt_regs(current);
regs_user->abi = perf_reg_abi(current);
}
#else /* CONFIG_X86_64 */
#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
(1ULL << PERF_REG_X86_ES) | \
Expand All @@ -102,4 +110,86 @@ u64 perf_reg_abi(struct task_struct *task)
else
return PERF_SAMPLE_REGS_ABI_64;
}

void perf_get_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs,
struct pt_regs *regs_user_copy)
{
struct pt_regs *user_regs = task_pt_regs(current);

/*
* If we're in an NMI that interrupted task_pt_regs setup, then
* we can't sample user regs at all. This check isn't really
* sufficient, though, as we could be in an NMI inside an interrupt
* that happened during task_pt_regs setup.
*/
if (regs->sp > (unsigned long)&user_regs->r11 &&
regs->sp <= (unsigned long)(user_regs + 1)) {
regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
regs_user->regs = NULL;
return;
}

/*
* RIP, flags, and the argument registers are usually saved.
* orig_ax is probably okay, too.
*/
regs_user_copy->ip = user_regs->ip;
regs_user_copy->cx = user_regs->cx;
regs_user_copy->dx = user_regs->dx;
regs_user_copy->si = user_regs->si;
regs_user_copy->di = user_regs->di;
regs_user_copy->r8 = user_regs->r8;
regs_user_copy->r9 = user_regs->r9;
regs_user_copy->r10 = user_regs->r10;
regs_user_copy->r11 = user_regs->r11;
regs_user_copy->orig_ax = user_regs->orig_ax;
regs_user_copy->flags = user_regs->flags;

/*
* Don't even try to report the "rest" regs.
*/
regs_user_copy->bx = -1;
regs_user_copy->bp = -1;
regs_user_copy->r12 = -1;
regs_user_copy->r13 = -1;
regs_user_copy->r14 = -1;
regs_user_copy->r15 = -1;

/*
* For this to be at all useful, we need a reasonable guess for
* sp and the ABI. Be careful: we're in NMI context, and we're
* considering current to be the current task, so we should
* be careful not to look at any other percpu variables that might
* change during context switches.
*/
if (IS_ENABLED(CONFIG_IA32_EMULATION) &&
task_thread_info(current)->status & TS_COMPAT) {
/* Easy case: we're in a compat syscall. */
regs_user->abi = PERF_SAMPLE_REGS_ABI_32;
regs_user_copy->sp = user_regs->sp;
regs_user_copy->cs = user_regs->cs;
regs_user_copy->ss = user_regs->ss;
} else if (user_regs->orig_ax != -1) {
/*
* We're probably in a 64-bit syscall.
* Warning: this code is severely racy. At least it's better
* than just blindly copying user_regs.
*/
regs_user->abi = PERF_SAMPLE_REGS_ABI_64;
regs_user_copy->sp = this_cpu_read(old_rsp);
regs_user_copy->cs = __USER_CS;
regs_user_copy->ss = __USER_DS;
regs_user_copy->cx = -1; /* usually contains garbage */
} else {
/* We're probably in an interrupt or exception. */
regs_user->abi = user_64bit_mode(user_regs) ?
PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
regs_user_copy->sp = user_regs->sp;
regs_user_copy->cs = user_regs->cs;
regs_user_copy->ss = user_regs->ss;
}

regs_user->regs = regs_user_copy;
}
#endif /* CONFIG_X86_32 */
2 changes: 1 addition & 1 deletion arch/x86/lib/insn.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

/* Verify next sizeof(t) bytes can be on the same instruction */
#define validate_next(t, insn, n) \
((insn)->next_byte + sizeof(t) + n < (insn)->end_kaddr)
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)

#define __get_next(t, insn) \
({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
Expand Down
12 changes: 7 additions & 5 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,6 @@ struct perf_branch_stack {
struct perf_branch_entry entries[0];
};

struct perf_regs {
__u64 abi;
struct pt_regs *regs;
};

struct task_struct;

/*
Expand Down Expand Up @@ -610,7 +605,14 @@ struct perf_sample_data {
u32 reserved;
} cpu_entry;
struct perf_callchain_entry *callchain;

/*
* regs_user may point to task_pt_regs or to regs_user_copy, depending
* on arch details.
*/
struct perf_regs regs_user;
struct pt_regs regs_user_copy;

struct perf_regs regs_intr;
u64 stack_user_size;
} ____cacheline_aligned;
Expand Down
16 changes: 16 additions & 0 deletions include/linux/perf_regs.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
#ifndef _LINUX_PERF_REGS_H
#define _LINUX_PERF_REGS_H

struct perf_regs {
__u64 abi;
struct pt_regs *regs;
};

#ifdef CONFIG_HAVE_PERF_REGS
#include <asm/perf_regs.h>
u64 perf_reg_value(struct pt_regs *regs, int idx);
int perf_reg_validate(u64 mask);
u64 perf_reg_abi(struct task_struct *task);
void perf_get_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs,
struct pt_regs *regs_user_copy);
#else
static inline u64 perf_reg_value(struct pt_regs *regs, int idx)
{
Expand All @@ -21,5 +29,13 @@ static inline u64 perf_reg_abi(struct task_struct *task)
{
return PERF_SAMPLE_REGS_ABI_NONE;
}

static inline void perf_get_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs,
struct pt_regs *regs_user_copy)
{
regs_user->regs = task_pt_regs(current);
regs_user->abi = perf_reg_abi(current);
}
#endif /* CONFIG_HAVE_PERF_REGS */
#endif /* _LINUX_PERF_REGS_H */
19 changes: 8 additions & 11 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -4461,18 +4461,14 @@ perf_output_sample_regs(struct perf_output_handle *handle,
}

static void perf_sample_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs)
struct pt_regs *regs,
struct pt_regs *regs_user_copy)
{
if (!user_mode(regs)) {
if (current->mm)
regs = task_pt_regs(current);
else
regs = NULL;
}

if (regs) {
regs_user->abi = perf_reg_abi(current);
if (user_mode(regs)) {
regs_user->abi = perf_reg_abi(current);
regs_user->regs = regs;
} else if (current->mm) {
perf_get_regs_user(regs_user, regs, regs_user_copy);
} else {
regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
regs_user->regs = NULL;
Expand Down Expand Up @@ -4951,7 +4947,8 @@ void perf_prepare_sample(struct perf_event_header *header,
}

if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
perf_sample_regs_user(&data->regs_user, regs);
perf_sample_regs_user(&data->regs_user, regs,
&data->regs_user_copy);

if (sample_type & PERF_SAMPLE_REGS_USER) {
/* regs dump ABI info */
Expand Down
2 changes: 1 addition & 1 deletion tools/perf/builtin-annotate.c
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ static int __cmd_annotate(struct perf_annotate *ann)
if (nr_samples > 0) {
total_nr_samples += nr_samples;
hists__collapse_resort(hists, NULL);
hists__output_resort(hists);
hists__output_resort(hists, NULL);

if (symbol_conf.event_group &&
!perf_evsel__is_group_leader(pos))
Expand Down
46 changes: 45 additions & 1 deletion tools/perf/builtin-diff.c
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,42 @@ hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right,
return __hist_entry__cmp_compute(p_left, p_right, c);
}

static int64_t
hist_entry__cmp_nop(struct hist_entry *left __maybe_unused,
struct hist_entry *right __maybe_unused)
{
return 0;
}

static int64_t
hist_entry__cmp_baseline(struct hist_entry *left, struct hist_entry *right)
{
if (sort_compute)
return 0;

if (left->stat.period == right->stat.period)
return 0;
return left->stat.period > right->stat.period ? 1 : -1;
}

static int64_t
hist_entry__cmp_delta(struct hist_entry *left, struct hist_entry *right)
{
return hist_entry__cmp_compute(right, left, COMPUTE_DELTA);
}

static int64_t
hist_entry__cmp_ratio(struct hist_entry *left, struct hist_entry *right)
{
return hist_entry__cmp_compute(right, left, COMPUTE_RATIO);
}

static int64_t
hist_entry__cmp_wdiff(struct hist_entry *left, struct hist_entry *right)
{
return hist_entry__cmp_compute(right, left, COMPUTE_WEIGHTED_DIFF);
}

static void insert_hist_entry_by_compute(struct rb_root *root,
struct hist_entry *he,
int c)
Expand Down Expand Up @@ -605,7 +641,7 @@ static void hists__process(struct hists *hists)
hists__precompute(hists);
hists__compute_resort(hists);
} else {
hists__output_resort(hists);
hists__output_resort(hists, NULL);
}

hists__fprintf(hists, true, 0, 0, 0, stdout);
Expand Down Expand Up @@ -1038,27 +1074,35 @@ static void data__hpp_register(struct data__file *d, int idx)
fmt->header = hpp__header;
fmt->width = hpp__width;
fmt->entry = hpp__entry_global;
fmt->cmp = hist_entry__cmp_nop;
fmt->collapse = hist_entry__cmp_nop;

/* TODO more colors */
switch (idx) {
case PERF_HPP_DIFF__BASELINE:
fmt->color = hpp__color_baseline;
fmt->sort = hist_entry__cmp_baseline;
break;
case PERF_HPP_DIFF__DELTA:
fmt->color = hpp__color_delta;
fmt->sort = hist_entry__cmp_delta;
break;
case PERF_HPP_DIFF__RATIO:
fmt->color = hpp__color_ratio;
fmt->sort = hist_entry__cmp_ratio;
break;
case PERF_HPP_DIFF__WEIGHTED_DIFF:
fmt->color = hpp__color_wdiff;
fmt->sort = hist_entry__cmp_wdiff;
break;
default:
fmt->sort = hist_entry__cmp_nop;
break;
}

init_header(d, dfmt);
perf_hpp__column_register(fmt);
perf_hpp__register_sort_field(fmt);
}

static void ui_init(void)
Expand Down
Loading

0 comments on commit ddb321a

Please sign in to comment.