Skip to content

Commit

Permalink
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/…
Browse files Browse the repository at this point in the history
…linux/kernel/git/tip/tip

Pull perf updates and fixes from Ingo Molnar:
 "It's mostly fixes, but there's also two late items:

   - preliminary GTK GUI support for perf report
   - PMU raw event format descriptors in sysfs, to be parsed by tooling

  The raw event format in sysfs is a new ABI.  For example for the 'CPU'
  PMU we have:

    aldebaran:~> ll /sys/bus/event_source/devices/cpu/format/*
    -r--r--r--. 1 root root 4096 Mar 31 10:29 /sys/bus/event_source/devices/cpu/format/any
    -r--r--r--. 1 root root 4096 Mar 31 10:29 /sys/bus/event_source/devices/cpu/format/cmask
    -r--r--r--. 1 root root 4096 Mar 31 10:29 /sys/bus/event_source/devices/cpu/format/edge
    -r--r--r--. 1 root root 4096 Mar 31 10:29 /sys/bus/event_source/devices/cpu/format/event
    -r--r--r--. 1 root root 4096 Mar 31 10:29 /sys/bus/event_source/devices/cpu/format/inv
    -r--r--r--. 1 root root 4096 Mar 31 10:29 /sys/bus/event_source/devices/cpu/format/offcore_rsp
    -r--r--r--. 1 root root 4096 Mar 31 10:29 /sys/bus/event_source/devices/cpu/format/pc
    -r--r--r--. 1 root root 4096 Mar 31 10:29 /sys/bus/event_source/devices/cpu/format/umask

  those lists of fields contain a specific format:

    aldebaran:~> cat /sys/bus/event_source/devices/cpu/format/offcore_rsp
    config1:0-63

  So, those who wish to specify raw events can now use the following
  event format:

    -e cpu/cmask=1,event=2,umask=3

  Most people will not want to specify any events (let alone raw
  events), they'll just use whatever default event the tools use.

  But for more obscure PMU events that have no cross-architecture
  generic events the above syntax is more usable and a bit more
  structured than specifying hex numbers."

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (41 commits)
  perf tools: Remove auto-generated bison/flex files
  perf annotate: Fix off by one symbol hist size allocation and hit accounting
  perf tools: Add missing ref-cycles event back to event parser
  perf annotate: addr2line wants addresses in same format as objdump
  perf probe: Finder fails to resolve function name to address
  tracing: Fix ent_size in trace output
  perf symbols: Handle NULL dso in dso__name_len
  perf symbols: Do not include libgen.h
  perf tools: Fix bug in raw sample parsing
  perf tools: Fix display of first level of callchains
  perf tools: Switch module.h into export.h
  perf: Move mmap page data_head offset assertion out of header
  perf: Fix mmap_page capabilities and docs
  perf diff: Fix to work with new hists design
  perf tools: Fix modifier to be applied on correct events
  perf tools: Fix various casting issues for 32 bits
  perf tools: Simplify event_read_id exit path
  tracing: Fix ftrace stack trace entries
  tracing: Move the tracing_on/off() declarations into CONFIG_TRACING
  perf report: Add a simple GTK2-based 'perf report' browser
  ...
  • Loading branch information
torvalds committed Mar 31, 2012
2 parents adb3b1f + 8ebfdf2 commit f187e9f
Show file tree
Hide file tree
Showing 52 changed files with 2,531 additions and 584 deletions.
14 changes: 14 additions & 0 deletions Documentation/ABI/testing/sysfs-bus-event_source-devices-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Where: /sys/bus/event_source/devices/<dev>/format
Date: January 2012
Kernel Version: 3.3
Contact: Jiri Olsa <[email protected]>
Description:
Attribute group to describe the magic bits that go into
perf_event_attr::config[012] for a particular pmu.
Each attribute of this group defines the 'hardware' bitmask
we want to export, so that userspace can deal with sane
name/value pairs.

Example: 'config1:1,6-10,44'
Defines contents of attribute that occupies bits 1,6-10,44 of
perf_event_attr::config1.
17 changes: 16 additions & 1 deletion arch/x86/kernel/cpu/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -1313,6 +1313,11 @@ static void __init pmu_check_apic(void)
pr_info("no hardware sampling interrupt available.\n");
}

static struct attribute_group x86_pmu_format_group = {
.name = "format",
.attrs = NULL,
};

static int __init init_hw_perf_events(void)
{
struct x86_pmu_quirk *quirk;
Expand Down Expand Up @@ -1387,6 +1392,7 @@ static int __init init_hw_perf_events(void)
}

x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
x86_pmu_format_group.attrs = x86_pmu.format_attrs;

pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
Expand Down Expand Up @@ -1615,6 +1621,9 @@ static int x86_pmu_event_idx(struct perf_event *event)
{
int idx = event->hw.idx;

if (!x86_pmu.attr_rdpmc)
return 0;

if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
idx -= X86_PMC_IDX_FIXED;
idx |= 1 << 30;
Expand Down Expand Up @@ -1667,6 +1676,7 @@ static struct attribute_group x86_pmu_attr_group = {

static const struct attribute_group *x86_pmu_attr_groups[] = {
&x86_pmu_attr_group,
&x86_pmu_format_group,
NULL,
};

Expand Down Expand Up @@ -1698,14 +1708,19 @@ static struct pmu pmu = {
.flush_branch_stack = x86_pmu_flush_branch_stack,
};

void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
{
userpg->cap_usr_time = 0;
userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc;
userpg->pmc_width = x86_pmu.cntval_bits;

if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
return;

if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
return;

userpg->cap_usr_time = 1;
userpg->time_mult = this_cpu_read(cyc2ns);
userpg->time_shift = CYC2NS_SCALE_FACTOR;
userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/cpu/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ struct x86_pmu {
* sysfs attrs
*/
int attr_rdpmc;
struct attribute **format_attrs;

/*
* CPU Hotplug hooks
Expand Down
18 changes: 18 additions & 0 deletions arch/x86/kernel/cpu/perf_event_amd.c
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,21 @@ static void amd_pmu_cpu_dead(int cpu)
}
}

PMU_FORMAT_ATTR(event, "config:0-7,32-35");
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
PMU_FORMAT_ATTR(inv, "config:23" );
PMU_FORMAT_ATTR(cmask, "config:24-31" );

static struct attribute *amd_format_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
&format_attr_edge.attr,
&format_attr_inv.attr,
&format_attr_cmask.attr,
NULL,
};

static __initconst const struct x86_pmu amd_pmu = {
.name = "AMD",
.handle_irq = x86_pmu_handle_irq,
Expand All @@ -426,6 +441,8 @@ static __initconst const struct x86_pmu amd_pmu = {
.get_event_constraints = amd_get_event_constraints,
.put_event_constraints = amd_put_event_constraints,

.format_attrs = amd_format_attr,

.cpu_prepare = amd_pmu_cpu_prepare,
.cpu_starting = amd_pmu_cpu_starting,
.cpu_dead = amd_pmu_cpu_dead,
Expand Down Expand Up @@ -596,6 +613,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
.cpu_dead = amd_pmu_cpu_dead,
#endif
.cpu_starting = amd_pmu_cpu_starting,
.format_attrs = amd_format_attr,
};

__init int amd_pmu_init(void)
Expand Down
36 changes: 36 additions & 0 deletions arch/x86/kernel/cpu/perf_event_intel.c
Original file line number Diff line number Diff line change
Expand Up @@ -1431,6 +1431,24 @@ static void core_pmu_enable_all(int added)
}
}

PMU_FORMAT_ATTR(event, "config:0-7" );
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
PMU_FORMAT_ATTR(pc, "config:19" );
PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
PMU_FORMAT_ATTR(inv, "config:23" );
PMU_FORMAT_ATTR(cmask, "config:24-31" );

static struct attribute *intel_arch_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
&format_attr_edge.attr,
&format_attr_pc.attr,
&format_attr_inv.attr,
&format_attr_cmask.attr,
NULL,
};

static __initconst const struct x86_pmu core_pmu = {
.name = "core",
.handle_irq = x86_pmu_handle_irq,
Expand All @@ -1455,6 +1473,7 @@ static __initconst const struct x86_pmu core_pmu = {
.put_event_constraints = intel_put_event_constraints,
.event_constraints = intel_core_event_constraints,
.guest_get_msrs = core_guest_get_msrs,
.format_attrs = intel_arch_formats_attr,
};

struct intel_shared_regs *allocate_shared_regs(int cpu)
Expand Down Expand Up @@ -1553,6 +1572,21 @@ static void intel_pmu_flush_branch_stack(void)
intel_pmu_lbr_reset();
}

PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");

static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
&format_attr_edge.attr,
&format_attr_pc.attr,
&format_attr_any.attr,
&format_attr_inv.attr,
&format_attr_cmask.attr,

&format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
NULL,
};

static __initconst const struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
Expand All @@ -1576,6 +1610,8 @@ static __initconst const struct x86_pmu intel_pmu = {
.get_event_constraints = intel_get_event_constraints,
.put_event_constraints = intel_put_event_constraints,

.format_attrs = intel_arch3_formats_attr,

.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
Expand Down
19 changes: 19 additions & 0 deletions arch/x86/kernel/cpu/perf_event_p6.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,23 @@ static void p6_pmu_enable_event(struct perf_event *event)
(void)checking_wrmsrl(hwc->config_base, val);
}

PMU_FORMAT_ATTR(event, "config:0-7" );
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
PMU_FORMAT_ATTR(pc, "config:19" );
PMU_FORMAT_ATTR(inv, "config:23" );
PMU_FORMAT_ATTR(cmask, "config:24-31" );

static struct attribute *intel_p6_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
&format_attr_edge.attr,
&format_attr_pc.attr,
&format_attr_inv.attr,
&format_attr_cmask.attr,
NULL,
};

static __initconst const struct x86_pmu p6_pmu = {
.name = "p6",
.handle_irq = x86_pmu_handle_irq,
Expand Down Expand Up @@ -115,6 +132,8 @@ static __initconst const struct x86_pmu p6_pmu = {
.cntval_mask = (1ULL << 32) - 1,
.get_event_constraints = x86_get_event_constraints,
.event_constraints = p6_event_constraints,

.format_attrs = intel_p6_formats_attr,
};

__init int p6_pmu_init(void)
Expand Down
2 changes: 2 additions & 0 deletions include/linux/ftrace_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,14 @@ struct event_filter;
enum trace_reg {
TRACE_REG_REGISTER,
TRACE_REG_UNREGISTER,
#ifdef CONFIG_PERF_EVENTS
TRACE_REG_PERF_REGISTER,
TRACE_REG_PERF_UNREGISTER,
TRACE_REG_PERF_OPEN,
TRACE_REG_PERF_CLOSE,
TRACE_REG_PERF_ADD,
TRACE_REG_PERF_DEL,
#endif
};

struct ftrace_event_call;
Expand Down
15 changes: 9 additions & 6 deletions include/linux/kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -430,16 +430,10 @@ extern int __must_check hex2bin(u8 *dst, const char *src, size_t count);
* Most likely, you want to use tracing_on/tracing_off.
*/
#ifdef CONFIG_RING_BUFFER
void tracing_on(void);
void tracing_off(void);
/* trace_off_permanent stops recording with no way to bring it back */
void tracing_off_permanent(void);
int tracing_is_on(void);
#else
static inline void tracing_on(void) { }
static inline void tracing_off(void) { }
static inline void tracing_off_permanent(void) { }
static inline int tracing_is_on(void) { return 0; }
#endif

enum ftrace_dump_mode {
Expand All @@ -449,6 +443,10 @@ enum ftrace_dump_mode {
};

#ifdef CONFIG_TRACING
void tracing_on(void);
void tracing_off(void);
int tracing_is_on(void);

extern void tracing_start(void);
extern void tracing_stop(void);
extern void ftrace_off_permanent(void);
Expand Down Expand Up @@ -533,6 +531,11 @@ static inline void tracing_start(void) { }
static inline void tracing_stop(void) { }
static inline void ftrace_off_permanent(void) { }
static inline void trace_dump_stack(void) { }

static inline void tracing_on(void) { }
static inline void tracing_off(void) { }
static inline int tracing_is_on(void) { return 0; }

static inline int
trace_printk(const char *fmt, ...)
{
Expand Down
90 changes: 80 additions & 10 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,18 +299,31 @@ struct perf_event_mmap_page {
/*
* Bits needed to read the hw events in user-space.
*
* u32 seq;
* s64 count;
* u32 seq, time_mult, time_shift, idx, width;
* u64 count, enabled, running;
* u64 cyc, time_offset;
* s64 pmc = 0;
*
* do {
* seq = pc->lock;
*
* barrier()
* if (pc->index) {
* count = pmc_read(pc->index - 1);
* count += pc->offset;
* } else
* goto regular_read;
*
* enabled = pc->time_enabled;
* running = pc->time_running;
*
* if (pc->cap_usr_time && enabled != running) {
* cyc = rdtsc();
* time_offset = pc->time_offset;
* time_mult = pc->time_mult;
* time_shift = pc->time_shift;
* }
*
* idx = pc->index;
* count = pc->offset;
* if (pc->cap_usr_rdpmc && idx) {
* width = pc->pmc_width;
* pmc = rdpmc(idx - 1);
* }
*
* barrier();
* } while (pc->lock != seq);
Expand All @@ -323,14 +336,57 @@ struct perf_event_mmap_page {
__s64 offset; /* add to hardware event value */
__u64 time_enabled; /* time event active */
__u64 time_running; /* time event on cpu */
__u32 time_mult, time_shift;
union {
__u64 capabilities;
__u64 cap_usr_time : 1,
cap_usr_rdpmc : 1,
cap_____res : 62;
};

/*
* If cap_usr_rdpmc this field provides the bit-width of the value
* read using the rdpmc() or equivalent instruction. This can be used
* to sign extend the result like:
*
* pmc <<= 64 - width;
* pmc >>= 64 - width; // signed shift right
* count += pmc;
*/
__u16 pmc_width;

/*
* If cap_usr_time the below fields can be used to compute the time
* delta since time_enabled (in ns) using rdtsc or similar.
*
* u64 quot, rem;
* u64 delta;
*
* quot = (cyc >> time_shift);
* rem = cyc & ((1 << time_shift) - 1);
* delta = time_offset + quot * time_mult +
* ((rem * time_mult) >> time_shift);
*
* Where time_offset,time_mult,time_shift and cyc are read in the
* seqcount loop described above. This delta can then be added to
* enabled and possible running (if idx), improving the scaling:
*
* enabled += delta;
* if (idx)
* running += delta;
*
* quot = count / running;
* rem = count % running;
* count = quot * enabled + (rem * enabled) / running;
*/
__u16 time_shift;
__u32 time_mult;
__u64 time_offset;

/*
* Hole for extension of the self monitor capabilities
*/

__u64 __reserved[121]; /* align to 1k */
__u64 __reserved[120]; /* align to 1k */

/*
* Control data for the mmap() data buffer.
Expand Down Expand Up @@ -550,6 +606,7 @@ struct perf_guest_info_callbacks {
#include <linux/irq_work.h>
#include <linux/static_key.h>
#include <linux/atomic.h>
#include <linux/sysfs.h>
#include <asm/local.h>

#define PERF_MAX_STACK_DEPTH 255
Expand Down Expand Up @@ -1291,5 +1348,18 @@ do { \
register_cpu_notifier(&fn##_nb); \
} while (0)


#define PMU_FORMAT_ATTR(_name, _format) \
static ssize_t \
_name##_show(struct device *dev, \
struct device_attribute *attr, \
char *page) \
{ \
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
return sprintf(page, _format "\n"); \
} \
\
static struct device_attribute format_attr_##_name = __ATTR_RO(_name)

#endif /* __KERNEL__ */
#endif /* _LINUX_PERF_EVENT_H */
Loading

0 comments on commit f187e9f

Please sign in to comment.