Skip to content

Commit

Permalink
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/…
Browse files Browse the repository at this point in the history
…linux/kernel/git/tip/tip

Pull perf fixes from Thomas Gleixner:
 "Another pile of fixes for perf:

   - Plug overflows and races in the core code

   - Sanitize the flow of the perf syscall so we error out before
     handling the more complex and hard to undo setups

   - Improve and fix Broadwell and Skylake hardware support

   - Revert a fix which broke what it tried to fix in perf tools

   - A couple of smaller fixes in various places of perf tools"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf tools: Fix copying of /proc/kcore
  perf intel-pt: Remove no_force_psb from documentation
  perf probe: Use existing routine to look for a kernel module by dso->short_name
  perf/x86: Change test_aperfmperf() and test_intel() to static
  tools lib traceevent: Fix string handling in heterogeneous arch environments
  perf record: Avoid infinite loop at buildid processing with no samples
  perf: Fix races in computing the header sizes
  perf: Fix u16 overflows
  perf: Restructure perf syscall point of no return
  perf/x86/intel: Fix Skylake FRONTEND MSR extrareg mask
  perf/x86/intel/pebs: Add PEBS frontend profiling for Skylake
  perf/x86/intel: Make the CYCLE_ACTIVITY.* constraint on Broadwell more specific
  perf tools: Bool functions shouldn't return -1
  tools build: Add test for presence of __get_cpuid() gcc builtin
  tools build: Add test for presence of numa_num_possible_cpus() in libnuma
  Revert "perf symbols: Fix mismatched declarations for elf_getphdrnum"
  perf stat: Fix per-pkg event reporting bug
  • Loading branch information
torvalds committed Sep 27, 2015
2 parents 73f479b + 2530e39 commit e3be426
Show file tree
Hide file tree
Showing 18 changed files with 214 additions and 96 deletions.
2 changes: 2 additions & 0 deletions arch/x86/include/asm/msr-index.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@
#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)

#define MSR_PEBS_FRONTEND 0x000003f7

#define MSR_IA32_POWER_CTL 0x000001fc

#define MSR_IA32_MC0_CTL 0x00000400
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/cpu/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ enum extra_reg_type {
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
EXTRA_REG_LBR = 2, /* lbr_select */
EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
EXTRA_REG_FE = 4, /* fe_* */

EXTRA_REG_MAX /* number of entries needed */
};
Expand Down
17 changes: 15 additions & 2 deletions arch/x86/kernel/cpu/perf_event_intel.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,11 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
/*
* Note the low 8 bits eventsel code is not a continuous field, containing
* some #GPing bits. These are masked out.
*/
INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
EVENT_EXTRA_END
};

Expand Down Expand Up @@ -250,7 +255,7 @@ struct event_constraint intel_bdw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */
INTEL_UEVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
EVENT_CONSTRAINT_END
};

Expand Down Expand Up @@ -2891,6 +2896,8 @@ PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");

PMU_FORMAT_ATTR(ldlat, "config1:0-15");

PMU_FORMAT_ATTR(frontend, "config1:0-23");

static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
Expand All @@ -2907,6 +2914,11 @@ static struct attribute *intel_arch3_formats_attr[] = {
NULL,
};

static struct attribute *skl_format_attr[] = {
&format_attr_frontend.attr,
NULL,
};

static __initconst const struct x86_pmu core_pmu = {
.name = "core",
.handle_irq = x86_pmu_handle_irq,
Expand Down Expand Up @@ -3516,7 +3528,8 @@ __init int intel_pmu_init(void)

x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
skl_format_attr);
WARN_ON(!x86_pmu.format_attrs);
x86_pmu.cpu_events = hsw_events_attrs;
pr_cont("Skylake events, ");
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/kernel/cpu/perf_event_msr.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ enum perf_msr_id {
PERF_MSR_EVENT_MAX,
};

bool test_aperfmperf(int idx)
static bool test_aperfmperf(int idx)
{
return boot_cpu_has(X86_FEATURE_APERFMPERF);
}

bool test_intel(int idx)
static bool test_intel(int idx)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
boot_cpu_data.x86 != 6)
Expand Down
114 changes: 81 additions & 33 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1243,11 +1243,7 @@ static inline void perf_event__state_init(struct perf_event *event)
PERF_EVENT_STATE_INACTIVE;
}

/*
* Called at perf_event creation and when events are attached/detached from a
* group.
*/
static void perf_event__read_size(struct perf_event *event)
static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
{
int entry = sizeof(u64); /* value */
int size = 0;
Expand All @@ -1263,22 +1259,19 @@ static void perf_event__read_size(struct perf_event *event)
entry += sizeof(u64);

if (event->attr.read_format & PERF_FORMAT_GROUP) {
nr += event->group_leader->nr_siblings;
nr += nr_siblings;
size += sizeof(u64);
}

size += entry * nr;
event->read_size = size;
}

static void perf_event__header_size(struct perf_event *event)
static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
{
struct perf_sample_data *data;
u64 sample_type = event->attr.sample_type;
u16 size = 0;

perf_event__read_size(event);

if (sample_type & PERF_SAMPLE_IP)
size += sizeof(data->ip);

Expand All @@ -1303,6 +1296,17 @@ static void perf_event__header_size(struct perf_event *event)
event->header_size = size;
}

/*
* Called at perf_event creation and when events are attached/detached from a
* group.
*/
static void perf_event__header_size(struct perf_event *event)
{
__perf_event_read_size(event,
event->group_leader->nr_siblings);
__perf_event_header_size(event, event->attr.sample_type);
}

static void perf_event__id_header_size(struct perf_event *event)
{
struct perf_sample_data *data;
Expand Down Expand Up @@ -1330,6 +1334,27 @@ static void perf_event__id_header_size(struct perf_event *event)
event->id_header_size = size;
}

static bool perf_event_validate_size(struct perf_event *event)
{
/*
* The values computed here will be over-written when we actually
* attach the event.
*/
__perf_event_read_size(event, event->group_leader->nr_siblings + 1);
__perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ);
perf_event__id_header_size(event);

/*
* Sum the lot; should not exceed the 64k limit we have on records.
* Conservative limit to allow for callchains and other variable fields.
*/
if (event->read_size + event->header_size +
event->id_header_size + sizeof(struct perf_event_header) >= 16*1024)
return false;

return true;
}

static void perf_group_attach(struct perf_event *event)
{
struct perf_event *group_leader = event->group_leader, *pos;
Expand Down Expand Up @@ -8297,27 +8322,43 @@ SYSCALL_DEFINE5(perf_event_open,

if (move_group) {
gctx = group_leader->ctx;
mutex_lock_double(&gctx->mutex, &ctx->mutex);
} else {
mutex_lock(&ctx->mutex);
}

if (!perf_event_validate_size(event)) {
err = -E2BIG;
goto err_locked;
}

/*
* Must be under the same ctx::mutex as perf_install_in_context(),
* because we need to serialize with concurrent event creation.
*/
if (!exclusive_event_installable(event, ctx)) {
/* exclusive and group stuff are assumed mutually exclusive */
WARN_ON_ONCE(move_group);

err = -EBUSY;
goto err_locked;
}

WARN_ON_ONCE(ctx->parent_ctx);

if (move_group) {
/*
* See perf_event_ctx_lock() for comments on the details
* of swizzling perf_event::ctx.
*/
mutex_lock_double(&gctx->mutex, &ctx->mutex);

perf_remove_from_context(group_leader, false);

list_for_each_entry(sibling, &group_leader->sibling_list,
group_entry) {
perf_remove_from_context(sibling, false);
put_ctx(gctx);
}
} else {
mutex_lock(&ctx->mutex);
}

WARN_ON_ONCE(ctx->parent_ctx);

if (move_group) {
/*
* Wait for everybody to stop referencing the events through
* the old lists, before installing it on new lists.
Expand Down Expand Up @@ -8349,22 +8390,29 @@ SYSCALL_DEFINE5(perf_event_open,
perf_event__state_init(group_leader);
perf_install_in_context(ctx, group_leader, group_leader->cpu);
get_ctx(ctx);
}

if (!exclusive_event_installable(event, ctx)) {
err = -EBUSY;
mutex_unlock(&ctx->mutex);
fput(event_file);
goto err_context;
/*
* Now that all events are installed in @ctx, nothing
* references @gctx anymore, so drop the last reference we have
* on it.
*/
put_ctx(gctx);
}

/*
* Precalculate sample_data sizes; do while holding ctx::mutex such
* that we're serialized against further additions and before
* perf_install_in_context() which is the point the event is active and
* can use these values.
*/
perf_event__header_size(event);
perf_event__id_header_size(event);

perf_install_in_context(ctx, event, event->cpu);
perf_unpin_context(ctx);

if (move_group) {
if (move_group)
mutex_unlock(&gctx->mutex);
put_ctx(gctx);
}
mutex_unlock(&ctx->mutex);

put_online_cpus();
Expand All @@ -8375,12 +8423,6 @@ SYSCALL_DEFINE5(perf_event_open,
list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex);

/*
* Precalculate sample_data sizes
*/
perf_event__header_size(event);
perf_event__id_header_size(event);

/*
* Drop the reference on the group_event after placing the
* new event on the sibling_list. This ensures destruction
Expand All @@ -8391,6 +8433,12 @@ SYSCALL_DEFINE5(perf_event_open,
fd_install(event_fd, event_file);
return event_fd;

err_locked:
if (move_group)
mutex_unlock(&gctx->mutex);
mutex_unlock(&ctx->mutex);
/* err_file: */
fput(event_file);
err_context:
perf_unpin_context(ctx);
put_ctx(ctx);
Expand Down
8 changes: 6 additions & 2 deletions tools/build/Makefile.feature
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ FEATURE_TESTS ?= \
libelf-getphdrnum \
libelf-mmap \
libnuma \
numa_num_possible_cpus \
libperl \
libpython \
libpython-version \
Expand All @@ -51,7 +52,8 @@ FEATURE_TESTS ?= \
timerfd \
libdw-dwarf-unwind \
zlib \
lzma
lzma \
get_cpuid

FEATURE_DISPLAY ?= \
dwarf \
Expand All @@ -61,13 +63,15 @@ FEATURE_DISPLAY ?= \
libbfd \
libelf \
libnuma \
numa_num_possible_cpus \
libperl \
libpython \
libslang \
libunwind \
libdw-dwarf-unwind \
zlib \
lzma
lzma \
get_cpuid

# Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features.
# If in the future we need per-feature checks/flags for features not
Expand Down
10 changes: 9 additions & 1 deletion tools/build/feature/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ FILES= \
test-libelf-getphdrnum.bin \
test-libelf-mmap.bin \
test-libnuma.bin \
test-numa_num_possible_cpus.bin \
test-libperl.bin \
test-libpython.bin \
test-libpython-version.bin \
Expand All @@ -34,7 +35,8 @@ FILES= \
test-compile-x32.bin \
test-zlib.bin \
test-lzma.bin \
test-bpf.bin
test-bpf.bin \
test-get_cpuid.bin

CC := $(CROSS_COMPILE)gcc -MD
PKG_CONFIG := $(CROSS_COMPILE)pkg-config
Expand Down Expand Up @@ -87,6 +89,9 @@ test-libelf-getphdrnum.bin:
test-libnuma.bin:
$(BUILD) -lnuma

test-numa_num_possible_cpus.bin:
$(BUILD) -lnuma

test-libunwind.bin:
$(BUILD) -lelf

Expand Down Expand Up @@ -162,6 +167,9 @@ test-zlib.bin:
test-lzma.bin:
$(BUILD) -llzma

test-get_cpuid.bin:
$(BUILD)

test-bpf.bin:
$(BUILD)

Expand Down
10 changes: 10 additions & 0 deletions tools/build/feature/test-all.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@
# include "test-libnuma.c"
#undef main

#define main main_test_numa_num_possible_cpus
# include "test-numa_num_possible_cpus.c"
#undef main

#define main main_test_timerfd
# include "test-timerfd.c"
#undef main
Expand Down Expand Up @@ -117,6 +121,10 @@
# include "test-lzma.c"
#undef main

#define main main_test_get_cpuid
# include "test-get_cpuid.c"
#undef main

int main(int argc, char *argv[])
{
main_test_libpython();
Expand All @@ -136,13 +144,15 @@ int main(int argc, char *argv[])
main_test_libbfd();
main_test_backtrace();
main_test_libnuma();
main_test_numa_num_possible_cpus();
main_test_timerfd();
main_test_stackprotector_all();
main_test_libdw_dwarf_unwind();
main_test_sync_compare_and_swap(argc, argv);
main_test_zlib();
main_test_pthread_attr_setaffinity_np();
main_test_lzma();
main_test_get_cpuid();

return 0;
}
Loading

0 comments on commit e3be426

Please sign in to comment.