diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fa38612d70b..72138d75a60 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -501,8 +501,9 @@ struct perf_cpu_context { struct perf_event_context *task_ctx; int active_oncpu; int exclusive; + struct hrtimer hrtimer; + ktime_t hrtimer_interval; struct list_head rotation_list; - int jiffies_interval; struct pmu *unique_pmu; struct perf_cgroup *cgrp; }; diff --git a/init/main.c b/init/main.c index 9484f4ba88d..ec549581d73 100644 --- a/init/main.c +++ b/init/main.c @@ -542,7 +542,6 @@ asmlinkage void __init start_kernel(void) if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n")) local_irq_disable(); idr_init_cache(); - perf_event_init(); rcu_init(); tick_nohz_init(); radix_tree_init(); @@ -555,6 +554,7 @@ asmlinkage void __init start_kernel(void) softirq_init(); timekeeping_init(); time_init(); + perf_event_init(); profile_init(); call_function_init(); WARN(!irqs_disabled(), "Interrupts were enabled early\n"); diff --git a/kernel/events/core.c b/kernel/events/core.c index e0dcced282e..97bfac7e6f4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -170,6 +170,8 @@ int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE; static int max_samples_per_tick __read_mostly = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ); +static int perf_rotate_context(struct perf_cpu_context *cpuctx); + int perf_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -658,6 +660,98 @@ perf_cgroup_mark_enabled(struct perf_event *event, } #endif +/* + * set default to be dependent on timer tick just + * like original code + */ +#define PERF_CPU_HRTIMER (1000 / HZ) +/* + * function must be called with interrupts disbled + */ +static enum hrtimer_restart perf_cpu_hrtimer_handler(struct hrtimer *hr) +{ + struct perf_cpu_context *cpuctx; + enum hrtimer_restart ret = HRTIMER_NORESTART; + int rotations = 0; + + WARN_ON(!irqs_disabled()); + + cpuctx = container_of(hr, struct perf_cpu_context, hrtimer); + + rotations = perf_rotate_context(cpuctx); + + /* + * arm timer if needed + */ + if (rotations) { + hrtimer_forward_now(hr, cpuctx->hrtimer_interval); + ret = HRTIMER_RESTART; + } + + return ret; +} + +/* CPU is going down */ +void perf_cpu_hrtimer_cancel(int cpu) +{ + struct perf_cpu_context *cpuctx; + struct pmu *pmu; + unsigned long flags; + + if (WARN_ON(cpu != smp_processor_id())) + return; + + local_irq_save(flags); + + rcu_read_lock(); + + list_for_each_entry_rcu(pmu, &pmus, entry) { + cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + + if (pmu->task_ctx_nr == perf_sw_context) + continue; + + hrtimer_cancel(&cpuctx->hrtimer); + } + + rcu_read_unlock(); + + local_irq_restore(flags); +} + +static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) +{ + struct hrtimer *hr = &cpuctx->hrtimer; + struct pmu *pmu = cpuctx->ctx.pmu; + + /* no multiplexing needed for SW PMU */ + if (pmu->task_ctx_nr == perf_sw_context) + return; + + cpuctx->hrtimer_interval = + ns_to_ktime(NSEC_PER_MSEC * PERF_CPU_HRTIMER); + + hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + hr->function = perf_cpu_hrtimer_handler; +} + +static void perf_cpu_hrtimer_restart(struct perf_cpu_context *cpuctx) +{ + struct hrtimer *hr = &cpuctx->hrtimer; + struct pmu *pmu = cpuctx->ctx.pmu; + + /* not for SW PMU */ + if (pmu->task_ctx_nr == perf_sw_context) + return; + + if (hrtimer_active(hr)) + return; + + if (!hrtimer_callback_running(hr)) + __hrtimer_start_range_ns(hr, cpuctx->hrtimer_interval, + 0, HRTIMER_MODE_REL_PINNED, 0); +} + void perf_pmu_disable(struct pmu *pmu) { int *count = this_cpu_ptr(pmu->pmu_disable_count); @@ -1506,6 +1600,7 @@ group_sched_in(struct perf_event *group_event, if (event_sched_in(group_event, cpuctx, ctx)) { pmu->cancel_txn(pmu); + perf_cpu_hrtimer_restart(cpuctx); return -EAGAIN; } @@ -1552,6 +1647,8 @@ group_sched_in(struct perf_event *group_event, pmu->cancel_txn(pmu); + perf_cpu_hrtimer_restart(cpuctx); + return -EAGAIN; } @@ -1807,8 +1904,10 @@ static int __perf_event_enable(void *info) * If this event can't go on and it's part of a * group, then the whole group has to come off. */ - if (leader != event) + if (leader != event) { group_sched_out(leader, cpuctx, ctx); + perf_cpu_hrtimer_restart(cpuctx); + } if (leader->attr.pinned) { update_group_times(leader); leader->state = PERF_EVENT_STATE_ERROR; @@ -2555,7 +2654,7 @@ static void rotate_ctx(struct perf_event_context *ctx) * because they're strictly cpu affine and rotate_start is called with IRQs * disabled, while rotate_context is called from IRQ context. */ -static void perf_rotate_context(struct perf_cpu_context *cpuctx) +static int perf_rotate_context(struct perf_cpu_context *cpuctx) { struct perf_event_context *ctx = NULL; int rotate = 0, remove = 1; @@ -2594,6 +2693,8 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) done: if (remove) list_del_init(&cpuctx->rotation_list); + + return rotate; } #ifdef CONFIG_NO_HZ_FULL @@ -2625,10 +2726,6 @@ void perf_event_task_tick(void) ctx = cpuctx->task_ctx; if (ctx) perf_adjust_freq_unthr_context(ctx, throttled); - - if (cpuctx->jiffies_interval == 1 || - !(jiffies % cpuctx->jiffies_interval)) - perf_rotate_context(cpuctx); } } @@ -6001,7 +6098,9 @@ int perf_pmu_register(struct pmu *pmu, char *name, int type) lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock); cpuctx->ctx.type = cpu_context; cpuctx->ctx.pmu = pmu; - cpuctx->jiffies_interval = 1; + + __perf_cpu_hrtimer_init(cpuctx, cpu); + INIT_LIST_HEAD(&cpuctx->rotation_list); cpuctx->unique_pmu = pmu; } @@ -7387,7 +7486,6 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) case CPU_DOWN_PREPARE: perf_event_exit_cpu(cpu); break; - default: break; }