aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/cpu/perf_event.c59
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c15
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c2
-rw-r--r--arch/x86/oprofile/nmi_int.c22
-rw-r--r--drivers/oprofile/buffer_sync.c27
-rw-r--r--drivers/oprofile/cpu_buffer.c2
-rw-r--r--kernel/perf_event.c32
-rw-r--r--kernel/trace/ftrace.c17
-rw-r--r--kernel/trace/trace_event_perf.c3
-rw-r--r--kernel/trace/trace_kprobe.c43
-rw-r--r--kernel/watchdog.c17
-rw-r--r--tools/perf/util/probe-event.c1
-rw-r--r--tools/perf/util/probe-finder.c42
13 files changed, 194 insertions, 88 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index bcddac61422..de6569c04cd 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1154,7 +1154,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
/*
* event overflow
*/
- handled = 1;
+ handled++;
data.period = event->hw.last_period;
if (!x86_perf_event_set_period(event))
@@ -1200,12 +1200,20 @@ void perf_events_lapic_init(void)
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
+struct pmu_nmi_state {
+ unsigned int marked;
+ int handled;
+};
+
+static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
+
static int __kprobes
perf_event_nmi_handler(struct notifier_block *self,
unsigned long cmd, void *__args)
{
struct die_args *args = __args;
- struct pt_regs *regs;
+ unsigned int this_nmi;
+ int handled;
if (!atomic_read(&active_events))
return NOTIFY_DONE;
@@ -1214,22 +1222,47 @@ perf_event_nmi_handler(struct notifier_block *self,
case DIE_NMI:
case DIE_NMI_IPI:
break;
-
+ case DIE_NMIUNKNOWN:
+ this_nmi = percpu_read(irq_stat.__nmi_count);
+ if (this_nmi != __get_cpu_var(pmu_nmi).marked)
+ /* let the kernel handle the unknown nmi */
+ return NOTIFY_DONE;
+ /*
+ * This one is a PMU back-to-back nmi. Two events
+ * trigger 'simultaneously' raising two back-to-back
+ * NMIs. If the first NMI handles both, the latter
+ * will be empty and daze the CPU. So, we drop it to
+ * avoid false-positive 'unknown nmi' messages.
+ */
+ return NOTIFY_STOP;
default:
return NOTIFY_DONE;
}
- regs = args->regs;
-
apic_write(APIC_LVTPC, APIC_DM_NMI);
- /*
- * Can't rely on the handled return value to say it was our NMI, two
- * events could trigger 'simultaneously' raising two back-to-back NMIs.
- *
- * If the first NMI handles both, the latter will be empty and daze
- * the CPU.
- */
- x86_pmu.handle_irq(regs);
+
+ handled = x86_pmu.handle_irq(args->regs);
+ if (!handled)
+ return NOTIFY_DONE;
+
+ this_nmi = percpu_read(irq_stat.__nmi_count);
+ if ((handled > 1) ||
+ /* the next nmi could be a back-to-back nmi */
+ ((__get_cpu_var(pmu_nmi).marked == this_nmi) &&
+ (__get_cpu_var(pmu_nmi).handled > 1))) {
+ /*
+ * We could have two subsequent back-to-back nmis: The
+ * first handles more than one counter, the 2nd
+ * handles only one counter and the 3rd handles no
+ * counter.
+ *
+ * This is the 2nd nmi because the previous was
+ * handling more than one counter. We will mark the
+ * next (3rd) and then drop it if unhandled.
+ */
+ __get_cpu_var(pmu_nmi).marked = this_nmi + 1;
+ __get_cpu_var(pmu_nmi).handled = handled;
+ }
return NOTIFY_STOP;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index d8d86d01400..ee05c90012d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -712,7 +712,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
int bit, loops;
- u64 ack, status;
+ u64 status;
+ int handled = 0;
perf_sample_data_init(&data, 0);
@@ -728,6 +729,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
loops = 0;
again:
+ intel_pmu_ack_status(status);
if (++loops > 100) {
WARN_ONCE(1, "perfevents: irq loop stuck!\n");
perf_event_print_debug();
@@ -736,19 +738,22 @@ again:
}
inc_irq_stat(apic_perf_irqs);
- ack = status;
intel_pmu_lbr_read();
/*
* PEBS overflow sets bit 62 in the global status register
*/
- if (__test_and_clear_bit(62, (unsigned long *)&status))
+ if (__test_and_clear_bit(62, (unsigned long *)&status)) {
+ handled++;
x86_pmu.drain_pebs(regs);
+ }
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit];
+ handled++;
+
if (!test_bit(bit, cpuc->active_mask))
continue;
@@ -761,8 +766,6 @@ again:
x86_pmu_stop(event);
}
- intel_pmu_ack_status(ack);
-
/*
* Repeat if there is more work to be done:
*/
@@ -772,7 +775,7 @@ again:
done:
intel_pmu_enable_all(0);
- return 1;
+ return handled;
}
static struct event_constraint *
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 4fe62c7165c..c70c878ee02 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -936,7 +936,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
inc_irq_stat(apic_perf_irqs);
}
- return handled > 0;
+ return handled;
}
/*
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index f6b48f6c595..cfe4faabb0f 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -568,8 +568,13 @@ static int __init init_sysfs(void)
int error;
error = sysdev_class_register(&oprofile_sysclass);
- if (!error)
- error = sysdev_register(&device_oprofile);
+ if (error)
+ return error;
+
+ error = sysdev_register(&device_oprofile);
+ if (error)
+ sysdev_class_unregister(&oprofile_sysclass);
+
return error;
}
@@ -580,8 +585,10 @@ static void exit_sysfs(void)
}
#else
-#define init_sysfs() do { } while (0)
-#define exit_sysfs() do { } while (0)
+
+static inline int init_sysfs(void) { return 0; }
+static inline void exit_sysfs(void) { }
+
#endif /* CONFIG_PM */
static int __init p4_init(char **cpu_type)
@@ -695,6 +702,8 @@ int __init op_nmi_init(struct oprofile_operations *ops)
char *cpu_type = NULL;
int ret = 0;
+ using_nmi = 0;
+
if (!cpu_has_apic)
return -ENODEV;
@@ -774,7 +783,10 @@ int __init op_nmi_init(struct oprofile_operations *ops)
mux_init(ops);
- init_sysfs();
+ ret = init_sysfs();
+ if (ret)
+ return ret;
+
using_nmi = 1;
printk(KERN_INFO "oprofile: using NMI interrupt.\n");
return 0;
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index a9352b2c7ac..b7e755f4178 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -141,16 +141,6 @@ static struct notifier_block module_load_nb = {
.notifier_call = module_load_notify,
};
-
-static void end_sync(void)
-{
- end_cpu_work();
- /* make sure we don't leak task structs */
- process_task_mortuary();
- process_task_mortuary();
-}
-
-
int sync_start(void)
{
int err;
@@ -158,7 +148,7 @@ int sync_start(void)
if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL))
return -ENOMEM;
- start_cpu_work();
+ mutex_lock(&buffer_mutex);
err = task_handoff_register(&task_free_nb);
if (err)
@@ -173,7 +163,10 @@ int sync_start(void)
if (err)
goto out4;
+ start_cpu_work();
+
out:
+ mutex_unlock(&buffer_mutex);
return err;
out4:
profile_event_unregister(PROFILE_MUNMAP, &munmap_nb);
@@ -182,7 +175,6 @@ out3:
out2:
task_handoff_unregister(&task_free_nb);
out1:
- end_sync();
free_cpumask_var(marked_cpus);
goto out;
}
@@ -190,11 +182,20 @@ out1:
void sync_stop(void)
{
+ /* flush buffers */
+ mutex_lock(&buffer_mutex);
+ end_cpu_work();
unregister_module_notifier(&module_load_nb);
profile_event_unregister(PROFILE_MUNMAP, &munmap_nb);
profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb);
task_handoff_unregister(&task_free_nb);
- end_sync();
+ mutex_unlock(&buffer_mutex);
+ flush_scheduled_work();
+
+ /* make sure we don't leak task structs */
+ process_task_mortuary();
+ process_task_mortuary();
+
free_cpumask_var(marked_cpus);
}
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 219f79e2210..f179ac2ea80 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -120,8 +120,6 @@ void end_cpu_work(void)
cancel_delayed_work(&b->work);
}
-
- flush_scheduled_work();
}
/*
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 0d38f27ad88..2d74f31220a 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -402,11 +402,31 @@ static void perf_group_detach(struct perf_event *event)
}
}
+static inline int
+event_filter_match(struct perf_event *event)
+{
+ return event->cpu == -1 || event->cpu == smp_processor_id();
+}
+
static void
event_sched_out(struct perf_event *event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
+ u64 delta;
+ /*
+ * An event which could not be activated because of
+ * filter mismatch still needs to have its timings
+ * maintained, otherwise bogus information is return
+ * via read() for time_enabled, time_running:
+ */
+ if (event->state == PERF_EVENT_STATE_INACTIVE
+ && !event_filter_match(event)) {
+ delta = ctx->time - event->tstamp_stopped;
+ event->tstamp_running += delta;
+ event->tstamp_stopped = ctx->time;
+ }
+
if (event->state != PERF_EVENT_STATE_ACTIVE)
return;
@@ -432,9 +452,7 @@ group_sched_out(struct perf_event *group_event,
struct perf_event_context *ctx)
{
struct perf_event *event;
-
- if (group_event->state != PERF_EVENT_STATE_ACTIVE)
- return;
+ int state = group_event->state;
event_sched_out(group_event, cpuctx, ctx);
@@ -444,7 +462,7 @@ group_sched_out(struct perf_event *group_event,
list_for_each_entry(event, &group_event->sibling_list, group_entry)
event_sched_out(event, cpuctx, ctx);
- if (group_event->attr.exclusive)
+ if (state == PERF_EVENT_STATE_ACTIVE && group_event->attr.exclusive)
cpuctx->exclusive = 0;
}
@@ -5942,15 +5960,15 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;
- switch (action) {
+ switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
+ case CPU_DOWN_FAILED:
perf_event_init_cpu(cpu);
break;
+ case CPU_UP_CANCELED:
case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
perf_event_exit_cpu(cpu);
break;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0d88ce9b9fb..83a16e9ee51 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -381,12 +381,19 @@ static int function_stat_show(struct seq_file *m, void *v)
{
struct ftrace_profile *rec = v;
char str[KSYM_SYMBOL_LEN];
+ int ret = 0;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- static DEFINE_MUTEX(mutex);
static struct trace_seq s;
unsigned long long avg;
unsigned long long stddev;
#endif
+ mutex_lock(&ftrace_profile_lock);
+
+ /* we raced with function_profile_reset() */
+ if (unlikely(rec->counter == 0)) {
+ ret = -EBUSY;
+ goto out;
+ }
kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
seq_printf(m, " %-30.30s %10lu", str, rec->counter);
@@ -408,7 +415,6 @@ static int function_stat_show(struct seq_file *m, void *v)
do_div(stddev, (rec->counter - 1) * 1000);
}
- mutex_lock(&mutex);
trace_seq_init(&s);
trace_print_graph_duration(rec->time, &s);
trace_seq_puts(&s, " ");
@@ -416,11 +422,12 @@ static int function_stat_show(struct seq_file *m, void *v)
trace_seq_puts(&s, " ");
trace_print_graph_duration(stddev, &s);
trace_print_seq(m, &s);
- mutex_unlock(&mutex);
#endif
seq_putc(m, '\n');
+out:
+ mutex_unlock(&ftrace_profile_lock);
- return 0;
+ return ret;
}
static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
@@ -2409,7 +2416,7 @@ static const struct file_operations ftrace_filter_fops = {
.open = ftrace_filter_open,
.read = seq_read,
.write = ftrace_filter_write,
- .llseek = ftrace_regex_lseek,
+ .llseek = no_llseek,
.release = ftrace_filter_release,
};
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 92f5477a006..f3bbcd1c90c 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -91,6 +91,8 @@ int perf_trace_init(struct perf_event *p_event)
tp_event->class && tp_event->class->reg &&
try_module_get(tp_event->mod)) {
ret = perf_trace_event_init(tp_event, p_event);
+ if (ret)
+ module_put(tp_event->mod);
break;
}
}
@@ -147,6 +149,7 @@ void perf_trace_destroy(struct perf_event *p_event)
}
}
out:
+ module_put(tp_event->mod);
mutex_unlock(&event_mutex);
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 8b27c9849b4..544301d29de 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -514,8 +514,8 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_dispatcher(struct kretprobe_instance *ri,
struct pt_regs *regs);
-/* Check the name is good for event/group */
-static int check_event_name(const char *name)
+/* Check the name is good for event/group/fields */
+static int is_good_name(const char *name)
{
if (!isalpha(*name) && *name != '_')
return 0;
@@ -557,7 +557,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
else
tp->rp.kp.pre_handler = kprobe_dispatcher;
- if (!event || !check_event_name(event)) {
+ if (!event || !is_good_name(event)) {
ret = -EINVAL;
goto error;
}
@@ -567,7 +567,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
if (!tp->call.name)
goto error;
- if (!group || !check_event_name(group)) {
+ if (!group || !is_good_name(group)) {
ret = -EINVAL;
goto error;
}
@@ -883,7 +883,7 @@ static int create_trace_probe(int argc, char **argv)
int i, ret = 0;
int is_return = 0, is_delete = 0;
char *symbol = NULL, *event = NULL, *group = NULL;
- char *arg, *tmp;
+ char *arg;
unsigned long offset = 0;
void *addr = NULL;
char buf[MAX_EVENT_NAME_LEN];
@@ -992,26 +992,36 @@ static int create_trace_probe(int argc, char **argv)
/* parse arguments */
ret = 0;
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
+ /* Increment count for freeing args in error case */
+ tp->nr_args++;
+
/* Parse argument name */
arg = strchr(argv[i], '=');
- if (arg)
+ if (arg) {
*arg++ = '\0';
- else
+ tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
+ } else {
arg = argv[i];
+ /* If argument name is omitted, set "argN" */
+ snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
+ tp->args[i].name = kstrdup(buf, GFP_KERNEL);
+ }
- tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
if (!tp->args[i].name) {
- pr_info("Failed to allocate argument%d name '%s'.\n",
- i, argv[i]);
+ pr_info("Failed to allocate argument[%d] name.\n", i);
ret = -ENOMEM;
goto error;
}
- tmp = strchr(tp->args[i].name, ':');
- if (tmp)
- *tmp = '_'; /* convert : to _ */
+
+ if (!is_good_name(tp->args[i].name)) {
+ pr_info("Invalid argument[%d] name: %s\n",
+ i, tp->args[i].name);
+ ret = -EINVAL;
+ goto error;
+ }
if (conflict_field_name(tp->args[i].name, tp->args, i)) {
- pr_info("Argument%d name '%s' conflicts with "
+ pr_info("Argument[%d] name '%s' conflicts with "
"another field.\n", i, argv[i]);
ret = -EINVAL;
goto error;
@@ -1020,12 +1030,9 @@ static int create_trace_probe(int argc, char **argv)
/* Parse fetch argument */
ret = parse_probe_arg(arg, tp, &tp->args[i], is_return);
if (ret) {
- pr_info("Parse error at argument%d. (%d)\n", i, ret);
- kfree(tp->args[i].name);
+ pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
goto error;
}
-
- tp->nr_args++;
}
ret = register_trace_probe(tp);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 5b1ee4f4ca0..fa71aebda4f 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -121,7 +121,7 @@ static void __touch_watchdog(void)
void touch_softlockup_watchdog(void)
{
- __get_cpu_var(watchdog_touch_ts) = 0;
+ __raw_get_cpu_var(watchdog_touch_ts) = 0;
}
EXPORT_SYMBOL(touch_softlockup_watchdog);
@@ -141,7 +141,14 @@ void touch_all_softlockup_watchdogs(void)
#ifdef CONFIG_HARDLOCKUP_DETECTOR
void touch_nmi_watchdog(void)
{
- __get_cpu_var(watchdog_nmi_touch) = true;
+ if (watchdog_enabled) {
+ unsigned cpu;
+
+ for_each_present_cpu(cpu) {
+ if (per_cpu(watchdog_nmi_touch, cpu) != true)
+ per_cpu(watchdog_nmi_touch, cpu) = true;
+ }
+ }
touch_softlockup_watchdog();
}
EXPORT_SYMBOL(touch_nmi_watchdog);
@@ -422,6 +429,9 @@ static int watchdog_enable(int cpu)
wake_up_process(p);
}
+ /* if any cpu succeeds, watchdog is considered enabled for the system */
+ watchdog_enabled = 1;
+
return 0;
}
@@ -444,9 +454,6 @@ static void watchdog_disable(int cpu)
per_cpu(softlockup_watchdog, cpu) = NULL;
kthread_stop(p);
}
-
- /* if any cpu succeeds, watchdog is considered enabled for the system */
- watchdog_enabled = 1;
}
static void watchdog_enable_all_cpus(void)
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index e72f05c3bef..fcc16e4349d 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1539,6 +1539,7 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
goto error;
}
tev->point.offset = pev->point.offset;
+ tev->point.retprobe = pev->point.retprobe;
tev->nargs = pev->nargs;
if (tev->nargs) {
tev->args = zalloc(sizeof(struct probe_trace_arg)
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 525136684d4..32b81f707ff 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -686,6 +686,25 @@ static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf)
char buf[32], *ptr;
int ret, nscopes;
+ if (!is_c_varname(pf->pvar->var)) {
+ /* Copy raw parameters */
+ pf->tvar->value = strdup(pf->pvar->var);
+ if (pf->tvar->value == NULL)
+ return -ENOMEM;
+ if (pf->pvar->type) {
+ pf->tvar->type = strdup(pf->pvar->type);
+ if (pf->tvar->type == NULL)
+ return -ENOMEM;
+ }
+ if (pf->pvar->name) {
+ pf->tvar->name = strdup(pf->pvar->name);
+ if (pf->tvar->name == NULL)
+ return -ENOMEM;
+ } else
+ pf->tvar->name = NULL;
+ return 0;
+ }
+
if (pf->pvar->name)
pf->tvar->name = strdup(pf->pvar->name);
else {
@@ -700,19 +719,6 @@ static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf)
if (pf->tvar->name == NULL)
return -ENOMEM;
- if (!is_c_varname(pf->pvar->var)) {
- /* Copy raw parameters */
- pf->tvar->value = strdup(pf->pvar->var);
- if (pf->tvar->value == NULL)
- return -ENOMEM;
- if (pf->pvar->type) {
- pf->tvar->type = strdup(pf->pvar->type);
- if (pf->tvar->type == NULL)
- return -ENOMEM;
- }
- return 0;
- }
-
pr_debug("Searching '%s' variable in context.\n",
pf->pvar->var);
/* Search child die for local variables and parameters. */
@@ -783,6 +789,16 @@ static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf)
/* This function has no name. */
tev->point.offset = (unsigned long)pf->addr;
+ /* Return probe must be on the head of a subprogram */
+ if (pf->pev->point.retprobe) {
+ if (tev->point.offset != 0) {
+ pr_warning("Return probe must be on the head of"
+ " a real function\n");
+ return -EINVAL;
+ }
+ tev->point.retprobe = true;
+ }
+
pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
tev->point.offset);