From 1fb9d6ad2766a1dd70d167552988375049a97f21 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 5 Feb 2010 21:47:04 -0500 Subject: nmi_watchdog: Add new, generic implementation, using perf events This is a new generic nmi_watchdog implementation using the perf events infrastructure as suggested by Ingo. The implementation is simple, just create an in-kernel perf event and register an overflow handler to check for cpu lockups. I created a generic implementation that lives in kernel/ and the hardware specific part that for now lives in arch/x86. This approach has a number of advantages: - It simplifies the x86 PMU implementation in the long run, in that it removes the hardcoded low-level PMU implementation that was the NMI watchdog before. - It allows new NMI watchdog features to be added in a central place. - It allows other architectures to enable the NMI watchdog, as long as they have perf events (that provide NMIs) implemented. - It also allows for more graceful co-existence of existing perf events apps and the NMI watchdog - before these changes the relationship was exclusive. (The NMI watchdog will 'spend' a perf event when enabled. In later iterations we might be able to piggyback from an existing NMI event without having to allocate a hardware event for the NMI watchdog - turning this into a no-hardware-cost feature.) As for compatibility, we'll keep the old NMI watchdog code as well until the new one can 100% replace it on all CPUs, old and new alike. That might take some time as the NMI watchdog has been ported to many CPU models. I have done light testing to make sure the framework works correctly and it does. v2: Set the correct timeout values based on the old nmi watchdog Signed-off-by: Don Zickus Cc: Linus Torvalds Cc: Andrew Morton Cc: gorcunov@gmail.com Cc: aris@redhat.com Cc: peterz@infradead.org LKML-Reference: <1265424425-31562-3-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- kernel/nmi_watchdog.c | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 kernel/nmi_watchdog.c (limited to 'kernel') diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c new file mode 100644 index 00000000000..36817b214d6 --- /dev/null +++ b/kernel/nmi_watchdog.c @@ -0,0 +1,191 @@ +/* + * Detect Hard Lockups using the NMI + * + * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. + * + * this code detects hard lockups: incidents in where on a CPU + * the kernel does not respond to anything except NMI. + * + * Note: Most of this code is borrowed heavily from softlockup.c, + * so thanks to Ingo for the initial implementation. + * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks + * to those contributors as well. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static DEFINE_PER_CPU(struct perf_event *, nmi_watchdog_ev); +static DEFINE_PER_CPU(int, nmi_watchdog_touch); +static DEFINE_PER_CPU(long, alert_counter); + +void touch_nmi_watchdog(void) +{ + __raw_get_cpu_var(nmi_watchdog_touch) = 1; + touch_softlockup_watchdog(); +} +EXPORT_SYMBOL(touch_nmi_watchdog); + +void touch_all_nmi_watchdog(void) +{ + int cpu; + + for_each_online_cpu(cpu) + per_cpu(nmi_watchdog_touch, cpu) = 1; + touch_softlockup_watchdog(); +} + +#ifdef CONFIG_SYSCTL +/* + * proc handler for /proc/sys/kernel/nmi_watchdog + */ +int proc_nmi_enabled(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int cpu; + + if (per_cpu(nmi_watchdog_ev, smp_processor_id()) == NULL) + nmi_watchdog_enabled = 0; + else + nmi_watchdog_enabled = 1; + + touch_all_nmi_watchdog(); + proc_dointvec(table, write, buffer, length, ppos); + if (nmi_watchdog_enabled) + for_each_online_cpu(cpu) + perf_event_enable(per_cpu(nmi_watchdog_ev, cpu)); + else + for_each_online_cpu(cpu) + perf_event_disable(per_cpu(nmi_watchdog_ev, cpu)); + return 0; +} + +#endif /* CONFIG_SYSCTL */ + +struct perf_event_attr wd_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .size = sizeof(struct perf_event_attr), + .pinned = 1, + .disabled = 1, +}; + +static int panic_on_timeout; + +void wd_overflow(struct perf_event *event, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + int touched = 0; + + if (__get_cpu_var(nmi_watchdog_touch)) { + per_cpu(nmi_watchdog_touch, cpu) = 0; + touched = 1; + } + + /* check to see if the cpu is doing anything */ + if (!touched && hw_nmi_is_cpu_stuck(regs)) { + /* + * Ayiee, looks like this CPU is stuck ... + * wait a few IRQs (5 seconds) before doing the oops ... + */ + per_cpu(alert_counter,cpu) += 1; + if (per_cpu(alert_counter,cpu) == 5) { + /* + * die_nmi will return ONLY if NOTIFY_STOP happens.. + */ + die_nmi("BUG: NMI Watchdog detected LOCKUP", + regs, panic_on_timeout); + } + } else { + per_cpu(alert_counter,cpu) = 0; + } + + return; +} + +/* + * Create/destroy watchdog threads as CPUs come and go: + */ +static int __cpuinit +cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + int hotcpu = (unsigned long)hcpu; + struct perf_event *event; + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + per_cpu(nmi_watchdog_touch, hotcpu) = 0; + break; + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + /* originally wanted the below chunk to be in CPU_UP_PREPARE, but caps is unpriv for non-CPU0 */ + wd_attr.sample_period = cpu_khz * 1000; + event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow); + if (IS_ERR(event)) { + printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event); + return NOTIFY_BAD; + } + per_cpu(nmi_watchdog_ev, hotcpu) = event; + perf_event_enable(per_cpu(nmi_watchdog_ev, hotcpu)); + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + perf_event_disable(per_cpu(nmi_watchdog_ev, hotcpu)); + case CPU_DEAD: + case CPU_DEAD_FROZEN: + event = per_cpu(nmi_watchdog_ev, hotcpu); + per_cpu(nmi_watchdog_ev, hotcpu) = NULL; + perf_event_release_kernel(event); + break; +#endif /* CONFIG_HOTPLUG_CPU */ + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata cpu_nfb = { + .notifier_call = cpu_callback +}; + +static int __initdata nonmi_watchdog; + +static int __init nonmi_watchdog_setup(char *str) +{ + nonmi_watchdog = 1; + return 1; +} +__setup("nonmi_watchdog", nonmi_watchdog_setup); + +static int __init spawn_nmi_watchdog_task(void) +{ + void *cpu = (void *)(long)smp_processor_id(); + int err; + + if (nonmi_watchdog) + return 0; + + err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); + if (err == NOTIFY_BAD) { + BUG(); + return 1; + } + cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); + register_cpu_notifier(&cpu_nfb); + + return 0; +} +early_initcall(spawn_nmi_watchdog_task); -- cgit v1.2.3 From 84e478c6f1eb9c4bfa1fff2f8108e9a061b46428 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 5 Feb 2010 21:47:05 -0500 Subject: nmi_watchdog: Config option to enable new nmi_watchdog These are the bits that enable the new nmi_watchdog and safely isolate the old nmi_watchdog. Only one or the other can run, not both at the same time. Signed-off-by: Don Zickus Cc: Linus Torvalds Cc: Andrew Morton Cc: gorcunov@gmail.com Cc: aris@redhat.com Cc: peterz@infradead.org LKML-Reference: <1265424425-31562-4-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- kernel/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/Makefile b/kernel/Makefile index 864ff75d65f..8a5abe53eba 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -76,6 +76,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o +obj-$(CONFIG_NMI_WATCHDOG) += nmi_watchdog.o obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_SECCOMP) += seccomp.o -- cgit v1.2.3 From 504d7cf10ee42bb76b9556859f23d4121dee0a77 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 12 Feb 2010 17:19:19 -0500 Subject: nmi_watchdog: Compile and portability fixes The original patch was x86_64 centric. Changed the code to make it less so. ested by building and running on a powerpc. Signed-off-by: Don Zickus Cc: peterz@infradead.org Cc: gorcunov@gmail.com Cc: aris@redhat.com LKML-Reference: <1266013161-31197-2-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- kernel/nmi_watchdog.c | 52 ++++++++++++++++++++++++++++++++++++++++----------- kernel/sysctl.c | 15 ++++++++++++++- 2 files changed, 55 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c index 36817b214d6..73c1954a97b 100644 --- a/kernel/nmi_watchdog.c +++ b/kernel/nmi_watchdog.c @@ -30,6 +30,8 @@ static DEFINE_PER_CPU(struct perf_event *, nmi_watchdog_ev); static DEFINE_PER_CPU(int, nmi_watchdog_touch); static DEFINE_PER_CPU(long, alert_counter); +static int panic_on_timeout; + void touch_nmi_watchdog(void) { __raw_get_cpu_var(nmi_watchdog_touch) = 1; @@ -46,19 +48,49 @@ void touch_all_nmi_watchdog(void) touch_softlockup_watchdog(); } +static int __init setup_nmi_watchdog(char *str) +{ + if (!strncmp(str, "panic", 5)) { + panic_on_timeout = 1; + str = strchr(str, ','); + if (!str) + return 1; + ++str; + } + return 1; +} +__setup("nmi_watchdog=", setup_nmi_watchdog); + #ifdef CONFIG_SYSCTL /* * proc handler for /proc/sys/kernel/nmi_watchdog */ +int nmi_watchdog_enabled; + int proc_nmi_enabled(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { int cpu; - if (per_cpu(nmi_watchdog_ev, smp_processor_id()) == NULL) + if (!write) { + struct perf_event *event; + for_each_online_cpu(cpu) { + event = per_cpu(nmi_watchdog_ev, cpu); + if (event->state > PERF_EVENT_STATE_OFF) { + nmi_watchdog_enabled = 1; + break; + } + } + proc_dointvec(table, write, buffer, length, ppos); + return 0; + } + + if (per_cpu(nmi_watchdog_ev, smp_processor_id()) == NULL) { nmi_watchdog_enabled = 0; - else - nmi_watchdog_enabled = 1; + proc_dointvec(table, write, buffer, length, ppos); + printk("NMI watchdog failed configuration, can not be enabled\n"); + return 0; + } touch_all_nmi_watchdog(); proc_dointvec(table, write, buffer, length, ppos); @@ -81,8 +113,6 @@ struct perf_event_attr wd_attr = { .disabled = 1, }; -static int panic_on_timeout; - void wd_overflow(struct perf_event *event, int nmi, struct perf_sample_data *data, struct pt_regs *regs) @@ -103,11 +133,11 @@ void wd_overflow(struct perf_event *event, int nmi, */ per_cpu(alert_counter,cpu) += 1; if (per_cpu(alert_counter,cpu) == 5) { - /* - * die_nmi will return ONLY if NOTIFY_STOP happens.. - */ - die_nmi("BUG: NMI Watchdog detected LOCKUP", - regs, panic_on_timeout); + if (panic_on_timeout) { + panic("NMI Watchdog detected LOCKUP on cpu %d", cpu); + } else { + WARN(1, "NMI Watchdog detected LOCKUP on cpu %d", cpu); + } } } else { per_cpu(alert_counter,cpu) = 0; @@ -133,7 +163,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) case CPU_ONLINE: case CPU_ONLINE_FROZEN: /* originally wanted the below chunk to be in CPU_UP_PREPARE, but caps is unpriv for non-CPU0 */ - wd_attr.sample_period = cpu_khz * 1000; + wd_attr.sample_period = hw_nmi_get_sample_period(); event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow); if (IS_ERR(event)) { printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8a68b244846..ac72c9e6bd9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -60,6 +60,10 @@ #include #endif +#ifdef CONFIG_NMI_WATCHDOG +#include +#endif + #if defined(CONFIG_SYSCTL) @@ -692,7 +696,16 @@ static struct ctl_table kern_table[] = { .mode = 0444, .proc_handler = proc_dointvec, }, -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) +#if defined(CONFIG_NMI_WATCHDOG) + { + .procname = "nmi_watchdog", + .data = &nmi_watchdog_enabled, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = proc_nmi_enabled, + }, +#endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_NMI_WATCHDOG) { .procname = "unknown_nmi_panic", .data = &unknown_nmi_panic, -- cgit v1.2.3 From cf454aecb31741a0438ed1201b3dd153c7c7b19a Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 12 Feb 2010 17:19:20 -0500 Subject: nmi_watchdog: Fallback to software events when no hardware pmu detected Not all arches have a PMU or have perf_event support for their PMU. The nmi_watchdog will fail in those cases. Fallback to using software events to generate nmi_watchdog traffic with local apic interrupts. Tested on a Pentium4 and it worked as expected, excepting for detecting cpu lockups. The problem with using software events as a cpu lock up detector is the nmi_watchdog uses the logic that if local apic interrupts stop incrementing then the cpu is probably locked up. But with software events we use the local apic to trigger the nmi_watchdog callback to see if local apic interrupts are still firing, which obviously they are otherwise we wouldn't have been triggered. The algorithm to detect cpu lock ups is the same as the old nmi_watchdog. Perhaps we need to find a better way to detect lock ups? Signed-off-by: Don Zickus Cc: peterz@infradead.org Cc: gorcunov@gmail.com Cc: aris@redhat.com LKML-Reference: <1266013161-31197-3-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- kernel/nmi_watchdog.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c index 73c1954a97b..4f23505d887 100644 --- a/kernel/nmi_watchdog.c +++ b/kernel/nmi_watchdog.c @@ -166,8 +166,12 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) wd_attr.sample_period = hw_nmi_get_sample_period(); event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow); if (IS_ERR(event)) { - printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event); - return NOTIFY_BAD; + wd_attr.type = PERF_TYPE_SOFTWARE; + event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow); + if (IS_ERR(event)) { + printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event); + return NOTIFY_BAD; + } } per_cpu(nmi_watchdog_ev, hotcpu) = event; perf_event_enable(per_cpu(nmi_watchdog_ev, hotcpu)); -- cgit v1.2.3 From 6081b6cd9702967889de34fe5da1f96bb96d0ab8 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 16 Feb 2010 17:04:52 -0500 Subject: nmi_watchdog: support for oprofile Re-arrange the code so that when someone disables nmi_watchdog with: echo 0 > /proc/sys/kernel/nmi_watchdog it releases the hardware reservation on the PMUs. This allows the oprofile module to grab those PMUs and do its thing. Otherwise oprofile fails to load because the hardware is reserved by the perf_events subsystem. Tested using: oprofile --vm-linux --start and watched it failed when nmi_watchdog is enabled and succeed when: oprofile --deinit && echo 0 > /proc/sys/kernel/nmi_watchdog is run. Note: this has the side quirk of having the nmi_watchdog latch onto the software events instead of hardware events if oprofile has already reserved the hardware first. User beware! :-) Signed-off-by: Don Zickus Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: gorcunov@gmail.com Cc: aris@redhat.com Cc: eranian@google.com LKML-Reference: <1266357892-30504-1-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- kernel/nmi_watchdog.c | 144 ++++++++++++++++++++++++++++---------------------- 1 file changed, 82 insertions(+), 62 deletions(-) (limited to 'kernel') diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c index 4f23505d887..633b230c231 100644 --- a/kernel/nmi_watchdog.c +++ b/kernel/nmi_watchdog.c @@ -61,50 +61,6 @@ static int __init setup_nmi_watchdog(char *str) } __setup("nmi_watchdog=", setup_nmi_watchdog); -#ifdef CONFIG_SYSCTL -/* - * proc handler for /proc/sys/kernel/nmi_watchdog - */ -int nmi_watchdog_enabled; - -int proc_nmi_enabled(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) -{ - int cpu; - - if (!write) { - struct perf_event *event; - for_each_online_cpu(cpu) { - event = per_cpu(nmi_watchdog_ev, cpu); - if (event->state > PERF_EVENT_STATE_OFF) { - nmi_watchdog_enabled = 1; - break; - } - } - proc_dointvec(table, write, buffer, length, ppos); - return 0; - } - - if (per_cpu(nmi_watchdog_ev, smp_processor_id()) == NULL) { - nmi_watchdog_enabled = 0; - proc_dointvec(table, write, buffer, length, ppos); - printk("NMI watchdog failed configuration, can not be enabled\n"); - return 0; - } - - touch_all_nmi_watchdog(); - proc_dointvec(table, write, buffer, length, ppos); - if (nmi_watchdog_enabled) - for_each_online_cpu(cpu) - perf_event_enable(per_cpu(nmi_watchdog_ev, cpu)); - else - for_each_online_cpu(cpu) - perf_event_disable(per_cpu(nmi_watchdog_ev, cpu)); - return 0; -} - -#endif /* CONFIG_SYSCTL */ - struct perf_event_attr wd_attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, @@ -146,6 +102,85 @@ void wd_overflow(struct perf_event *event, int nmi, return; } +static int enable_nmi_watchdog(int cpu) +{ + struct perf_event *event; + + event = per_cpu(nmi_watchdog_ev, cpu); + if (event && event->state > PERF_EVENT_STATE_OFF) + return 0; + + if (event == NULL) { + /* Try to register using hardware perf events first */ + wd_attr.sample_period = hw_nmi_get_sample_period(); + event = perf_event_create_kernel_counter(&wd_attr, cpu, -1, wd_overflow); + if (IS_ERR(event)) { + wd_attr.type = PERF_TYPE_SOFTWARE; + event = perf_event_create_kernel_counter(&wd_attr, cpu, -1, wd_overflow); + if (IS_ERR(event)) { + printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", cpu, event); + return -1; + } + } + per_cpu(nmi_watchdog_ev, cpu) = event; + } + perf_event_enable(per_cpu(nmi_watchdog_ev, cpu)); + return 0; +} + +static void disable_nmi_watchdog(int cpu) +{ + struct perf_event *event; + + event = per_cpu(nmi_watchdog_ev, cpu); + if (event) { + perf_event_disable(per_cpu(nmi_watchdog_ev, cpu)); + per_cpu(nmi_watchdog_ev, cpu) = NULL; + perf_event_release_kernel(event); + } +} + +#ifdef CONFIG_SYSCTL +/* + * proc handler for /proc/sys/kernel/nmi_watchdog + */ +int nmi_watchdog_enabled; + +int proc_nmi_enabled(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int cpu; + + if (!write) { + struct perf_event *event; + for_each_online_cpu(cpu) { + event = per_cpu(nmi_watchdog_ev, cpu); + if (event && event->state > PERF_EVENT_STATE_OFF) { + nmi_watchdog_enabled = 1; + break; + } + } + proc_dointvec(table, write, buffer, length, ppos); + return 0; + } + + touch_all_nmi_watchdog(); + proc_dointvec(table, write, buffer, length, ppos); + if (nmi_watchdog_enabled) { + for_each_online_cpu(cpu) + if (enable_nmi_watchdog(cpu)) { + printk("NMI watchdog failed configuration, " + " can not be enabled\n"); + } + } else { + for_each_online_cpu(cpu) + disable_nmi_watchdog(cpu); + } + return 0; +} + +#endif /* CONFIG_SYSCTL */ + /* * Create/destroy watchdog threads as CPUs come and go: */ @@ -153,7 +188,6 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { int hotcpu = (unsigned long)hcpu; - struct perf_event *event; switch (action) { case CPU_UP_PREPARE: @@ -162,29 +196,15 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: - /* originally wanted the below chunk to be in CPU_UP_PREPARE, but caps is unpriv for non-CPU0 */ - wd_attr.sample_period = hw_nmi_get_sample_period(); - event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow); - if (IS_ERR(event)) { - wd_attr.type = PERF_TYPE_SOFTWARE; - event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow); - if (IS_ERR(event)) { - printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event); - return NOTIFY_BAD; - } - } - per_cpu(nmi_watchdog_ev, hotcpu) = event; - perf_event_enable(per_cpu(nmi_watchdog_ev, hotcpu)); + if (enable_nmi_watchdog(hotcpu)) + return NOTIFY_BAD; break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: - perf_event_disable(per_cpu(nmi_watchdog_ev, hotcpu)); + disable_nmi_watchdog(hotcpu); case CPU_DEAD: case CPU_DEAD_FROZEN: - event = per_cpu(nmi_watchdog_ev, hotcpu); - per_cpu(nmi_watchdog_ev, hotcpu) = NULL; - perf_event_release_kernel(event); break; #endif /* CONFIG_HOTPLUG_CPU */ } -- cgit v1.2.3 From 96ca4028aca0638d0e11dcbfabc4283054072933 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 16 Feb 2010 17:02:25 -0500 Subject: nmi_watchdog: Properly configure for software events Paul Mackerras brought up a good point that when fallbacking to software events, I may have been lucky in my configuration. Modified the code to explicit provide a new configuration for software events. Suggested-by: Paul Mackerras Signed-off-by: Don Zickus Cc: gorcunov@gmail.com Cc: aris@redhat.com Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <1266357745-26671-1-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- kernel/nmi_watchdog.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c index 633b230c231..3c75cbf3acb 100644 --- a/kernel/nmi_watchdog.c +++ b/kernel/nmi_watchdog.c @@ -61,7 +61,7 @@ static int __init setup_nmi_watchdog(char *str) } __setup("nmi_watchdog=", setup_nmi_watchdog); -struct perf_event_attr wd_attr = { +struct perf_event_attr wd_hw_attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, .size = sizeof(struct perf_event_attr), @@ -69,6 +69,14 @@ struct perf_event_attr wd_attr = { .disabled = 1, }; +struct perf_event_attr wd_sw_attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + .size = sizeof(struct perf_event_attr), + .pinned = 1, + .disabled = 1, +}; + void wd_overflow(struct perf_event *event, int nmi, struct perf_sample_data *data, struct pt_regs *regs) @@ -105,6 +113,7 @@ void wd_overflow(struct perf_event *event, int nmi, static int enable_nmi_watchdog(int cpu) { struct perf_event *event; + struct perf_event_attr *wd_attr; event = per_cpu(nmi_watchdog_ev, cpu); if (event && event->state > PERF_EVENT_STATE_OFF) @@ -112,11 +121,15 @@ static int enable_nmi_watchdog(int cpu) if (event == NULL) { /* Try to register using hardware perf events first */ - wd_attr.sample_period = hw_nmi_get_sample_period(); - event = perf_event_create_kernel_counter(&wd_attr, cpu, -1, wd_overflow); + wd_attr = &wd_hw_attr; + wd_attr->sample_period = hw_nmi_get_sample_period(); + event = perf_event_create_kernel_counter(wd_attr, cpu, -1, wd_overflow); if (IS_ERR(event)) { - wd_attr.type = PERF_TYPE_SOFTWARE; - event = perf_event_create_kernel_counter(&wd_attr, cpu, -1, wd_overflow); + /* hardware doesn't exist or not supported, fallback to software events */ + printk("nmi_watchdog: hardware not available, trying software events\n"); + wd_attr = &wd_sw_attr; + wd_attr->sample_period = NSEC_PER_SEC; + event = perf_event_create_kernel_counter(wd_attr, cpu, -1, wd_overflow); if (IS_ERR(event)) { printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", cpu, event); return -1; -- cgit v1.2.3 From 47195d57636604ff6048b0d7aa3e4ed9643f6073 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Mon, 22 Feb 2010 18:09:03 -0500 Subject: nmi_watchdog: Clean up various small details Mostly copy/paste whitespace damage with a couple of nitpicks by the checkpatch script. Fix the struct definition as requested by Ingo too. Signed-off-by: Don Zickus Cc: peterz@infradead.org Cc: gorcunov@gmail.com Cc: aris@redhat.com LKML-Reference: <1266880143-24943-1-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar -- arch/x86/kernel/apic/hw_nmi.c | 14 +++++------ arch/x86/kernel/traps.c | 6 ++-- include/linux/nmi.h | 2 - kernel/nmi_watchdog.c | 51 ++++++++++++++++++++---------------------- 4 files changed, 36 insertions(+), 37 deletions(-) --- kernel/nmi_watchdog.c | 51 +++++++++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 26 deletions(-) (limited to 'kernel') diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c index 3c75cbf3acb..0a6f57f537a 100644 --- a/kernel/nmi_watchdog.c +++ b/kernel/nmi_watchdog.c @@ -50,31 +50,31 @@ void touch_all_nmi_watchdog(void) static int __init setup_nmi_watchdog(char *str) { - if (!strncmp(str, "panic", 5)) { - panic_on_timeout = 1; - str = strchr(str, ','); - if (!str) - return 1; - ++str; - } - return 1; + if (!strncmp(str, "panic", 5)) { + panic_on_timeout = 1; + str = strchr(str, ','); + if (!str) + return 1; + ++str; + } + return 1; } __setup("nmi_watchdog=", setup_nmi_watchdog); struct perf_event_attr wd_hw_attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_CPU_CYCLES, - .size = sizeof(struct perf_event_attr), - .pinned = 1, - .disabled = 1, + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .size = sizeof(struct perf_event_attr), + .pinned = 1, + .disabled = 1, }; struct perf_event_attr wd_sw_attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_CPU_CLOCK, - .size = sizeof(struct perf_event_attr), - .pinned = 1, - .disabled = 1, + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + .size = sizeof(struct perf_event_attr), + .pinned = 1, + .disabled = 1, }; void wd_overflow(struct perf_event *event, int nmi, @@ -95,16 +95,15 @@ void wd_overflow(struct perf_event *event, int nmi, * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ - per_cpu(alert_counter,cpu) += 1; - if (per_cpu(alert_counter,cpu) == 5) { - if (panic_on_timeout) { + per_cpu(alert_counter, cpu) += 1; + if (per_cpu(alert_counter, cpu) == 5) { + if (panic_on_timeout) panic("NMI Watchdog detected LOCKUP on cpu %d", cpu); - } else { + else WARN(1, "NMI Watchdog detected LOCKUP on cpu %d", cpu); - } } } else { - per_cpu(alert_counter,cpu) = 0; + per_cpu(alert_counter, cpu) = 0; } return; @@ -126,7 +125,7 @@ static int enable_nmi_watchdog(int cpu) event = perf_event_create_kernel_counter(wd_attr, cpu, -1, wd_overflow); if (IS_ERR(event)) { /* hardware doesn't exist or not supported, fallback to software events */ - printk("nmi_watchdog: hardware not available, trying software events\n"); + printk(KERN_INFO "nmi_watchdog: hardware not available, trying software events\n"); wd_attr = &wd_sw_attr; wd_attr->sample_period = NSEC_PER_SEC; event = perf_event_create_kernel_counter(wd_attr, cpu, -1, wd_overflow); @@ -182,7 +181,7 @@ int proc_nmi_enabled(struct ctl_table *table, int write, if (nmi_watchdog_enabled) { for_each_online_cpu(cpu) if (enable_nmi_watchdog(cpu)) { - printk("NMI watchdog failed configuration, " + printk(KERN_ERR "NMI watchdog failed configuration, " " can not be enabled\n"); } } else { -- cgit v1.2.3 From 5671a10e2bc7f99d9157c6044faf8be2ef302361 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Mar 2010 14:20:14 +0100 Subject: nmi_watchdog: Tell the world we're active Because I was wondering why perf stat wasn't working as expected.. Signed-off-by: Peter Zijlstra Cc: Don Zickus LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/nmi_watchdog.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c index 0a6f57f537a..a79d211c30d 100644 --- a/kernel/nmi_watchdog.c +++ b/kernel/nmi_watchdog.c @@ -244,6 +244,8 @@ static int __init spawn_nmi_watchdog_task(void) if (nonmi_watchdog) return 0; + printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); + err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); if (err == NOTIFY_BAD) { BUG(); -- cgit v1.2.3 From 58687acba59266735adb8ccd9b5b9aa2c7cd205b Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 7 May 2010 17:11:44 -0400 Subject: lockup_detector: Combine nmi_watchdog and softlockup detector The new nmi_watchdog (which uses the perf event subsystem) is very similar in structure to the softlockup detector. Using Ingo's suggestion, I combined the two functionalities into one file: kernel/watchdog.c. Now both the nmi_watchdog (or hardlockup detector) and softlockup detector sit on top of the perf event subsystem, which is run every 60 seconds or so to see if there are any lockups. To detect hardlockups, cpus not responding to interrupts, I implemented an hrtimer that runs 5 times for every perf event overflow event. If that stops counting on a cpu, then the cpu is most likely in trouble. To detect softlockups, tasks not yielding to the scheduler, I used the previous kthread idea that now gets kicked every time the hrtimer fires. If the kthread isn't being scheduled neither is anyone else and the warning is printed to the console. I tested this on x86_64 and both the softlockup and hardlockup paths work. V2: - cleaned up the Kconfig and softlockup combination - surrounded hardlockup cases with #ifdef CONFIG_PERF_EVENTS_NMI - seperated out the softlockup case from perf event subsystem - re-arranged the enabling/disabling nmi watchdog from proc space - added cpumasks for hardlockup failure cases - removed fallback to soft events if no PMU exists for hard events V3: - comment cleanups - drop support for older softlockup code - per_cpu cleanups - completely remove software clock base hardlockup detector - use per_cpu masking on hard/soft lockup detection - #ifdef cleanups - rename config option NMI_WATCHDOG to LOCKUP_DETECTOR - documentation additions V4: - documentation fixes - convert per_cpu to __get_cpu_var - powerpc compile fixes V5: - split apart warn flags for hard and soft lockups TODO: - figure out how to make an arch-agnostic clock2cycles call (if possible) to feed into perf events as a sample period [fweisbec: merged conflict patch] Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap LKML-Reference: <1273266711-18706-2-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker --- kernel/Makefile | 3 +- kernel/sysctl.c | 21 +- kernel/watchdog.c | 592 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 608 insertions(+), 8 deletions(-) create mode 100644 kernel/watchdog.c (limited to 'kernel') diff --git a/kernel/Makefile b/kernel/Makefile index d5c30060ac1..6adeafc3e25 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -76,9 +76,8 @@ obj-$(CONFIG_GCOV_KERNEL) += gcov/ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KGDB) += kgdb.o -obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o -obj-$(CONFIG_NMI_WATCHDOG) += nmi_watchdog.o obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o +obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a38af430f0d..0f9adda85f9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -74,7 +74,7 @@ #include #endif -#ifdef CONFIG_NMI_WATCHDOG +#ifdef CONFIG_LOCKUP_DETECTOR #include #endif @@ -686,16 +686,25 @@ static struct ctl_table kern_table[] = { .mode = 0444, .proc_handler = proc_dointvec, }, -#if defined(CONFIG_NMI_WATCHDOG) +#if defined(CONFIG_LOCKUP_DETECTOR) { - .procname = "nmi_watchdog", - .data = &nmi_watchdog_enabled, + .procname = "watchdog", + .data = &watchdog_enabled, .maxlen = sizeof (int), .mode = 0644, - .proc_handler = proc_nmi_enabled, + .proc_handler = proc_dowatchdog_enabled, + }, + { + .procname = "watchdog_thresh", + .data = &softlockup_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dowatchdog_thresh, + .extra1 = &neg_one, + .extra2 = &sixty, }, #endif -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_NMI_WATCHDOG) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR) { .procname = "unknown_nmi_panic", .data = &unknown_nmi_panic, diff --git a/kernel/watchdog.c b/kernel/watchdog.c new file mode 100644 index 00000000000..6b7fad8497a --- /dev/null +++ b/kernel/watchdog.c @@ -0,0 +1,592 @@ +/* + * Detect hard and soft lockups on a system + * + * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. + * + * this code detects hard lockups: incidents in where on a CPU + * the kernel does not respond to anything except NMI. + * + * Note: Most of this code is borrowed heavily from softlockup.c, + * so thanks to Ingo for the initial implementation. + * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks + * to those contributors as well. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +int watchdog_enabled; +int __read_mostly softlockup_thresh = 60; + +static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); +static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); +static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); +static DEFINE_PER_CPU(bool, softlockup_touch_sync); +static DEFINE_PER_CPU(bool, hard_watchdog_warn); +static DEFINE_PER_CPU(bool, soft_watchdog_warn); +#ifdef CONFIG_PERF_EVENTS_NMI +static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); +static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); +static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); +#endif + +static int __read_mostly did_panic; +static int __initdata no_watchdog; + + +/* boot commands */ +/* + * Should we panic when a soft-lockup or hard-lockup occurs: + */ +#ifdef CONFIG_PERF_EVENTS_NMI +static int hardlockup_panic; + +static int __init hardlockup_panic_setup(char *str) +{ + if (!strncmp(str, "panic", 5)) + hardlockup_panic = 1; + return 1; +} +__setup("nmi_watchdog=", hardlockup_panic_setup); +#endif + +unsigned int __read_mostly softlockup_panic = + CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; + +static int __init softlockup_panic_setup(char *str) +{ + softlockup_panic = simple_strtoul(str, NULL, 0); + + return 1; +} +__setup("softlockup_panic=", softlockup_panic_setup); + +static int __init nowatchdog_setup(char *str) +{ + no_watchdog = 1; + return 1; +} +__setup("nowatchdog", nowatchdog_setup); + +/* deprecated */ +static int __init nosoftlockup_setup(char *str) +{ + no_watchdog = 1; + return 1; +} +__setup("nosoftlockup", nosoftlockup_setup); +/* */ + + +/* + * Returns seconds, approximately. We don't need nanosecond + * resolution, and we don't need to waste time with a big divide when + * 2^30ns == 1.074s. + */ +static unsigned long get_timestamp(int this_cpu) +{ + return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ +} + +static unsigned long get_sample_period(void) +{ + /* + * convert softlockup_thresh from seconds to ns + * the divide by 5 is to give hrtimer 5 chances to + * increment before the hardlockup detector generates + * a warning + */ + return softlockup_thresh / 5 * NSEC_PER_SEC; +} + +/* Commands for resetting the watchdog */ +static void __touch_watchdog(void) +{ + int this_cpu = raw_smp_processor_id(); + + __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu); +} + +void touch_watchdog(void) +{ + __get_cpu_var(watchdog_touch_ts) = 0; +} +EXPORT_SYMBOL(touch_watchdog); + +void touch_all_watchdog(void) +{ + int cpu; + + /* + * this is done lockless + * do we care if a 0 races with a timestamp? + * all it means is the softlock check starts one cycle later + */ + for_each_online_cpu(cpu) + per_cpu(watchdog_touch_ts, cpu) = 0; +} + +void touch_nmi_watchdog(void) +{ + touch_watchdog(); +} +EXPORT_SYMBOL(touch_nmi_watchdog); + +void touch_all_nmi_watchdog(void) +{ + touch_all_watchdog(); +} + +void touch_softlockup_watchdog(void) +{ + touch_watchdog(); +} + +void touch_all_softlockup_watchdogs(void) +{ + touch_all_watchdog(); +} + +void touch_softlockup_watchdog_sync(void) +{ + __raw_get_cpu_var(softlockup_touch_sync) = true; + __raw_get_cpu_var(watchdog_touch_ts) = 0; +} + +void softlockup_tick(void) +{ +} + +#ifdef CONFIG_PERF_EVENTS_NMI +/* watchdog detector functions */ +static int is_hardlockup(int cpu) +{ + unsigned long hrint = per_cpu(hrtimer_interrupts, cpu); + + if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint) + return 1; + + per_cpu(hrtimer_interrupts_saved, cpu) = hrint; + return 0; +} +#endif + +static int is_softlockup(unsigned long touch_ts, int cpu) +{ + unsigned long now = get_timestamp(cpu); + + /* Warn about unreasonable delays: */ + if (time_after(now, touch_ts + softlockup_thresh)) + return now - touch_ts; + + return 0; +} + +static int +watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr) +{ + did_panic = 1; + + return NOTIFY_DONE; +} + +static struct notifier_block panic_block = { + .notifier_call = watchdog_panic, +}; + +#ifdef CONFIG_PERF_EVENTS_NMI +static struct perf_event_attr wd_hw_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .size = sizeof(struct perf_event_attr), + .pinned = 1, + .disabled = 1, +}; + +/* Callback function for perf event subsystem */ +void watchdog_overflow_callback(struct perf_event *event, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + int this_cpu = smp_processor_id(); + unsigned long touch_ts = per_cpu(watchdog_touch_ts, this_cpu); + + if (touch_ts == 0) { + __touch_watchdog(); + return; + } + + /* check for a hardlockup + * This is done by making sure our timer interrupt + * is incrementing. The timer interrupt should have + * fired multiple times before we overflow'd. If it hasn't + * then this is a good indication the cpu is stuck + */ + if (is_hardlockup(this_cpu)) { + /* only print hardlockups once */ + if (__get_cpu_var(hard_watchdog_warn) == true) + return; + + if (hardlockup_panic) + panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu); + else + WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); + + __get_cpu_var(hard_watchdog_warn) = true; + return; + } + + __get_cpu_var(hard_watchdog_warn) = false; + return; +} +static void watchdog_interrupt_count(void) +{ + __get_cpu_var(hrtimer_interrupts)++; +} +#else +static inline void watchdog_interrupt_count(void) { return; } +#endif /* CONFIG_PERF_EVENTS_NMI */ + +/* watchdog kicker functions */ +static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) +{ + int this_cpu = smp_processor_id(); + unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts); + struct pt_regs *regs = get_irq_regs(); + int duration; + + /* kick the hardlockup detector */ + watchdog_interrupt_count(); + + /* kick the softlockup detector */ + wake_up_process(__get_cpu_var(softlockup_watchdog)); + + /* .. and repeat */ + hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period())); + + if (touch_ts == 0) { + if (unlikely(per_cpu(softlockup_touch_sync, this_cpu))) { + /* + * If the time stamp was touched atomically + * make sure the scheduler tick is up to date. + */ + per_cpu(softlockup_touch_sync, this_cpu) = false; + sched_clock_tick(); + } + __touch_watchdog(); + return HRTIMER_RESTART; + } + + /* check for a softlockup + * This is done by making sure a high priority task is + * being scheduled. The task touches the watchdog to + * indicate it is getting cpu time. If it hasn't then + * this is a good indication some task is hogging the cpu + */ + duration = is_softlockup(touch_ts, this_cpu); + if (unlikely(duration)) { + /* only warn once */ + if (__get_cpu_var(soft_watchdog_warn) == true) + return HRTIMER_RESTART; + + printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", + this_cpu, duration, + current->comm, task_pid_nr(current)); + print_modules(); + print_irqtrace_events(current); + if (regs) + show_regs(regs); + else + dump_stack(); + + if (softlockup_panic) + panic("softlockup: hung tasks"); + __get_cpu_var(soft_watchdog_warn) = true; + } else + __get_cpu_var(soft_watchdog_warn) = false; + + return HRTIMER_RESTART; +} + + +/* + * The watchdog thread - touches the timestamp. + */ +static int watchdog(void *__bind_cpu) +{ + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, (unsigned long)__bind_cpu); + + sched_setscheduler(current, SCHED_FIFO, ¶m); + + /* initialize timestamp */ + __touch_watchdog(); + + /* kick off the timer for the hardlockup detector */ + /* done here because hrtimer_start can only pin to smp_processor_id() */ + hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()), + HRTIMER_MODE_REL_PINNED); + + set_current_state(TASK_INTERRUPTIBLE); + /* + * Run briefly once per second to reset the softlockup timestamp. + * If this gets delayed for more than 60 seconds then the + * debug-printout triggers in softlockup_tick(). + */ + while (!kthread_should_stop()) { + __touch_watchdog(); + schedule(); + + if (kthread_should_stop()) + break; + + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + + return 0; +} + + +#ifdef CONFIG_PERF_EVENTS_NMI +static int watchdog_nmi_enable(int cpu) +{ + struct perf_event_attr *wd_attr; + struct perf_event *event = per_cpu(watchdog_ev, cpu); + + /* is it already setup and enabled? */ + if (event && event->state > PERF_EVENT_STATE_OFF) + goto out; + + /* it is setup but not enabled */ + if (event != NULL) + goto out_enable; + + /* Try to register using hardware perf events */ + wd_attr = &wd_hw_attr; + wd_attr->sample_period = hw_nmi_get_sample_period(); + event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback); + if (!IS_ERR(event)) { + printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); + goto out_save; + } + + printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event); + return -1; + + /* success path */ +out_save: + per_cpu(watchdog_ev, cpu) = event; +out_enable: + perf_event_enable(per_cpu(watchdog_ev, cpu)); +out: + return 0; +} + +static void watchdog_nmi_disable(int cpu) +{ + struct perf_event *event = per_cpu(watchdog_ev, cpu); + + if (event) { + perf_event_disable(event); + per_cpu(watchdog_ev, cpu) = NULL; + + /* should be in cleanup, but blocks oprofile */ + perf_event_release_kernel(event); + } + return; +} +#else +static int watchdog_nmi_enable(int cpu) { return 0; } +static void watchdog_nmi_disable(int cpu) { return; } +#endif /* CONFIG_PERF_EVENTS_NMI */ + +/* prepare/enable/disable routines */ +static int watchdog_prepare_cpu(int cpu) +{ + struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); + + WARN_ON(per_cpu(softlockup_watchdog, cpu)); + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer->function = watchdog_timer_fn; + + return 0; +} + +static int watchdog_enable(int cpu) +{ + struct task_struct *p = per_cpu(softlockup_watchdog, cpu); + + /* enable the perf event */ + if (watchdog_nmi_enable(cpu) != 0) + return -1; + + /* create the watchdog thread */ + if (!p) { + p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); + if (IS_ERR(p)) { + printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); + return -1; + } + kthread_bind(p, cpu); + per_cpu(watchdog_touch_ts, cpu) = 0; + per_cpu(softlockup_watchdog, cpu) = p; + wake_up_process(p); + } + + return 0; +} + +static void watchdog_disable(int cpu) +{ + struct task_struct *p = per_cpu(softlockup_watchdog, cpu); + struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); + + /* + * cancel the timer first to stop incrementing the stats + * and waking up the kthread + */ + hrtimer_cancel(hrtimer); + + /* disable the perf event */ + watchdog_nmi_disable(cpu); + + /* stop the watchdog thread */ + if (p) { + per_cpu(softlockup_watchdog, cpu) = NULL; + kthread_stop(p); + } + + /* if any cpu succeeds, watchdog is considered enabled for the system */ + watchdog_enabled = 1; +} + +static void watchdog_enable_all_cpus(void) +{ + int cpu; + int result; + + for_each_online_cpu(cpu) + result += watchdog_enable(cpu); + + if (result) + printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); + +} + +static void watchdog_disable_all_cpus(void) +{ + int cpu; + + for_each_online_cpu(cpu) + watchdog_disable(cpu); + + /* if all watchdogs are disabled, then they are disabled for the system */ + watchdog_enabled = 0; +} + + +/* sysctl functions */ +#ifdef CONFIG_SYSCTL +/* + * proc handler for /proc/sys/kernel/nmi_watchdog + */ + +int proc_dowatchdog_enabled(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + proc_dointvec(table, write, buffer, length, ppos); + + if (watchdog_enabled) + watchdog_enable_all_cpus(); + else + watchdog_disable_all_cpus(); + return 0; +} + +int proc_dowatchdog_thresh(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + return proc_dointvec_minmax(table, write, buffer, lenp, ppos); +} + +/* stub functions */ +int proc_dosoftlockup_thresh(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + return proc_dowatchdog_thresh(table, write, buffer, lenp, ppos); +} +/* end of stub functions */ +#endif /* CONFIG_SYSCTL */ + + +/* + * Create/destroy watchdog threads as CPUs come and go: + */ +static int __cpuinit +cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + int hotcpu = (unsigned long)hcpu; + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + if (watchdog_prepare_cpu(hotcpu)) + return NOTIFY_BAD; + break; + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + if (watchdog_enable(hotcpu)) + return NOTIFY_BAD; + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + watchdog_disable(hotcpu); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + watchdog_disable(hotcpu); + break; +#endif /* CONFIG_HOTPLUG_CPU */ + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata cpu_nfb = { + .notifier_call = cpu_callback +}; + +static int __init spawn_watchdog_task(void) +{ + void *cpu = (void *)(long)smp_processor_id(); + int err; + + if (no_watchdog) + return 0; + + err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); + WARN_ON(err == NOTIFY_BAD); + + cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); + register_cpu_notifier(&cpu_nfb); + + atomic_notifier_chain_register(&panic_notifier_list, &panic_block); + + return 0; +} +early_initcall(spawn_watchdog_task); -- cgit v1.2.3 From 332fbdbca3f7716c5620970755ae054d213bcc4e Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 7 May 2010 17:11:45 -0400 Subject: lockup_detector: Touch_softlockup cleanups and softlockup_tick removal Just some code cleanup to make touch_softlockup clearer and remove the softlockup_tick function as it is no longer needed. Also remove the /proc softlockup_thres call as it has been changed to watchdog_thres. Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap LKML-Reference: <1273266711-18706-3-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker --- kernel/sysctl.c | 9 --------- kernel/timer.c | 1 - kernel/watchdog.c | 35 +++-------------------------------- 3 files changed, 3 insertions(+), 42 deletions(-) (limited to 'kernel') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0f9adda85f9..999bc3fccf4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -817,15 +817,6 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one, }, - { - .procname = "softlockup_thresh", - .data = &softlockup_thresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dosoftlockup_thresh, - .extra1 = &neg_one, - .extra2 = &sixty, - }, #endif #ifdef CONFIG_DETECT_HUNG_TASK { diff --git a/kernel/timer.c b/kernel/timer.c index aeb6a54f277..e8de5eb07a0 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1225,7 +1225,6 @@ void run_local_timers(void) { hrtimer_run_queues(); raise_softirq(TIMER_SOFTIRQ); - softlockup_tick(); } /* diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 6b7fad8497a..f1541b7e324 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -119,13 +119,12 @@ static void __touch_watchdog(void) __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu); } -void touch_watchdog(void) +void touch_softlockup_watchdog(void) { __get_cpu_var(watchdog_touch_ts) = 0; } -EXPORT_SYMBOL(touch_watchdog); -void touch_all_watchdog(void) +void touch_all_softlockup_watchdogs(void) { int cpu; @@ -140,35 +139,16 @@ void touch_all_watchdog(void) void touch_nmi_watchdog(void) { - touch_watchdog(); + touch_softlockup_watchdog(); } EXPORT_SYMBOL(touch_nmi_watchdog); -void touch_all_nmi_watchdog(void) -{ - touch_all_watchdog(); -} - -void touch_softlockup_watchdog(void) -{ - touch_watchdog(); -} - -void touch_all_softlockup_watchdogs(void) -{ - touch_all_watchdog(); -} - void touch_softlockup_watchdog_sync(void) { __raw_get_cpu_var(softlockup_touch_sync) = true; __raw_get_cpu_var(watchdog_touch_ts) = 0; } -void softlockup_tick(void) -{ -} - #ifdef CONFIG_PERF_EVENTS_NMI /* watchdog detector functions */ static int is_hardlockup(int cpu) @@ -522,15 +502,6 @@ int proc_dowatchdog_thresh(struct ctl_table *table, int write, { return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } - -/* stub functions */ -int proc_dosoftlockup_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) -{ - return proc_dowatchdog_thresh(table, write, buffer, lenp, ppos); -} -/* end of stub functions */ #endif /* CONFIG_SYSCTL */ -- cgit v1.2.3 From 2508ce1845a3b256798532b2c6b7997c2dc6533b Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 7 May 2010 17:11:46 -0400 Subject: lockup_detector: Remove old softlockup code Now that is no longer compiled or used, just remove it. Also move some of the code wrapped with DETECT_SOFTLOCKUP to the LOCKUP_DETECTOR wrappers because that is the code that uses it now. Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap LKML-Reference: <1273266711-18706-4-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker --- kernel/softlockup.c | 293 ---------------------------------------------------- kernel/sysctl.c | 22 ++-- 2 files changed, 10 insertions(+), 305 deletions(-) delete mode 100644 kernel/softlockup.c (limited to 'kernel') diff --git a/kernel/softlockup.c b/kernel/softlockup.c deleted file mode 100644 index 4b493f67dcb..00000000000 --- a/kernel/softlockup.c +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Detect Soft Lockups - * - * started by Ingo Molnar, Copyright (C) 2005, 2006 Red Hat, Inc. - * - * this code detects soft lockups: incidents in where on a CPU - * the kernel does not reschedule for 10 seconds or more. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -static DEFINE_SPINLOCK(print_lock); - -static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */ -static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */ -static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); -static DEFINE_PER_CPU(bool, softlock_touch_sync); - -static int __read_mostly did_panic; -int __read_mostly softlockup_thresh = 60; - -/* - * Should we panic (and reboot, if panic_timeout= is set) when a - * soft-lockup occurs: - */ -unsigned int __read_mostly softlockup_panic = - CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; - -static int __init softlockup_panic_setup(char *str) -{ - softlockup_panic = simple_strtoul(str, NULL, 0); - - return 1; -} -__setup("softlockup_panic=", softlockup_panic_setup); - -static int -softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) -{ - did_panic = 1; - - return NOTIFY_DONE; -} - -static struct notifier_block panic_block = { - .notifier_call = softlock_panic, -}; - -/* - * Returns seconds, approximately. We don't need nanosecond - * resolution, and we don't need to waste time with a big divide when - * 2^30ns == 1.074s. - */ -static unsigned long get_timestamp(int this_cpu) -{ - return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ -} - -static void __touch_softlockup_watchdog(void) -{ - int this_cpu = raw_smp_processor_id(); - - __raw_get_cpu_var(softlockup_touch_ts) = get_timestamp(this_cpu); -} - -void touch_softlockup_watchdog(void) -{ - __raw_get_cpu_var(softlockup_touch_ts) = 0; -} -EXPORT_SYMBOL(touch_softlockup_watchdog); - -void touch_softlockup_watchdog_sync(void) -{ - __raw_get_cpu_var(softlock_touch_sync) = true; - __raw_get_cpu_var(softlockup_touch_ts) = 0; -} - -void touch_all_softlockup_watchdogs(void) -{ - int cpu; - - /* Cause each CPU to re-update its timestamp rather than complain */ - for_each_online_cpu(cpu) - per_cpu(softlockup_touch_ts, cpu) = 0; -} -EXPORT_SYMBOL(touch_all_softlockup_watchdogs); - -int proc_dosoftlockup_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) -{ - touch_all_softlockup_watchdogs(); - return proc_dointvec_minmax(table, write, buffer, lenp, ppos); -} - -/* - * This callback runs from the timer interrupt, and checks - * whether the watchdog thread has hung or not: - */ -void softlockup_tick(void) -{ - int this_cpu = smp_processor_id(); - unsigned long touch_ts = per_cpu(softlockup_touch_ts, this_cpu); - unsigned long print_ts; - struct pt_regs *regs = get_irq_regs(); - unsigned long now; - - /* Is detection switched off? */ - if (!per_cpu(softlockup_watchdog, this_cpu) || softlockup_thresh <= 0) { - /* Be sure we don't false trigger if switched back on */ - if (touch_ts) - per_cpu(softlockup_touch_ts, this_cpu) = 0; - return; - } - - if (touch_ts == 0) { - if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) { - /* - * If the time stamp was touched atomically - * make sure the scheduler tick is up to date. - */ - per_cpu(softlock_touch_sync, this_cpu) = false; - sched_clock_tick(); - } - __touch_softlockup_watchdog(); - return; - } - - print_ts = per_cpu(softlockup_print_ts, this_cpu); - - /* report at most once a second */ - if (print_ts == touch_ts || did_panic) - return; - - /* do not print during early bootup: */ - if (unlikely(system_state != SYSTEM_RUNNING)) { - __touch_softlockup_watchdog(); - return; - } - - now = get_timestamp(this_cpu); - - /* - * Wake up the high-prio watchdog task twice per - * threshold timespan. - */ - if (time_after(now - softlockup_thresh/2, touch_ts)) - wake_up_process(per_cpu(softlockup_watchdog, this_cpu)); - - /* Warn about unreasonable delays: */ - if (time_before_eq(now - softlockup_thresh, touch_ts)) - return; - - per_cpu(softlockup_print_ts, this_cpu) = touch_ts; - - spin_lock(&print_lock); - printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n", - this_cpu, now - touch_ts, - current->comm, task_pid_nr(current)); - print_modules(); - print_irqtrace_events(current); - if (regs) - show_regs(regs); - else - dump_stack(); - spin_unlock(&print_lock); - - if (softlockup_panic) - panic("softlockup: hung tasks"); -} - -/* - * The watchdog thread - runs every second and touches the timestamp. - */ -static int watchdog(void *__bind_cpu) -{ - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; - - sched_setscheduler(current, SCHED_FIFO, ¶m); - - /* initialize timestamp */ - __touch_softlockup_watchdog(); - - set_current_state(TASK_INTERRUPTIBLE); - /* - * Run briefly once per second to reset the softlockup timestamp. - * If this gets delayed for more than 60 seconds then the - * debug-printout triggers in softlockup_tick(). - */ - while (!kthread_should_stop()) { - __touch_softlockup_watchdog(); - schedule(); - - if (kthread_should_stop()) - break; - - set_current_state(TASK_INTERRUPTIBLE); - } - __set_current_state(TASK_RUNNING); - - return 0; -} - -/* - * Create/destroy watchdog threads as CPUs come and go: - */ -static int __cpuinit -cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - int hotcpu = (unsigned long)hcpu; - struct task_struct *p; - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - BUG_ON(per_cpu(softlockup_watchdog, hotcpu)); - p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu); - if (IS_ERR(p)) { - printk(KERN_ERR "watchdog for %i failed\n", hotcpu); - return NOTIFY_BAD; - } - per_cpu(softlockup_touch_ts, hotcpu) = 0; - per_cpu(softlockup_watchdog, hotcpu) = p; - kthread_bind(p, hotcpu); - break; - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - wake_up_process(per_cpu(softlockup_watchdog, hotcpu)); - break; -#ifdef CONFIG_HOTPLUG_CPU - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - if (!per_cpu(softlockup_watchdog, hotcpu)) - break; - /* Unbind so it can run. Fall thru. */ - kthread_bind(per_cpu(softlockup_watchdog, hotcpu), - cpumask_any(cpu_online_mask)); - case CPU_DEAD: - case CPU_DEAD_FROZEN: - p = per_cpu(softlockup_watchdog, hotcpu); - per_cpu(softlockup_watchdog, hotcpu) = NULL; - kthread_stop(p); - break; -#endif /* CONFIG_HOTPLUG_CPU */ - } - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata cpu_nfb = { - .notifier_call = cpu_callback -}; - -static int __initdata nosoftlockup; - -static int __init nosoftlockup_setup(char *str) -{ - nosoftlockup = 1; - return 1; -} -__setup("nosoftlockup", nosoftlockup_setup); - -static int __init spawn_softlockup_task(void) -{ - void *cpu = (void *)(long)smp_processor_id(); - int err; - - if (nosoftlockup) - return 0; - - err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); - if (err == NOTIFY_BAD) { - BUG(); - return 1; - } - cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); - register_cpu_notifier(&cpu_nfb); - - atomic_notifier_chain_register(&panic_notifier_list, &panic_block); - - return 0; -} -early_initcall(spawn_softlockup_task); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 999bc3fccf4..04bcd8abe09 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -108,7 +108,7 @@ extern int blk_iopoll_enabled; #endif /* Constants used for minimum and maximum */ -#ifdef CONFIG_DETECT_SOFTLOCKUP +#ifdef CONFIG_LOCKUP_DETECTOR static int sixty = 60; static int neg_one = -1; #endif @@ -703,6 +703,15 @@ static struct ctl_table kern_table[] = { .extra1 = &neg_one, .extra2 = &sixty, }, + { + .procname = "softlockup_panic", + .data = &softlockup_panic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, #endif #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR) { @@ -807,17 +816,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif -#ifdef CONFIG_DETECT_SOFTLOCKUP - { - .procname = "softlockup_panic", - .data = &softlockup_panic, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, - }, -#endif #ifdef CONFIG_DETECT_HUNG_TASK { .procname = "hung_task_panic", -- cgit v1.2.3 From f69bcf60c3f17aa367e16eef7bc6ab001ea6d58a Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 7 May 2010 17:11:47 -0400 Subject: lockup_detector: Remove nmi_watchdog.c file This file migrated to kernel/watchdog.c and then combined with kernel/softlockup.c. As a result kernel/nmi_watchdog.c is no longer needed. Just remove it. Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap LKML-Reference: <1273266711-18706-5-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker --- kernel/nmi_watchdog.c | 259 -------------------------------------------------- 1 file changed, 259 deletions(-) delete mode 100644 kernel/nmi_watchdog.c (limited to 'kernel') diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c deleted file mode 100644 index a79d211c30d..00000000000 --- a/kernel/nmi_watchdog.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * Detect Hard Lockups using the NMI - * - * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. - * - * this code detects hard lockups: incidents in where on a CPU - * the kernel does not respond to anything except NMI. - * - * Note: Most of this code is borrowed heavily from softlockup.c, - * so thanks to Ingo for the initial implementation. - * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks - * to those contributors as well. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -static DEFINE_PER_CPU(struct perf_event *, nmi_watchdog_ev); -static DEFINE_PER_CPU(int, nmi_watchdog_touch); -static DEFINE_PER_CPU(long, alert_counter); - -static int panic_on_timeout; - -void touch_nmi_watchdog(void) -{ - __raw_get_cpu_var(nmi_watchdog_touch) = 1; - touch_softlockup_watchdog(); -} -EXPORT_SYMBOL(touch_nmi_watchdog); - -void touch_all_nmi_watchdog(void) -{ - int cpu; - - for_each_online_cpu(cpu) - per_cpu(nmi_watchdog_touch, cpu) = 1; - touch_softlockup_watchdog(); -} - -static int __init setup_nmi_watchdog(char *str) -{ - if (!strncmp(str, "panic", 5)) { - panic_on_timeout = 1; - str = strchr(str, ','); - if (!str) - return 1; - ++str; - } - return 1; -} -__setup("nmi_watchdog=", setup_nmi_watchdog); - -struct perf_event_attr wd_hw_attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_CPU_CYCLES, - .size = sizeof(struct perf_event_attr), - .pinned = 1, - .disabled = 1, -}; - -struct perf_event_attr wd_sw_attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_CPU_CLOCK, - .size = sizeof(struct perf_event_attr), - .pinned = 1, - .disabled = 1, -}; - -void wd_overflow(struct perf_event *event, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - int cpu = smp_processor_id(); - int touched = 0; - - if (__get_cpu_var(nmi_watchdog_touch)) { - per_cpu(nmi_watchdog_touch, cpu) = 0; - touched = 1; - } - - /* check to see if the cpu is doing anything */ - if (!touched && hw_nmi_is_cpu_stuck(regs)) { - /* - * Ayiee, looks like this CPU is stuck ... - * wait a few IRQs (5 seconds) before doing the oops ... - */ - per_cpu(alert_counter, cpu) += 1; - if (per_cpu(alert_counter, cpu) == 5) { - if (panic_on_timeout) - panic("NMI Watchdog detected LOCKUP on cpu %d", cpu); - else - WARN(1, "NMI Watchdog detected LOCKUP on cpu %d", cpu); - } - } else { - per_cpu(alert_counter, cpu) = 0; - } - - return; -} - -static int enable_nmi_watchdog(int cpu) -{ - struct perf_event *event; - struct perf_event_attr *wd_attr; - - event = per_cpu(nmi_watchdog_ev, cpu); - if (event && event->state > PERF_EVENT_STATE_OFF) - return 0; - - if (event == NULL) { - /* Try to register using hardware perf events first */ - wd_attr = &wd_hw_attr; - wd_attr->sample_period = hw_nmi_get_sample_period(); - event = perf_event_create_kernel_counter(wd_attr, cpu, -1, wd_overflow); - if (IS_ERR(event)) { - /* hardware doesn't exist or not supported, fallback to software events */ - printk(KERN_INFO "nmi_watchdog: hardware not available, trying software events\n"); - wd_attr = &wd_sw_attr; - wd_attr->sample_period = NSEC_PER_SEC; - event = perf_event_create_kernel_counter(wd_attr, cpu, -1, wd_overflow); - if (IS_ERR(event)) { - printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", cpu, event); - return -1; - } - } - per_cpu(nmi_watchdog_ev, cpu) = event; - } - perf_event_enable(per_cpu(nmi_watchdog_ev, cpu)); - return 0; -} - -static void disable_nmi_watchdog(int cpu) -{ - struct perf_event *event; - - event = per_cpu(nmi_watchdog_ev, cpu); - if (event) { - perf_event_disable(per_cpu(nmi_watchdog_ev, cpu)); - per_cpu(nmi_watchdog_ev, cpu) = NULL; - perf_event_release_kernel(event); - } -} - -#ifdef CONFIG_SYSCTL -/* - * proc handler for /proc/sys/kernel/nmi_watchdog - */ -int nmi_watchdog_enabled; - -int proc_nmi_enabled(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) -{ - int cpu; - - if (!write) { - struct perf_event *event; - for_each_online_cpu(cpu) { - event = per_cpu(nmi_watchdog_ev, cpu); - if (event && event->state > PERF_EVENT_STATE_OFF) { - nmi_watchdog_enabled = 1; - break; - } - } - proc_dointvec(table, write, buffer, length, ppos); - return 0; - } - - touch_all_nmi_watchdog(); - proc_dointvec(table, write, buffer, length, ppos); - if (nmi_watchdog_enabled) { - for_each_online_cpu(cpu) - if (enable_nmi_watchdog(cpu)) { - printk(KERN_ERR "NMI watchdog failed configuration, " - " can not be enabled\n"); - } - } else { - for_each_online_cpu(cpu) - disable_nmi_watchdog(cpu); - } - return 0; -} - -#endif /* CONFIG_SYSCTL */ - -/* - * Create/destroy watchdog threads as CPUs come and go: - */ -static int __cpuinit -cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - int hotcpu = (unsigned long)hcpu; - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - per_cpu(nmi_watchdog_touch, hotcpu) = 0; - break; - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - if (enable_nmi_watchdog(hotcpu)) - return NOTIFY_BAD; - break; -#ifdef CONFIG_HOTPLUG_CPU - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - disable_nmi_watchdog(hotcpu); - case CPU_DEAD: - case CPU_DEAD_FROZEN: - break; -#endif /* CONFIG_HOTPLUG_CPU */ - } - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata cpu_nfb = { - .notifier_call = cpu_callback -}; - -static int __initdata nonmi_watchdog; - -static int __init nonmi_watchdog_setup(char *str) -{ - nonmi_watchdog = 1; - return 1; -} -__setup("nonmi_watchdog", nonmi_watchdog_setup); - -static int __init spawn_nmi_watchdog_task(void) -{ - void *cpu = (void *)(long)smp_processor_id(); - int err; - - if (nonmi_watchdog) - return 0; - - printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); - - err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); - if (err == NOTIFY_BAD) { - BUG(); - return 1; - } - cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); - register_cpu_notifier(&cpu_nfb); - - return 0; -} -early_initcall(spawn_nmi_watchdog_task); -- cgit v1.2.3 From d7c547335fa6b0090fa09c46ea0e965ac273a27e Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 7 May 2010 17:11:51 -0400 Subject: lockup_detector: Separate touch_nmi_watchdog code path from touch_watchdog When I combined the nmi_watchdog (hardlockup) and softlockup code, I also combined the paths the touch_watchdog and touch_nmi_watchdog took. This may not be the best idea as pointed out by Frederic W., that the touch_watchdog case probably should not reset the hardlockup count. Therefore the patch below falls back to the previous idea of keeping the touch_nmi_watchdog a superset of the touch_watchdog case. Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap LKML-Reference: <1273266711-18706-9-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker --- kernel/watchdog.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index f1541b7e324..57b8e2c25ed 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -31,6 +31,7 @@ int watchdog_enabled; int __read_mostly softlockup_thresh = 60; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); +static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); static DEFINE_PER_CPU(bool, softlockup_touch_sync); @@ -139,6 +140,7 @@ void touch_all_softlockup_watchdogs(void) void touch_nmi_watchdog(void) { + __get_cpu_var(watchdog_nmi_touch) = true; touch_softlockup_watchdog(); } EXPORT_SYMBOL(touch_nmi_watchdog); @@ -201,10 +203,9 @@ void watchdog_overflow_callback(struct perf_event *event, int nmi, struct pt_regs *regs) { int this_cpu = smp_processor_id(); - unsigned long touch_ts = per_cpu(watchdog_touch_ts, this_cpu); - if (touch_ts == 0) { - __touch_watchdog(); + if (__get_cpu_var(watchdog_nmi_touch) == true) { + __get_cpu_var(watchdog_nmi_touch) = false; return; } -- cgit v1.2.3 From 0167c781907fcdc3e1f144ef5ce31d402c91eb94 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 13 May 2010 08:53:33 +0200 Subject: watchdog: Export touch_softlockup_watchdog There are modules that rely on it: ERROR: "touch_softlockup_watchdog" [drivers/video/nvidia/nvidiafb.ko] undefined! Cc: Frederic Weisbecker Cc: Don Zickus Cc: Peter Zijlstra Cc: Cyrill Gorcunov LKML-Reference: <1273713674-8434-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 57b8e2c25ed..be5e74e62be 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -124,6 +124,7 @@ void touch_softlockup_watchdog(void) { __get_cpu_var(watchdog_touch_ts) = 0; } +EXPORT_SYMBOL(touch_softlockup_watchdog); void touch_all_softlockup_watchdogs(void) { -- cgit v1.2.3 From 23637d477c1f53acbb176a02c241d60a25888fae Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 15 May 2010 23:15:20 +0200 Subject: lockup_detector: Introduce CONFIG_HARDLOCKUP_DETECTOR This new config is deemed to simplify even more the lockup detector dependencies and can make it easier to bring a smooth sorting between archs that support the new generic lockup detector and those that still have their own, especially for those that are in the middle of this migration. Instead of checking whether we have CONFIG_LOCKUP_DETECTOR + CONFIG_PERF_EVENTS_NMI each time an arch wants to know if it needs to build its own lockup detector, take a shortcut with this new config. It is enabled only if the hardlockup detection part of the whole lockup detector is on. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Don Zickus Cc: Cyrill Gorcunov --- kernel/watchdog.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index be5e74e62be..83fb63155cb 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -37,7 +37,7 @@ static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); static DEFINE_PER_CPU(bool, softlockup_touch_sync); static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, soft_watchdog_warn); -#ifdef CONFIG_PERF_EVENTS_NMI +#ifdef CONFIG_HARDLOCKUP_DETECTOR static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); @@ -51,7 +51,7 @@ static int __initdata no_watchdog; /* * Should we panic when a soft-lockup or hard-lockup occurs: */ -#ifdef CONFIG_PERF_EVENTS_NMI +#ifdef CONFIG_HARDLOCKUP_DETECTOR static int hardlockup_panic; static int __init hardlockup_panic_setup(char *str) @@ -152,7 +152,7 @@ void touch_softlockup_watchdog_sync(void) __raw_get_cpu_var(watchdog_touch_ts) = 0; } -#ifdef CONFIG_PERF_EVENTS_NMI +#ifdef CONFIG_HARDLOCKUP_DETECTOR /* watchdog detector functions */ static int is_hardlockup(int cpu) { @@ -189,7 +189,7 @@ static struct notifier_block panic_block = { .notifier_call = watchdog_panic, }; -#ifdef CONFIG_PERF_EVENTS_NMI +#ifdef CONFIG_HARDLOCKUP_DETECTOR static struct perf_event_attr wd_hw_attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, @@ -239,7 +239,7 @@ static void watchdog_interrupt_count(void) } #else static inline void watchdog_interrupt_count(void) { return; } -#endif /* CONFIG_PERF_EVENTS_NMI */ +#endif /* CONFIG_HARDLOCKUP_DETECTOR */ /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) @@ -342,7 +342,7 @@ static int watchdog(void *__bind_cpu) } -#ifdef CONFIG_PERF_EVENTS_NMI +#ifdef CONFIG_HARDLOCKUP_DETECTOR static int watchdog_nmi_enable(int cpu) { struct perf_event_attr *wd_attr; @@ -393,7 +393,7 @@ static void watchdog_nmi_disable(int cpu) #else static int watchdog_nmi_enable(int cpu) { return 0; } static void watchdog_nmi_disable(int cpu) { return; } -#endif /* CONFIG_PERF_EVENTS_NMI */ +#endif /* CONFIG_HARDLOCKUP_DETECTOR */ /* prepare/enable/disable routines */ static int watchdog_prepare_cpu(int cpu) -- cgit v1.2.3 From cafcd80d216bc2136b8edbb794327e495792c666 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 14 May 2010 11:11:21 -0400 Subject: lockup_detector: Cross arch compile fixes Combining the softlockup and hardlockup code causes watchdog.c to build even without the hardlockup detection support. So if an arch, that has the previous and the new nmi watchdog implementations cohabiting, wants to know if the generic one is in use, CONFIG_LOCKUP_DETECTOR is not a reliable check. We need to use CONFIG_HARDLOCKUP_DETECTOR instead. Fixes: kernel/built-in.o: In function `touch_nmi_watchdog': (.text+0x449bc): multiple definition of `touch_nmi_watchdog' arch/sparc/kernel/built-in.o:(.text+0x11b28): first defined here Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Don Zickus Cc: Cyrill Gorcunov LKML-Reference: <20100514151121.GR15159@redhat.com> [ use CONFIG_HARDLOCKUP_DETECTOR instead of CONFIG_PERF_EVENTS_NMI] Signed-off-by: Frederic Weisbecker --- kernel/watchdog.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 83fb63155cb..e53622c1465 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -31,13 +31,13 @@ int watchdog_enabled; int __read_mostly softlockup_thresh = 60; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); -static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); static DEFINE_PER_CPU(bool, softlockup_touch_sync); -static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, soft_watchdog_warn); #ifdef CONFIG_HARDLOCKUP_DETECTOR +static DEFINE_PER_CPU(bool, hard_watchdog_warn); +static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); @@ -139,6 +139,7 @@ void touch_all_softlockup_watchdogs(void) per_cpu(watchdog_touch_ts, cpu) = 0; } +#ifdef CONFIG_HARDLOCKUP_DETECTOR void touch_nmi_watchdog(void) { __get_cpu_var(watchdog_nmi_touch) = true; @@ -146,6 +147,8 @@ void touch_nmi_watchdog(void) } EXPORT_SYMBOL(touch_nmi_watchdog); +#endif + void touch_softlockup_watchdog_sync(void) { __raw_get_cpu_var(softlockup_touch_sync) = true; -- cgit v1.2.3 From 26e09c6eee14f4827b55137ba0eedc4e77cd50ab Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Mon, 17 May 2010 18:06:04 -0400 Subject: lockup_detector: Convert per_cpu to __get_cpu_var for readability Just a bunch of conversions as suggested by Frederic W. __get_cpu_var() provides preemption disabled checks. Plus it gives more readability as it makes it obvious we are dealing locally now with these vars. Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Don Zickus Cc: Cyrill Gorcunov LKML-Reference: <1274133966-18415-2-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker --- kernel/watchdog.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index e53622c1465..91b0b26adc6 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -115,7 +115,7 @@ static unsigned long get_sample_period(void) /* Commands for resetting the watchdog */ static void __touch_watchdog(void) { - int this_cpu = raw_smp_processor_id(); + int this_cpu = smp_processor_id(); __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu); } @@ -157,21 +157,21 @@ void touch_softlockup_watchdog_sync(void) #ifdef CONFIG_HARDLOCKUP_DETECTOR /* watchdog detector functions */ -static int is_hardlockup(int cpu) +static int is_hardlockup(void) { - unsigned long hrint = per_cpu(hrtimer_interrupts, cpu); + unsigned long hrint = __get_cpu_var(hrtimer_interrupts); - if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint) + if (__get_cpu_var(hrtimer_interrupts_saved) == hrint) return 1; - per_cpu(hrtimer_interrupts_saved, cpu) = hrint; + __get_cpu_var(hrtimer_interrupts_saved) = hrint; return 0; } #endif -static int is_softlockup(unsigned long touch_ts, int cpu) +static int is_softlockup(unsigned long touch_ts) { - unsigned long now = get_timestamp(cpu); + unsigned long now = get_timestamp(smp_processor_id()); /* Warn about unreasonable delays: */ if (time_after(now, touch_ts + softlockup_thresh)) @@ -206,8 +206,6 @@ void watchdog_overflow_callback(struct perf_event *event, int nmi, struct perf_sample_data *data, struct pt_regs *regs) { - int this_cpu = smp_processor_id(); - if (__get_cpu_var(watchdog_nmi_touch) == true) { __get_cpu_var(watchdog_nmi_touch) = false; return; @@ -219,7 +217,9 @@ void watchdog_overflow_callback(struct perf_event *event, int nmi, * fired multiple times before we overflow'd. If it hasn't * then this is a good indication the cpu is stuck */ - if (is_hardlockup(this_cpu)) { + if (is_hardlockup()) { + int this_cpu = smp_processor_id(); + /* only print hardlockups once */ if (__get_cpu_var(hard_watchdog_warn) == true) return; @@ -247,7 +247,6 @@ static inline void watchdog_interrupt_count(void) { return; } /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { - int this_cpu = smp_processor_id(); unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts); struct pt_regs *regs = get_irq_regs(); int duration; @@ -262,12 +261,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period())); if (touch_ts == 0) { - if (unlikely(per_cpu(softlockup_touch_sync, this_cpu))) { + if (unlikely(__get_cpu_var(softlockup_touch_sync))) { /* * If the time stamp was touched atomically * make sure the scheduler tick is up to date. */ - per_cpu(softlockup_touch_sync, this_cpu) = false; + __get_cpu_var(softlockup_touch_sync) = false; sched_clock_tick(); } __touch_watchdog(); @@ -280,14 +279,14 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) * indicate it is getting cpu time. If it hasn't then * this is a good indication some task is hogging the cpu */ - duration = is_softlockup(touch_ts, this_cpu); + duration = is_softlockup(touch_ts); if (unlikely(duration)) { /* only warn once */ if (__get_cpu_var(soft_watchdog_warn) == true) return HRTIMER_RESTART; printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", - this_cpu, duration, + smp_processor_id(), duration, current->comm, task_pid_nr(current)); print_modules(); print_irqtrace_events(current); @@ -309,10 +308,10 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) /* * The watchdog thread - touches the timestamp. */ -static int watchdog(void *__bind_cpu) +static int watchdog(void *unused) { struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; - struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, (unsigned long)__bind_cpu); + struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); sched_setscheduler(current, SCHED_FIFO, ¶m); @@ -328,7 +327,7 @@ static int watchdog(void *__bind_cpu) /* * Run briefly once per second to reset the softlockup timestamp. * If this gets delayed for more than 60 seconds then the - * debug-printout triggers in softlockup_tick(). + * debug-printout triggers in watchdog_timer_fn(). */ while (!kthread_should_stop()) { __touch_watchdog(); -- cgit v1.2.3 From eb703f98191a505f78d0066712ad67d5dedc4c90 Mon Sep 17 00:00:00 2001 From: Kulikov Vasiliy Date: Mon, 5 Jul 2010 12:00:54 +0400 Subject: kernel/watchdog: Initialize 'result' Variable on the stack is not initialized to zero, do it explicitly. This bug was found by a compiler warning: kernel/watchdog.c:463: warning: 'result' may be used uninitialized in this function Signed-off-by: Kulikov Vasiliy Acked-by: Don Zickus Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Mike Galbraith Cc: Steven Rostedt LKML-Reference: <1278316854-28442-1-git-send-email-segooon@gmail.com> Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 91b0b26adc6..613bc1f0461 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -460,7 +460,7 @@ static void watchdog_disable(int cpu) static void watchdog_enable_all_cpus(void) { int cpu; - int result; + int result = 0; for_each_online_cpu(cpu) result += watchdog_enable(cpu); -- cgit v1.2.3