aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ftrace.txt1353
-rw-r--r--Makefile2
-rw-r--r--arch/mips/mm/c-r3k.c6
-rw-r--r--arch/mips/mm/page.c61
-rw-r--r--arch/mips/mm/sc-rm7k.c4
-rw-r--r--arch/powerpc/kernel/legacy_serial.c5
-rw-r--r--arch/powerpc/kernel/of_platform.c2
-rw-r--r--arch/um/Makefile1
-rw-r--r--arch/um/Makefile-i3867
-rw-r--r--arch/um/Makefile-x86_643
-rw-r--r--arch/x86/kernel/.gitignore1
-rw-r--r--arch/x86/pci/common.c8
-rw-r--r--crypto/chainiv.c10
-rw-r--r--crypto/tcrypt.c10
-rw-r--r--drivers/ata/ahci.c16
-rw-r--r--drivers/ide/arm/palm_bk3710.c38
-rw-r--r--drivers/ide/ide-probe.c10
-rw-r--r--drivers/ide/ide.c24
-rw-r--r--drivers/ide/pci/it8213.c3
-rw-r--r--drivers/ide/pci/ns87415.c6
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c4
-rw-r--r--drivers/md/raid5.c7
-rw-r--r--drivers/mmc/host/pxamci.c13
-rw-r--r--drivers/net/ehea/ehea.h8
-rw-r--r--drivers/net/ehea/ehea_main.c42
-rw-r--r--drivers/net/forcedeth.c15
-rw-r--r--drivers/net/fs_enet/mac-fcc.c3
-rw-r--r--drivers/net/ibm_newemac/core.c8
-rw-r--r--drivers/net/pasemi_mac.c2
-rw-r--r--drivers/net/wan/hdlc_fr.c1
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-3945.c6
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-4965.c6
-rw-r--r--drivers/net/wireless/iwlwifi/iwl3945-base.c3
-rw-r--r--drivers/net/wireless/iwlwifi/iwl4965-base.c3
-rw-r--r--drivers/net/wireless/libertas/if_usb.c1
-rw-r--r--drivers/usb/core/hcd.c9
-rw-r--r--drivers/usb/core/hcd.h2
-rw-r--r--drivers/usb/core/hub.c9
-rw-r--r--drivers/usb/host/ohci-at91.c1
-rw-r--r--drivers/usb/host/ohci-au1xxx.c1
-rw-r--r--drivers/usb/host/ohci-ep93xx.c1
-rw-r--r--drivers/usb/host/ohci-hub.c53
-rw-r--r--drivers/usb/host/ohci-lh7a404.c1
-rw-r--r--drivers/usb/host/ohci-omap.c1
-rw-r--r--drivers/usb/host/ohci-pci.c1
-rw-r--r--drivers/usb/host/ohci-pnx4008.c1
-rw-r--r--drivers/usb/host/ohci-pnx8550.c1
-rw-r--r--drivers/usb/host/ohci-ppc-of.c1
-rw-r--r--drivers/usb/host/ohci-ppc-soc.c1
-rw-r--r--drivers/usb/host/ohci-ps3.c1
-rw-r--r--drivers/usb/host/ohci-pxa27x.c1
-rw-r--r--drivers/usb/host/ohci-s3c2410.c1
-rw-r--r--drivers/usb/host/ohci-sa1111.c1
-rw-r--r--drivers/usb/host/ohci-sh.c1
-rw-r--r--drivers/usb/host/ohci-sm501.c1
-rw-r--r--drivers/usb/host/ohci-ssb.c1
-rw-r--r--drivers/usb/host/u132-hcd.c11
-rw-r--r--drivers/video/fsl-diu-fb.c4
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c2
-rw-r--r--fs/proc/task_mmu.c80
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--include/asm-powerpc/hugetlb.h6
-rw-r--r--include/asm-powerpc/pgtable-ppc64.h10
-rw-r--r--include/asm-s390/pgtable.h5
-rw-r--r--include/asm-x86/kvm_para.h15
-rw-r--r--include/linux/debug_locks.h10
-rw-r--r--include/linux/ide.h15
-rw-r--r--include/linux/kernel.h3
-rw-r--r--kernel/kprobes.c2
-rw-r--r--kernel/printk.c2
-rw-r--r--kernel/rcupreempt.c20
-rw-r--r--kernel/softlockup.c1
-rw-r--r--lib/vsprintf.c128
-rw-r--r--mm/mempolicy.c6
-rw-r--r--net/bridge/br_if.c10
-rw-r--r--net/can/af_can.c10
-rw-r--r--net/can/bcm.c23
-rw-r--r--net/can/raw.c3
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/sunrpc/rpcb_clnt.c23
-rw-r--r--virt/kvm/ioapic.c2
82 files changed, 1878 insertions, 303 deletions
diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt
new file mode 100644
index 00000000000..13e4bf054c3
--- /dev/null
+++ b/Documentation/ftrace.txt
@@ -0,0 +1,1353 @@
+ ftrace - Function Tracer
+ ========================
+
+Copyright 2008 Red Hat Inc.
+Author: Steven Rostedt <srostedt@redhat.com>
+
+
+Introduction
+------------
+
+Ftrace is an internal tracer designed to help out developers and
+designers of systems to find what is going on inside the kernel.
+It can be used for debugging or analyzing latencies and performance
+issues that take place outside of user-space.
+
+Although ftrace is the function tracer, it also includes an
+infrastructure that allows for other types of tracing. Some of the
+tracers that are currently in ftrace is a tracer to trace
+context switches, the time it takes for a high priority task to
+run after it was woken up, the time interrupts are disabled, and
+more.
+
+
+The File System
+---------------
+
+Ftrace uses the debugfs file system to hold the control files as well
+as the files to display output.
+
+To mount the debugfs system:
+
+ # mkdir /debug
+ # mount -t debugfs nodev /debug
+
+
+That's it! (assuming that you have ftrace configured into your kernel)
+
+After mounting the debugfs, you can see a directory called
+"tracing". This directory contains the control and output files
+of ftrace. Here is a list of some of the key files:
+
+
+ Note: all time values are in microseconds.
+
+ current_tracer : This is used to set or display the current tracer
+ that is configured.
+
+ available_tracers : This holds the different types of tracers that
+ has been compiled into the kernel. The tracers
+ listed here can be configured by echoing in their
+ name into current_tracer.
+
+ tracing_enabled : This sets or displays whether the current_tracer
+ is activated and tracing or not. Echo 0 into this
+ file to disable the tracer or 1 (or non-zero) to
+ enable it.
+
+ trace : This file holds the output of the trace in a human readable
+ format.
+
+ latency_trace : This file shows the same trace but the information
+ is organized more to display possible latencies
+ in the system.
+
+ trace_pipe : The output is the same as the "trace" file but this
+ file is meant to be streamed with live tracing.
+ Reads from this file will block until new data
+ is retrieved. Unlike the "trace" and "latency_trace"
+ files, this file is a consumer. This means reading
+ from this file causes sequential reads to display
+ more current data. Once data is read from this
+ file, it is consumed, and will not be read
+ again with a sequential read. The "trace" and
+ "latency_trace" files are static, and if the
+ tracer isn't adding more data, they will display
+ the same information every time they are read.
+
+ iter_ctrl : This file lets the user control the amount of data
+ that is displayed in one of the above output
+ files.
+
+ trace_max_latency : Some of the tracers record the max latency.
+ For example, the time interrupts are disabled.
+ This time is saved in this file. The max trace
+ will also be stored, and displayed by either
+ "trace" or "latency_trace". A new max trace will
+ only be recorded if the latency is greater than
+ the value in this file. (in microseconds)
+
+ trace_entries : This sets or displays the number of trace
+ entries each CPU buffer can hold. The tracer buffers
+ are the same size for each CPU, so care must be
+ taken when modifying the trace_entries. The number
+ of actually entries will be the number given
+ times the number of possible CPUS. The buffers
+ are saved as individual pages, and the actual entries
+ will always be rounded up to entries per page.
+
+ This can only be updated when the current_tracer
+ is set to "none".
+
+ NOTE: It is planned on changing the allocated buffers
+ from being the number of possible CPUS to
+ the number of online CPUS.
+
+ tracing_cpumask : This is a mask that lets the user only trace
+ on specified CPUS. The format is a hex string
+ representing the CPUS.
+
+ set_ftrace_filter : When dynamic ftrace is configured in, the
+ code is dynamically modified to disable calling
+ of the function profiler (mcount). This lets
+ tracing be configured in with practically no overhead
+ in performance. This also has a side effect of
+ enabling or disabling specific functions to be
+ traced. Echoing in names of functions into this
+ file will limit the trace to only those files.
+
+ set_ftrace_notrace: This has the opposite effect that
+ set_ftrace_filter has. Any function that is added
+ here will not be traced. If a function exists
+ in both set_ftrace_filter and set_ftrace_notrace
+ the function will _not_ bet traced.
+
+ available_filter_functions : When a function is encountered the first
+ time by the dynamic tracer, it is recorded and
+ later the call is converted into a nop. This file
+ lists the functions that have been recorded
+ by the dynamic tracer and these functions can
+ be used to set the ftrace filter by the above
+ "set_ftrace_filter" file.
+
+
+The Tracers
+-----------
+
+Here are the list of current tracers that can be configured.
+
+ ftrace - function tracer that uses mcount to trace all functions.
+ It is possible to filter out which functions that are
+ traced when dynamic ftrace is configured in.
+
+ sched_switch - traces the context switches between tasks.
+
+ irqsoff - traces the areas that disable interrupts and saves off
+ the trace with the longest max latency.
+ See tracing_max_latency. When a new max is recorded,
+ it replaces the old trace. It is best to view this
+ trace with the latency_trace file.
+
+ preemptoff - Similar to irqsoff but traces and records the time
+ preemption is disabled.
+
+ preemptirqsoff - Similar to irqsoff and preemptoff, but traces and
+ records the largest time irqs and/or preemption is
+ disabled.
+
+ wakeup - Traces and records the max latency that it takes for
+ the highest priority task to get scheduled after
+ it has been woken up.
+
+ none - This is not a tracer. To remove all tracers from tracing
+ simply echo "none" into current_tracer.
+
+
+Examples of using the tracer
+----------------------------
+
+Here are typical examples of using the tracers with only controlling
+them with the debugfs interface (without using any user-land utilities).
+
+Output format:
+--------------
+
+Here's an example of the output format of the file "trace"
+
+ --------
+# tracer: ftrace
+#
+# TASK-PID CPU# TIMESTAMP FUNCTION
+# | | | | |
+ bash-4251 [01] 10152.583854: path_put <-path_walk
+ bash-4251 [01] 10152.583855: dput <-path_put
+ bash-4251 [01] 10152.583855: _atomic_dec_and_lock <-dput
+ --------
+
+A header is printed with the trace that is represented. In this case
+the tracer is "ftrace". Then a header showing the format. Task name
+"bash", the task PID "4251", the CPU that it was running on
+"01", the timestamp in <secs>.<usecs> format, the function name that was
+traced "path_put" and the parent function that called this function
+"path_walk".
+
+The sched_switch tracer also includes tracing of task wake ups and
+context switches.
+
+ ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 2916:115:S
+ ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 10:115:S
+ ksoftirqd/1-7 [01] 1453.070013: 7:115:R ==> 10:115:R
+ events/1-10 [01] 1453.070013: 10:115:S ==> 2916:115:R
+ kondemand/1-2916 [01] 1453.070013: 2916:115:S ==> 7:115:R
+ ksoftirqd/1-7 [01] 1453.070013: 7:115:S ==> 0:140:R
+
+Wake ups are represented by a "+" and the context switches show
+"==>". The format is:
+
+ Context switches:
+
+ Previous task Next Task
+
+ <pid>:<prio>:<state> ==> <pid>:<prio>:<state>
+
+ Wake ups:
+
+ Current task Task waking up
+
+ <pid>:<prio>:<state> + <pid>:<prio>:<state>
+
+The prio is the internal kernel priority, which is inverse to the
+priority that is usually displayed by user-space tools. Zero represents
+the highest priority (99). Prio 100 starts the "nice" priorities with
+100 being equal to nice -20 and 139 being nice 19. The prio "140" is
+reserved for the idle task which is the lowest priority thread (pid 0).
+
+
+Latency trace format
+--------------------
+
+For traces that display latency times, the latency_trace file gives
+a bit more information to see why a latency happened. Here's a typical
+trace.
+
+# tracer: irqsoff
+#
+irqsoff latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 97 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+ -----------------
+ | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0)
+ -----------------
+ => started at: apic_timer_interrupt
+ => ended at: do_softirq
+
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| /
+# ||||| delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ <idle>-0 0d..1 0us+: trace_hardirqs_off_thunk (apic_timer_interrupt)
+ <idle>-0 0d.s. 97us : __do_softirq (do_softirq)
+ <idle>-0 0d.s1 98us : trace_hardirqs_on (do_softirq)
+
+
+vim:ft=help
+
+
+This shows that the current tracer is "irqsoff" tracing the time
+interrupts are disabled. It gives the trace version and the kernel
+this was executed on (2.6.26-rc8). Then it displays the max latency
+in microsecs (97 us). The number of trace entries displayed
+by the total number recorded (both are three: #3/3). The type of
+preemption that was used (PREEMPT). VP, KP, SP, and HP are always zero
+and reserved for later use. #P is the number of online CPUS (#P:2).
+
+The task is the process that was running when the latency happened.
+(swapper pid: 0).
+
+The start and stop that caused the latencies:
+
+ apic_timer_interrupt is where the interrupts were disabled.
+ do_softirq is where they were enabled again.
+
+The next lines after the header are the trace itself. The header
+explains which is which.
+
+ cmd: The name of the process in the trace.
+
+ pid: The PID of that process.
+
+ CPU#: The CPU that the process was running on.
+
+ irqs-off: 'd' interrupts are disabled. '.' otherwise.
+
+ need-resched: 'N' task need_resched is set, '.' otherwise.
+
+ hardirq/softirq:
+ 'H' - hard irq happened inside a softirq.
+ 'h' - hard irq is running
+ 's' - soft irq is running
+ '.' - normal context.
+
+ preempt-depth: The level of preempt_disabled
+
+The above is mostly meaningful for kernel developers.
+
+ time: This differs from the trace output where as the trace output
+ contained a absolute timestamp. This timestamp is relative
+ to the start of the first entry in the the trace.
+
+ delay: This is just to help catch your eye a bit better. And
+ needs to be fixed to be only relative to the same CPU.
+ The marks is determined by the difference between this
+ current trace and the next trace.
+ '!' - greater than preempt_mark_thresh (default 100)
+ '+' - greater than 1 microsecond
+ ' ' - less than or equal to 1 microsecond.
+
+ The rest is the same as the 'trace' file.
+
+
+iter_ctrl
+---------
+
+The iter_ctrl file is used to control what gets printed in the trace
+output. To see what is available, simply cat the file:
+
+ cat /debug/tracing/iter_ctrl
+ print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
+ noblock nostacktrace nosched-tree
+
+To disable one of the options, echo in the option appended with "no".
+
+ echo noprint-parent > /debug/tracing/iter_ctrl
+
+To enable an option, leave off the "no".
+
+ echo sym-offest > /debug/tracing/iter_ctrl
+
+Here are the available options:
+
+ print-parent - On function traces, display the calling function
+ as well as the function being traced.
+
+ print-parent:
+ bash-4000 [01] 1477.606694: simple_strtoul <-strict_strtoul
+
+ noprint-parent:
+ bash-4000 [01] 1477.606694: simple_strtoul
+
+
+ sym-offset - Display not only the function name, but also the offset
+ in the function. For example, instead of seeing just
+ "ktime_get" you will see "ktime_get+0xb/0x20"
+
+ sym-offset:
+ bash-4000 [01] 1477.606694: simple_strtoul+0x6/0xa0
+
+ sym-addr - this will also display the function address as well as
+ the function name.
+
+ sym-addr:
+ bash-4000 [01] 1477.606694: simple_strtoul <c0339346>
+
+ verbose - This deals with the latency_trace file.
+
+ bash 4000 1 0 00000000 00010a95 [58127d26] 1720.415ms \
+ (+0.000ms): simple_strtoul (strict_strtoul)
+
+ raw - This will display raw numbers. This option is best for use with
+ user applications that can translate the raw numbers better than
+ having it done in the kernel.
+
+ hex - similar to raw, but the numbers will be in a hexadecimal format.
+
+ bin - This will print out the formats in raw binary.
+
+ block - TBD (needs update)
+
+ stacktrace - This is one of the options that changes the trace itself.
+ When a trace is recorded, so is the stack of functions.
+ This allows for back traces of trace sites.
+
+ sched-tree - TBD (any users??)
+
+
+sched_switch
+------------
+
+This tracer simply records schedule switches. Here's an example
+on how to implement it.
+
+ # echo sched_switch > /debug/tracing/current_tracer
+ # echo 1 > /debug/tracing/tracing_enabled
+ # sleep 1
+ # echo 0 > /debug/tracing/tracing_enabled
+ # cat /debug/tracing/trace
+
+# tracer: sched_switch
+#
+# TASK-PID CPU# TIMESTAMP FUNCTION
+# | | | | |
+ bash-3997 [01] 240.132281: 3997:120:R + 4055:120:R
+ bash-3997 [01] 240.132284: 3997:120:R ==> 4055:120:R
+ sleep-4055 [01] 240.132371: 4055:120:S ==> 3997:120:R
+ bash-3997 [01] 240.132454: 3997:120:R + 4055:120:S
+ bash-3997 [01] 240.132457: 3997:120:R ==> 4055:120:R
+ sleep-4055 [01] 240.132460: 4055:120:D ==> 3997:120:R
+ bash-3997 [01] 240.132463: 3997:120:R + 4055:120:D
+ bash-3997 [01] 240.132465: 3997:120:R ==> 4055:120:R
+ <idle>-0 [00] 240.132589: 0:140:R + 4:115:S
+ <idle>-0 [00] 240.132591: 0:140:R ==> 4:115:R
+ ksoftirqd/0-4 [00] 240.132595: 4:115:S ==> 0:140:R
+ <idle>-0 [00] 240.132598: 0:140:R + 4:115:S
+ <idle>-0 [00] 240.132599: 0:140:R ==> 4:115:R
+ ksoftirqd/0-4 [00] 240.132603: 4:115:S ==> 0:140:R
+ sleep-4055 [01] 240.133058: 4055:120:S ==> 3997:120:R
+ [...]
+
+
+As we have discussed previously about this format, the header shows
+the name of the trace and points to the options. The "FUNCTION"
+is a misnomer since here it represents the wake ups and context
+switches.
+
+The sched_switch only lists the wake ups (represented with '+')
+and context switches ('==>') with the previous task or current
+first followed by the next task or task waking up. The format for both
+of these is PID:KERNEL-PRIO:TASK-STATE. Remember that the KERNEL-PRIO
+is the inverse of the actual priority with zero (0) being the highest
+priority and the nice values starting at 100 (nice -20). Below is
+a quick chart to map the kernel priority to user land priorities.
+
+ Kernel priority: 0 to 99 ==> user RT priority 99 to 0
+ Kernel priority: 100 to 139 ==> user nice -20 to 19
+ Kernel priority: 140 ==> idle task priority
+
+The task states are:
+
+ R - running : wants to run, may not actually be running
+ S - sleep : process is waiting to be woken up (handles signals)
+ D - deep sleep : process must be woken up (ignores signals)
+ T - stopped : process suspended
+ t - traced : process is being traced (with something like gdb)
+ Z - zombie : process waiting to be cleaned up
+ X - unknown
+
+
+ftrace_enabled
+--------------
+
+The following tracers give different output depending on whether
+or not the sysctl ftrace_enabled is set. To set ftrace_enabled,
+one can either use the sysctl function or set it via the proc
+file system interface.
+
+ sysctl kernel.ftrace_enabled=1
+
+ or
+
+ echo 1 > /proc/sys/kernel/ftrace_enabled
+
+To disable ftrace_enabled simply replace the '1' with '0' in
+the above commands.
+
+When ftrace_enabled is set the tracers will also record the functions
+that are within the trace. The descriptions of the tracers
+will also show an example with ftrace enabled.
+
+
+irqsoff
+-------
+
+When interrupts are disabled, the CPU can not react to any other
+external event (besides NMIs and SMIs). This prevents the timer
+interrupt from triggering or the mouse interrupt from letting the
+kernel know of a new mouse event. The result is a latency with the
+reaction time.
+
+The irqsoff tracer tracks the time interrupts are disabled and when
+they are re-enabled. When a new maximum latency is hit, it saves off
+the trace so that it may be retrieved at a later time. Every time a
+new maximum in reached, the old saved trace is discarded and the new
+trace is saved.
+
+To reset the maximum, echo 0 into tracing_max_latency. Here's an
+example:
+
+ # echo irqsoff > /debug/tracing/current_tracer
+ # echo 0 > /debug/tracing/tracing_max_latency
+ # echo 1 > /debug/tracing/tracing_enabled
+ # ls -ltr
+ [...]
+ # echo 0 > /debug/tracing/tracing_enabled
+ # cat /debug/tracing/latency_trace
+# tracer: irqsoff
+#
+irqsoff latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 6 us, #3/3, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+ -----------------
+ | task: bash-4269 (uid:0 nice:0 policy:0 rt_prio:0)
+ -----------------
+ => started at: copy_page_range
+ => ended at: copy_page_range
+
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| /
+# ||||| delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ bash-4269 1...1 0us+: _spin_lock (copy_page_range)
+ bash-4269 1...1 7us : _spin_unlock (copy_page_range)
+ bash-4269 1...2 7us : trace_preempt_on (copy_page_range)
+
+
+vim:ft=help
+
+Here we see that that we had a latency of 6 microsecs (which is
+very good). The spin_lock in copy_page_range disabled interrupts.
+The difference between the 6 and the displayed timestamp 7us is
+because the clock must have incremented between the time of recording
+the max latency and recording the function that had that latency.
+
+Note the above had ftrace_enabled not set. If we set the ftrace_enabled
+we get a much larger output:
+
+# tracer: irqsoff
+#
+irqsoff latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 50 us, #101/101, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+ -----------------
+ | task: ls-4339 (uid:0 nice:0 policy:0 rt_prio:0)
+ -----------------
+ => started at: __alloc_pages_internal
+ => ended at: __alloc_pages_internal
+
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| /
+# ||||| delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ ls-4339 0...1 0us+: get_page_from_freelist (__alloc_pages_internal)
+ ls-4339 0d..1 3us : rmqueue_bulk (get_page_from_freelist)
+ ls-4339 0d..1 3us : _spin_lock (rmqueue_bulk)
+ ls-4339 0d..1 4us : add_preempt_count (_spin_lock)
+ ls-4339 0d..2 4us : __rmqueue (rmqueue_bulk)
+ ls-4339 0d..2 5us : __rmqueue_smallest (__rmqueue)
+ ls-4339 0d..2 5us : __mod_zone_page_state (__rmqueue_smallest)
+ ls-4339 0d..2 6us : __rmqueue (rmqueue_bulk)
+ ls-4339 0d..2 6us : __rmqueue_smallest (__rmqueue)
+ ls-4339 0d..2 7us : __mod_zone_page_state (__rmqueue_smallest)
+ ls-4339 0d..2 7us : __rmqueue (rmqueue_bulk)
+ ls-4339 0d..2 8us : __rmqueue_smallest (__rmqueue)
+[...]
+ ls-4339 0d..2 46us : __rmqueue_smallest (__rmqueue)
+ ls-4339 0d..2 47us : __mod_zone_page_state (__rmqueue_smallest)
+ ls-4339 0d..2 47us : __rmqueue (rmqueue_bulk)
+ ls-4339 0d..2 48us : __rmqueue_smallest (__rmqueue)
+ ls-4339 0d..2 48us : __mod_zone_page_state (__rmqueue_smallest)
+ ls-4339 0d..2 49us : _spin_unlock (rmqueue_bulk)
+ ls-4339 0d..2 49us : sub_preempt_count (_spin_unlock)
+ ls-4339 0d..1 50us : get_page_from_freelist (__alloc_pages_internal)
+ ls-4339 0d..2 51us : trace_hardirqs_on (__alloc_pages_internal)
+
+
+vim:ft=help
+
+
+Here we traced a 50 microsecond latency. But we also see all the
+functions that were called during that time. Note that enabling
+function tracing we endure an added overhead. This overhead may
+extend the latency times. But never the less, this trace has provided
+some very helpful debugging.
+
+
+preemptoff
+----------
+
+When preemption is disabled we may be able to receive interrupts but
+the task can not be preempted and a higher priority task must wait
+for preemption to be enabled again before it can preempt a lower
+priority task.
+
+The preemptoff tracer traces the places that disables preemption.
+Like the irqsoff, it records the maximum latency that preemption
+was disabled. The control of preemptoff is much like the irqsoff.
+
+ # echo preemptoff > /debug/tracing/current_tracer
+ # echo 0 > /debug/tracing/tracing_max_latency
+ # echo 1 > /debug/tracing/tracing_enabled
+ # ls -ltr
+ [...]
+ # echo 0 > /debug/tracing/tracing_enabled
+ # cat /debug/tracing/latency_trace
+# tracer: preemptoff
+#
+preemptoff latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 29 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+ -----------------
+ | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0)
+ -----------------
+ => started at: do_IRQ
+ => ended at: __do_softirq
+
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| /
+# ||||| delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ sshd-4261 0d.h. 0us+: irq_enter (do_IRQ)
+ sshd-4261 0d.s. 29us : _local_bh_enable (__do_softirq)
+ sshd-4261 0d.s1 30us : trace_preempt_on (__do_softirq)
+
+
+vim:ft=help
+
+This has some more changes. Preemption was disabled when an interrupt
+came in (notice the 'h'), and was enabled while doing a softirq.
+(notice the 's'). But we also see that interrupts have been disabled
+when entering the preempt off section and leaving it (the 'd').
+We do not know if interrupts were enabled in the mean time.
+
+# tracer: preemptoff
+#
+preemptoff latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 63 us, #87/87, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+ -----------------
+ | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0)
+ -----------------
+ => started at: remove_wait_queue
+ => ended at: __do_softirq
+
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| /
+# ||||| delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ sshd-4261 0d..1 0us : _spin_lock_irqsave (remove_wait_queue)
+ sshd-4261 0d..1 1us : _spin_unlock_irqrestore (remove_wait_queue)
+ sshd-4261 0d..1 2us : do_IRQ (common_interrupt)
+ sshd-4261 0d..1 2us : irq_enter (do_IRQ)
+ sshd-4261 0d..1 2us : idle_cpu (irq_enter)
+ sshd-4261 0d..1 3us : add_preempt_count (irq_enter)
+ sshd-4261 0d.h1 3us : idle_cpu (irq_enter)
+ sshd-4261 0d.h. 4us : handle_fasteoi_irq (do_IRQ)
+[...]
+ sshd-4261 0d.h. 12us : add_preempt_count (_spin_lock)
+ sshd-4261 0d.h1 12us : ack_ioapic_quirk_irq (handle_fasteoi_irq)
+ sshd-4261 0d.h1 13us : move_native_irq (ack_ioapic_quirk_irq)
+ sshd-4261 0d.h1 13us : _spin_unlock (handle_fasteoi_irq)
+ sshd-4261 0d.h1 14us : sub_preempt_count (_spin_unlock)
+ sshd-4261 0d.h1 14us : irq_exit (do_IRQ)
+ sshd-4261 0d.h1 15us : sub_preempt_count (irq_exit)
+ sshd-4261 0d..2 15us : do_softirq (irq_exit)
+ sshd-4261 0d... 15us : __do_softirq (do_softirq)
+ sshd-4261 0d... 16us : __local_bh_disable (__do_softirq)
+ sshd-4261 0d... 16us+: add_preempt_count (__local_bh_disable)
+ sshd-4261 0d.s4 20us : add_preempt_count (__local_bh_disable)
+ sshd-4261 0d.s4 21us : sub_preempt_count (local_bh_enable)
+ sshd-4261 0d.s5 21us : sub_preempt_count (local_bh_enable)
+[...]
+ sshd-4261 0d.s6 41us : add_preempt_count (__local_bh_disable)
+ sshd-4261 0d.s6 42us : sub_preempt_count (local_bh_enable)
+ sshd-4261 0d.s7 42us : sub_preempt_count (local_bh_enable)
+ sshd-4261 0d.s5 43us : add_preempt_count (__local_bh_disable)
+ sshd-4261 0d.s5 43us : sub_preempt_count (local_bh_enable_ip)
+ sshd-4261 0d.s6 44us : sub_preempt_count (local_bh_enable_ip)
+ sshd-4261 0d.s5 44us : add_preempt_count (__local_bh_disable)
+ sshd-4261 0d.s5 45us : sub_preempt_count (local_bh_enable)
+[...]
+ sshd-4261 0d.s. 63us : _local_bh_enable (__do_softirq)
+ sshd-4261 0d.s1 64us : trace_preempt_on (__do_softirq)
+
+
+The above is an example of the preemptoff trace with ftrace_enabled
+set. Here we see that interrupts were disabled the entire time.
+The irq_enter code lets us know that we entered an interrupt 'h'.
+Before that, the functions being traced still show that it is not
+in an interrupt, but we can see by the functions themselves that
+this is not the case.
+
+Notice that the __do_softirq when called doesn't have a preempt_count.
+It may seem that we missed a preempt enabled. What really happened
+is that the preempt count is held on the threads stack and we
+switched to the softirq stack (4K stacks in effect). The code
+does not copy the preempt count, but because interrupts are disabled
+we don't need to worry about it. Having a tracer like this is good
+to let people know what really happens inside the kernel.
+
+
+preemptirqsoff
+--------------
+
+Knowing the locations that have interrupts disabled or preemption
+disabled for the longest times is helpful. But sometimes we would
+like to know when either preemption and/or interrupts are disabled.
+
+The following code:
+
+ local_irq_disable();
+ call_function_with_irqs_off();
+ preempt_disable();
+ call_function_with_irqs_and_preemption_off();
+ local_irq_enable();
+ call_function_with_preemption_off();
+ preempt_enable();
+
+The irqsoff tracer will record the total length of
+call_function_with_irqs_off() and
+call_function_with_irqs_and_preemption_off().
+
+The preemptoff tracer will record the total length of
+call_function_with_irqs_and_preemption_off() and
+call_function_with_preemption_off().
+
+But neither will trace the time that interrupts and/or preemption
+is disabled. This total time is the time that we can not schedule.
+To record this time, use the preemptirqsoff tracer.
+
+Again, using this trace is much like the irqsoff and preemptoff tracers.
+
+ # echo preemptoff > /debug/tracing/current_tracer
+ # echo 0 > /debug/tracing/tracing_max_latency
+ # echo 1 > /debug/tracing/tracing_enabled
+ # ls -ltr
+ [...]
+ # echo 0 > /debug/tracing/tracing_enabled
+ # cat /debug/tracing/latency_trace
+# tracer: preemptirqsoff
+#
+preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 293 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+ -----------------
+ | task: ls-4860 (uid:0 nice:0 policy:0 rt_prio:0)
+ -----------------
+ => started at: apic_timer_interrupt
+ => ended at: __do_softirq
+
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| /
+# ||||| delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ ls-4860 0d... 0us!: trace_hardirqs_off_thunk (apic_timer_interrupt)
+ ls-4860 0d.s. 294us : _local_bh_enable (__do_softirq)
+ ls-4860 0d.s1 294us : trace_preempt_on (__do_softirq)
+
+
+vim:ft=help
+
+
+The trace_hardirqs_off_thunk is called from assembly on x86 when
+interrupts are disabled in the assembly code. Without the function
+tracing, we don't know if interrupts were enabled within the preemption
+points. We do see that it started with preemption enabled.
+
+Here is a trace with ftrace_enabled set:
+
+
+# tracer: preemptirqsoff
+#
+preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 105 us, #183/183, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+ -----------------
+ | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0)
+ -----------------
+ => started at: write_chan
+ => ended at: __do_softirq
+
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| /
+# ||||| delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ ls-4473 0.N.. 0us : preempt_schedule (write_chan)
+ ls-4473 0dN.1 1us : _spin_lock (schedule)
+ ls-4473 0dN.1 2us : add_preempt_count (_spin_lock)
+ ls-4473 0d..2 2us : put_prev_task_fair (schedule)
+[...]
+ ls-4473 0d..2 13us : set_normalized_timespec (ktime_get_ts)
+ ls-4473 0d..2 13us : __switch_to (schedule)
+ sshd-4261 0d..2 14us : finish_task_switch (schedule)
+ sshd-4261 0d..2 14us : _spin_unlock_irq (finish_task_switch)
+ sshd-4261 0d..1 15us : add_preempt_count (_spin_lock_irqsave)
+ sshd-4261 0d..2 16us : _spin_unlock_irqrestore (hrtick_set)
+ sshd-4261 0d..2 16us : do_IRQ (common_interrupt)
+ sshd-4261 0d..2 17us : irq_enter (do_IRQ)
+ sshd-4261 0d..2 17us : idle_cpu (irq_enter)
+ sshd-4261 0d..2 18us : add_preempt_count (irq_enter)
+ sshd-4261 0d.h2 18us : idle_cpu (irq_enter)
+ sshd-4261 0d.h. 18us : handle_fasteoi_irq (do_IRQ)
+ sshd-4261 0d.h. 19us : _spin_lock (handle_fasteoi_irq)
+ sshd-4261 0d.h. 19us : add_preempt_count (_spin_lock)
+ sshd-4261 0d.h1 20us : _spin_unlock (handle_fasteoi_irq)
+ sshd-4261 0d.h1 20us : sub_preempt_count (_spin_unlock)
+[...]
+ sshd-4261 0d.h1 28us : _spin_unlock (handle_fasteoi_irq)
+ sshd-4261 0d.h1 29us : sub_preempt_count (_spin_unlock)
+ sshd-4261 0d.h2 29us : irq_exit (do_IRQ)
+ sshd-4261 0d.h2 29us : sub_preempt_count (irq_exit)
+ sshd-4261 0d..3 30us : do_softirq (irq_exit)
+ sshd-4261 0d... 30us : __do_softirq (do_softirq)
+ sshd-4261 0d... 31us : __local_bh_disable (__do_softirq)
+ sshd-4261 0d... 31us+: add_preempt_count (__local_bh_disable)
+ sshd-4261 0d.s4 34us : add_preempt_count (__local_bh_disable)
+[...]
+ sshd-4261 0d.s3 43us : sub_preempt_count (local_bh_enable_ip)
+ sshd-4261 0d.s4 44us : sub_preempt_count (local_bh_enable_ip)
+ sshd-4261 0d.s3 44us : smp_apic_timer_interrupt (apic_timer_interrupt)
+ sshd-4261 0d.s3 45us : irq_enter (smp_apic_timer_interrupt)
+ sshd-4261 0d.s3 45us : idle_cpu (irq_enter)
+ sshd-4261 0d.s3 46us : add_preempt_count (irq_enter)
+ sshd-4261 0d.H3 46us : idle_cpu (irq_enter)
+ sshd-4261 0d.H3 47us : hrtimer_interrupt (smp_apic_timer_interrupt)
+ sshd-4261 0d.H3 47us : ktime_get (hrtimer_interrupt)
+[...]
+ sshd-4261 0d.H3 81us : tick_program_event (hrtimer_interrupt)
+ sshd-4261 0d.H3 82us : ktime_get (tick_program_event)
+ sshd-4261 0d.H3 82us : ktime_get_ts (ktime_get)
+ sshd-4261 0d.H3 83us : getnstimeofday (ktime_get_ts)
+ sshd-4261 0d.H3 83us : set_normalized_timespec (ktime_get_ts)
+ sshd-4261 0d.H3 84us : clockevents_program_event (tick_program_event)
+ sshd-4261 0d.H3 84us : lapic_next_event (clockevents_program_event)
+ sshd-4261 0d.H3 85us : irq_exit (smp_apic_timer_interrupt)
+ sshd-4261 0d.H3 85us : sub_preempt_count (irq_exit)
+ sshd-4261 0d.s4 86us : sub_preempt_count (irq_exit)
+ sshd-4261 0d.s3 86us : add_preempt_count (__local_bh_disable)
+[...]
+ sshd-4261 0d.s1 98us : sub_preempt_count (net_rx_action)
+ sshd-4261 0d.s. 99us : add_preempt_count (_spin_lock_irq)
+ sshd-4261 0d.s1 99us+: _spin_unlock_irq (run_timer_softirq)
+ sshd-4261 0d.s. 104us : _local_bh_enable (__do_softirq)
+ sshd-4261 0d.s. 104us : sub_preempt_count (_local_bh_enable)
+ sshd-4261 0d.s. 105us : _local_bh_enable (__do_softirq)
+ sshd-4261 0d.s1 105us : trace_preempt_on (__do_softirq)
+
+
+This is a very interesting trace. It started with the preemption of
+the ls task. We see that the task had the "need_resched" bit set
+with the 'N' in the trace. Interrupts are disabled in the spin_lock
+and the trace started. We see that a schedule took place to run
+sshd. When the interrupts were enabled we took an interrupt.
+On return of the interrupt the softirq ran. We took another interrupt
+while running the softirq as we see with the capital 'H'.
+
+
+wakeup
+------
+
+In Real-Time environment it is very important to know the wakeup
+time it takes for the highest priority task that wakes up to the
+time it executes. This is also known as "schedule latency".
+I stress the point that this is about RT tasks. It is also important
+to know the scheduling latency of non-RT tasks, but the average
+schedule latency is better for non-RT tasks. Tools like
+LatencyTop is more appropriate for such measurements.
+
+Real-Time environments is interested in the worst case latency.
+That is the longest latency it takes for something to happen, and
+not the average. We can have a very fast scheduler that may only
+have a large latency once in a while, but that would not work well
+with Real-Time tasks. The wakeup tracer was designed to record
+the worst case wakeups of RT tasks. Non-RT tasks are not recorded
+because the tracer only records one worst case and tracing non-RT
+tasks that are unpredictable will overwrite the worst case latency
+of RT tasks.
+
+Since this tracer only deals with RT tasks, we will run this slightly
+different than we did with the previous tracers. Instead of performing
+an 'ls' we will run 'sleep 1' under 'chrt' which changes the
+priority of the task.
+
+ # echo wakeup > /debug/tracing/current_tracer
+ # echo 0 > /debug/tracing/tracing_max_latency
+ # echo 1 > /debug/tracing/tracing_enabled
+ # chrt -f 5 sleep 1
+ # echo 0 > /debug/tracing/tracing_enabled
+ # cat /debug/tracing/latency_trace
+# tracer: wakeup
+#
+wakeup latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 4 us, #2/2, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+ -----------------
+ | task: sleep-4901 (uid:0 nice:0 policy:1 rt_prio:5)
+ -----------------
+
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| /
+# ||||| delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ <idle>-0 1d.h4 0us+: try_to_wake_up (wake_up_process)
+ <idle>-0 1d..4 4us : schedule (cpu_idle)
+
+
+vim:ft=help
+
+
+Running this on an idle system we see that it only took 4 microseconds
+to perform the task switch. Note, since the trace marker in the
+schedule is before the actual "switch" we stop the tracing when
+the recorded task is about to schedule in. This may change if
+we add a new marker at the end of the scheduler.
+
+Notice that the recorded task is 'sleep' with the PID of 4901 and it
+has an rt_prio of 5. This priority is user-space priority and not
+the internal kernel priority. The policy is 1 for SCHED_FIFO and 2
+for SCHED_RR.
+
+Doing the same with chrt -r 5 and ftrace_enabled set.
+
+# tracer: wakeup
+#
+wakeup latency trace v1.1.5 on 2.6.26-rc8
+--------------------------------------------------------------------
+ latency: 50 us, #60/60, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
+ -----------------
+ | task: sleep-4068 (uid:0 nice:0 policy:2 rt_prio:5)
+ -----------------
+
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| /
+# ||||| delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ksoftirq-7 1d.H3 0us : try_to_wake_up (wake_up_process)
+ksoftirq-7 1d.H4 1us : sub_preempt_count (marker_probe_cb)
+ksoftirq-7 1d.H3 2us : check_preempt_wakeup (try_to_wake_up)
+ksoftirq-7 1d.H3 3us : update_curr (check_preempt_wakeup)
+ksoftirq-7 1d.H3 4us : calc_delta_mine (update_curr)
+ksoftirq-7 1d.H3 5us : __resched_task (check_preempt_wakeup)
+ksoftirq-7 1d.H3 6us : task_wake_up_rt (try_to_wake_up)
+ksoftirq-7 1d.H3 7us : _spin_unlock_irqrestore (try_to_wake_up)
+[...]
+ksoftirq-7 1d.H2 17us : irq_exit (smp_apic_timer_interrupt)
+ksoftirq-7 1d.H2 18us : sub_preempt_count (irq_exit)
+ksoftirq-7 1d.s3 19us : sub_preempt_count (irq_exit)
+ksoftirq-7 1..s2 20us : rcu_process_callbacks (__do_softirq)
+[...]
+ksoftirq-7 1..s2 26us : __rcu_process_callbacks (rcu_process_callbacks)
+ksoftirq-7 1d.s2 27us : _local_bh_enable (__do_softirq)
+ksoftirq-7 1d.s2 28us : sub_preempt_count (_local_bh_enable)
+ksoftirq-7 1.N.3 29us : sub_preempt_count (ksoftirqd)
+ksoftirq-7 1.N.2 30us : _cond_resched (ksoftirqd)
+ksoftirq-7 1.N.2 31us : __cond_resched (_cond_resched)
+ksoftirq-7 1.N.2 32us : add_preempt_count (__cond_resched)
+ksoftirq-7 1.N.2 33us : schedule (__cond_resched)
+ksoftirq-7 1.N.2 33us : add_preempt_count (schedule)
+ksoftirq-7 1.N.3 34us : hrtick_clear (schedule)
+ksoftirq-7 1dN.3 35us : _spin_lock (schedule)
+ksoftirq-7 1dN.3 36us : add_preempt_count (_spin_lock)
+ksoftirq-7 1d..4 37us : put_prev_task_fair (schedule)
+ksoftirq-7 1d..4 38us : update_curr (put_prev_task_fair)
+[...]
+ksoftirq-7 1d..5 47us : _spin_trylock (tracing_record_cmdline)
+ksoftirq-7 1d..5 48us : add_preempt_count (_spin_trylock)
+ksoftirq-7 1d..6 49us : _spin_unlock (tracing_record_cmdline)
+ksoftirq-7 1d..6 49us : sub_preempt_count (_spin_unlock)
+ksoftirq-7 1d..4 50us : schedule (__cond_resched)
+
+The interrupt went off while running ksoftirqd. This task runs at
+SCHED_OTHER. Why didn't we see the 'N' set early? This may be
+a harmless bug with x86_32 and 4K stacks. The need_reched() function
+that tests if we need to reschedule looks on the actual stack.
+Where as the setting of the NEED_RESCHED bit happens on the
+task's stack. But because we are in a hard interrupt, the test
+is with the interrupts stack which has that to be false. We don't
+see the 'N' until we switch back to the task's stack.
+
+ftrace
+------
+
+ftrace is not only the name of the tracing infrastructure, but it
+is also a name of one of the tracers. The tracer is the function
+tracer. Enabling the function tracer can be done from the
+debug file system. Make sure the ftrace_enabled is set otherwise
+this tracer is a nop.
+
+ # sysctl kernel.ftrace_enabled=1
+ # echo ftrace > /debug/tracing/current_tracer
+ # echo 1 > /debug/tracing/tracing_enabled
+ # usleep 1
+ # echo 0 > /debug/tracing/tracing_enabled
+ # cat /debug/tracing/trace
+# tracer: ftrace
+#
+# TASK-PID CPU# TIMESTAMP FUNCTION
+# | | | | |
+ bash-4003 [00] 123.638713: finish_task_switch <-schedule
+ bash-4003 [00] 123.638714: _spin_unlock_irq <-finish_task_switch
+ bash-4003 [00] 123.638714: sub_preempt_count <-_spin_unlock_irq
+ bash-4003 [00] 123.638715: hrtick_set <-schedule
+ bash-4003 [00] 123.638715: _spin_lock_irqsave <-hrtick_set
+ bash-4003 [00] 123.638716: add_preempt_count <-_spin_lock_irqsave
+ bash-4003 [00] 123.638716: _spin_unlock_irqrestore <-hrtick_set
+ bash-4003 [00] 123.638717: sub_preempt_count <-_spin_unlock_irqrestore
+ bash-4003 [00] 123.638717: hrtick_clear <-hrtick_set
+ bash-4003 [00] 123.638718: sub_preempt_count <-schedule
+ bash-4003 [00] 123.638718: sub_preempt_count <-preempt_schedule
+ bash-4003 [00] 123.638719: wait_for_completion <-__stop_machine_run
+ bash-4003 [00] 123.638719: wait_for_common <-wait_for_completion
+ bash-4003 [00] 123.638720: _spin_lock_irq <-wait_for_common
+ bash-4003 [00] 123.638720: add_preempt_count <-_spin_lock_irq
+[...]
+
+
+Note: It is sometimes better to enable or disable tracing directly from
+a program, because the buffer may be overflowed by the echo commands
+before you get to the point you want to trace. It is also easier to
+stop the tracing at the point that you hit the part that you are
+interested in. Since the ftrace buffer is a ring buffer with the
+oldest data being overwritten, usually it is sufficient to start the
+tracer with an echo command but have you code stop it. Something
+like the following is usually appropriate for this.
+
+int trace_fd;
+[...]
+int main(int argc, char *argv[]) {
+ [...]
+ trace_fd = open("/debug/tracing/tracing_enabled", O_WRONLY);
+ [...]
+ if (condition_hit()) {
+ write(trace_fd, "0", 1);
+ }
+ [...]
+}
+
+
+dynamic ftrace
+--------------
+
+If CONFIG_DYNAMIC_FTRACE is set, then the system will run with
+virtually no overhead when function tracing is disabled. The way
+this works is the mcount function call (placed at the start of
+every kernel function, produced by the -pg switch in gcc), starts
+of pointing to a simple return.
+
+When dynamic ftrace is initialized, it calls kstop_machine to make it
+act like a uniprocessor so that it can freely modify code without
+worrying about other processors executing that same code. At
+initialization, the mcount calls are change to call a "record_ip"
+function. After this, the first time a kernel function is called,
+it has the calling address saved in a hash table.
+
+Later on the ftraced kernel thread is awoken and will again call
+kstop_machine if new functions have been recorded. The ftraced thread
+will change all calls to mcount to "nop". Just calling mcount
+and having mcount return has shown a 10% overhead. By converting
+it to a nop, there is no recordable overhead to the system.
+
+One special side-effect to the recording of the functions being
+traced, is that we can now selectively choose which functions we
+want to trace and which ones we want the mcount calls to remain as
+nops.
+
+Two files that contain to the enabling and disabling of recorded
+functions are:
+
+ set_ftrace_filter
+
+and
+
+ set_ftrace_notrace
+
+A list of available functions that you can add to this files is listed
+in:
+
+ available_filter_functions
+
+ # cat /debug/tracing/available_filter_functions
+put_prev_task_idle
+kmem_cache_create
+pick_next_task_rt
+get_online_cpus
+pick_next_task_fair
+mutex_lock
+[...]
+
+If I'm only interested in sys_nanosleep and hrtimer_interrupt:
+
+ # echo sys_nanosleep hrtimer_interrupt \
+ > /debug/tracing/set_ftrace_filter
+ # echo ftrace > /debug/tracing/current_tracer
+ # echo 1 > /debug/tracing/tracing_enabled
+ # usleep 1
+ # echo 0 > /debug/tracing/tracing_enabled
+ # cat /debug/tracing/trace
+# tracer: ftrace
+#
+# TASK-PID CPU# TIMESTAMP FUNCTION
+# | | | | |
+ usleep-4134 [00] 1317.070017: hrtimer_interrupt <-smp_apic_timer_interrupt
+ usleep-4134 [00] 1317.070111: sys_nanosleep <-syscall_call
+ <idle>-0 [00] 1317.070115: hrtimer_interrupt <-smp_apic_timer_interrupt
+
+To see what functions are being traced, you can cat the file:
+
+ # cat /debug/tracing/set_ftrace_filter
+hrtimer_interrupt
+sys_nanosleep
+
+
+Perhaps this isn't enough. The filters also allow simple wild cards.
+Only the following is currently available
+
+ <match>* - will match functions that begins with <match>
+ *<match> - will match functions that end with <match>
+ *<match>* - will match functions that have <match> in it
+
+Thats all the wild cards that are allowed.
+
+ <match>*<match> will not work.
+
+ # echo hrtimer_* > /debug/tracing/set_ftrace_filter
+
+Produces:
+
+# tracer: ftrace
+#
+# TASK-PID CPU# TIMESTAMP FUNCTION
+# | | | | |
+ bash-4003 [00] 1480.611794: hrtimer_init <-copy_process
+ bash-4003 [00] 1480.611941: hrtimer_start <-hrtick_set
+ bash-4003 [00] 1480.611956: hrtimer_cancel <-hrtick_clear
+ bash-4003 [00] 1480.611956: hrtimer_try_to_cancel <-hrtimer_cancel
+ <idle>-0 [00] 1480.612019: hrtimer_get_next_event <-get_next_timer_interrupt
+ <idle>-0 [00] 1480.612025: hrtimer_get_next_event <-get_next_timer_interrupt
+ <idle>-0 [00] 1480.612032: hrtimer_get_next_event <-get_next_timer_interrupt
+ <idle>-0 [00] 1480.612037: hrtimer_get_next_event <-get_next_timer_interrupt
+ <idle>-0 [00] 1480.612382: hrtimer_get_next_event <-get_next_timer_interrupt
+
+
+Notice that we lost the sys_nanosleep.
+
+ # cat /debug/tracing/set_ftrace_filter
+hrtimer_run_queues
+hrtimer_run_pending
+hrtimer_init
+hrtimer_cancel
+hrtimer_try_to_cancel
+hrtimer_forward
+hrtimer_start
+hrtimer_reprogram
+hrtimer_force_reprogram
+hrtimer_get_next_event
+hrtimer_interrupt
+hrtimer_nanosleep
+hrtimer_wakeup
+hrtimer_get_remaining
+hrtimer_get_res
+hrtimer_init_sleeper
+
+
+This is because the '>' and '>>' act just like they do in bash.
+To rewrite the filters, use '>'
+To append to the filters, use '>>'
+
+To clear out a filter so that all functions will be recorded again.
+
+ # echo > /debug/tracing/set_ftrace_filter
+ # cat /debug/tracing/set_ftrace_filter
+ #
+
+Again, now we want to append.
+
+ # echo sys_nanosleep > /debug/tracing/set_ftrace_filter
+ # cat /debug/tracing/set_ftrace_filter
+sys_nanosleep
+ # echo hrtimer_* >> /debug/tracing/set_ftrace_filter
+ # cat /debug/tracing/set_ftrace_filter
+hrtimer_run_queues
+hrtimer_run_pending
+hrtimer_init
+hrtimer_cancel
+hrtimer_try_to_cancel
+hrtimer_forward
+hrtimer_start
+hrtimer_reprogram
+hrtimer_force_reprogram
+hrtimer_get_next_event
+hrtimer_interrupt
+sys_nanosleep
+hrtimer_nanosleep
+hrtimer_wakeup
+hrtimer_get_remaining
+hrtimer_get_res
+hrtimer_init_sleeper
+
+
+The set_ftrace_notrace prevents those functions from being traced.
+
+ # echo '*preempt*' '*lock*' > /debug/tracing/set_ftrace_notrace
+
+Produces:
+
+# tracer: ftrace
+#
+# TASK-PID CPU# TIMESTAMP FUNCTION
+# | | | | |
+ bash-4043 [01] 115.281644: finish_task_switch <-schedule
+ bash-4043 [01] 115.281645: hrtick_set <-schedule
+ bash-4043 [01] 115.281645: hrtick_clear <-hrtick_set
+ bash-4043 [01] 115.281646: wait_for_completion <-__stop_machine_run
+ bash-4043 [01] 115.281647: wait_for_common <-wait_for_completion
+ bash-4043 [01] 115.281647: kthread_stop <-stop_machine_run
+ bash-4043 [01] 115.281648: init_waitqueue_head <-kthread_stop
+ bash-4043 [01] 115.281648: wake_up_process <-kthread_stop
+ bash-4043 [01] 115.281649: try_to_wake_up <-wake_up_process
+
+We can see that there's no more lock or preempt tracing.
+
+ftraced
+-------
+
+As mentioned above, when dynamic ftrace is configured in, a kernel
+thread wakes up once a second and checks to see if there are mcount
+calls that need to be converted into nops. If there is not, then
+it simply goes back to sleep. But if there is, it will call
+kstop_machine to convert the calls to nops.
+
+There may be a case that you do not want this added latency.
+Perhaps you are doing some audio recording and this activity might
+cause skips in the playback. There is an interface to disable
+and enable the ftraced kernel thread.
+
+ # echo 0 > /debug/tracing/ftraced_enabled
+
+This will disable the calling of the kstop_machine to update the
+mcount calls to nops. Remember that there's a large overhead
+to calling mcount. Without this kernel thread, that overhead will
+exist.
+
+Any write to the ftraced_enabled file will cause the kstop_machine
+to run if there are recorded calls to mcount. This means that a
+user can manually perform the updates when they want to by simply
+echoing a '0' into the ftraced_enabled file.
+
+The updates are also done at the beginning of enabling a tracer
+that uses ftrace function recording.
+
+
+trace_pipe
+----------
+
+The trace_pipe outputs the same as trace, but the effect on the
+tracing is different. Every read from trace_pipe is consumed.
+This means that subsequent reads will be different. The trace
+is live.
+
+ # echo ftrace > /debug/tracing/current_tracer
+ # cat /debug/tracing/trace_pipe > /tmp/trace.out &
+[1] 4153
+ # echo 1 > /debug/tracing/tracing_enabled
+ # usleep 1
+ # echo 0 > /debug/tracing/tracing_enabled
+ # cat /debug/tracing/trace
+# tracer: ftrace
+#
+# TASK-PID CPU# TIMESTAMP FUNCTION
+# | | | | |
+
+ #
+ # cat /tmp/trace.out
+ bash-4043 [00] 41.267106: finish_task_switch <-schedule
+ bash-4043 [00] 41.267106: hrtick_set <-schedule
+ bash-4043 [00] 41.267107: hrtick_clear <-hrtick_set
+ bash-4043 [00] 41.267108: wait_for_completion <-__stop_machine_run
+ bash-4043 [00] 41.267108: wait_for_common <-wait_for_completion
+ bash-4043 [00] 41.267109: kthread_stop <-stop_machine_run
+ bash-4043 [00] 41.267109: init_waitqueue_head <-kthread_stop
+ bash-4043 [00] 41.267110: wake_up_process <-kthread_stop
+ bash-4043 [00] 41.267110: try_to_wake_up <-wake_up_process
+ bash-4043 [00] 41.267111: select_task_rq_rt <-try_to_wake_up
+
+
+Note, reading the trace_pipe will block until more input is added.
+By changing the tracer, trace_pipe will issue an EOF. We needed
+to set the ftrace tracer _before_ cating the trace_pipe file.
+
+
+trace entries
+-------------
+
+Having too much or not enough data can be troublesome in diagnosing
+some issue in the kernel. The file trace_entries is used to modify
+the size of the internal trace buffers. The numbers listed
+is the number of entries that can be recorded per CPU. To know
+the full size, multiply the number of possible CPUS with the
+number of entries.
+
+ # cat /debug/tracing/trace_entries
+65620
+
+Note, to modify this you must have tracing fulling disabled. To do that,
+echo "none" into the current_tracer.
+
+ # echo none > /debug/tracing/current_tracer
+ # echo 100000 > /debug/tracing/trace_entries
+ # cat /debug/tracing/trace_entries
+100045
+
+
+Notice that we echoed in 100,000 but the size is 100,045. The entries
+are held by individual pages. It allocates the number of pages it takes
+to fulfill the request. If more entries may fit on the last page
+it will add them.
+
+ # echo 1 > /debug/tracing/trace_entries
+ # cat /debug/tracing/trace_entries
+85
+
+This shows us that 85 entries can fit on a single page.
+
+The number of pages that will be allocated is a percentage of available
+memory. Allocating too much will produces an error.
+
+ # echo 1000000000000 > /debug/tracing/trace_entries
+-bash: echo: write error: Cannot allocate memory
+ # cat /debug/tracing/trace_entries
+85
+
diff --git a/Makefile b/Makefile
index 6aff5f47c21..6315424a00b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 26
-EXTRAVERSION = -rc8
+EXTRAVERSION = -rc9
NAME = Rotary Wombat
# *DOCUMENTATION*
diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c
index 76935e32021..27a5b466c85 100644
--- a/arch/mips/mm/c-r3k.c
+++ b/arch/mips/mm/c-r3k.c
@@ -26,7 +26,7 @@
static unsigned long icache_size, dcache_size; /* Size in bytes */
static unsigned long icache_lsize, dcache_lsize; /* Size in bytes */
-unsigned long __init r3k_cache_size(unsigned long ca_flags)
+unsigned long __cpuinit r3k_cache_size(unsigned long ca_flags)
{
unsigned long flags, status, dummy, size;
volatile unsigned long *p;
@@ -61,7 +61,7 @@ unsigned long __init r3k_cache_size(unsigned long ca_flags)
return size * sizeof(*p);
}
-unsigned long __init r3k_cache_lsize(unsigned long ca_flags)
+unsigned long __cpuinit r3k_cache_lsize(unsigned long ca_flags)
{
unsigned long flags, status, lsize, i;
volatile unsigned long *p;
@@ -90,7 +90,7 @@ unsigned long __init r3k_cache_lsize(unsigned long ca_flags)
return lsize * sizeof(*p);
}
-static void __init r3k_probe_cache(void)
+static void __cpuinit r3k_probe_cache(void)
{
dcache_size = r3k_cache_size(ST0_ISC);
if (dcache_size)
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index 1edf0cbbeed..1417c649485 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -235,13 +235,12 @@ static void __cpuinit set_prefetch_parameters(void)
}
/*
* Too much unrolling will overflow the available space in
- * clear_space_array / copy_page_array. 8 words sounds generous,
- * but a R4000 with 128 byte L2 line length can exceed even that.
+ * clear_space_array / copy_page_array.
*/
- half_clear_loop_size = min(8 * clear_word_size,
+ half_clear_loop_size = min(16 * clear_word_size,
max(cache_line_size >> 1,
4 * clear_word_size));
- half_copy_loop_size = min(8 * copy_word_size,
+ half_copy_loop_size = min(16 * copy_word_size,
max(cache_line_size >> 1,
4 * copy_word_size));
}
@@ -263,21 +262,23 @@ static inline void __cpuinit build_clear_pref(u32 **buf, int off)
if (pref_bias_clear_store) {
uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off,
A0);
- } else if (cpu_has_cache_cdex_s) {
- uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
- } else if (cpu_has_cache_cdex_p) {
- if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
- uasm_i_nop(buf);
- uasm_i_nop(buf);
- uasm_i_nop(buf);
- uasm_i_nop(buf);
- }
+ } else if (cache_line_size == (half_clear_loop_size << 1)) {
+ if (cpu_has_cache_cdex_s) {
+ uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
+ } else if (cpu_has_cache_cdex_p) {
+ if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
+ uasm_i_nop(buf);
+ uasm_i_nop(buf);
+ uasm_i_nop(buf);
+ uasm_i_nop(buf);
+ }
- if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
- uasm_i_lw(buf, ZERO, ZERO, AT);
+ if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+ uasm_i_lw(buf, ZERO, ZERO, AT);
- uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
- }
+ uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
+ }
+ }
}
void __cpuinit build_clear_page(void)
@@ -403,20 +404,22 @@ static inline void build_copy_store_pref(u32 **buf, int off)
if (pref_bias_copy_store) {
uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off,
A0);
- } else if (cpu_has_cache_cdex_s) {
- uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
- } else if (cpu_has_cache_cdex_p) {
- if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
- uasm_i_nop(buf);
- uasm_i_nop(buf);
- uasm_i_nop(buf);
- uasm_i_nop(buf);
- }
+ } else if (cache_line_size == (half_copy_loop_size << 1)) {
+ if (cpu_has_cache_cdex_s) {
+ uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
+ } else if (cpu_has_cache_cdex_p) {
+ if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
+ uasm_i_nop(buf);
+ uasm_i_nop(buf);
+ uasm_i_nop(buf);
+ uasm_i_nop(buf);
+ }
- if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
- uasm_i_lw(buf, ZERO, ZERO, AT);
+ if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+ uasm_i_lw(buf, ZERO, ZERO, AT);
- uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
+ uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
+ }
}
}
diff --git a/arch/mips/mm/sc-rm7k.c b/arch/mips/mm/sc-rm7k.c
index fc227f3b119..e3abfb2d7e8 100644
--- a/arch/mips/mm/sc-rm7k.c
+++ b/arch/mips/mm/sc-rm7k.c
@@ -86,7 +86,7 @@ static void rm7k_sc_inv(unsigned long addr, unsigned long size)
/*
* This function is executed in uncached address space.
*/
-static __init void __rm7k_sc_enable(void)
+static __cpuinit void __rm7k_sc_enable(void)
{
int i;
@@ -107,7 +107,7 @@ static __init void __rm7k_sc_enable(void)
}
}
-static __init void rm7k_sc_enable(void)
+static __cpuinit void rm7k_sc_enable(void)
{
if (read_c0_config() & RM7K_CONF_SE)
return;
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index cf37f5ca4b7..4d96e1db55e 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -33,13 +33,14 @@ static struct legacy_serial_info {
phys_addr_t taddr;
} legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS];
-static struct __initdata of_device_id parents[] = {
+static struct __initdata of_device_id legacy_serial_parents[] = {
{.type = "soc",},
{.type = "tsi-bridge",},
{.type = "opb", },
{.compatible = "ibm,opb",},
{.compatible = "simple-bus",},
{.compatible = "wrs,epld-localbus",},
+ {},
};
static unsigned int legacy_serial_count;
@@ -327,7 +328,7 @@ void __init find_legacy_serial_ports(void)
struct device_node *parent = of_get_parent(np);
if (!parent)
continue;
- if (of_match_node(parents, parent) != NULL) {
+ if (of_match_node(legacy_serial_parents, parent) != NULL) {
index = add_legacy_soc_port(np, np);
if (index >= 0 && np == stdout)
legacy_serial_console = index;
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index e79ad8afda0..3f37a6e6277 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -76,6 +76,8 @@ struct of_device* of_platform_device_create(struct device_node *np,
return NULL;
dev->dma_mask = 0xffffffffUL;
+ dev->dev.coherent_dma_mask = DMA_32BIT_MASK;
+
dev->dev.bus = &of_platform_bus_type;
/* We do not fill the DMA ops for platform devices by default.
diff --git a/arch/um/Makefile b/arch/um/Makefile
index dbeab15e7bb..ca40397017b 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -77,7 +77,6 @@ include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS)
KERNEL_DEFINES = $(strip -Derrno=kernel_errno -Dsigprocmask=kernel_sigprocmask \
-Dmktime=kernel_mktime $(ARCH_KERNEL_DEFINES))
KBUILD_CFLAGS += $(KERNEL_DEFINES)
-KBUILD_CFLAGS += $(call cc-option,-fno-unit-at-a-time,)
PHONY += linux
diff --git a/arch/um/Makefile-i386 b/arch/um/Makefile-i386
index 561e373bd85..302cbe50454 100644
--- a/arch/um/Makefile-i386
+++ b/arch/um/Makefile-i386
@@ -32,4 +32,11 @@ cflags-y += $(call cc-option,-mpreferred-stack-boundary=2)
# an unresolved reference.
cflags-y += -ffreestanding
+# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
+# a lot more stack due to the lack of sharing of stacklots. Also, gcc
+# 4.3.0 needs -funit-at-a-time for extern inline functions.
+KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \
+ echo $(call cc-option,-fno-unit-at-a-time); \
+ else echo $(call cc-option,-funit-at-a-time); fi ;)
+
KBUILD_CFLAGS += $(cflags-y)
diff --git a/arch/um/Makefile-x86_64 b/arch/um/Makefile-x86_64
index 8ed362f9358..a9cd7e77a7a 100644
--- a/arch/um/Makefile-x86_64
+++ b/arch/um/Makefile-x86_64
@@ -21,3 +21,6 @@ HEADER_ARCH := x86
LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64
LINK-y += -m64
+
+# Do unit-at-a-time unconditionally on x86_64, following the host
+KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
diff --git a/arch/x86/kernel/.gitignore b/arch/x86/kernel/.gitignore
index 4ea38a39aed..08f4fd73146 100644
--- a/arch/x86/kernel/.gitignore
+++ b/arch/x86/kernel/.gitignore
@@ -1,2 +1,3 @@
vsyscall.lds
vsyscall_32.lds
+vmlinux.lds
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 940185ecaed..6e64aaf00d1 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -328,18 +328,18 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = {
#endif
{
.callback = set_bf_sort,
- .ident = "HP ProLiant DL360",
+ .ident = "HP ProLiant DL385 G2",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "HP"),
- DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL360"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL385 G2"),
},
},
{
.callback = set_bf_sort,
- .ident = "HP ProLiant DL380",
+ .ident = "HP ProLiant DL585 G2",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "HP"),
- DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL380"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"),
},
},
{}
diff --git a/crypto/chainiv.c b/crypto/chainiv.c
index 6da3f577e4d..9affadee328 100644
--- a/crypto/chainiv.c
+++ b/crypto/chainiv.c
@@ -117,6 +117,7 @@ static int chainiv_init(struct crypto_tfm *tfm)
static int async_chainiv_schedule_work(struct async_chainiv_ctx *ctx)
{
int queued;
+ int err = ctx->err;
if (!ctx->queue.qlen) {
smp_mb__before_clear_bit();
@@ -131,7 +132,7 @@ static int async_chainiv_schedule_work(struct async_chainiv_ctx *ctx)
BUG_ON(!queued);
out:
- return ctx->err;
+ return err;
}
static int async_chainiv_postpone_request(struct skcipher_givcrypt_request *req)
@@ -227,6 +228,7 @@ static void async_chainiv_do_postponed(struct work_struct *work)
postponed);
struct skcipher_givcrypt_request *req;
struct ablkcipher_request *subreq;
+ int err;
/* Only handle one request at a time to avoid hogging keventd. */
spin_lock_bh(&ctx->lock);
@@ -241,7 +243,11 @@ static void async_chainiv_do_postponed(struct work_struct *work)
subreq = skcipher_givcrypt_reqctx(req);
subreq->base.flags |= CRYPTO_TFM_REQ_MAY_SLEEP;
- async_chainiv_givencrypt_tail(req);
+ err = async_chainiv_givencrypt_tail(req);
+
+ local_bh_disable();
+ skcipher_givcrypt_complete(req, err);
+ local_bh_enable();
}
static int async_chainiv_init(struct crypto_tfm *tfm)
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 6beabc5abd0..e47f6e02133 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -586,12 +586,6 @@ static void test_cipher(char *algo, int enc,
j = 0;
for (i = 0; i < tcount; i++) {
- data = kzalloc(template[i].ilen, GFP_KERNEL);
- if (!data)
- continue;
-
- memcpy(data, template[i].input, template[i].ilen);
-
if (template[i].iv)
memcpy(iv, template[i].iv, MAX_IVLEN);
else
@@ -613,10 +607,8 @@ static void test_cipher(char *algo, int enc,
printk("setkey() failed flags=%x\n",
crypto_ablkcipher_get_flags(tfm));
- if (!template[i].fail) {
- kfree(data);
+ if (!template[i].fail)
goto out;
- }
}
temp = 0;
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 061817a3a0e..5e6468a7ca4 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1777,7 +1777,7 @@ static irqreturn_t ahci_interrupt(int irq, void *dev_instance)
struct ahci_host_priv *hpriv;
unsigned int i, handled = 0;
void __iomem *mmio;
- u32 irq_stat;
+ u32 irq_stat, irq_masked;
VPRINTK("ENTER\n");
@@ -1786,16 +1786,17 @@ static irqreturn_t ahci_interrupt(int irq, void *dev_instance)
/* sigh. 0xffffffff is a valid return from h/w */
irq_stat = readl(mmio + HOST_IRQ_STAT);
- irq_stat &= hpriv->port_map;
if (!irq_stat)
return IRQ_NONE;
+ irq_masked = irq_stat & hpriv->port_map;
+
spin_lock(&host->lock);
for (i = 0; i < host->n_ports; i++) {
struct ata_port *ap;
- if (!(irq_stat & (1 << i)))
+ if (!(irq_masked & (1 << i)))
continue;
ap = host->ports[i];
@@ -1812,6 +1813,15 @@ static irqreturn_t ahci_interrupt(int irq, void *dev_instance)
handled = 1;
}
+ /* HOST_IRQ_STAT behaves as level triggered latch meaning that
+ * it should be cleared after all the port events are cleared;
+ * otherwise, it will raise a spurious interrupt after each
+ * valid one. Please read section 10.6.2 of ahci 1.1 for more
+ * information.
+ *
+ * Also, use the unmasked value to clear interrupt as spurious
+ * pending event on a dummy port might cause screaming IRQ.
+ */
writel(irq_stat, mmio + HOST_IRQ_STAT);
spin_unlock(&host->lock);
diff --git a/drivers/ide/arm/palm_bk3710.c b/drivers/ide/arm/palm_bk3710.c
index cc24803fadf..2f2b4f4cf22 100644
--- a/drivers/ide/arm/palm_bk3710.c
+++ b/drivers/ide/arm/palm_bk3710.c
@@ -76,7 +76,7 @@ struct palm_bk3710_udmatiming {
#include "../ide-timing.h"
-static long ide_palm_clk;
+static unsigned ideclk_period; /* in nanoseconds */
static const struct palm_bk3710_udmatiming palm_bk3710_udmatimings[6] = {
{160, 240}, /* UDMA Mode 0 */
@@ -86,8 +86,6 @@ static const struct palm_bk3710_udmatiming palm_bk3710_udmatimings[6] = {
{85, 60}, /* UDMA Mode 4 */
};
-static struct clk *ideclkp;
-
static void palm_bk3710_setudmamode(void __iomem *base, unsigned int dev,
unsigned int mode)
{
@@ -97,10 +95,10 @@ static void palm_bk3710_setudmamode(void __iomem *base, unsigned int dev,
/* DMA Data Setup */
t0 = DIV_ROUND_UP(palm_bk3710_udmatimings[mode].cycletime,
- ide_palm_clk) - 1;
- tenv = DIV_ROUND_UP(20, ide_palm_clk) - 1;
+ ideclk_period) - 1;
+ tenv = DIV_ROUND_UP(20, ideclk_period) - 1;
trp = DIV_ROUND_UP(palm_bk3710_udmatimings[mode].rptime,
- ide_palm_clk) - 1;
+ ideclk_period) - 1;
/* udmatim Register */
val16 = readw(base + BK3710_UDMATIM) & (dev ? 0xFF0F : 0xFFF0);
@@ -141,8 +139,8 @@ static void palm_bk3710_setdmamode(void __iomem *base, unsigned int dev,
cycletime = max_t(int, t->cycle, min_cycle);
/* DMA Data Setup */
- t0 = DIV_ROUND_UP(cycletime, ide_palm_clk);
- td = DIV_ROUND_UP(t->active, ide_palm_clk);
+ t0 = DIV_ROUND_UP(cycletime, ideclk_period);
+ td = DIV_ROUND_UP(t->active, ideclk_period);
tkw = t0 - td - 1;
td -= 1;
@@ -168,9 +166,9 @@ static void palm_bk3710_setpiomode(void __iomem *base, ide_drive_t *mate,
struct ide_timing *t;
/* PIO Data Setup */
- t0 = DIV_ROUND_UP(cycletime, ide_palm_clk);
+ t0 = DIV_ROUND_UP(cycletime, ideclk_period);
t2 = DIV_ROUND_UP(ide_timing_find_mode(XFER_PIO_0 + mode)->active,
- ide_palm_clk);
+ ideclk_period);
t2i = t0 - t2 - 1;
t2 -= 1;
@@ -192,8 +190,8 @@ static void palm_bk3710_setpiomode(void __iomem *base, ide_drive_t *mate,
/* TASKFILE Setup */
t = ide_timing_find_mode(XFER_PIO_0 + mode);
- t0 = DIV_ROUND_UP(t->cyc8b, ide_palm_clk);
- t2 = DIV_ROUND_UP(t->act8b, ide_palm_clk);
+ t0 = DIV_ROUND_UP(t->cyc8b, ideclk_period);
+ t2 = DIV_ROUND_UP(t->act8b, ideclk_period);
t2i = t0 - t2 - 1;
t2 -= 1;
@@ -350,22 +348,22 @@ static const struct ide_port_info __devinitdata palm_bk3710_port_info = {
static int __devinit palm_bk3710_probe(struct platform_device *pdev)
{
- struct clk *clkp;
+ struct clk *clk;
struct resource *mem, *irq;
ide_hwif_t *hwif;
- unsigned long base;
+ unsigned long base, rate;
int i;
hw_regs_t hw;
u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
- clkp = clk_get(NULL, "IDECLK");
- if (IS_ERR(clkp))
+ clk = clk_get(NULL, "IDECLK");
+ if (IS_ERR(clk))
return -ENODEV;
- ideclkp = clkp;
- clk_enable(ideclkp);
- ide_palm_clk = clk_get_rate(ideclkp)/100000;
- ide_palm_clk = (10000/ide_palm_clk) + 1;
+ clk_enable(clk);
+ rate = clk_get_rate(clk);
+ ideclk_period = 1000000000UL / rate;
+
/* Register the IDE interface with Linux ATA Interface */
memset(&hw, 0, sizeof(hw));
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 380fa0c8cc8..26e68b65b7c 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -646,8 +646,6 @@ static int ide_register_port(ide_hwif_t *hwif)
goto out;
}
- get_device(&hwif->gendev);
-
hwif->portdev = device_create_drvdata(ide_port_class, &hwif->gendev,
MKDEV(0, 0), hwif, hwif->name);
if (IS_ERR(hwif->portdev)) {
@@ -1220,16 +1218,12 @@ static void drive_release_dev (struct device *dev)
complete(&drive->gendev_rel_comp);
}
-#ifndef ide_default_irq
-#define ide_default_irq(irq) 0
-#endif
-
static int hwif_init(ide_hwif_t *hwif)
{
int old_irq;
if (!hwif->irq) {
- hwif->irq = ide_default_irq(hwif->io_ports.data_addr);
+ hwif->irq = __ide_default_irq(hwif->io_ports.data_addr);
if (!hwif->irq) {
printk("%s: DISABLED, NO IRQ\n", hwif->name);
return 0;
@@ -1259,7 +1253,7 @@ static int hwif_init(ide_hwif_t *hwif)
* It failed to initialise. Find the default IRQ for
* this port and try that.
*/
- hwif->irq = ide_default_irq(hwif->io_ports.data_addr);
+ hwif->irq = __ide_default_irq(hwif->io_ports.data_addr);
if (!hwif->irq) {
printk("%s: Disabled unable to get IRQ %d.\n",
hwif->name, old_irq);
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index c758dcb13b1..300431d080a 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -315,13 +315,14 @@ void ide_unregister(ide_hwif_t *hwif)
BUG_ON(in_interrupt());
BUG_ON(irqs_disabled());
+
mutex_lock(&ide_cfg_mtx);
- spin_lock_irq(&ide_lock);
- if (!hwif->present)
- goto abort;
- __ide_port_unregister_devices(hwif);
- hwif->present = 0;
+ spin_lock_irq(&ide_lock);
+ if (hwif->present) {
+ __ide_port_unregister_devices(hwif);
+ hwif->present = 0;
+ }
spin_unlock_irq(&ide_lock);
ide_proc_unregister_port(hwif);
@@ -351,16 +352,15 @@ void ide_unregister(ide_hwif_t *hwif)
blk_unregister_region(MKDEV(hwif->major, 0), MAX_DRIVES<<PARTN_BITS);
kfree(hwif->sg_table);
unregister_blkdev(hwif->major, hwif->name);
- spin_lock_irq(&ide_lock);
if (hwif->dma_base)
ide_release_dma_engine(hwif);
+ spin_lock_irq(&ide_lock);
/* restore hwif data to pristine status */
ide_init_port_data(hwif, hwif->index);
-
-abort:
spin_unlock_irq(&ide_lock);
+
mutex_unlock(&ide_cfg_mtx);
}
@@ -1094,13 +1094,6 @@ struct bus_type ide_bus_type = {
EXPORT_SYMBOL_GPL(ide_bus_type);
-static void ide_port_class_release(struct device *portdev)
-{
- ide_hwif_t *hwif = dev_get_drvdata(portdev);
-
- put_device(&hwif->gendev);
-}
-
int ide_vlb_clk;
EXPORT_SYMBOL_GPL(ide_vlb_clk);
@@ -1305,7 +1298,6 @@ static int __init ide_init(void)
ret = PTR_ERR(ide_port_class);
goto out_port_class;
}
- ide_port_class->dev_release = ide_port_class_release;
init_ide_data();
diff --git a/drivers/ide/pci/it8213.c b/drivers/ide/pci/it8213.c
index 9053c8771e6..2b71bdf74e7 100644
--- a/drivers/ide/pci/it8213.c
+++ b/drivers/ide/pci/it8213.c
@@ -184,8 +184,7 @@ static const struct ide_port_info it8213_chipsets[] __devinitdata = {
static int __devinit it8213_init_one(struct pci_dev *dev, const struct pci_device_id *id)
{
- ide_setup_pci_device(dev, &it8213_chipsets[id->driver_data]);
- return 0;
+ return ide_setup_pci_device(dev, &it8213_chipsets[id->driver_data]);
}
static const struct pci_device_id it8213_pci_tbl[] = {
diff --git a/drivers/ide/pci/ns87415.c b/drivers/ide/pci/ns87415.c
index fec4955f449..a7a41bb8277 100644
--- a/drivers/ide/pci/ns87415.c
+++ b/drivers/ide/pci/ns87415.c
@@ -225,10 +225,6 @@ static int ns87415_dma_setup(ide_drive_t *drive)
return 1;
}
-#ifndef ide_default_irq
-#define ide_default_irq(irq) 0
-#endif
-
static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif)
{
struct pci_dev *dev = to_pci_dev(hwif->dev);
@@ -288,7 +284,7 @@ static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif)
}
if (!using_inta)
- hwif->irq = ide_default_irq(hwif->io_ports.data_addr);
+ hwif->irq = __ide_default_irq(hwif->io_ports.data_addr);
else if (!hwif->irq && hwif->mate && hwif->mate->irq)
hwif->irq = hwif->mate->irq; /* share IRQ with mate */
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 8934178a23e..95f82cfb6c5 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1096,7 +1096,9 @@ static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, ch
struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
PDBG("%s dev 0x%p\n", __func__, dev);
+ rtnl_lock();
lldev->ethtool_ops->get_drvinfo(lldev, &info);
+ rtnl_unlock();
return sprintf(buf, "%s\n", info.fw_version);
}
@@ -1109,7 +1111,9 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
PDBG("%s dev 0x%p\n", __func__, dev);
+ rtnl_lock();
lldev->ethtool_ops->get_drvinfo(lldev, &info);
+ rtnl_unlock();
return sprintf(buf, "%s\n", info.driver);
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 54c8ee28fcc..3b27df52456 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2017,12 +2017,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
*/
s->uptodate++;
return 0; /* uptodate + compute == disks */
- } else if ((s->uptodate < disks - 1) &&
- test_bit(R5_Insync, &dev->flags)) {
- /* Note: we hold off compute operations while checks are
- * in flight, but we still prefer 'compute' over 'read'
- * hence we only read if (uptodate < * disks-1)
- */
+ } else if (test_bit(R5_Insync, &dev->flags)) {
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags);
if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index 65210fca37e..d89475d3698 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -114,6 +114,7 @@ static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data)
unsigned int nob = data->blocks;
unsigned long long clks;
unsigned int timeout;
+ bool dalgn = 0;
u32 dcmd;
int i;
@@ -152,6 +153,9 @@ static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data)
host->sg_cpu[i].dcmd = dcmd | length;
if (length & 31 && !(data->flags & MMC_DATA_READ))
host->sg_cpu[i].dcmd |= DCMD_ENDIRQEN;
+ /* Not aligned to 8-byte boundary? */
+ if (sg_dma_address(&data->sg[i]) & 0x7)
+ dalgn = 1;
if (data->flags & MMC_DATA_READ) {
host->sg_cpu[i].dsadr = host->res->start + MMC_RXFIFO;
host->sg_cpu[i].dtadr = sg_dma_address(&data->sg[i]);
@@ -165,6 +169,15 @@ static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data)
host->sg_cpu[host->dma_len - 1].ddadr = DDADR_STOP;
wmb();
+ /*
+ * The PXA27x DMA controller encounters overhead when working with
+ * unaligned (to 8-byte boundaries) data, so switch on byte alignment
+ * mode only if we have unaligned data.
+ */
+ if (dalgn)
+ DALGN |= (1 << host->dma);
+ else
+ DALGN &= (1 << host->dma);
DDADR(host->dma) = host->sg_dma;
DCSR(host->dma) = DCSR_RUN;
}
diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h
index fe872fbd671..e01926b7b5b 100644
--- a/drivers/net/ehea/ehea.h
+++ b/drivers/net/ehea/ehea.h
@@ -40,7 +40,7 @@
#include <asm/io.h>
#define DRV_NAME "ehea"
-#define DRV_VERSION "EHEA_0091"
+#define DRV_VERSION "EHEA_0092"
/* eHEA capability flags */
#define DLPAR_PORT_ADD_REM 1
@@ -452,7 +452,7 @@ struct ehea_bcmc_reg_entry {
struct ehea_bcmc_reg_array {
struct ehea_bcmc_reg_entry *arr;
int num_entries;
- struct mutex lock;
+ spinlock_t lock;
};
#define EHEA_PORT_UP 1
@@ -478,6 +478,7 @@ struct ehea_port {
int num_add_tx_qps;
int num_mcs;
int resets;
+ u64 flags;
u64 mac_addr;
u32 logical_port_id;
u32 port_speed;
@@ -501,7 +502,8 @@ struct port_res_cfg {
};
enum ehea_flag_bits {
- __EHEA_STOP_XFER
+ __EHEA_STOP_XFER,
+ __EHEA_DISABLE_PORT_RESET
};
void ehea_set_ethtool_ops(struct net_device *netdev);
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 075fd547421..0920b796bd7 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -118,6 +118,7 @@ static struct of_device_id ehea_device_table[] = {
},
{},
};
+MODULE_DEVICE_TABLE(of, ehea_device_table);
static struct of_platform_driver ehea_driver = {
.name = "ehea",
@@ -137,6 +138,12 @@ void ehea_dump(void *adr, int len, char *msg)
}
}
+void ehea_schedule_port_reset(struct ehea_port *port)
+{
+ if (!test_bit(__EHEA_DISABLE_PORT_RESET, &port->flags))
+ schedule_work(&port->reset_task);
+}
+
static void ehea_update_firmware_handles(void)
{
struct ehea_fw_handle_entry *arr = NULL;
@@ -241,7 +248,7 @@ static void ehea_update_bcmc_registrations(void)
}
if (num_registrations) {
- arr = kzalloc(num_registrations * sizeof(*arr), GFP_KERNEL);
+ arr = kzalloc(num_registrations * sizeof(*arr), GFP_ATOMIC);
if (!arr)
return; /* Keep the existing array */
} else
@@ -301,7 +308,7 @@ static struct net_device_stats *ehea_get_stats(struct net_device *dev)
memset(stats, 0, sizeof(*stats));
- cb2 = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ cb2 = kzalloc(PAGE_SIZE, GFP_ATOMIC);
if (!cb2) {
ehea_error("no mem for cb2");
goto out;
@@ -587,7 +594,7 @@ static int ehea_treat_poll_error(struct ehea_port_res *pr, int rq,
"Resetting port.", pr->qp->init_attr.qp_nr);
ehea_dump(cqe, sizeof(*cqe), "CQE");
}
- schedule_work(&pr->port->reset_task);
+ ehea_schedule_port_reset(pr->port);
return 1;
}
@@ -616,7 +623,7 @@ static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
*tcph = tcp_hdr(skb);
/* check if ip header and tcp header are complete */
- if (iph->tot_len < ip_len + tcp_hdrlen(skb))
+ if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb))
return -1;
*hdr_flags = LRO_IPV4 | LRO_TCP;
@@ -765,7 +772,7 @@ static struct ehea_cqe *ehea_proc_cqes(struct ehea_port_res *pr, int my_quota)
ehea_error("Send Completion Error: Resetting port");
if (netif_msg_tx_err(pr->port))
ehea_dump(cqe, sizeof(*cqe), "Send CQE");
- schedule_work(&pr->port->reset_task);
+ ehea_schedule_port_reset(pr->port);
break;
}
@@ -885,7 +892,7 @@ static irqreturn_t ehea_qp_aff_irq_handler(int irq, void *param)
eqe = ehea_poll_eq(port->qp_eq);
}
- schedule_work(&port->reset_task);
+ ehea_schedule_port_reset(port);
return IRQ_HANDLED;
}
@@ -1763,7 +1770,7 @@ static int ehea_set_mac_addr(struct net_device *dev, void *sa)
memcpy(dev->dev_addr, mac_addr->sa_data, dev->addr_len);
- mutex_lock(&ehea_bcmc_regs.lock);
+ spin_lock(&ehea_bcmc_regs.lock);
/* Deregister old MAC in pHYP */
if (port->state == EHEA_PORT_UP) {
@@ -1785,7 +1792,7 @@ static int ehea_set_mac_addr(struct net_device *dev, void *sa)
out_upregs:
ehea_update_bcmc_registrations();
- mutex_unlock(&ehea_bcmc_regs.lock);
+ spin_unlock(&ehea_bcmc_regs.lock);
out_free:
kfree(cb0);
out:
@@ -1947,7 +1954,7 @@ static void ehea_set_multicast_list(struct net_device *dev)
}
ehea_promiscuous(dev, 0);
- mutex_lock(&ehea_bcmc_regs.lock);
+ spin_lock(&ehea_bcmc_regs.lock);
if (dev->flags & IFF_ALLMULTI) {
ehea_allmulti(dev, 1);
@@ -1978,7 +1985,7 @@ static void ehea_set_multicast_list(struct net_device *dev)
}
out:
ehea_update_bcmc_registrations();
- mutex_unlock(&ehea_bcmc_regs.lock);
+ spin_unlock(&ehea_bcmc_regs.lock);
return;
}
@@ -2497,7 +2504,7 @@ static int ehea_up(struct net_device *dev)
}
}
- mutex_lock(&ehea_bcmc_regs.lock);
+ spin_lock(&ehea_bcmc_regs.lock);
ret = ehea_broadcast_reg_helper(port, H_REG_BCMC);
if (ret) {
@@ -2520,7 +2527,7 @@ out:
ehea_info("Failed starting %s. ret=%i", dev->name, ret);
ehea_update_bcmc_registrations();
- mutex_unlock(&ehea_bcmc_regs.lock);
+ spin_unlock(&ehea_bcmc_regs.lock);
ehea_update_firmware_handles();
mutex_unlock(&ehea_fw_handles.lock);
@@ -2575,7 +2582,7 @@ static int ehea_down(struct net_device *dev)
mutex_lock(&ehea_fw_handles.lock);
- mutex_lock(&ehea_bcmc_regs.lock);
+ spin_lock(&ehea_bcmc_regs.lock);
ehea_drop_multicast_list(dev);
ehea_broadcast_reg_helper(port, H_DEREG_BCMC);
@@ -2584,7 +2591,7 @@ static int ehea_down(struct net_device *dev)
port->state = EHEA_PORT_DOWN;
ehea_update_bcmc_registrations();
- mutex_unlock(&ehea_bcmc_regs.lock);
+ spin_unlock(&ehea_bcmc_regs.lock);
ret = ehea_clean_all_portres(port);
if (ret)
@@ -2605,13 +2612,14 @@ static int ehea_stop(struct net_device *dev)
if (netif_msg_ifdown(port))
ehea_info("disabling port %s", dev->name);
+ set_bit(__EHEA_DISABLE_PORT_RESET, &port->flags);
cancel_work_sync(&port->reset_task);
-
mutex_lock(&port->port_lock);
netif_stop_queue(dev);
port_napi_disable(port);
ret = ehea_down(dev);
mutex_unlock(&port->port_lock);
+ clear_bit(__EHEA_DISABLE_PORT_RESET, &port->flags);
return ret;
}
@@ -2941,7 +2949,7 @@ static void ehea_tx_watchdog(struct net_device *dev)
if (netif_carrier_ok(dev) &&
!test_bit(__EHEA_STOP_XFER, &ehea_driver_flags))
- schedule_work(&port->reset_task);
+ ehea_schedule_port_reset(port);
}
int ehea_sense_adapter_attr(struct ehea_adapter *adapter)
@@ -3590,7 +3598,7 @@ int __init ehea_module_init(void)
memset(&ehea_bcmc_regs, 0, sizeof(ehea_bcmc_regs));
mutex_init(&ehea_fw_handles.lock);
- mutex_init(&ehea_bcmc_regs.lock);
+ spin_lock_init(&ehea_bcmc_regs.lock);
ret = check_module_parm();
if (ret)
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 2cb24476329..20d4fe96a81 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -4194,12 +4194,23 @@ static int nv_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
netif_carrier_off(dev);
if (netif_running(dev)) {
+ unsigned long flags;
+
nv_disable_irq(dev);
netif_tx_lock_bh(dev);
- spin_lock(&np->lock);
+ /* with plain spinlock lockdep complains */
+ spin_lock_irqsave(&np->lock, flags);
/* stop engines */
+ /* FIXME:
+ * this can take some time, and interrupts are disabled
+ * due to spin_lock_irqsave, but let's hope no daemon
+ * is going to change the settings very often...
+ * Worst case:
+ * NV_RXSTOP_DELAY1MAX + NV_TXSTOP_DELAY1MAX
+ * + some minor delays, which is up to a second approximately
+ */
nv_stop_rxtx(dev);
- spin_unlock(&np->lock);
+ spin_unlock_irqrestore(&np->lock, flags);
netif_tx_unlock_bh(dev);
}
diff --git a/drivers/net/fs_enet/mac-fcc.c b/drivers/net/fs_enet/mac-fcc.c
index e36321152d5..8268b3535b3 100644
--- a/drivers/net/fs_enet/mac-fcc.c
+++ b/drivers/net/fs_enet/mac-fcc.c
@@ -463,6 +463,9 @@ static void restart(struct net_device *dev)
else
C32(fccp, fcc_fpsmr, FCC_PSMR_FDE | FCC_PSMR_LPB);
+ /* Restore multicast and promiscuous settings */
+ set_multicast_list(dev);
+
S32(fccp, fcc_gfmr, FCC_GFMR_ENR | FCC_GFMR_ENT);
}
diff --git a/drivers/net/ibm_newemac/core.c b/drivers/net/ibm_newemac/core.c
index 5d2108c5ac7..babc79ad490 100644
--- a/drivers/net/ibm_newemac/core.c
+++ b/drivers/net/ibm_newemac/core.c
@@ -1636,6 +1636,12 @@ static int emac_poll_rx(void *param, int budget)
goto next;
}
+ if (len < ETH_HLEN) {
+ ++dev->estats.rx_dropped_stack;
+ emac_recycle_rx_skb(dev, slot, len);
+ goto next;
+ }
+
if (len && len < EMAC_RX_COPY_THRESH) {
struct sk_buff *copy_skb =
alloc_skb(len + EMAC_RX_SKB_HEADROOM + 2, GFP_ATOMIC);
@@ -2719,6 +2725,8 @@ static int __devinit emac_probe(struct of_device *ofdev,
/* Clean rings */
memset(dev->tx_desc, 0, NUM_TX_BUFF * sizeof(struct mal_descriptor));
memset(dev->rx_desc, 0, NUM_RX_BUFF * sizeof(struct mal_descriptor));
+ memset(dev->tx_skb, 0, NUM_TX_BUFF * sizeof(struct sk_buff *));
+ memset(dev->rx_skb, 0, NUM_RX_BUFF * sizeof(struct sk_buff *));
/* Attach to ZMII, if needed */
if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII) &&
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index 3b2a6c59808..993d87c9296 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -277,7 +277,7 @@ static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
*tcph = tcp_hdr(skb);
/* check if ip header and tcp header are complete */
- if (iph->tot_len < ip_len + tcp_hdrlen(skb))
+ if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb))
return -1;
*hdr_flags = LRO_IPV4 | LRO_TCP;
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 520bb0b1a9a..6d35155c714 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1008,6 +1008,7 @@ static int fr_rx(struct sk_buff *skb)
stats->rx_bytes += skb->len;
if (pvc->state.becn)
stats->rx_compressed++;
+ skb->dev = dev;
netif_rx(skb);
return NET_RX_SUCCESS;
} else {
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c
index 62a3d8f8563..f5387a7a76c 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.c
@@ -588,8 +588,12 @@ static void iwl3945_add_radiotap(struct iwl3945_priv *priv,
if (rate == -1)
iwl3945_rt->rt_rate = 0;
- else
+ else {
+ if (stats->band == IEEE80211_BAND_5GHZ)
+ rate += IWL_FIRST_OFDM_RATE;
+
iwl3945_rt->rt_rate = iwl3945_rates[rate].ieee;
+ }
/* antenna number */
antenna = phy_flags_hw & RX_RES_PHY_FLAGS_ANTENNA_MSK;
diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c
index bf19eb8aafd..de330ae0ca9 100644
--- a/drivers/net/wireless/iwlwifi/iwl-4965.c
+++ b/drivers/net/wireless/iwlwifi/iwl-4965.c
@@ -3528,8 +3528,12 @@ static void iwl4965_add_radiotap(struct iwl_priv *priv,
if (rate == -1)
iwl4965_rt->rt_rate = 0;
- else
+ else {
+ if (stats->band == IEEE80211_BAND_5GHZ)
+ rate += IWL_FIRST_OFDM_RATE;
+
iwl4965_rt->rt_rate = iwl4965_rates[rate].ieee;
+ }
/*
* "antenna number"
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index b1b3c523185..6027e1119c3 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -6687,7 +6687,8 @@ static int iwl3945_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
if (priv->iw_mode == IEEE80211_IF_TYPE_MNTR) {
IWL_DEBUG_MAC80211("leave - monitor\n");
- return -1;
+ dev_kfree_skb_any(skb);
+ return 0;
}
IWL_DEBUG_TX("dev->xmit(%d bytes) at rate 0x%02x\n", skb->len,
diff --git a/drivers/net/wireless/iwlwifi/iwl4965-base.c b/drivers/net/wireless/iwlwifi/iwl4965-base.c
index 5ed16ce7846..0bd55bb1973 100644
--- a/drivers/net/wireless/iwlwifi/iwl4965-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl4965-base.c
@@ -6237,7 +6237,8 @@ static int iwl4965_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
if (priv->iw_mode == IEEE80211_IF_TYPE_MNTR) {
IWL_DEBUG_MAC80211("leave - monitor\n");
- return -1;
+ dev_kfree_skb_any(skb);
+ return 0;
}
IWL_DEBUG_TX("dev->xmit(%d bytes) at rate 0x%02x\n", skb->len,
diff --git a/drivers/net/wireless/libertas/if_usb.c b/drivers/net/wireless/libertas/if_usb.c
index 8032df72aaa..36288b29abf 100644
--- a/drivers/net/wireless/libertas/if_usb.c
+++ b/drivers/net/wireless/libertas/if_usb.c
@@ -925,6 +925,7 @@ static struct usb_driver if_usb_driver = {
.id_table = if_usb_table,
.suspend = if_usb_suspend,
.resume = if_usb_resume,
+ .reset_resume = if_usb_resume,
};
static int __init if_usb_init_module(void)
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 7158dbb6e4b..42a436478b7 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -924,6 +924,15 @@ static int register_root_hub(struct usb_hcd *hcd)
return retval;
}
+void usb_enable_root_hub_irq (struct usb_bus *bus)
+{
+ struct usb_hcd *hcd;
+
+ hcd = container_of (bus, struct usb_hcd, self);
+ if (hcd->driver->hub_irq_enable && hcd->state != HC_STATE_HALT)
+ hcd->driver->hub_irq_enable (hcd);
+}
+
/*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h
index a0bf5df6cb6..b9de1569b39 100644
--- a/drivers/usb/core/hcd.h
+++ b/drivers/usb/core/hcd.h
@@ -210,6 +210,8 @@ struct hc_driver {
int (*bus_suspend)(struct usb_hcd *);
int (*bus_resume)(struct usb_hcd *);
int (*start_port_reset)(struct usb_hcd *, unsigned port_num);
+ void (*hub_irq_enable)(struct usb_hcd *);
+ /* Needed only if port-change IRQs are level-triggered */
/* force handover of high-speed port to full-speed companion */
void (*relinquish_port)(struct usb_hcd *, int);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 512d2d57d41..4cfe32a16c3 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2073,6 +2073,8 @@ int usb_port_resume(struct usb_device *udev)
}
clear_bit(port1, hub->busy_bits);
+ if (!hub->hdev->parent && !hub->busy_bits[0])
+ usb_enable_root_hub_irq(hub->hdev->bus);
if (status == 0)
status = finish_port_resume(udev);
@@ -3002,6 +3004,11 @@ static void hub_events(void)
hub->activating = 0;
+ /* If this is a root hub, tell the HCD it's okay to
+ * re-enable port-change interrupts now. */
+ if (!hdev->parent && !hub->busy_bits[0])
+ usb_enable_root_hub_irq(hdev->bus);
+
loop_autopm:
/* Allow autosuspend if we're not going to run again */
if (list_empty(&hub->event_list))
@@ -3227,6 +3234,8 @@ int usb_reset_device(struct usb_device *udev)
break;
}
clear_bit(port1, parent_hub->busy_bits);
+ if (!parent_hdev->parent && !parent_hub->busy_bits[0])
+ usb_enable_root_hub_irq(parent_hdev->bus);
if (ret < 0)
goto re_enumerate;
diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c
index c96db1153dc..e534f9de0f0 100644
--- a/drivers/usb/host/ohci-at91.c
+++ b/drivers/usb/host/ohci-at91.c
@@ -261,6 +261,7 @@ static const struct hc_driver ohci_at91_hc_driver = {
*/
.hub_status_data = ohci_hub_status_data,
.hub_control = ohci_hub_control,
+ .hub_irq_enable = ohci_rhsc_enable,
#ifdef CONFIG_PM
.bus_suspend = ohci_bus_suspend,
.bus_resume = ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-au1xxx.c b/drivers/usb/host/ohci-au1xxx.c
index 1b9abdba920..f90fe0c7373 100644
--- a/drivers/usb/host/ohci-au1xxx.c
+++ b/drivers/usb/host/ohci-au1xxx.c
@@ -288,6 +288,7 @@ static const struct hc_driver ohci_au1xxx_hc_driver = {
*/
.hub_status_data = ohci_hub_status_data,
.hub_control = ohci_hub_control,
+ .hub_irq_enable = ohci_rhsc_enable,
#ifdef CONFIG_PM
.bus_suspend = ohci_bus_suspend,
.bus_resume = ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-ep93xx.c b/drivers/usb/host/ohci-ep93xx.c
index 06aadfb0ec2..5adaf36e47d 100644
--- a/drivers/usb/host/ohci-ep93xx.c
+++ b/drivers/usb/host/ohci-ep93xx.c
@@ -135,6 +135,7 @@ static struct hc_driver ohci_ep93xx_hc_driver = {
.get_frame_number = ohci_get_frame,
.hub_status_data = ohci_hub_status_data,
.hub_control = ohci_hub_control,
+ .hub_irq_enable = ohci_rhsc_enable,
#ifdef CONFIG_PM
.bus_suspend = ohci_bus_suspend,
.bus_resume = ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-hub.c b/drivers/usb/host/ohci-hub.c
index 79a78029f89..b56739221d1 100644
--- a/drivers/usb/host/ohci-hub.c
+++ b/drivers/usb/host/ohci-hub.c
@@ -36,6 +36,18 @@
/*-------------------------------------------------------------------------*/
+/* hcd->hub_irq_enable() */
+static void ohci_rhsc_enable (struct usb_hcd *hcd)
+{
+ struct ohci_hcd *ohci = hcd_to_ohci (hcd);
+
+ spin_lock_irq(&ohci->lock);
+ if (!ohci->autostop)
+ del_timer(&hcd->rh_timer); /* Prevent next poll */
+ ohci_writel(ohci, OHCI_INTR_RHSC, &ohci->regs->intrenable);
+ spin_unlock_irq(&ohci->lock);
+}
+
#define OHCI_SCHED_ENABLES \
(OHCI_CTRL_CLE|OHCI_CTRL_BLE|OHCI_CTRL_PLE|OHCI_CTRL_IE)
@@ -362,28 +374,18 @@ static int ohci_root_hub_state_changes(struct ohci_hcd *ohci, int changed,
int any_connected)
{
int poll_rh = 1;
- int rhsc;
- rhsc = ohci_readl(ohci, &ohci->regs->intrenable) & OHCI_INTR_RHSC;
switch (ohci->hc_control & OHCI_CTRL_HCFS) {
case OHCI_USB_OPER:
- /* If no status changes are pending, enable status-change
- * interrupts.
- */
- if (!rhsc && !changed) {
- rhsc = OHCI_INTR_RHSC;
- ohci_writel(ohci, rhsc, &ohci->regs->intrenable);
- }
-
- /* Keep on polling until we know a device is connected
- * and RHSC is enabled, or until we autostop.
- */
+ /* keep on polling until we know a device is connected
+ * and RHSC is enabled */
if (!ohci->autostop) {
if (any_connected ||
!device_may_wakeup(&ohci_to_hcd(ohci)
->self.root_hub->dev)) {
- if (rhsc)
+ if (ohci_readl(ohci, &ohci->regs->intrenable) &
+ OHCI_INTR_RHSC)
poll_rh = 0;
} else {
ohci->autostop = 1;
@@ -396,13 +398,12 @@ static int ohci_root_hub_state_changes(struct ohci_hcd *ohci, int changed,
ohci->autostop = 0;
ohci->next_statechange = jiffies +
STATECHANGE_DELAY;
- } else if (rhsc && time_after_eq(jiffies,
+ } else if (time_after_eq(jiffies,
ohci->next_statechange)
&& !ohci->ed_rm_list
&& !(ohci->hc_control &
OHCI_SCHED_ENABLES)) {
ohci_rh_suspend(ohci, 1);
- poll_rh = 0;
}
}
break;
@@ -416,12 +417,6 @@ static int ohci_root_hub_state_changes(struct ohci_hcd *ohci, int changed,
else
usb_hcd_resume_root_hub(ohci_to_hcd(ohci));
} else {
- if (!rhsc && (ohci->autostop ||
- ohci_to_hcd(ohci)->self.root_hub->
- do_remote_wakeup))
- ohci_writel(ohci, OHCI_INTR_RHSC,
- &ohci->regs->intrenable);
-
/* everything is idle, no need for polling */
poll_rh = 0;
}
@@ -443,16 +438,12 @@ static inline int ohci_rh_resume(struct ohci_hcd *ohci)
static int ohci_root_hub_state_changes(struct ohci_hcd *ohci, int changed,
int any_connected)
{
- /* If RHSC is enabled, don't poll */
- if (ohci_readl(ohci, &ohci->regs->intrenable) & OHCI_INTR_RHSC)
- return 0;
+ int poll_rh = 1;
- /* If no status changes are pending, enable status-change interrupts */
- if (!changed) {
- ohci_writel(ohci, OHCI_INTR_RHSC, &ohci->regs->intrenable);
- return 0;
- }
- return 1;
+ /* keep on polling until RHSC is enabled */
+ if (ohci_readl(ohci, &ohci->regs->intrenable) & OHCI_INTR_RHSC)
+ poll_rh = 0;
+ return poll_rh;
}
#endif /* CONFIG_PM */
diff --git a/drivers/usb/host/ohci-lh7a404.c b/drivers/usb/host/ohci-lh7a404.c
index 96d14fa1d83..13c12ed2225 100644
--- a/drivers/usb/host/ohci-lh7a404.c
+++ b/drivers/usb/host/ohci-lh7a404.c
@@ -193,6 +193,7 @@ static const struct hc_driver ohci_lh7a404_hc_driver = {
*/
.hub_status_data = ohci_hub_status_data,
.hub_control = ohci_hub_control,
+ .hub_irq_enable = ohci_rhsc_enable,
#ifdef CONFIG_PM
.bus_suspend = ohci_bus_suspend,
.bus_resume = ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-omap.c b/drivers/usb/host/ohci-omap.c
index 6859fb5f1d6..3a7c24c0367 100644
--- a/drivers/usb/host/ohci-omap.c
+++ b/drivers/usb/host/ohci-omap.c
@@ -466,6 +466,7 @@ static const struct hc_driver ohci_omap_hc_driver = {
*/
.hub_status_data = ohci_hub_status_data,
.hub_control = ohci_hub_control,
+ .hub_irq_enable = ohci_rhsc_enable,
#ifdef CONFIG_PM
.bus_suspend = ohci_bus_suspend,
.bus_resume = ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c
index 3bf175d95a2..4696cc912e1 100644
--- a/drivers/usb/host/ohci-pci.c
+++ b/drivers/usb/host/ohci-pci.c
@@ -327,6 +327,7 @@ static const struct hc_driver ohci_pci_hc_driver = {
*/
.hub_status_data = ohci_hub_status_data,
.hub_control = ohci_hub_control,
+ .hub_irq_enable = ohci_rhsc_enable,
#ifdef CONFIG_PM
.bus_suspend = ohci_bus_suspend,
.bus_resume = ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-pnx4008.c b/drivers/usb/host/ohci-pnx4008.c
index 664f07ee873..28b458f20cc 100644
--- a/drivers/usb/host/ohci-pnx4008.c
+++ b/drivers/usb/host/ohci-pnx4008.c
@@ -280,6 +280,7 @@ static const struct hc_driver ohci_pnx4008_hc_driver = {
*/
.hub_status_data = ohci_hub_status_data,
.hub_control = ohci_hub_control,
+ .hub_irq_enable = ohci_rhsc_enable,
#ifdef CONFIG_PM
.bus_suspend = ohci_bus_suspend,
.bus_resume = ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-pnx8550.c b/drivers/usb/host/ohci-pnx8550.c
index 28467e288a9..605d59cba28 100644
--- a/drivers/usb/host/ohci-pnx8550.c
+++ b/drivers/usb/host/ohci-pnx8550.c
@@ -201,6 +201,7 @@ static const struct hc_driver ohci_pnx8550_hc_driver = {
*/
.hub_status_data = ohci_hub_status_data,
.hub_control = ohci_hub_control,
+ .hub_irq_enable = ohci_rhsc_enable,
#ifdef CONFIG_PM
.bus_suspend = ohci_bus_suspend,
.bus_resume = ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-ppc-of.c b/drivers/usb/host/ohci-ppc-of.c
index 50e55db1363..a6725279122 100644
--- a/drivers/usb/host/ohci-ppc-of.c
+++ b/drivers/usb/host/ohci-ppc-of.c
@@ -72,6 +72,7 @@ static const struct hc_driver ohci_ppc_of_hc_driver = {
*/
.hub_status_data = ohci_hub_status_data,
.hub_control = ohci_hub_control,
+ .hub_irq_enable = ohci_rhsc_enable,
#ifdef CONFIG_PM
.bus_suspend = ohci_bus_suspend,
.bus_resume = ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-ppc-soc.c b/drivers/usb/host/ohci-ppc-soc.c
index cd3398b675b..523c3012557 100644
--- a/drivers/usb/host/ohci-ppc-soc.c
+++ b/drivers/usb/host/ohci-ppc-soc.c
@@ -172,6 +172,7 @@ static const struct hc_driver ohci_ppc_soc_hc_driver = {
*/
.hub_status_data = ohci_hub_status_data,
.hub_control = ohci_hub_control,
+ .hub_irq_enable = ohci_rhsc_enable,
#ifdef CONFIG_PM
.bus_suspend = ohci_bus_suspend,
.bus_resume = ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-ps3.c b/drivers/usb/host/ohci-ps3.c
index bfdeb0d22d0..c1935ae537f 100644
--- a/drivers/usb/host/ohci-ps3.c
+++ b/drivers/usb/host/ohci-ps3.c
@@ -68,6 +68,7 @@ static const struct hc_driver ps3_ohci_hc_driver = {
.get_frame_number = ohci_get_frame,
.hub_status_data = ohci_hub_status_data,
.hub_control = ohci_hub_control,
+ .hub_irq_enable = ohci_rhsc_enable,
.start_port_reset = ohci_start_port_reset,
#if defined(CONFIG_PM)
.bus_suspend = ohci_bus_suspend,
diff --git a/drivers/usb/host/ohci-pxa27x.c b/drivers/usb/host/ohci-pxa27x.c
index 70b0d4b459e..d4ee27d92be 100644
--- a/drivers/usb/host/ohci-pxa27x.c
+++ b/drivers/usb/host/ohci-pxa27x.c
@@ -298,6 +298,7 @@ static const struct hc_driver ohci_pxa27x_hc_driver = {
*/
.hub_status_data = ohci_hub_status_data,
.hub_control = ohci_hub_control,
+ .hub_irq_enable = ohci_rhsc_enable,
#ifdef CONFIG_PM
.bus_suspend = ohci_bus_suspend,
.bus_resume = ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-s3c2410.c b/drivers/usb/host/ohci-s3c2410.c
index a73d2ff322e..ead4772f0f2 100644
--- a/drivers/usb/host/ohci-s3c2410.c
+++ b/drivers/usb/host/ohci-s3c2410.c
@@ -466,6 +466,7 @@ static const struct hc_driver ohci_s3c2410_hc_driver = {
*/
.hub_status_data = ohci_s3c2410_hub_status_data,
.hub_control = ohci_s3c2410_hub_control,
+ .hub_irq_enable = ohci_rhsc_enable,
#ifdef CONFIG_PM
.bus_suspend = ohci_bus_suspend,
.bus_resume = ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-sa1111.c b/drivers/usb/host/ohci-sa1111.c
index 99438c65981..0f48f2d9922 100644
--- a/drivers/usb/host/ohci-sa1111.c
+++ b/drivers/usb/host/ohci-sa1111.c
@@ -231,6 +231,7 @@ static const struct hc_driver ohci_sa1111_hc_driver = {
*/
.hub_status_data = ohci_hub_status_data,
.hub_control = ohci_hub_control,
+ .hub_irq_enable = ohci_rhsc_enable,
#ifdef CONFIG_PM
.bus_suspend = ohci_bus_suspend,
.bus_resume = ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-sh.c b/drivers/usb/host/ohci-sh.c
index 60f03cc7ec4..e7ee607278f 100644
--- a/drivers/usb/host/ohci-sh.c
+++ b/drivers/usb/host/ohci-sh.c
@@ -68,6 +68,7 @@ static const struct hc_driver ohci_sh_hc_driver = {
*/
.hub_status_data = ohci_hub_status_data,
.hub_control = ohci_hub_control,
+ .hub_irq_enable = ohci_rhsc_enable,
#ifdef CONFIG_PM
.bus_suspend = ohci_bus_suspend,
.bus_resume = ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-sm501.c b/drivers/usb/host/ohci-sm501.c
index e899a77dfb8..e610698c6b6 100644
--- a/drivers/usb/host/ohci-sm501.c
+++ b/drivers/usb/host/ohci-sm501.c
@@ -75,6 +75,7 @@ static const struct hc_driver ohci_sm501_hc_driver = {
*/
.hub_status_data = ohci_hub_status_data,
.hub_control = ohci_hub_control,
+ .hub_irq_enable = ohci_rhsc_enable,
#ifdef CONFIG_PM
.bus_suspend = ohci_bus_suspend,
.bus_resume = ohci_bus_resume,
diff --git a/drivers/usb/host/ohci-ssb.c b/drivers/usb/host/ohci-ssb.c
index c4265caec78..7275186db31 100644
--- a/drivers/usb/host/ohci-ssb.c
+++ b/drivers/usb/host/ohci-ssb.c
@@ -81,6 +81,7 @@ static const struct hc_driver ssb_ohci_hc_driver = {
.hub_status_data = ohci_hub_status_data,
.hub_control = ohci_hub_control,
+ .hub_irq_enable = ohci_rhsc_enable,
#ifdef CONFIG_PM
.bus_suspend = ohci_bus_suspend,
.bus_resume = ohci_bus_resume,
diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c
index f29307405bb..9b6323f768b 100644
--- a/drivers/usb/host/u132-hcd.c
+++ b/drivers/usb/host/u132-hcd.c
@@ -2934,6 +2934,16 @@ static int u132_start_port_reset(struct usb_hcd *hcd, unsigned port_num)
return 0;
}
+static void u132_hub_irq_enable(struct usb_hcd *hcd)
+{
+ struct u132 *u132 = hcd_to_u132(hcd);
+ if (u132->going > 1) {
+ dev_err(&u132->platform_dev->dev, "device has been removed %d\n"
+ , u132->going);
+ } else if (u132->going > 0)
+ dev_err(&u132->platform_dev->dev, "device is being removed\n");
+}
+
#ifdef CONFIG_PM
static int u132_bus_suspend(struct usb_hcd *hcd)
@@ -2985,6 +2995,7 @@ static struct hc_driver u132_hc_driver = {
.bus_suspend = u132_bus_suspend,
.bus_resume = u132_bus_resume,
.start_port_reset = u132_start_port_reset,
+ .hub_irq_enable = u132_hub_irq_enable,
};
/*
diff --git a/drivers/video/fsl-diu-fb.c b/drivers/video/fsl-diu-fb.c
index 712dabc6269..09d7e22c6fe 100644
--- a/drivers/video/fsl-diu-fb.c
+++ b/drivers/video/fsl-diu-fb.c
@@ -1324,7 +1324,7 @@ static int fsl_diu_suspend(struct of_device *ofdev, pm_message_t state)
{
struct fsl_diu_data *machine_data;
- machine_data = dev_get_drvdata(&dev->dev);
+ machine_data = dev_get_drvdata(&ofdev->dev);
disable_lcdc(machine_data->fsl_diu_info[0]);
return 0;
@@ -1334,7 +1334,7 @@ static int fsl_diu_resume(struct of_device *ofdev)
{
struct fsl_diu_data *machine_data;
- machine_data = dev_get_drvdata(&dev->dev);
+ machine_data = dev_get_drvdata(&ofdev->dev);
enable_lcdc(machine_data->fsl_diu_info[0]);
return 0;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 58d43daec08..982a2064fe4 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -204,7 +204,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
* Note: assumes we have exclusive access to this mapping either
* through inode->i_mutex or some other mechanism.
*/
- if (page->index == 0 && invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1) < 0) {
+ if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
/* Should never happen */
nfs_zap_mapping(inode, inode->i_mapping);
}
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index efc015c6128..44f87caf368 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -606,7 +606,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
res->last_used = 0;
+ spin_lock(&dlm->spinlock);
list_add_tail(&res->tracking, &dlm->tracking_list);
+ spin_unlock(&dlm->spinlock);
memset(res->lvb, 0, DLM_LVB_LEN);
memset(res->refmap, 0, sizeof(res->refmap));
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ab8ccc9d14f..c492449f3b4 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -476,10 +476,10 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
return -ESRCH;
mm = get_task_mm(task);
if (mm) {
- static struct mm_walk clear_refs_walk;
- memset(&clear_refs_walk, 0, sizeof(clear_refs_walk));
- clear_refs_walk.pmd_entry = clear_refs_pte_range;
- clear_refs_walk.mm = mm;
+ struct mm_walk clear_refs_walk = {
+ .pmd_entry = clear_refs_pte_range,
+ .mm = mm,
+ };
down_read(&mm->mmap_sem);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
clear_refs_walk.private = vma;
@@ -602,11 +602,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
return err;
}
-static struct mm_walk pagemap_walk = {
- .pmd_entry = pagemap_pte_range,
- .pte_hole = pagemap_pte_hole
-};
-
/*
* /proc/pid/pagemap - an array mapping virtual pages to pfns
*
@@ -641,6 +636,11 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
struct pagemapread pm;
int pagecount;
int ret = -ESRCH;
+ struct mm_walk pagemap_walk;
+ unsigned long src;
+ unsigned long svpfn;
+ unsigned long start_vaddr;
+ unsigned long end_vaddr;
if (!task)
goto out;
@@ -659,11 +659,15 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
if (!mm)
goto out_task;
- ret = -ENOMEM;
+
uaddr = (unsigned long)buf & PAGE_MASK;
uend = (unsigned long)(buf + count);
pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
- pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL);
+ ret = 0;
+ if (pagecount == 0)
+ goto out_mm;
+ pages = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+ ret = -ENOMEM;
if (!pages)
goto out_mm;
@@ -684,33 +688,33 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
pm.out = (u64 *)buf;
pm.end = (u64 *)(buf + count);
- if (!ptrace_may_attach(task)) {
- ret = -EIO;
- } else {
- unsigned long src = *ppos;
- unsigned long svpfn = src / PM_ENTRY_BYTES;
- unsigned long start_vaddr = svpfn << PAGE_SHIFT;
- unsigned long end_vaddr = TASK_SIZE_OF(task);
-
- /* watch out for wraparound */
- if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
- start_vaddr = end_vaddr;
-
- /*
- * The odds are that this will stop walking way
- * before end_vaddr, because the length of the
- * user buffer is tracked in "pm", and the walk
- * will stop when we hit the end of the buffer.
- */
- ret = walk_page_range(start_vaddr, end_vaddr,
- &pagemap_walk);
- if (ret == PM_END_OF_BUFFER)
- ret = 0;
- /* don't need mmap_sem for these, but this looks cleaner */
- *ppos += (char *)pm.out - buf;
- if (!ret)
- ret = (char *)pm.out - buf;
- }
+ pagemap_walk.pmd_entry = pagemap_pte_range;
+ pagemap_walk.pte_hole = pagemap_pte_hole;
+ pagemap_walk.mm = mm;
+ pagemap_walk.private = &pm;
+
+ src = *ppos;
+ svpfn = src / PM_ENTRY_BYTES;
+ start_vaddr = svpfn << PAGE_SHIFT;
+ end_vaddr = TASK_SIZE_OF(task);
+
+ /* watch out for wraparound */
+ if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
+ start_vaddr = end_vaddr;
+
+ /*
+ * The odds are that this will stop walking way
+ * before end_vaddr, because the length of the
+ * user buffer is tracked in "pm", and the walk
+ * will stop when we hit the end of the buffer.
+ */
+ ret = walk_page_range(start_vaddr, end_vaddr, &pagemap_walk);
+ if (ret == PM_END_OF_BUFFER)
+ ret = 0;
+ /* don't need mmap_sem for these, but this looks cleaner */
+ *ppos += (char *)pm.out - buf;
+ if (!ret)
+ ret = (char *)pm.out - buf;
out_pages:
for (; pagecount; pagecount--) {
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 57917932212..192269698a8 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -45,6 +45,8 @@ void reiserfs_delete_inode(struct inode *inode)
goto out;
reiserfs_update_inode_transaction(inode);
+ reiserfs_discard_prealloc(&th, inode);
+
err = reiserfs_delete_object(&th, inode);
/* Do quota update inside a transaction for journaled quotas. We must do that
diff --git a/include/asm-powerpc/hugetlb.h b/include/asm-powerpc/hugetlb.h
index 649c6c3b87b..be32ff02f4a 100644
--- a/include/asm-powerpc/hugetlb.h
+++ b/include/asm-powerpc/hugetlb.h
@@ -49,12 +49,6 @@ static inline pte_t huge_pte_wrprotect(pte_t pte)
return pte_wrprotect(pte);
}
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
-{
- ptep_set_wrprotect(mm, addr, ptep);
-}
-
static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty)
diff --git a/include/asm-powerpc/pgtable-ppc64.h b/include/asm-powerpc/pgtable-ppc64.h
index cc6a43ba41d..7686569a0be 100644
--- a/include/asm-powerpc/pgtable-ppc64.h
+++ b/include/asm-powerpc/pgtable-ppc64.h
@@ -314,6 +314,16 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
old = pte_update(mm, addr, ptep, _PAGE_RW, 0);
}
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ unsigned long old;
+
+ if ((pte_val(*ptep) & _PAGE_RW) == 0)
+ return;
+ old = pte_update(mm, addr, ptep, _PAGE_RW, 1);
+}
+
/*
* We currently remove entries from the hashtable regardless of whether
* the entry was young or dirty. The generic routines only flush if the
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index c7f4f8e3e29..bd0ea191dfa 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -223,6 +223,9 @@ extern char empty_zero_page[PAGE_SIZE];
#define _PAGE_SPECIAL 0x004 /* SW associated with special page */
#define __HAVE_ARCH_PTE_SPECIAL
+/* Set of bits not changed in pte_modify */
+#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL)
+
/* Six different types of pages. */
#define _PAGE_TYPE_EMPTY 0x400
#define _PAGE_TYPE_NONE 0x401
@@ -681,7 +684,7 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
*/
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
- pte_val(pte) &= PAGE_MASK;
+ pte_val(pte) &= _PAGE_CHG_MASK;
pte_val(pte) |= pgprot_val(newprot);
return pte;
}
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index bfd9900742b..76f392146da 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -71,7 +71,8 @@ static inline long kvm_hypercall0(unsigned int nr)
long ret;
asm volatile(KVM_HYPERCALL
: "=a"(ret)
- : "a"(nr));
+ : "a"(nr)
+ : "memory");
return ret;
}
@@ -80,7 +81,8 @@ static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
long ret;
asm volatile(KVM_HYPERCALL
: "=a"(ret)
- : "a"(nr), "b"(p1));
+ : "a"(nr), "b"(p1)
+ : "memory");
return ret;
}
@@ -90,7 +92,8 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
long ret;
asm volatile(KVM_HYPERCALL
: "=a"(ret)
- : "a"(nr), "b"(p1), "c"(p2));
+ : "a"(nr), "b"(p1), "c"(p2)
+ : "memory");
return ret;
}
@@ -100,7 +103,8 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
long ret;
asm volatile(KVM_HYPERCALL
: "=a"(ret)
- : "a"(nr), "b"(p1), "c"(p2), "d"(p3));
+ : "a"(nr), "b"(p1), "c"(p2), "d"(p3)
+ : "memory");
return ret;
}
@@ -111,7 +115,8 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
long ret;
asm volatile(KVM_HYPERCALL
: "=a"(ret)
- : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4));
+ : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4)
+ : "memory");
return ret;
}
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index f4a5871767f..4aaa4afb1cb 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -1,6 +1,8 @@
#ifndef __LINUX_DEBUG_LOCKING_H
#define __LINUX_DEBUG_LOCKING_H
+#include <linux/kernel.h>
+
struct task_struct;
extern int debug_locks;
@@ -11,14 +13,6 @@ extern int debug_locks_silent;
*/
extern int debug_locks_off(void);
-/*
- * In the debug case we carry the caller's instruction pointer into
- * other functions, but we dont want the function argument overhead
- * in the nondebug case - hence these macros:
- */
-#define _RET_IP_ (unsigned long)__builtin_return_address(0)
-#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
-
#define DEBUG_LOCKS_WARN_ON(c) \
({ \
int __ret = 0; \
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9918772bf27..eddb6daadf4 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -189,6 +189,21 @@ static inline void ide_std_init_ports(hw_regs_t *hw,
hw->io_ports.ctl_addr = ctl_addr;
}
+/* for IDE PCI controllers in legacy mode, temporary */
+static inline int __ide_default_irq(unsigned long base)
+{
+ switch (base) {
+#ifdef CONFIG_IA64
+ case 0x1f0: return isa_irq_to_vector(14);
+ case 0x170: return isa_irq_to_vector(15);
+#else
+ case 0x1f0: return 14;
+ case 0x170: return 15;
+#endif
+ }
+ return 0;
+}
+
#include <asm/ide.h>
#if !defined(MAX_HWIFS) || defined(CONFIG_EMBEDDED)
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 792bf0aa779..2e70006c7fa 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -46,6 +46,9 @@ extern const char linux_proc_banner[];
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
+#define _RET_IP_ (unsigned long)__builtin_return_address(0)
+#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
+
#ifdef CONFIG_LBD
# include <asm/div64.h>
# define sector_div(a, b) do_div(a, b)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index d4998f81e22..1485ca8d0e0 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -79,7 +79,7 @@ static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
*
* For such cases, we now have a blacklist
*/
-struct kprobe_blackpoint kprobe_blacklist[] = {
+static struct kprobe_blackpoint kprobe_blacklist[] = {
{"preempt_schedule",},
{NULL} /* Terminator */
};
diff --git a/kernel/printk.c b/kernel/printk.c
index 8fb01c32aa3..e2129e83fd7 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -666,7 +666,7 @@ static int acquire_console_semaphore_for_printk(unsigned int cpu)
return retval;
}
-const char printk_recursion_bug_msg [] =
+static const char printk_recursion_bug_msg [] =
KERN_CRIT "BUG: recent printk recursion!\n";
static int printk_recursion_bug;
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 5e02b774070..41d275a81df 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -925,26 +925,22 @@ void rcu_offline_cpu(int cpu)
spin_unlock_irqrestore(&rdp->lock, flags);
}
-void __devinit rcu_online_cpu(int cpu)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
- cpu_set(cpu, rcu_cpu_online_map);
- spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
-}
-
#else /* #ifdef CONFIG_HOTPLUG_CPU */
void rcu_offline_cpu(int cpu)
{
}
-void __devinit rcu_online_cpu(int cpu)
+#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
+
+void __cpuinit rcu_online_cpu(int cpu)
{
-}
+ unsigned long flags;
-#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
+ spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
+ cpu_set(cpu, rcu_cpu_online_map);
+ spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
+}
static void rcu_process_callbacks(struct softirq_action *unused)
{
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index c828c2339cc..a272d78185e 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -120,6 +120,7 @@ void softlockup_tick(void)
printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
this_cpu, now - touch_timestamp,
current->comm, task_pid_nr(current));
+ print_modules();
if (regs)
show_regs(regs);
else
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 6021757a449..1dc2d1d18fa 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -22,6 +22,8 @@
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/kernel.h>
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
#include <asm/page.h> /* for PAGE_SIZE */
#include <asm/div64.h>
@@ -482,6 +484,89 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
return buf;
}
+static char *string(char *buf, char *end, char *s, int field_width, int precision, int flags)
+{
+ int len, i;
+
+ if ((unsigned long)s < PAGE_SIZE)
+ s = "<NULL>";
+
+ len = strnlen(s, precision);
+
+ if (!(flags & LEFT)) {
+ while (len < field_width--) {
+ if (buf < end)
+ *buf = ' ';
+ ++buf;
+ }
+ }
+ for (i = 0; i < len; ++i) {
+ if (buf < end)
+ *buf = *s;
+ ++buf; ++s;
+ }
+ while (len < field_width--) {
+ if (buf < end)
+ *buf = ' ';
+ ++buf;
+ }
+ return buf;
+}
+
+static inline void *dereference_function_descriptor(void *ptr)
+{
+#if defined(CONFIG_IA64) || defined(CONFIG_PPC64)
+ void *p;
+ if (!probe_kernel_address(ptr, p))
+ ptr = p;
+#endif
+ return ptr;
+}
+
+static char *symbol_string(char *buf, char *end, void *ptr, int field_width, int precision, int flags)
+{
+ unsigned long value = (unsigned long) ptr;
+#ifdef CONFIG_KALLSYMS
+ char sym[KSYM_SYMBOL_LEN];
+ sprint_symbol(sym, value);
+ return string(buf, end, sym, field_width, precision, flags);
+#else
+ field_width = 2*sizeof(void *);
+ flags |= SPECIAL | SMALL | ZEROPAD;
+ return number(buf, end, value, 16, field_width, precision, flags);
+#endif
+}
+
+/*
+ * Show a '%p' thing. A kernel extension is that the '%p' is followed
+ * by an extra set of alphanumeric characters that are extended format
+ * specifiers.
+ *
+ * Right now we just handle 'F' (for symbolic Function descriptor pointers)
+ * and 'S' (for Symbolic direct pointers), but this can easily be
+ * extended in the future (network address types etc).
+ *
+ * The difference between 'S' and 'F' is that on ia64 and ppc64 function
+ * pointers are really function descriptors, which contain a pointer the
+ * real address.
+ */
+static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags)
+{
+ switch (*fmt) {
+ case 'F':
+ ptr = dereference_function_descriptor(ptr);
+ /* Fallthrough */
+ case 'S':
+ return symbol_string(buf, end, ptr, field_width, precision, flags);
+ }
+ flags |= SMALL;
+ if (field_width == -1) {
+ field_width = 2*sizeof(void *);
+ flags |= ZEROPAD;
+ }
+ return number(buf, end, (unsigned long) ptr, 16, field_width, precision, flags);
+}
+
/**
* vsnprintf - Format a string and place it in a buffer
* @buf: The buffer to place the result into
@@ -502,11 +587,9 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
*/
int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
{
- int len;
unsigned long long num;
- int i, base;
+ int base;
char *str, *end, c;
- const char *s;
int flags; /* flags to number() */
@@ -622,43 +705,18 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
continue;
case 's':
- s = va_arg(args, char *);
- if ((unsigned long)s < PAGE_SIZE)
- s = "<NULL>";
-
- len = strnlen(s, precision);
-
- if (!(flags & LEFT)) {
- while (len < field_width--) {
- if (str < end)
- *str = ' ';
- ++str;
- }
- }
- for (i = 0; i < len; ++i) {
- if (str < end)
- *str = *s;
- ++str; ++s;
- }
- while (len < field_width--) {
- if (str < end)
- *str = ' ';
- ++str;
- }
+ str = string(str, end, va_arg(args, char *), field_width, precision, flags);
continue;
case 'p':
- flags |= SMALL;
- if (field_width == -1) {
- field_width = 2*sizeof(void *);
- flags |= ZEROPAD;
- }
- str = number(str, end,
- (unsigned long) va_arg(args, void *),
- 16, field_width, precision, flags);
+ str = pointer(fmt+1, str, end,
+ va_arg(args, void *),
+ field_width, precision, flags);
+ /* Skip all alphanumeric pointer suffixes */
+ while (isalnum(fmt[1]))
+ fmt++;
continue;
-
case 'n':
/* FIXME:
* What does C99 say about the overflow case here? */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a37a5034f63..c94e58b192c 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -729,7 +729,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
} else {
*policy = pol == &default_policy ? MPOL_DEFAULT :
pol->mode;
- *policy |= pol->flags;
+ /*
+ * Internal mempolicy flags must be masked off before exposing
+ * the policy to userspace.
+ */
+ *policy |= (pol->flags & MPOL_MODE_FLAGS);
}
if (vma) {
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index c2397f503b0..f38cc5317b8 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -442,12 +442,16 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
void __exit br_cleanup_bridges(void)
{
- struct net_device *dev, *nxt;
+ struct net_device *dev;
rtnl_lock();
- for_each_netdev_safe(&init_net, dev, nxt)
- if (dev->priv_flags & IFF_EBRIDGE)
+restart:
+ for_each_netdev(&init_net, dev) {
+ if (dev->priv_flags & IFF_EBRIDGE) {
del_br(dev->priv);
+ goto restart;
+ }
+ }
rtnl_unlock();
}
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 7e8ca283645..484bbf6dd03 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -205,12 +205,19 @@ static int can_create(struct net *net, struct socket *sock, int protocol)
* -ENOBUFS on full driver queue (see net_xmit_errno())
* -ENOMEM when local loopback failed at calling skb_clone()
* -EPERM when trying to send on a non-CAN interface
+ * -EINVAL when the skb->data does not contain a valid CAN frame
*/
int can_send(struct sk_buff *skb, int loop)
{
struct sk_buff *newskb = NULL;
+ struct can_frame *cf = (struct can_frame *)skb->data;
int err;
+ if (skb->len != sizeof(struct can_frame) || cf->can_dlc > 8) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
if (skb->dev->type != ARPHRD_CAN) {
kfree_skb(skb);
return -EPERM;
@@ -605,6 +612,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
struct dev_rcv_lists *d;
+ struct can_frame *cf = (struct can_frame *)skb->data;
int matches;
if (dev->type != ARPHRD_CAN || dev_net(dev) != &init_net) {
@@ -612,6 +620,8 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
return 0;
}
+ BUG_ON(skb->len != sizeof(struct can_frame) || cf->can_dlc > 8);
+
/* update statistics */
can_stats.rx_frames++;
can_stats.rx_frames_delta++;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index d9a3a9d13be..72c2ce904f8 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -298,7 +298,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
if (head->nframes) {
/* can_frames starting here */
- firstframe = (struct can_frame *) skb_tail_pointer(skb);
+ firstframe = (struct can_frame *)skb_tail_pointer(skb);
memcpy(skb_put(skb, datalen), frames, datalen);
@@ -826,6 +826,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
for (i = 0; i < msg_head->nframes; i++) {
err = memcpy_fromiovec((u8 *)&op->frames[i],
msg->msg_iov, CFSIZ);
+
+ if (op->frames[i].can_dlc > 8)
+ err = -EINVAL;
+
if (err < 0)
return err;
@@ -858,6 +862,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
for (i = 0; i < msg_head->nframes; i++) {
err = memcpy_fromiovec((u8 *)&op->frames[i],
msg->msg_iov, CFSIZ);
+
+ if (op->frames[i].can_dlc > 8)
+ err = -EINVAL;
+
if (err < 0) {
if (op->frames != &op->sframe)
kfree(op->frames);
@@ -1164,9 +1172,12 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
skb->dev = dev;
skb->sk = sk;
- can_send(skb, 1); /* send with loopback */
+ err = can_send(skb, 1); /* send with loopback */
dev_put(dev);
+ if (err)
+ return err;
+
return CFSIZ + MHSIZ;
}
@@ -1185,6 +1196,10 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock,
if (!bo->bound)
return -ENOTCONN;
+ /* check for valid message length from userspace */
+ if (size < MHSIZ || (size - MHSIZ) % CFSIZ)
+ return -EINVAL;
+
/* check for alternative ifindex for this bcm_op */
if (!ifindex && msg->msg_name) {
@@ -1259,8 +1274,8 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock,
break;
case TX_SEND:
- /* we need at least one can_frame */
- if (msg_head.nframes < 1)
+ /* we need exactly one can_frame behind the msg head */
+ if ((msg_head.nframes != 1) || (size != CFSIZ + MHSIZ))
ret = -EINVAL;
else
ret = bcm_tx_send(msg, ifindex, sk);
diff --git a/net/can/raw.c b/net/can/raw.c
index 69877b8e7e9..3e46ee36a1a 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -632,6 +632,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
} else
ifindex = ro->ifindex;
+ if (size != sizeof(struct can_frame))
+ return -EINVAL;
+
dev = dev_get_by_index(&init_net, ifindex);
if (!dev)
return -ENXIO;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 850825dc86e..1d723de1868 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -255,6 +255,7 @@
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/skbuff.h>
+#include <linux/scatterlist.h>
#include <linux/splice.h>
#include <linux/net.h>
#include <linux/socket.h>
@@ -1208,7 +1209,8 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
return -ENOTCONN;
while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
if (offset < skb->len) {
- size_t used, len;
+ int used;
+ size_t len;
len = skb->len - offset;
/* Stop reading if we hit a patch of urgent data */
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 0517967a68b..e6fb21b19b8 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -243,10 +243,10 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
}
EXPORT_SYMBOL_GPL(rpcb_getport_sync);
-static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, int version)
+static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc)
{
struct rpc_message msg = {
- .rpc_proc = rpcb_next_version[version].rpc_proc,
+ .rpc_proc = proc,
.rpc_argp = map,
.rpc_resp = &map->r_port,
};
@@ -271,6 +271,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
void rpcb_getport_async(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
+ struct rpc_procinfo *proc;
u32 bind_version;
struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_clnt *rpcb_clnt;
@@ -280,7 +281,6 @@ void rpcb_getport_async(struct rpc_task *task)
struct sockaddr *sap = (struct sockaddr *)&addr;
size_t salen;
int status;
- struct rpcb_info *info;
dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
task->tk_pid, __func__,
@@ -313,10 +313,12 @@ void rpcb_getport_async(struct rpc_task *task)
/* Don't ever use rpcbind v2 for AF_INET6 requests */
switch (sap->sa_family) {
case AF_INET:
- info = rpcb_next_version;
+ proc = rpcb_next_version[xprt->bind_index].rpc_proc;
+ bind_version = rpcb_next_version[xprt->bind_index].rpc_vers;
break;
case AF_INET6:
- info = rpcb_next_version6;
+ proc = rpcb_next_version6[xprt->bind_index].rpc_proc;
+ bind_version = rpcb_next_version6[xprt->bind_index].rpc_vers;
break;
default:
status = -EAFNOSUPPORT;
@@ -324,14 +326,13 @@ void rpcb_getport_async(struct rpc_task *task)
task->tk_pid, __func__);
goto bailout_nofree;
}
- if (info[xprt->bind_index].rpc_proc == NULL) {
+ if (proc == NULL) {
xprt->bind_index = 0;
status = -EPFNOSUPPORT;
dprintk("RPC: %5u %s: no more getport versions available\n",
task->tk_pid, __func__);
goto bailout_nofree;
}
- bind_version = info[xprt->bind_index].rpc_vers;
dprintk("RPC: %5u %s: trying rpcbind version %u\n",
task->tk_pid, __func__, bind_version);
@@ -361,22 +362,20 @@ void rpcb_getport_async(struct rpc_task *task)
map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */
- child = rpcb_call_async(rpcb_clnt, map, xprt->bind_index);
+ child = rpcb_call_async(rpcb_clnt, map, proc);
rpc_release_client(rpcb_clnt);
if (IS_ERR(child)) {
status = -EIO;
+ /* rpcb_map_release() has freed the arguments */
dprintk("RPC: %5u %s: rpc_run_task failed\n",
task->tk_pid, __func__);
- goto bailout;
+ goto bailout_nofree;
}
rpc_put_task(child);
task->tk_xprt->stat.bind_count++;
return;
-bailout:
- kfree(map);
- xprt_put(xprt);
bailout_nofree:
rpcb_wake_rpcbind_waiters(xprt, status);
bailout_nowake:
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 1dcf9f3d110..44589088941 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -278,7 +278,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi)
ent->fields.remote_irr = 0;
if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
- ioapic_deliver(ioapic, gsi);
+ ioapic_service(ioapic, gsi);
}
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)