diff options
135 files changed, 11616 insertions, 274 deletions
diff --git a/Documentation/devicetree/bindings/arm/cci.txt b/Documentation/devicetree/bindings/arm/cci.txt new file mode 100644 index 00000000000..92d36e2aa87 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/cci.txt @@ -0,0 +1,172 @@ +======================================================= +ARM CCI cache coherent interconnect binding description +======================================================= + +ARM multi-cluster systems maintain intra-cluster coherency through a +cache coherent interconnect (CCI) that is capable of monitoring bus +transactions and manage coherency, TLB invalidations and memory barriers. + +It allows snooping and distributed virtual memory message broadcast across +clusters, through memory mapped interface, with a global control register +space and multiple sets of interface control registers, one per slave +interface. + +Bindings for the CCI node follow the ePAPR standard, available from: + +www.power.org/documentation/epapr-version-1-1/ + +with the addition of the bindings described in this document which are +specific to ARM. + +* CCI interconnect node + + Description: Describes a CCI cache coherent Interconnect component + + Node name must be "cci". + Node's parent must be the root node /, and the address space visible + through the CCI interconnect is the same as the one seen from the + root node (ie from CPUs perspective as per DT standard). + Every CCI node has to define the following properties: + + - compatible + Usage: required + Value type: <string> + Definition: must be set to + "arm,cci-400" + + - reg + Usage: required + Value type: <prop-encoded-array> + Definition: A standard property. Specifies base physical + address of CCI control registers common to all + interfaces. + + - ranges: + Usage: required + Value type: <prop-encoded-array> + Definition: A standard property. Follow rules in the ePAPR for + hierarchical bus addressing. CCI interfaces + addresses refer to the parent node addressing + scheme to declare their register bases. + + CCI interconnect node can define the following child nodes: + + - CCI control interface nodes + + Node name must be "slave-if". + Parent node must be CCI interconnect node. + + A CCI control interface node must contain the following + properties: + + - compatible + Usage: required + Value type: <string> + Definition: must be set to + "arm,cci-400-ctrl-if" + + - interface-type: + Usage: required + Value type: <string> + Definition: must be set to one of {"ace", "ace-lite"} + depending on the interface type the node + represents. + + - reg: + Usage: required + Value type: <prop-encoded-array> + Definition: the base address and size of the + corresponding interface programming + registers. + +* CCI interconnect bus masters + + Description: masters in the device tree connected to a CCI port + (inclusive of CPUs and their cpu nodes). + + A CCI interconnect bus master node must contain the following + properties: + + - cci-control-port: + Usage: required + Value type: <phandle> + Definition: a phandle containing the CCI control interface node + the master is connected to. + +Example: + + cpus { + #size-cells = <0>; + #address-cells = <1>; + + CPU0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + cci-control-port = <&cci_control1>; + reg = <0x0>; + }; + + CPU1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + cci-control-port = <&cci_control1>; + reg = <0x1>; + }; + + CPU2: cpu@100 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + cci-control-port = <&cci_control2>; + reg = <0x100>; + }; + + CPU3: cpu@101 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + cci-control-port = <&cci_control2>; + reg = <0x101>; + }; + + }; + + dma0: dma@3000000 { + compatible = "arm,pl330", "arm,primecell"; + cci-control-port = <&cci_control0>; + reg = <0x0 0x3000000 0x0 0x1000>; + interrupts = <10>; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <32>; + }; + + cci@2c090000 { + compatible = "arm,cci-400"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x0 0x2c090000 0 0x1000>; + ranges = <0x0 0x0 0x2c090000 0x6000>; + + cci_control0: slave-if@1000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace-lite"; + reg = <0x1000 0x1000>; + }; + + cci_control1: slave-if@4000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x4000 0x1000>; + }; + + cci_control2: slave-if@5000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x5000 0x1000>; + }; + }; + +This CCI node corresponds to a CCI component whose control registers sits +at address 0x000000002c090000. +CCI slave interface @0x000000002c091000 is connected to dma controller dma0. +CCI slave interface @0x000000002c094000 is connected to CPUs {CPU0, CPU1}; +CCI slave interface @0x000000002c095000 is connected to CPUs {CPU2, CPU3}; diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt index 343781b9f24..4ce82d045a6 100644 --- a/Documentation/devicetree/bindings/arm/pmu.txt +++ b/Documentation/devicetree/bindings/arm/pmu.txt @@ -16,6 +16,9 @@ Required properties: "arm,arm1176-pmu" "arm,arm1136-pmu" - interrupts : 1 combined interrupt or 1 per core. +- cluster : a phandle to the cluster to which it belongs + If there are more than one cluster with same CPU type + then there should be separate PMU nodes per cluster. Example: diff --git a/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt b/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt new file mode 100644 index 00000000000..3b8fbf3c00c --- /dev/null +++ b/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt @@ -0,0 +1,19 @@ +ARM Dual Cluster System Configuration Block +------------------------------------------- + +The Dual Cluster System Configuration Block (DCSCB) provides basic +functionality for controlling clocks, resets and configuration pins in +the Dual Cluster System implemented by the Real-Time System Model (RTSM). + +Required properties: + +- compatible : should be "arm,rtsm,dcscb" + +- reg : physical base address and the size of the registers window + +Example: + + dcscb@60000000 { + compatible = "arm,rtsm,dcscb"; + reg = <0x60000000 0x1000>; + }; diff --git a/Documentation/devicetree/bindings/mfd/vexpress-spc.txt b/Documentation/devicetree/bindings/mfd/vexpress-spc.txt new file mode 100644 index 00000000000..1d71dc2ff15 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/vexpress-spc.txt @@ -0,0 +1,35 @@ +* ARM Versatile Express Serial Power Controller device tree bindings + +Latest ARM development boards implement a power management interface (serial +power controller - SPC) that is capable of managing power/voltage and +operating point transitions, through memory mapped registers interface. + +On testchips like TC2 it also provides a configuration interface that can +be used to read/write values which cannot be read/written through simple +memory mapped reads/writes. + +- spc node + + - compatible: + Usage: required + Value type: <stringlist> + Definition: must be + "arm,vexpress-spc,v2p-ca15_a7","arm,vexpress-spc" + - reg: + Usage: required + Value type: <prop-encode-array> + Definition: A standard property that specifies the base address + and the size of the SPC address space + - interrupts: + Usage: required + Value type: <prop-encoded-array> + Definition: SPC interrupt configuration. A standard property + that follows ePAPR interrupts specifications + +Example: + +spc: spc@7fff0000 { + compatible = "arm,vexpress-spc,v2p-ca15_a7","arm,vexpress-spc"; + reg = <0 0x7FFF0000 0 0x1000>; + interrupts = <0 95 4>; +}; diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 2fe6e767b3d..ba7daaa688f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1240,6 +1240,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. See comment before ip2_setup() in drivers/char/ip2/ip2base.c. + irqaffinity= [SMP] Set the default irq affinity mask + Format: + <cpu number>,...,<cpu number> + or + <cpu number>-<cpu number> + (must be a positive range in ascending order) + or a mixture + <cpu number>,...,<cpu number>-<cpu number> + irqfixup [HW] When an interrupt is not handled search all handlers for it. Intended to get systems with badly broken @@ -3341,6 +3350,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted. that this also can be controlled per-workqueue for workqueues visible under /sys/bus/workqueue/. + workqueue.power_efficient + Per-cpu workqueues are generally preferred because + they show better performance thanks to cache + locality; unfortunately, per-cpu workqueues tend to + be more power hungry than unbound workqueues. + + Enabling this makes the per-cpu workqueues which + were observed to contribute significantly to power + consumption unbound, leading to measurably lower + power usage at the cost of small performance + overhead. + + The default value of this parameter is determined by + the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT. + x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of default x2apic cluster mode on platforms supporting x2apic. diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 18a9f5ef643..0460846b8a1 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1494,6 +1494,90 @@ config SCHED_SMT MultiThreading at a cost of slightly increased overhead in some places. If unsure say N here. +config DISABLE_CPU_SCHED_DOMAIN_BALANCE + bool "(EXPERIMENTAL) Disable CPU level scheduler load-balancing" + help + Disables scheduler load-balancing at CPU sched domain level. + +config SCHED_HMP + bool "(EXPERIMENTAL) Heterogenous multiprocessor scheduling" + depends on DISABLE_CPU_SCHED_DOMAIN_BALANCE && SCHED_MC && FAIR_GROUP_SCHED && !SCHED_AUTOGROUP + help + Experimental scheduler optimizations for heterogeneous platforms. + Attempts to introspectively select task affinity to optimize power + and performance. Basic support for multiple (>2) cpu types is in place, + but it has only been tested with two types of cpus. + There is currently no support for migration of task groups, hence + !SCHED_AUTOGROUP. Furthermore, normal load-balancing must be disabled + between cpus of different type (DISABLE_CPU_SCHED_DOMAIN_BALANCE). + +config SCHED_HMP_PRIO_FILTER + bool "(EXPERIMENTAL) Filter HMP migrations by task priority" + depends on SCHED_HMP + help + Enables task priority based HMP migration filter. Any task with + a NICE value above the threshold will always be on low-power cpus + with less compute capacity. + +config SCHED_HMP_PRIO_FILTER_VAL + int "NICE priority threshold" + default 5 + depends on SCHED_HMP_PRIO_FILTER + +config HMP_FAST_CPU_MASK + string "HMP scheduler fast CPU mask" + depends on SCHED_HMP + help + Leave empty to use device tree information. + Specify the cpuids of the fast CPUs in the system as a list string, + e.g. cpuid 0+1 should be specified as 0-1. + +config HMP_SLOW_CPU_MASK + string "HMP scheduler slow CPU mask" + depends on SCHED_HMP + help + Leave empty to use device tree information. + Specify the cpuids of the slow CPUs in the system as a list string, + e.g. cpuid 0+1 should be specified as 0-1. + +config HMP_VARIABLE_SCALE + bool "Allows changing the load tracking scale through sysfs" + depends on SCHED_HMP + help + When turned on, this option exports the thresholds and load average + period value for the load tracking patches through sysfs. + The values can be modified to change the rate of load accumulation + and the thresholds used for HMP migration. + The load_avg_period_ms is the time in ms to reach a load average of + 0.5 for an idle task of 0 load average ratio that start a busy loop. + The up_threshold and down_threshold is the value to go to a faster + CPU or to go back to a slower cpu. + The {up,down}_threshold are devided by 1024 before being compared + to the load average. + For examples, with load_avg_period_ms = 128 and up_threshold = 512, + a running task with a load of 0 will be migrated to a bigger CPU after + 128ms, because after 128ms its load_avg_ratio is 0.5 and the real + up_threshold is 0.5. + This patch has the same behavior as changing the Y of the load + average computation to + (1002/1024)^(LOAD_AVG_PERIOD/load_avg_period_ms) + but it remove intermadiate overflows in computation. + +config HMP_FREQUENCY_INVARIANT_SCALE + bool "(EXPERIMENTAL) Frequency-Invariant Tracked Load for HMP" + depends on HMP_VARIABLE_SCALE && CPU_FREQ + help + Scales the current load contribution in line with the frequency + of the CPU that the task was executed on. + In this version, we use a simple linear scale derived from the + maximum frequency reported by CPUFreq. + Restricting tracked load to be scaled by the CPU's frequency + represents the consumption of possible compute capacity + (rather than consumption of actual instantaneous capacity as + normal) and allows the HMP migration's simple threshold + migration strategy to interact more predictably with CPUFreq's + asynchronous compute capacity changes. + config HAVE_ARM_SCU bool help @@ -1521,6 +1605,31 @@ config MCPM for (multi-)cluster based systems, such as big.LITTLE based systems. +config BIG_LITTLE + bool "big.LITTLE support (Experimental)" + depends on CPU_V7 && SMP + select MCPM + help + This option enables support for the big.LITTLE architecture. + +config BL_SWITCHER + bool "big.LITTLE switcher support" + depends on BIG_LITTLE && MCPM && HOTPLUG_CPU + select CPU_PM + select ARM_CPU_SUSPEND + help + The big.LITTLE "switcher" provides the core functionality to + transparently handle transition between a cluster of A15's + and a cluster of A7's in a big.LITTLE system. + +config BL_SWITCHER_DUMMY_IF + tristate "Simple big.LITTLE switcher user interface" + depends on BL_SWITCHER && DEBUG_KERNEL + help + This is a simple and dummy char dev interface to control + the big.LITTLE switcher core code. It is meant for + debugging purposes only. + choice prompt "Memory split" default VMSPLIT_3G diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index f0895c581a8..00baf9f5766 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -202,7 +202,14 @@ dtb-$(CONFIG_ARCH_VERSATILE) += versatile-ab.dtb \ dtb-$(CONFIG_ARCH_VEXPRESS) += vexpress-v2p-ca5s.dtb \ vexpress-v2p-ca9.dtb \ vexpress-v2p-ca15-tc1.dtb \ - vexpress-v2p-ca15_a7.dtb + vexpress-v2p-ca15_a7.dtb \ + rtsm_ve-cortex_a9x2.dtb \ + rtsm_ve-cortex_a9x4.dtb \ + rtsm_ve-cortex_a15x1.dtb \ + rtsm_ve-cortex_a15x2.dtb \ + rtsm_ve-cortex_a15x4.dtb \ + rtsm_ve-v2p-ca15x1-ca7x1.dtb \ + rtsm_ve-v2p-ca15x4-ca7x4.dtb dtb-$(CONFIG_ARCH_VIRT) += xenvm-4.2.dtb dtb-$(CONFIG_ARCH_VT8500) += vt8500-bv07.dtb \ wm8505-ref.dtb \ diff --git a/arch/arm/boot/dts/clcd-panels.dtsi b/arch/arm/boot/dts/clcd-panels.dtsi new file mode 100644 index 00000000000..0b0ff6ead4b --- /dev/null +++ b/arch/arm/boot/dts/clcd-panels.dtsi @@ -0,0 +1,52 @@ +/* + * ARM Ltd. Versatile Express + * + */ + +/ { + panels { + panel@0 { + compatible = "panel"; + mode = "VGA"; + refresh = <60>; + xres = <640>; + yres = <480>; + pixclock = <39721>; + left_margin = <40>; + right_margin = <24>; + upper_margin = <32>; + lower_margin = <11>; + hsync_len = <96>; + vsync_len = <2>; + sync = <0>; + vmode = "FB_VMODE_NONINTERLACED"; + + tim2 = "TIM2_BCD", "TIM2_IPC"; + cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)"; + caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888"; + bpp = <16>; + }; + + panel@1 { + compatible = "panel"; + mode = "XVGA"; + refresh = <60>; + xres = <1024>; + yres = <768>; + pixclock = <15748>; + left_margin = <152>; + right_margin = <48>; + upper_margin = <23>; + lower_margin = <3>; + hsync_len = <104>; + vsync_len = <4>; + sync = <0>; + vmode = "FB_VMODE_NONINTERLACED"; + + tim2 = "TIM2_BCD", "TIM2_IPC"; + cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)"; + caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888"; + bpp = <16>; + }; + }; +}; diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts new file mode 100644 index 00000000000..c9eee916aa7 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts @@ -0,0 +1,159 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x1CT + * + * RTSM_VE_Cortex_A15x1.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA15x1"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x1", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts new file mode 100644 index 00000000000..853a166e3c3 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts @@ -0,0 +1,165 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x2CT + * + * RTSM_VE_Cortex_A15x2.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA15x2"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x2", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <1>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts new file mode 100644 index 00000000000..c1947a3a5c8 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts @@ -0,0 +1,177 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x4CT + * + * RTSM_VE_Cortex_A15x4.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA15x4"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x4", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <1>; + }; + + cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <2>; + }; + + cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <3>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts new file mode 100644 index 00000000000..fca6b2f7967 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts @@ -0,0 +1,171 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA9MPx2CT + * + * RTSM_VE_Cortex_A9x2.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA9x2"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a9x2", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <1>; + #size-cells = <1>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <1>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x80000000>; + }; + + scu@2c000000 { + compatible = "arm,cortex-a9-scu"; + reg = <0x2c000000 0x58>; + }; + + timer@2c000600 { + compatible = "arm,cortex-a9-twd-timer"; + reg = <0x2c000600 0x20>; + interrupts = <1 13 0xf04>; + }; + + watchdog@2c000620 { + compatible = "arm,cortex-a9-twd-wdt"; + reg = <0x2c000620 0x20>; + interrupts = <1 14 0xf04>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0x2c001000 0x1000>, + <0x2c000100 0x100>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0x08000000 0x04000000>, + <1 0 0x14000000 0x04000000>, + <2 0 0x18000000 0x04000000>, + <3 0 0x1c000000 0x04000000>, + <4 0 0x0c000000 0x04000000>, + <5 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts new file mode 100644 index 00000000000..fd8a6ed97a0 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts @@ -0,0 +1,183 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA9MPx4CT + * + * RTSM_VE_Cortex_A9x4.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA9x4"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a9x4", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <1>; + #size-cells = <1>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <1>; + }; + + cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <2>; + }; + + cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <3>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x80000000>; + }; + + scu@2c000000 { + compatible = "arm,cortex-a9-scu"; + reg = <0x2c000000 0x58>; + }; + + timer@2c000600 { + compatible = "arm,cortex-a9-twd-timer"; + reg = <0x2c000600 0x20>; + interrupts = <1 13 0xf04>; + }; + + watchdog@2c000620 { + compatible = "arm,cortex-a9-twd-wdt"; + reg = <0x2c000620 0x20>; + interrupts = <1 14 0xf04>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0x2c001000 0x1000>, + <0x2c000100 0x100>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0x08000000 0x04000000>, + <1 0 0x14000000 0x04000000>, + <2 0 0x18000000 0x04000000>, + <3 0 0x1c000000 0x04000000>, + <4 0 0x0c000000 0x04000000>, + <5 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi new file mode 100644 index 00000000000..6d125662612 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi @@ -0,0 +1,224 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * Motherboard component + * + * VEMotherBoard.lisa + */ + + motherboard { + compatible = "arm,vexpress,v2m-p1", "simple-bus"; + arm,hbi = <0x190>; + arm,vexpress,site = <0>; + arm,v2m-memory-map = "rs1"; + #address-cells = <2>; /* SMB chipselect number and offset */ + #size-cells = <1>; + #interrupt-cells = <1>; + ranges; + + flash@0,00000000 { + compatible = "arm,vexpress-flash", "cfi-flash"; + reg = <0 0x00000000 0x04000000>, + <4 0x00000000 0x04000000>; + bank-width = <4>; + }; + + vram@2,00000000 { + compatible = "arm,vexpress-vram"; + reg = <2 0x00000000 0x00800000>; + }; + + ethernet@2,02000000 { + compatible = "smsc,lan91c111"; + reg = <2 0x02000000 0x10000>; + interrupts = <15>; + }; + + iofpga@3,00000000 { + compatible = "arm,amba-bus", "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 3 0 0x200000>; + + v2m_sysreg: sysreg@010000 { + compatible = "arm,vexpress-sysreg"; + reg = <0x010000 0x1000>; + gpio-controller; + #gpio-cells = <2>; + }; + + v2m_sysctl: sysctl@020000 { + compatible = "arm,sp810", "arm,primecell"; + reg = <0x020000 0x1000>; + clocks = <&v2m_refclk32khz>, <&v2m_refclk1mhz>, <&smbclk>; + clock-names = "refclk", "timclk", "apb_pclk"; + #clock-cells = <1>; + clock-output-names = "timerclken0", "timerclken1", "timerclken2", "timerclken3"; + }; + + aaci@040000 { + compatible = "arm,pl041", "arm,primecell"; + reg = <0x040000 0x1000>; + interrupts = <11>; + clocks = <&smbclk>; + clock-names = "apb_pclk"; + }; + + mmci@050000 { + compatible = "arm,pl180", "arm,primecell"; + reg = <0x050000 0x1000>; + interrupts = <9 10>; + cd-gpios = <&v2m_sysreg 0 0>; + wp-gpios = <&v2m_sysreg 1 0>; + max-frequency = <12000000>; + vmmc-supply = <&v2m_fixed_3v3>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "mclk", "apb_pclk"; + }; + + kmi@060000 { + compatible = "arm,pl050", "arm,primecell"; + reg = <0x060000 0x1000>; + interrupts = <12>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "KMIREFCLK", "apb_pclk"; + }; + + kmi@070000 { + compatible = "arm,pl050", "arm,primecell"; + reg = <0x070000 0x1000>; + interrupts = <13>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "KMIREFCLK", "apb_pclk"; + }; + + v2m_serial0: uart@090000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x090000 0x1000>; + interrupts = <5>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial1: uart@0a0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0a0000 0x1000>; + interrupts = <6>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial2: uart@0b0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0b0000 0x1000>; + interrupts = <7>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial3: uart@0c0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0c0000 0x1000>; + interrupts = <8>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "uartclk", "apb_pclk"; + }; + + wdt@0f0000 { + compatible = "arm,sp805", "arm,primecell"; + reg = <0x0f0000 0x1000>; + interrupts = <0>; + clocks = <&v2m_refclk32khz>, <&smbclk>; + clock-names = "wdogclk", "apb_pclk"; + }; + + v2m_timer01: timer@110000 { + compatible = "arm,sp804", "arm,primecell"; + reg = <0x110000 0x1000>; + interrupts = <2>; + clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>, <&smbclk>; + clock-names = "timclken1", "timclken2", "apb_pclk"; + }; + + v2m_timer23: timer@120000 { + compatible = "arm,sp804", "arm,primecell"; + reg = <0x120000 0x1000>; + interrupts = <3>; + clocks = <&v2m_sysctl 2>, <&v2m_sysctl 3>, <&smbclk>; + clock-names = "timclken1", "timclken2", "apb_pclk"; + }; + + rtc@170000 { + compatible = "arm,pl031", "arm,primecell"; + reg = <0x170000 0x1000>; + interrupts = <4>; + clocks = <&smbclk>; + clock-names = "apb_pclk"; + }; + + clcd@1f0000 { + compatible = "arm,pl111", "arm,primecell"; + reg = <0x1f0000 0x1000>; + interrupts = <14>; + clocks = <&v2m_oscclk1>, <&smbclk>; + clock-names = "v2m:oscclk1", "apb_pclk"; + mode = "VGA"; + use_dma = <0>; + framebuffer = <0x18000000 0x00180000>; + }; + }; + + v2m_fixed_3v3: fixedregulator@0 { + compatible = "regulator-fixed"; + regulator-name = "3V3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + v2m_clk24mhz: clk24mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <24000000>; + clock-output-names = "v2m:clk24mhz"; + }; + + v2m_refclk1mhz: refclk1mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <1000000>; + clock-output-names = "v2m:refclk1mhz"; + }; + + v2m_refclk32khz: refclk32khz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "v2m:refclk32khz"; + }; + + mcc { + compatible = "simple-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + v2m_oscclk1: osc@1 { + /* CLCD clock */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <23750000 63500000>; + #clock-cells = <0>; + clock-output-names = "v2m:oscclk1"; + }; + + muxfpga@0 { + compatible = "arm,vexpress-muxfpga"; + arm,vexpress-sysreg,func = <7 0>; + }; + + shutdown@0 { + compatible = "arm,vexpress-shutdown"; + arm,vexpress-sysreg,func = <8 0>; + }; + }; + }; diff --git a/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts new file mode 100644 index 00000000000..fe8cf5dc857 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts @@ -0,0 +1,244 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x4CT + * ARMCortexA7x4CT + * RTSM_VE_Cortex_A15x1_A7x1.lisa + */ + +/dts-v1/; + +/memreserve/ 0xff000000 0x01000000; + +/ { + model = "RTSM_VE_CortexA15x1-A7x1"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x1_a7x1", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + clusters { + #address-cells = <1>; + #size-cells = <0>; + + cluster0: cluster@0 { + reg = <0>; +// freqs = <500000000 600000000 700000000 800000000 900000000 1000000000 1100000000 1200000000>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core0: core@0 { + reg = <0>; + }; + + }; + }; + + cluster1: cluster@1 { + reg = <1>; +// freqs = <350000000 400000000 500000000 600000000 700000000 800000000 900000000 1000000000>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core1: core@0 { + reg = <0>; + }; + + }; + }; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + cluster = <&cluster0>; + core = <&core0>; +// clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; + }; + + cpu1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x100>; + cluster = <&cluster1>; + core = <&core1>; +// clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + cci@2c090000 { + compatible = "arm,cci-400", "arm,cci"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0 0x2c090000 0 0x1000>; + ranges = <0x0 0x0 0x2c090000 0x10000>; + + cci_control1: slave-if@4000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x4000 0x1000>; + }; + + cci_control2: slave-if@5000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x5000 0x1000>; + }; + }; + + dcscb@60000000 { + compatible = "arm,rtsm,dcscb"; + reg = <0 0x60000000 0 0x1000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + + gic-cpuif@0 { + compatible = "arm,gic-cpuif"; + cpuif-id = <0>; + cpu = <&cpu0>; + }; + gic-cpuif@1 { + compatible = "arm,gic-cpuif"; + cpuif-id = <1>; + cpu = <&cpu1>; + }; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts new file mode 100644 index 00000000000..f715285131d --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts @@ -0,0 +1,358 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x4CT + * ARMCortexA7x4CT + * RTSM_VE_Cortex_A15x4_A7x4.lisa + */ + +/dts-v1/; + +/memreserve/ 0xff000000 0x01000000; + +/ { + model = "RTSM_VE_CortexA15x4-A7x4"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x4_a7x4", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + clusters { + #address-cells = <1>; + #size-cells = <0>; + + cluster0: cluster@0 { + reg = <0>; +// freqs = <500000000 600000000 700000000 800000000 900000000 1000000000 1100000000 1200000000>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core0: core@0 { + reg = <0>; + }; + + core1: core@1 { + reg = <1>; + }; + + core2: core@2 { + reg = <2>; + }; + + core3: core@3 { + reg = <3>; + }; + + }; + }; + + cluster1: cluster@1 { + reg = <1>; +// freqs = <350000000 400000000 500000000 600000000 700000000 800000000 900000000 1000000000>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core4: core@0 { + reg = <0>; + }; + + core5: core@1 { + reg = <1>; + }; + + core6: core@2 { + reg = <2>; + }; + + core7: core@3 { + reg = <3>; + }; + + }; + }; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + cluster = <&cluster0>; + core = <&core0>; +// clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; + }; + + cpu1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <1>; + cluster = <&cluster0>; + core = <&core1>; +// clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; + }; + + cpu2: cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <2>; + cluster = <&cluster0>; + core = <&core2>; +// clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; + }; + + cpu3: cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <3>; + cluster = <&cluster0>; + core = <&core3>; +// clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; + }; + + cpu4: cpu@4 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x100>; + cluster = <&cluster1>; + core = <&core4>; +// clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; + }; + + cpu5: cpu@5 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x101>; + cluster = <&cluster1>; + core = <&core5>; +// clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; + }; + + cpu6: cpu@6 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x102>; + cluster = <&cluster1>; + core = <&core6>; +// clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; + }; + + cpu7: cpu@7 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x103>; + cluster = <&cluster1>; + core = <&core7>; +// clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + cci@2c090000 { + compatible = "arm,cci-400", "arm,cci"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0 0x2c090000 0 0x1000>; + ranges = <0x0 0x0 0x2c090000 0x10000>; + + cci_control1: slave-if@4000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x4000 0x1000>; + }; + + cci_control2: slave-if@5000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x5000 0x1000>; + }; + }; + + dcscb@60000000 { + compatible = "arm,rtsm,dcscb"; + reg = <0 0x60000000 0 0x1000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + + gic-cpuif@0 { + compatible = "arm,gic-cpuif"; + cpuif-id = <0>; + cpu = <&cpu0>; + }; + gic-cpuif@1 { + compatible = "arm,gic-cpuif"; + cpuif-id = <1>; + cpu = <&cpu1>; + }; + gic-cpuif@2 { + compatible = "arm,gic-cpuif"; + cpuif-id = <2>; + cpu = <&cpu2>; + }; + gic-cpuif@3 { + compatible = "arm,gic-cpuif"; + cpuif-id = <3>; + cpu = <&cpu3>; + }; + gic-cpuif@4 { + compatible = "arm,gic-cpuif"; + cpuif-id = <4>; + cpu = <&cpu4>; + }; + gic-cpuif@5 { + compatible = "arm,gic-cpuif"; + cpuif-id = <5>; + cpu = <&cpu5>; + }; + gic-cpuif@6 { + compatible = "arm,gic-cpuif"; + cpuif-id = <6>; + cpu = <&cpu6>; + }; + gic-cpuif@7 { + compatible = "arm,gic-cpuif"; + cpuif-id = <7>; + cpu = <&cpu7>; + }; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi index ac870fb3fa0..9584232ee6b 100644 --- a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi +++ b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi @@ -228,6 +228,7 @@ }; clcd@1f0000 { + status = "disabled"; compatible = "arm,pl111", "arm,primecell"; reg = <0x1f0000 0x1000>; interrupts = <14>; diff --git a/arch/arm/boot/dts/vexpress-v2m.dtsi b/arch/arm/boot/dts/vexpress-v2m.dtsi index f1420368355..6593398c11a 100644 --- a/arch/arm/boot/dts/vexpress-v2m.dtsi +++ b/arch/arm/boot/dts/vexpress-v2m.dtsi @@ -227,6 +227,7 @@ }; clcd@1f000 { + status = "disabled"; compatible = "arm,pl111", "arm,primecell"; reg = <0x1f000 0x1000>; interrupts = <14>; diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts b/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts index 9420053acc1..cc6a8c0cfe3 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts @@ -9,6 +9,8 @@ /dts-v1/; +/memreserve/ 0xbf000000 0x01000000; + / { model = "V2P-CA15"; arm,hbi = <0x237>; @@ -57,6 +59,8 @@ interrupts = <0 85 4>; clocks = <&oscclk5>; clock-names = "pxlclk"; + mode = "1024x768-16@60"; + framebuffer = <0 0xff000000 0 0x01000000>; }; memory-controller@2b0a0000 { diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts index d2803be4e1a..f1dc620c5c4 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts @@ -9,11 +9,13 @@ /dts-v1/; +/memreserve/ 0xff000000 0x01000000; + / { model = "V2P-CA15_CA7"; arm,hbi = <0x249>; arm,vexpress,site = <0xf>; - compatible = "arm,vexpress,v2p-ca15_a7", "arm,vexpress"; + compatible = "arm,vexpress,v2p-ca15_a7", "arm,vexpress", "arm,generic"; interrupt-parent = <&gic>; #address-cells = <2>; #size-cells = <2>; @@ -29,44 +31,106 @@ i2c1 = &v2m_i2c_pcie; }; - cpus { + clusters { #address-cells = <1>; #size-cells = <0>; - cpu0: cpu@0 { - device_type = "cpu"; - compatible = "arm,cortex-a15"; + cluster0: cluster@0 { reg = <0>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core0: core@0 { + reg = <0>; + }; + + core1: core@1 { + reg = <1>; + }; + + }; }; - cpu1: cpu@1 { - device_type = "cpu"; - compatible = "arm,cortex-a15"; + cluster1: cluster@1 { reg = <1>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core2: core@0 { + reg = <0>; + }; + + core3: core@1 { + reg = <1>; + }; + + core4: core@2 { + reg = <2>; + }; + }; }; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; cpu2: cpu@2 { device_type = "cpu"; compatible = "arm,cortex-a7"; reg = <0x100>; + cluster = <&cluster1>; + core = <&core2>; + clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; }; cpu3: cpu@3 { device_type = "cpu"; compatible = "arm,cortex-a7"; reg = <0x101>; + cluster = <&cluster1>; + core = <&core3>; + clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; }; cpu4: cpu@4 { device_type = "cpu"; compatible = "arm,cortex-a7"; reg = <0x102>; + cluster = <&cluster1>; + core = <&core4>; + clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; + }; + + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + cluster = <&cluster0>; + core = <&core0>; + clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; + }; + + cpu1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <1>; + cluster = <&cluster0>; + core = <&core1>; + clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; }; }; memory@80000000 { device_type = "memory"; - reg = <0 0x80000000 0 0x40000000>; + reg = <0 0x80000000 0 0x80000000>; }; wdt@2a490000 { @@ -81,6 +145,8 @@ compatible = "arm,hdlcd"; reg = <0 0x2b000000 0 0x1000>; interrupts = <0 85 4>; + mode = "1024x768-16@60"; + framebuffer = <0 0xff000000 0 0x01000000>; clocks = <&oscclk5>; clock-names = "pxlclk"; }; @@ -102,6 +168,64 @@ <0 0x2c004000 0 0x2000>, <0 0x2c006000 0 0x2000>; interrupts = <1 9 0xf04>; + + gic-cpuif@0 { + compatible = "arm,gic-cpuif"; + cpuif-id = <0>; + cpu = <&cpu0>; + }; + gic-cpuif@1 { + compatible = "arm,gic-cpuif"; + cpuif-id = <1>; + cpu = <&cpu1>; + }; + gic-cpuif@2 { + compatible = "arm,gic-cpuif"; + cpuif-id = <2>; + cpu = <&cpu2>; + }; + + gic-cpuif@3 { + compatible = "arm,gic-cpuif"; + cpuif-id = <3>; + cpu = <&cpu3>; + }; + + gic-cpuif@4 { + compatible = "arm,gic-cpuif"; + cpuif-id = <4>; + cpu = <&cpu4>; + }; + }; + + cci@2c090000 { + compatible = "arm,cci-400"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0 0x2c090000 0 0x1000>; + ranges = <0x0 0x0 0x2c090000 0x10000>; + + cci_control1: slave-if@4000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x4000 0x1000>; + }; + + cci_control2: slave-if@5000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x5000 0x1000>; + }; + }; + + cci-pmu@2c099000 { + compatible = "arm,cci-400-pmu"; + reg = <0 0x2c099000 0 0x6000>; + interrupts = <0 101 4>, + <0 102 4>, + <0 103 4>, + <0 104 4>, + <0 105 4>; }; memory-controller@7ffd0000 { @@ -125,6 +249,12 @@ clock-names = "apb_pclk"; }; + spc@7fff0000 { + compatible = "arm,vexpress-spc,v2p-ca15_a7","arm,vexpress-spc"; + reg = <0 0x7fff0000 0 0x1000>; + interrupts = <0 95 4>; + }; + timer { compatible = "arm,armv7-timer"; interrupts = <1 13 0xf08>, @@ -133,12 +263,21 @@ <1 10 0xf08>; }; - pmu { + pmu_a15 { compatible = "arm,cortex-a15-pmu"; + cluster = <&cluster0>; interrupts = <0 68 4>, <0 69 4>; }; + pmu_a7 { + compatible = "arm,cortex-a7-pmu"; + cluster = <&cluster1>; + interrupts = <0 128 4>, + <0 129 4>, + <0 130 4>; + }; + oscclk6a: oscclk6a { /* Reference 24MHz clock */ compatible = "fixed-clock"; @@ -147,6 +286,15 @@ clock-output-names = "oscclk6a"; }; + psci { + compatible = "arm,psci"; + method = "smc"; + cpu_suspend = <0x80100001>; + cpu_off = <0x80100002>; + cpu_on = <0x80100003>; + migrate = <0x80100004>; + }; + dcc { compatible = "arm,vexpress,config-bus"; arm,vexpress,config-bridge = <&v2m_sysreg>; diff --git a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts index c544a550459..cf633ed6a1b 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts @@ -9,6 +9,8 @@ /dts-v1/; +/memreserve/ 0xbf000000 0x01000000; + / { model = "V2P-CA5s"; arm,hbi = <0x225>; @@ -59,6 +61,8 @@ interrupts = <0 85 4>; clocks = <&oscclk3>; clock-names = "pxlclk"; + mode = "640x480-16@60"; + framebuffer = <0xbf000000 0x01000000>; }; memory-controller@2a150000 { diff --git a/arch/arm/boot/dts/vexpress-v2p-ca9.dts b/arch/arm/boot/dts/vexpress-v2p-ca9.dts index 62d9b225dcc..f83706bd3f9 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca9.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca9.dts @@ -9,6 +9,8 @@ /dts-v1/; +/include/ "clcd-panels.dtsi" + / { model = "V2P-CA9"; arm,hbi = <0x191>; @@ -73,6 +75,8 @@ interrupts = <0 44 4>; clocks = <&oscclk1>, <&oscclk2>; clock-names = "clcdclk", "apb_pclk"; + mode = "XVGA"; + use_dma = <1>; }; memory-controller@100e0000 { diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 48434cbe3e8..462cd580fc2 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -14,5 +14,9 @@ obj-$(CONFIG_SHARP_SCOOP) += scoop.o obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o +obj-$(CONFIG_BL_SWITCHER) += bL_switcher.o +obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o + AFLAGS_mcpm_head.o := -march=armv7-a AFLAGS_vlock.o := -march=armv7-a +CFLAGS_REMOVE_mcpm_entry.o = -pg diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c new file mode 100644 index 00000000000..8fee70dfb30 --- /dev/null +++ b/arch/arm/common/bL_switcher.c @@ -0,0 +1,864 @@ +/* + * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver + * + * Created by: Nicolas Pitre, March 2012 + * Copyright: (C) 2012 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/atomic.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/cpu_pm.h> +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/kthread.h> +#include <linux/wait.h> +#include <linux/time.h> +#include <linux/clockchips.h> +#include <linux/hrtimer.h> +#include <linux/tick.h> +#include <linux/notifier.h> +#include <linux/mm.h> +#include <linux/mutex.h> +#include <linux/smp.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/sysfs.h> +#include <linux/irqchip/arm-gic.h> +#include <linux/moduleparam.h> + +#include <asm/smp_plat.h> +#include <asm/cacheflush.h> +#include <asm/cputype.h> +#include <asm/suspend.h> +#include <asm/mcpm.h> +#include <asm/bL_switcher.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/power_cpu_migrate.h> + + +/* + * Use our own MPIDR accessors as the generic ones in asm/cputype.h have + * __attribute_const__ and we don't want the compiler to assume any + * constness here as the value _does_ change along some code paths. + */ + +static int read_mpidr(void) +{ + unsigned int id; + asm volatile ("mrc\tp15, 0, %0, c0, c0, 5" : "=r" (id)); + return id & MPIDR_HWID_BITMASK; +} + +/* + * Get a global nanosecond time stamp for tracing. + */ +static s64 get_ns(void) +{ + struct timespec ts; + getnstimeofday(&ts); + return timespec_to_ns(&ts); +} + +/* + * bL switcher core code. + */ + +static void bL_do_switch(void *_arg) +{ + unsigned ib_mpidr, ib_cpu, ib_cluster; + long volatile handshake, **handshake_ptr = _arg; + + pr_debug("%s\n", __func__); + + ib_mpidr = cpu_logical_map(smp_processor_id()); + ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0); + ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1); + + /* Advertise our handshake location */ + if (handshake_ptr) { + handshake = 0; + *handshake_ptr = &handshake; + } else + handshake = -1; + + /* + * Our state has been saved at this point. Let's release our + * inbound CPU. + */ + mcpm_set_entry_vector(ib_cpu, ib_cluster, cpu_resume); + sev(); + + /* + * From this point, we must assume that our counterpart CPU might + * have taken over in its parallel world already, as if execution + * just returned from cpu_suspend(). It is therefore important to + * be very careful not to make any change the other guy is not + * expecting. This is why we need stack isolation. + * + * Fancy under cover tasks could be performed here. For now + * we have none. + */ + + /* + * Let's wait until our inbound is alive. + */ + while (!handshake) { + wfe(); + smp_mb(); + } + + /* Let's put ourself down. */ + mcpm_cpu_power_down(); + + /* should never get here */ + BUG(); +} + +/* + * Stack isolation. To ensure 'current' remains valid, we just use another + * piece of our thread's stack space which should be fairly lightly used. + * The selected area starts just above the thread_info structure located + * at the very bottom of the stack, aligned to a cache line, and indexed + * with the cluster number. + */ +#define STACK_SIZE 512 +extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); +static int bL_switchpoint(unsigned long _arg) +{ + unsigned int mpidr = read_mpidr(); + unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); + void *stack = current_thread_info() + 1; + stack = PTR_ALIGN(stack, L1_CACHE_BYTES); + stack += clusterid * STACK_SIZE + STACK_SIZE; + call_with_stack(bL_do_switch, (void *)_arg, stack); + BUG(); +} + +/* + * Generic switcher interface + */ + +static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS]; +static int bL_switcher_cpu_pairing[NR_CPUS]; + +/* + * bL_switch_to - Switch to a specific cluster for the current CPU + * @new_cluster_id: the ID of the cluster to switch to. + * + * This function must be called on the CPU to be switched. + * Returns 0 on success, else a negative status code. + */ +static int bL_switch_to(unsigned int new_cluster_id) +{ + unsigned int mpidr, this_cpu, that_cpu; + unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster; + struct completion inbound_alive; + struct tick_device *tdev; + enum clock_event_mode tdev_mode; + long volatile *handshake_ptr; + int ipi_nr, ret; + + this_cpu = smp_processor_id(); + ob_mpidr = read_mpidr(); + ob_cpu = MPIDR_AFFINITY_LEVEL(ob_mpidr, 0); + ob_cluster = MPIDR_AFFINITY_LEVEL(ob_mpidr, 1); + BUG_ON(cpu_logical_map(this_cpu) != ob_mpidr); + + if (new_cluster_id == ob_cluster) + return 0; + + that_cpu = bL_switcher_cpu_pairing[this_cpu]; + ib_mpidr = cpu_logical_map(that_cpu); + ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0); + ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1); + + pr_debug("before switch: CPU %d MPIDR %#x -> %#x\n", + this_cpu, ob_mpidr, ib_mpidr); + + this_cpu = smp_processor_id(); + + /* Close the gate for our entry vectors */ + mcpm_set_entry_vector(ob_cpu, ob_cluster, NULL); + mcpm_set_entry_vector(ib_cpu, ib_cluster, NULL); + + /* Install our "inbound alive" notifier. */ + init_completion(&inbound_alive); + ipi_nr = register_ipi_completion(&inbound_alive, this_cpu); + ipi_nr |= ((1 << 16) << bL_gic_id[ob_cpu][ob_cluster]); + mcpm_set_early_poke(ib_cpu, ib_cluster, gic_get_sgir_physaddr(), ipi_nr); + + /* + * Let's wake up the inbound CPU now in case it requires some delay + * to come online, but leave it gated in our entry vector code. + */ + ret = mcpm_cpu_power_up(ib_cpu, ib_cluster); + if (ret) { + pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret); + return ret; + } + + /* + * Raise a SGI on the inbound CPU to make sure it doesn't stall + * in a possible WFI, such as in bL_power_down(). + */ + gic_send_sgi(bL_gic_id[ib_cpu][ib_cluster], 0); + + /* + * Wait for the inbound to come up. This allows for other + * tasks to be scheduled in the mean time. + */ + wait_for_completion(&inbound_alive); + mcpm_set_early_poke(ib_cpu, ib_cluster, 0, 0); + + /* + * From this point we are entering the switch critical zone + * and can't sleep/schedule anymore. + */ + local_irq_disable(); + local_fiq_disable(); + trace_cpu_migrate_begin(get_ns(), ob_mpidr); + + /* redirect GIC's SGIs to our counterpart */ + gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]); + + tdev = tick_get_device(this_cpu); + if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu))) + tdev = NULL; + if (tdev) { + tdev_mode = tdev->evtdev->mode; + clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN); + } + + ret = cpu_pm_enter(); + + /* we can not tolerate errors at this point */ + if (ret) + panic("%s: cpu_pm_enter() returned %d\n", __func__, ret); + + /* + * Swap the physical CPUs in the logical map for this logical CPU. + * This must be flushed to RAM as the resume code + * needs to access it while the caches are still disabled. + */ + cpu_logical_map(this_cpu) = ib_mpidr; + cpu_logical_map(that_cpu) = ob_mpidr; + sync_cache_w(&cpu_logical_map(this_cpu)); + + /* Let's do the actual CPU switch. */ + ret = cpu_suspend((unsigned long)&handshake_ptr, bL_switchpoint); + if (ret > 0) + panic("%s: cpu_suspend() returned %d\n", __func__, ret); + + /* We are executing on the inbound CPU at this point */ + mpidr = read_mpidr(); + pr_debug("after switch: CPU %d MPIDR %#x\n", this_cpu, mpidr); + BUG_ON(mpidr != ib_mpidr); + + mcpm_cpu_powered_up(); + + ret = cpu_pm_exit(); + + if (tdev) { + clockevents_set_mode(tdev->evtdev, tdev_mode); + clockevents_program_event(tdev->evtdev, + tdev->evtdev->next_event, 1); + } + + trace_cpu_migrate_finish(get_ns(), ib_mpidr); + local_fiq_enable(); + local_irq_enable(); + + *handshake_ptr = 1; + dsb_sev(); + + if (ret) + pr_err("%s exiting with error %d\n", __func__, ret); + return ret; +} + +struct bL_thread { + spinlock_t lock; + struct task_struct *task; + wait_queue_head_t wq; + int wanted_cluster; + struct completion started; + bL_switch_completion_handler completer; + void *completer_cookie; +}; + +static struct bL_thread bL_threads[NR_CPUS]; + +static int bL_switcher_thread(void *arg) +{ + struct bL_thread *t = arg; + struct sched_param param = { .sched_priority = 1 }; + int cluster; + bL_switch_completion_handler completer; + void *completer_cookie; + + sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); + complete(&t->started); + + do { + if (signal_pending(current)) + flush_signals(current); + wait_event_interruptible(t->wq, + t->wanted_cluster != -1 || + kthread_should_stop()); + + spin_lock(&t->lock); + cluster = t->wanted_cluster; + completer = t->completer; + completer_cookie = t->completer_cookie; + t->wanted_cluster = -1; + t->completer = NULL; + spin_unlock(&t->lock); + + if (cluster != -1) { + bL_switch_to(cluster); + + if (completer) + completer(completer_cookie); + } + } while (!kthread_should_stop()); + + return 0; +} + +static struct task_struct * bL_switcher_thread_create(int cpu, void *arg) +{ + struct task_struct *task; + + task = kthread_create_on_node(bL_switcher_thread, arg, + cpu_to_node(cpu), "kswitcher_%d", cpu); + if (!IS_ERR(task)) { + kthread_bind(task, cpu); + wake_up_process(task); + } else + pr_err("%s failed for CPU %d\n", __func__, cpu); + return task; +} + +/* + * bL_switch_request_cb - Switch to a specific cluster for the given CPU, + * with completion notification via a callback + * + * @cpu: the CPU to switch + * @new_cluster_id: the ID of the cluster to switch to. + * @completer: switch completion callback. if non-NULL, + * @completer(@completer_cookie) will be called on completion of + * the switch, in non-atomic context. + * @completer_cookie: opaque context argument for @completer. + * + * This function causes a cluster switch on the given CPU by waking up + * the appropriate switcher thread. This function may or may not return + * before the switch has occurred. + * + * If a @completer callback function is supplied, it will be called when + * the switch is complete. This can be used to determine asynchronously + * when the switch is complete, regardless of when bL_switch_request() + * returns. When @completer is supplied, no new switch request is permitted + * for the affected CPU until after the switch is complete, and @completer + * has returned. + */ +int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id, + bL_switch_completion_handler completer, + void *completer_cookie) +{ + struct bL_thread *t; + + if (cpu >= ARRAY_SIZE(bL_threads)) { + pr_err("%s: cpu %d out of bounds\n", __func__, cpu); + return -EINVAL; + } + + t = &bL_threads[cpu]; + + if (IS_ERR(t->task)) + return PTR_ERR(t->task); + if (!t->task) + return -ESRCH; + + spin_lock(&t->lock); + if (t->completer) { + spin_unlock(&t->lock); + return -EBUSY; + } + t->completer = completer; + t->completer_cookie = completer_cookie; + t->wanted_cluster = new_cluster_id; + spin_unlock(&t->lock); + wake_up(&t->wq); + return 0; +} + +EXPORT_SYMBOL_GPL(bL_switch_request_cb); + +/* + * Detach an outstanding switch request. + * + * The switcher will continue with the switch request in the background, + * but the completer function will not be called. + * + * This may be necessary if the completer is in a kernel module which is + * about to be unloaded. + */ +void bL_switch_request_detach(unsigned int cpu, + bL_switch_completion_handler completer) +{ + struct bL_thread *t; + + if (cpu >= ARRAY_SIZE(bL_threads)) { + pr_err("%s: cpu %d out of bounds\n", __func__, cpu); + return; + } + + t = &bL_threads[cpu]; + + if (IS_ERR(t->task) || !t->task) + return; + + spin_lock(&t->lock); + if (t->completer == completer) + t->completer = NULL; + spin_unlock(&t->lock); +} + +EXPORT_SYMBOL_GPL(bL_switch_request_detach); + +/* + * Activation and configuration code. + */ + +static DEFINE_MUTEX(bL_switcher_activation_lock); +static BLOCKING_NOTIFIER_HEAD(bL_activation_notifier); +static unsigned int bL_switcher_active; +static unsigned int bL_switcher_cpu_original_cluster[NR_CPUS]; +static cpumask_t bL_switcher_removed_logical_cpus; + +int bL_switcher_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&bL_activation_notifier, nb); +} +EXPORT_SYMBOL_GPL(bL_switcher_register_notifier); + +int bL_switcher_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&bL_activation_notifier, nb); +} +EXPORT_SYMBOL_GPL(bL_switcher_unregister_notifier); + +static int bL_activation_notify(unsigned long val) +{ + int ret; + + ret = blocking_notifier_call_chain(&bL_activation_notifier, val, NULL); + if (ret & NOTIFY_STOP_MASK) + pr_err("%s: notifier chain failed with status 0x%x\n", + __func__, ret); + return notifier_to_errno(ret); +} + +static void bL_switcher_restore_cpus(void) +{ + int i; + + for_each_cpu(i, &bL_switcher_removed_logical_cpus) + cpu_up(i); +} + +static int bL_switcher_halve_cpus(void) +{ + int i, j, cluster_0, gic_id, ret; + unsigned int cpu, cluster, mask; + cpumask_t available_cpus; + + /* First pass to validate what we have */ + mask = 0; + for_each_online_cpu(i) { + cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0); + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); + if (cluster >= 2) { + pr_err("%s: only dual cluster systems are supported\n", __func__); + return -EINVAL; + } + if (WARN_ON(cpu >= MAX_CPUS_PER_CLUSTER)) + return -EINVAL; + mask |= (1 << cluster); + } + if (mask != 3) { + pr_err("%s: no CPU pairing possible\n", __func__); + return -EINVAL; + } + + /* + * Now let's do the pairing. We match each CPU with another CPU + * from a different cluster. To get a uniform scheduling behavior + * without fiddling with CPU topology and compute capacity data, + * we'll use logical CPUs initially belonging to the same cluster. + */ + memset(bL_switcher_cpu_pairing, -1, sizeof(bL_switcher_cpu_pairing)); + cpumask_copy(&available_cpus, cpu_online_mask); + cluster_0 = -1; + for_each_cpu(i, &available_cpus) { + int match = -1; + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); + if (cluster_0 == -1) + cluster_0 = cluster; + if (cluster != cluster_0) + continue; + cpumask_clear_cpu(i, &available_cpus); + for_each_cpu(j, &available_cpus) { + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(j), 1); + /* + * Let's remember the last match to create "odd" + * pairing on purpose in order for other code not + * to assume any relation between physical and + * logical CPU numbers. + */ + if (cluster != cluster_0) + match = j; + } + if (match != -1) { + bL_switcher_cpu_pairing[i] = match; + cpumask_clear_cpu(match, &available_cpus); + pr_info("CPU%d paired with CPU%d\n", i, match); + } + } + + /* + * Now we disable the unwanted CPUs i.e. everything that has no + * pairing information (that includes the pairing counterparts). + */ + cpumask_clear(&bL_switcher_removed_logical_cpus); + for_each_online_cpu(i) { + cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0); + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); + + /* Let's take note of the GIC ID for this CPU */ + gic_id = gic_get_cpu_id(i); + if (gic_id < 0) { + pr_err("%s: bad GIC ID for CPU %d\n", __func__, i); + bL_switcher_restore_cpus(); + return -EINVAL; + } + bL_gic_id[cpu][cluster] = gic_id; + pr_info("GIC ID for CPU %u cluster %u is %u\n", + cpu, cluster, gic_id); + + if (bL_switcher_cpu_pairing[i] != -1) { + bL_switcher_cpu_original_cluster[i] = cluster; + continue; + } + + ret = cpu_down(i); + if (ret) { + bL_switcher_restore_cpus(); + return ret; + } + cpumask_set_cpu(i, &bL_switcher_removed_logical_cpus); + } + + return 0; +} + +/* Determine the logical CPU a given physical CPU is grouped on. */ +int bL_switcher_get_logical_index(u32 mpidr) +{ + int cpu; + + if (!bL_switcher_active) + return -EUNATCH; + + mpidr &= MPIDR_HWID_BITMASK; + for_each_online_cpu(cpu) { + int pairing = bL_switcher_cpu_pairing[cpu]; + if (pairing == -1) + continue; + if ((mpidr == cpu_logical_map(cpu)) || + (mpidr == cpu_logical_map(pairing))) + return cpu; + } + return -EINVAL; +} + +static void bL_switcher_trace_trigger_cpu(void *__always_unused info) +{ + trace_cpu_migrate_current(get_ns(), read_mpidr()); +} + +int bL_switcher_trace_trigger(void) +{ + int ret; + + preempt_disable(); + + bL_switcher_trace_trigger_cpu(NULL); + ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true); + + preempt_enable(); + + return ret; +} +EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger); + +static int bL_switcher_enable(void) +{ + int cpu, ret; + + mutex_lock(&bL_switcher_activation_lock); + cpu_hotplug_driver_lock(); + if (bL_switcher_active) { + cpu_hotplug_driver_unlock(); + mutex_unlock(&bL_switcher_activation_lock); + return 0; + } + + pr_info("big.LITTLE switcher initializing\n"); + + ret = bL_activation_notify(BL_NOTIFY_PRE_ENABLE); + if (ret) + goto error; + + ret = bL_switcher_halve_cpus(); + if (ret) + goto error; + + bL_switcher_trace_trigger(); + + for_each_online_cpu(cpu) { + struct bL_thread *t = &bL_threads[cpu]; + spin_lock_init(&t->lock); + init_waitqueue_head(&t->wq); + init_completion(&t->started); + t->wanted_cluster = -1; + t->task = bL_switcher_thread_create(cpu, t); + } + + bL_switcher_active = 1; + bL_activation_notify(BL_NOTIFY_POST_ENABLE); + pr_info("big.LITTLE switcher initialized\n"); + goto out; + +error: + pr_warning("big.LITTLE switcher initialization failed\n"); + bL_activation_notify(BL_NOTIFY_POST_DISABLE); + +out: + cpu_hotplug_driver_unlock(); + mutex_unlock(&bL_switcher_activation_lock); + return ret; +} + +#ifdef CONFIG_SYSFS + +static void bL_switcher_disable(void) +{ + unsigned int cpu, cluster; + struct bL_thread *t; + struct task_struct *task; + + mutex_lock(&bL_switcher_activation_lock); + cpu_hotplug_driver_lock(); + + if (!bL_switcher_active) + goto out; + + if (bL_activation_notify(BL_NOTIFY_PRE_DISABLE) != 0) { + bL_activation_notify(BL_NOTIFY_POST_ENABLE); + goto out; + } + + bL_switcher_active = 0; + + /* + * To deactivate the switcher, we must shut down the switcher + * threads to prevent any other requests from being accepted. + * Then, if the final cluster for given logical CPU is not the + * same as the original one, we'll recreate a switcher thread + * just for the purpose of switching the CPU back without any + * possibility for interference from external requests. + */ + for_each_online_cpu(cpu) { + t = &bL_threads[cpu]; + task = t->task; + t->task = NULL; + if (!task || IS_ERR(task)) + continue; + kthread_stop(task); + /* no more switch may happen on this CPU at this point */ + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1); + if (cluster == bL_switcher_cpu_original_cluster[cpu]) + continue; + init_completion(&t->started); + t->wanted_cluster = bL_switcher_cpu_original_cluster[cpu]; + task = bL_switcher_thread_create(cpu, t); + if (!IS_ERR(task)) { + wait_for_completion(&t->started); + kthread_stop(task); + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1); + if (cluster == bL_switcher_cpu_original_cluster[cpu]) + continue; + } + /* If execution gets here, we're in trouble. */ + pr_crit("%s: unable to restore original cluster for CPU %d\n", + __func__, cpu); + pr_crit("%s: CPU %d can't be restored\n", + __func__, bL_switcher_cpu_pairing[cpu]); + cpumask_clear_cpu(bL_switcher_cpu_pairing[cpu], + &bL_switcher_removed_logical_cpus); + } + + bL_switcher_restore_cpus(); + bL_switcher_trace_trigger(); + + bL_activation_notify(BL_NOTIFY_POST_DISABLE); + +out: + cpu_hotplug_driver_unlock(); + mutex_unlock(&bL_switcher_activation_lock); +} + +static ssize_t bL_switcher_active_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", bL_switcher_active); +} + +static ssize_t bL_switcher_active_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int ret; + + switch (buf[0]) { + case '0': + bL_switcher_disable(); + ret = 0; + break; + case '1': + ret = bL_switcher_enable(); + break; + default: + ret = -EINVAL; + } + + return (ret >= 0) ? count : ret; +} + +static ssize_t bL_switcher_trace_trigger_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int ret = bL_switcher_trace_trigger(); + + return ret ? ret : count; +} + +static struct kobj_attribute bL_switcher_active_attr = + __ATTR(active, 0644, bL_switcher_active_show, bL_switcher_active_store); + +static struct kobj_attribute bL_switcher_trace_trigger_attr = + __ATTR(trace_trigger, 0200, NULL, bL_switcher_trace_trigger_store); + +static struct attribute *bL_switcher_attrs[] = { + &bL_switcher_active_attr.attr, + &bL_switcher_trace_trigger_attr.attr, + NULL, +}; + +static struct attribute_group bL_switcher_attr_group = { + .attrs = bL_switcher_attrs, +}; + +static struct kobject *bL_switcher_kobj; + +static int __init bL_switcher_sysfs_init(void) +{ + int ret; + + bL_switcher_kobj = kobject_create_and_add("bL_switcher", kernel_kobj); + if (!bL_switcher_kobj) + return -ENOMEM; + ret = sysfs_create_group(bL_switcher_kobj, &bL_switcher_attr_group); + if (ret) + kobject_put(bL_switcher_kobj); + return ret; +} + +#endif /* CONFIG_SYSFS */ + +bool bL_switcher_get_enabled(void) +{ + mutex_lock(&bL_switcher_activation_lock); + + return bL_switcher_active; +} +EXPORT_SYMBOL_GPL(bL_switcher_get_enabled); + +void bL_switcher_put_enabled(void) +{ + mutex_unlock(&bL_switcher_activation_lock); +} +EXPORT_SYMBOL_GPL(bL_switcher_put_enabled); + +/* + * Veto any CPU hotplug operation while the switcher is active. + * We're just not ready to deal with that given the trickery involved. + */ +static int bL_switcher_hotplug_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + switch (action) { + case CPU_UP_PREPARE: + case CPU_DOWN_PREPARE: + if (bL_switcher_active) + return NOTIFY_BAD; + } + return NOTIFY_DONE; +} + +static struct notifier_block bL_switcher_hotplug_notifier = + { &bL_switcher_hotplug_callback, NULL, 0 }; + +#ifdef CONFIG_SCHED_HMP +static bool no_bL_switcher = true; +#else +static bool no_bL_switcher; +#endif +core_param(no_bL_switcher, no_bL_switcher, bool, 0644); + +static int __init bL_switcher_init(void) +{ + int ret; + + if (MAX_NR_CLUSTERS != 2) { + pr_err("%s: only dual cluster systems are supported\n", __func__); + return -EINVAL; + } + + register_cpu_notifier(&bL_switcher_hotplug_notifier); + + if (!no_bL_switcher) { + ret = bL_switcher_enable(); + if (ret) + return ret; + } + +#ifdef CONFIG_SYSFS + ret = bL_switcher_sysfs_init(); + if (ret) + pr_err("%s: unable to create sysfs entry\n", __func__); +#endif + + return 0; +} + +late_initcall(bL_switcher_init); diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c new file mode 100644 index 00000000000..5e2dd197e72 --- /dev/null +++ b/arch/arm/common/bL_switcher_dummy_if.c @@ -0,0 +1,71 @@ +/* + * arch/arm/common/bL_switcher_dummy_if.c -- b.L switcher dummy interface + * + * Created by: Nicolas Pitre, November 2012 + * Copyright: (C) 2012 Linaro Limited + * + * Dummy interface to user space for debugging purpose only. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <asm/uaccess.h> +#include <asm/bL_switcher.h> + +static ssize_t bL_switcher_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + unsigned char val[3]; + unsigned int cpu, cluster; + int ret; + + pr_debug("%s\n", __func__); + + if (len < 3) + return -EINVAL; + + if (copy_from_user(val, buf, 3)) + return -EFAULT; + + /* format: <cpu#>,<cluster#> */ + if (val[0] < '0' || val[0] > '4' || + val[1] != ',' || + val[2] < '0' || val[2] > '1') + return -EINVAL; + + cpu = val[0] - '0'; + cluster = val[2] - '0'; + ret = bL_switch_request(cpu, cluster); + + return ret ? : len; +} + +static const struct file_operations bL_switcher_fops = { + .write = bL_switcher_write, + .owner = THIS_MODULE, +}; + +static struct miscdevice bL_switcher_device = { + MISC_DYNAMIC_MINOR, + "b.L_switcher", + &bL_switcher_fops +}; + +static int __init bL_switcher_dummy_if_init(void) +{ + return misc_register(&bL_switcher_device); +} + +static void __exit bL_switcher_dummy_if_exit(void) +{ + misc_deregister(&bL_switcher_device); +} + +module_init(bL_switcher_dummy_if_init); +module_exit(bL_switcher_dummy_if_exit); diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index 370236dd1a0..4a2b32fd53a 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -27,6 +27,18 @@ void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr) sync_cache_w(&mcpm_entry_vectors[cluster][cpu]); } +extern unsigned long mcpm_entry_early_pokes[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER][2]; + +void mcpm_set_early_poke(unsigned cpu, unsigned cluster, + unsigned long poke_phys_addr, unsigned long poke_val) +{ + unsigned long *poke = &mcpm_entry_early_pokes[cluster][cpu][0]; + poke[0] = poke_phys_addr; + poke[1] = poke_val; + __cpuc_flush_dcache_area((void *)poke, 8); + outer_clean_range(__pa(poke), __pa(poke + 2)); +} + static const struct mcpm_platform_ops *platform_ops; int __init mcpm_platform_register(const struct mcpm_platform_ops *ops) diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S index 8178705c4b2..057e9c5a9e1 100644 --- a/arch/arm/common/mcpm_head.S +++ b/arch/arm/common/mcpm_head.S @@ -71,12 +71,19 @@ ENTRY(mcpm_entry_point) * position independent way. */ adr r5, 3f - ldmia r5, {r6, r7, r8, r11} + ldmia r5, {r0, r6, r7, r8, r11} + add r0, r5, r0 @ r0 = mcpm_entry_early_pokes add r6, r5, r6 @ r6 = mcpm_entry_vectors ldr r7, [r5, r7] @ r7 = mcpm_power_up_setup_phys add r8, r5, r8 @ r8 = mcpm_sync add r11, r5, r11 @ r11 = first_man_locks + @ Perform an early poke, if any + add r0, r0, r4, lsl #3 + ldmia r0, {r0, r1} + teq r0, #0 + strne r1, [r0] + mov r0, #MCPM_SYNC_CLUSTER_SIZE mla r8, r0, r10, r8 @ r8 = sync cluster base @@ -195,7 +202,8 @@ mcpm_entry_gated: .align 2 -3: .word mcpm_entry_vectors - . +3: .word mcpm_entry_early_pokes - . + .word mcpm_entry_vectors - 3b .word mcpm_power_up_setup_phys - 3b .word mcpm_sync - 3b .word first_man_locks - 3b @@ -214,6 +222,10 @@ first_man_locks: ENTRY(mcpm_entry_vectors) .space 4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER + .type mcpm_entry_early_pokes, #object +ENTRY(mcpm_entry_early_pokes) + .space 8 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER + .type mcpm_power_up_setup_phys, #object ENTRY(mcpm_power_up_setup_phys) .space 4 @ set by mcpm_sync_init() diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index 7c1bfc0aea0..4928cdacedf 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -105,7 +105,7 @@ static inline void __cpuinit arch_counter_set_user_access(void) asm volatile("mrc p15, 0, %0, c14, c1, 0" : "=r" (cntkctl)); /* disable user access to everything */ - cntkctl &= ~((3 << 8) | (7 << 0)); + cntkctl &= ~((3 << 8) | (3 << 0)); asm volatile("mcr p15, 0, %0, c14, c1, 0" : : "r" (cntkctl)); } diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h new file mode 100644 index 00000000000..482383b45c9 --- /dev/null +++ b/arch/arm/include/asm/bL_switcher.h @@ -0,0 +1,83 @@ +/* + * arch/arm/include/asm/bL_switcher.h + * + * Created by: Nicolas Pitre, April 2012 + * Copyright: (C) 2012 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ASM_BL_SWITCHER_H +#define ASM_BL_SWITCHER_H + +#include <linux/compiler.h> +#include <linux/types.h> + +typedef void (*bL_switch_completion_handler)(void *cookie); + +int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id, + bL_switch_completion_handler completer, + void *completer_cookie); +static inline int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id) +{ + return bL_switch_request_cb(cpu, new_cluster_id, NULL, NULL); +} + +/* + * Register here to be notified about runtime enabling/disabling of + * the switcher. + * + * The notifier chain is called with the switcher activation lock held: + * the switcher will not be enabled or disabled during callbacks. + * Callbacks must not call bL_switcher_{get,put}_enabled(). + */ +#define BL_NOTIFY_PRE_ENABLE 0 +#define BL_NOTIFY_POST_ENABLE 1 +#define BL_NOTIFY_PRE_DISABLE 2 +#define BL_NOTIFY_POST_DISABLE 3 + +#ifdef CONFIG_BL_SWITCHER + +void bL_switch_request_detach(unsigned int cpu, + bL_switch_completion_handler completer); + +int bL_switcher_register_notifier(struct notifier_block *nb); +int bL_switcher_unregister_notifier(struct notifier_block *nb); + +/* + * Use these functions to temporarily prevent enabling/disabling of + * the switcher. + * bL_switcher_get_enabled() returns true if the switcher is currently + * enabled. Each call to bL_switcher_get_enabled() must be followed + * by a call to bL_switcher_put_enabled(). These functions are not + * recursive. + */ +bool bL_switcher_get_enabled(void); +void bL_switcher_put_enabled(void); + +int bL_switcher_trace_trigger(void); +int bL_switcher_get_logical_index(u32 mpidr); + +#else +static void bL_switch_request_detach(unsigned int cpu, + bL_switch_completion_handler completer) { } + +static inline int bL_switcher_register_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int bL_switcher_unregister_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline bool bL_switcher_get_enabled(void) { return false; } +static inline void bL_switcher_put_enabled(void) { } +static inline int bL_switcher_trace_trigger(void) { return 0; } +static inline int bL_switcher_get_logical_index(u32 mpidr) { return -EUNATCH; } +#endif /* CONFIG_BL_SWITCHER */ + +#endif diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h index 1f3262e99d8..cedd3721318 100644 --- a/arch/arm/include/asm/cp15.h +++ b/arch/arm/include/asm/cp15.h @@ -61,6 +61,20 @@ static inline void set_cr(unsigned int val) isb(); } +static inline unsigned int get_auxcr(void) +{ + unsigned int val; + asm("mrc p15, 0, %0, c1, c0, 1 @ get AUXCR" : "=r" (val)); + return val; +} + +static inline void set_auxcr(unsigned int val) +{ + asm volatile("mcr p15, 0, %0, c1, c0, 1 @ set AUXCR" + : : "r" (val)); + isb(); +} + #ifndef CONFIG_SMP extern void adjust_cr(unsigned long mask, unsigned long set); #endif diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h index 2740c2a2df6..3d7351c844a 100644 --- a/arch/arm/include/asm/hardirq.h +++ b/arch/arm/include/asm/hardirq.h @@ -5,7 +5,7 @@ #include <linux/threads.h> #include <asm/irq.h> -#define NR_IPI 6 +#define NR_IPI 7 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index 308ad7d6f98..75bf07910b8 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -8,6 +8,8 @@ * published by the Free Software Foundation. */ +#include <linux/types.h> + #ifndef __ASSEMBLY__ struct tag; @@ -16,8 +18,10 @@ struct pt_regs; struct smp_operations; #ifdef CONFIG_SMP #define smp_ops(ops) (&(ops)) +#define smp_init_ops(ops) (&(ops)) #else #define smp_ops(ops) (struct smp_operations *)NULL +#define smp_init_ops(ops) (bool (*)(void))NULL #endif struct machine_desc { @@ -41,6 +45,7 @@ struct machine_desc { unsigned char reserve_lp2 :1; /* never has lp2 */ char restart_mode; /* default restart mode */ struct smp_operations *smp; /* SMP operations */ + bool (*smp_init)(void); void (*fixup)(struct tag *, char **, struct meminfo *); void (*reserve)(void);/* reserve mem blocks */ diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h index 0f7b7620e9a..7626a7fd493 100644 --- a/arch/arm/include/asm/mcpm.h +++ b/arch/arm/include/asm/mcpm.h @@ -42,6 +42,14 @@ extern void mcpm_entry_point(void); void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr); /* + * This sets an early poke i.e a value to be poked into some address + * from very early assembly code before the CPU is ungated. The + * address must be physical, and if 0 then nothing will happen. + */ +void mcpm_set_early_poke(unsigned cpu, unsigned cluster, + unsigned long poke_phys_addr, unsigned long poke_val); + +/* * CPU/cluster power operations API for higher subsystems to use. */ diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index f24edad26c7..0cd7824ca76 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -62,9 +62,19 @@ struct pmu_hw_events { raw_spinlock_t pmu_lock; }; +struct cpupmu_regs { + u32 pmc; + u32 pmcntenset; + u32 pmuseren; + u32 pmintenset; + u32 pmxevttype[8]; + u32 pmxevtcnt[8]; +}; + struct arm_pmu { struct pmu pmu; cpumask_t active_irqs; + cpumask_t valid_cpus; char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct perf_event *event); @@ -81,6 +91,8 @@ struct arm_pmu { int (*request_irq)(struct arm_pmu *, irq_handler_t handler); void (*free_irq)(struct arm_pmu *); int (*map_event)(struct perf_event *event); + void (*save_regs)(struct arm_pmu *, struct cpupmu_regs *); + void (*restore_regs)(struct arm_pmu *, struct cpupmu_regs *); int num_events; atomic_t active_events; struct mutex reserve_mutex; diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h index ce0dbe7c162..f0a8627c9f1 100644 --- a/arch/arm/include/asm/psci.h +++ b/arch/arm/include/asm/psci.h @@ -16,6 +16,10 @@ #define PSCI_POWER_STATE_TYPE_STANDBY 0 #define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 +#define PSCI_POWER_STATE_AFFINITY_LEVEL0 0 +#define PSCI_POWER_STATE_AFFINITY_LEVEL1 1 +#define PSCI_POWER_STATE_AFFINITY_LEVEL2 2 +#define PSCI_POWER_STATE_AFFINITY_LEVEL3 3 struct psci_power_state { u16 id; @@ -32,5 +36,22 @@ struct psci_operations { }; extern struct psci_operations psci_ops; +extern struct smp_operations psci_smp_ops; +#ifdef CONFIG_ARM_PSCI +void psci_init(void); +bool psci_smp_available(void); +#else +static inline void psci_init(void) { } +static inline bool psci_smp_available(void) { return false; } +#endif + +#ifdef CONFIG_ARM_PSCI +extern int __init psci_probe(void); +#else +static inline int psci_probe(void) +{ + return -ENODEV; +} +#endif #endif /* __ASM_ARM_PSCI_H */ diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index d3a22bebe6c..610ccf33f5e 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -81,6 +81,8 @@ extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask); +extern int register_ipi_completion(struct completion *completion, int cpu); + struct smp_operations { #ifdef CONFIG_SMP /* diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index 58b8b84adcd..983fa7c153a 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -26,11 +26,45 @@ extern struct cputopo_arm cpu_topology[NR_CPUS]; void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); +int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask); + +#ifdef CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE +/* Common values for CPUs */ +#ifndef SD_CPU_INIT +#define SD_CPU_INIT (struct sched_domain) { \ + .min_interval = 1, \ + .max_interval = 4, \ + .busy_factor = 64, \ + .imbalance_pct = 125, \ + .cache_nice_tries = 1, \ + .busy_idx = 2, \ + .idle_idx = 1, \ + .newidle_idx = 0, \ + .wake_idx = 0, \ + .forkexec_idx = 0, \ + \ + .flags = 0*SD_LOAD_BALANCE \ + | 1*SD_BALANCE_NEWIDLE \ + | 1*SD_BALANCE_EXEC \ + | 1*SD_BALANCE_FORK \ + | 0*SD_BALANCE_WAKE \ + | 1*SD_WAKE_AFFINE \ + | 0*SD_SHARE_CPUPOWER \ + | 0*SD_SHARE_PKG_RESOURCES \ + | 0*SD_SERIALIZE \ + , \ + .last_balance = jiffies, \ + .balance_interval = 1, \ +} +#endif +#endif /* CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE */ #else static inline void init_cpu_topology(void) { } static inline void store_cpu_topology(unsigned int cpuid) { } +static inline int cluster_to_logical_mask(unsigned int socket_id, + cpumask_t *cluster_mask) { return -EINVAL; } #endif diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5f3338eacad..dd9d90ab65d 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -82,6 +82,9 @@ obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o -obj-$(CONFIG_ARM_PSCI) += psci.o +ifeq ($(CONFIG_ARM_PSCI),y) +obj-y += psci.o +obj-$(CONFIG_SMP) += psci_smp.o +endif extra-y := $(head-y) vmlinux.lds diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 8bac553fe21..2725c87fade 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -342,7 +342,6 @@ __turn_mmu_on_loc: .long __turn_mmu_on_end #if defined(CONFIG_SMP) - __CPUINIT ENTRY(secondary_startup) /* * Common entry point for secondary CPUs. diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 1fd749ee4a1..1b803117ed9 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -1049,7 +1049,8 @@ static struct notifier_block dbg_cpu_pm_nb = { static void __init pm_init(void) { - cpu_pm_register_notifier(&dbg_cpu_pm_nb); + if (has_ossr) + cpu_pm_register_notifier(&dbg_cpu_pm_nb); } #else static inline void pm_init(void) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index e19edc6f2d1..3a9d627cab6 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -12,6 +12,7 @@ */ #define pr_fmt(fmt) "hw perfevents: " fmt +#include <linux/cpumask.h> #include <linux/kernel.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> @@ -86,6 +87,9 @@ armpmu_map_event(struct perf_event *event, return armpmu_map_cache_event(cache_map, config); case PERF_TYPE_RAW: return armpmu_map_raw_event(raw_event_mask, config); + default: + if (event->attr.type >= PERF_TYPE_MAX) + return armpmu_map_raw_event(raw_event_mask, config); } return -ENOENT; @@ -163,6 +167,8 @@ armpmu_stop(struct perf_event *event, int flags) struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return; /* * ARM pmu always has to update the counter, so ignore * PERF_EF_UPDATE, see comments in armpmu_start(). @@ -179,6 +185,8 @@ static void armpmu_start(struct perf_event *event, int flags) struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return; /* * ARM pmu always has to reprogram the period, so ignore * PERF_EF_RELOAD, see the comment below. @@ -206,6 +214,9 @@ armpmu_del(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return; + armpmu_stop(event, PERF_EF_UPDATE); hw_events->events[idx] = NULL; clear_bit(idx, hw_events->used_mask); @@ -222,6 +233,10 @@ armpmu_add(struct perf_event *event, int flags) int idx; int err = 0; + /* An event following a process won't be stopped earlier */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return 0; + perf_pmu_disable(event->pmu); /* If we don't have a space for the counter then finish early. */ @@ -424,6 +439,10 @@ static int armpmu_event_init(struct perf_event *event) int err = 0; atomic_t *active_events = &armpmu->active_events; + if (event->cpu != -1 && + !cpumask_test_cpu(event->cpu, &armpmu->valid_cpus)) + return -ENOENT; + /* does not support taken branch sampling */ if (has_branch_stack(event)) return -EOPNOTSUPP; diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 1f2740e3dbc..0b48a38e3cf 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -19,6 +19,7 @@ #define pr_fmt(fmt) "CPU PMU: " fmt #include <linux/bitmap.h> +#include <linux/cpu_pm.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/of.h> @@ -31,33 +32,36 @@ #include <asm/pmu.h> /* Set at runtime when we know what CPU type we are. */ -static struct arm_pmu *cpu_pmu; +static DEFINE_PER_CPU(struct arm_pmu *, cpu_pmu); static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events); static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask); static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); +static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs); + /* * Despite the names, these two functions are CPU-specific and are used * by the OProfile/perf code. */ const char *perf_pmu_name(void) { - if (!cpu_pmu) + struct arm_pmu *pmu = per_cpu(cpu_pmu, 0); + if (!pmu) return NULL; - return cpu_pmu->name; + return pmu->name; } EXPORT_SYMBOL_GPL(perf_pmu_name); int perf_num_counters(void) { - int max_events = 0; + struct arm_pmu *pmu = per_cpu(cpu_pmu, 0); - if (cpu_pmu != NULL) - max_events = cpu_pmu->num_events; + if (!pmu) + return 0; - return max_events; + return pmu->num_events; } EXPORT_SYMBOL_GPL(perf_num_counters); @@ -75,11 +79,13 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) { int i, irq, irqs; struct platform_device *pmu_device = cpu_pmu->plat_device; + int cpu = -1; irqs = min(pmu_device->num_resources, num_possible_cpus()); for (i = 0; i < irqs; ++i) { - if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs)) + cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus); + if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) continue; irq = platform_get_irq(pmu_device, i); if (irq >= 0) @@ -91,6 +97,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) { int i, err, irq, irqs; struct platform_device *pmu_device = cpu_pmu->plat_device; + int cpu = -1; if (!pmu_device) return -ENODEV; @@ -103,6 +110,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) for (i = 0; i < irqs; ++i) { err = 0; + cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus); irq = platform_get_irq(pmu_device, i); if (irq < 0) continue; @@ -112,7 +120,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) * assume that we're running on a uniprocessor machine and * continue. Otherwise, continue without this interrupt. */ - if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { + if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", irq, i); continue; @@ -126,7 +134,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) return err; } - cpumask_set_cpu(i, &cpu_pmu->active_irqs); + cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); } return 0; @@ -135,7 +143,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) static void cpu_pmu_init(struct arm_pmu *cpu_pmu) { int cpu; - for_each_possible_cpu(cpu) { + for_each_cpu_mask(cpu, cpu_pmu->valid_cpus) { struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); events->events = per_cpu(hw_events, cpu); events->used_mask = per_cpu(used_mask, cpu); @@ -148,7 +156,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu) /* Ensure the PMU has sane values out of reset. */ if (cpu_pmu->reset) - on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); + on_each_cpu_mask(&cpu_pmu->valid_cpus, cpu_pmu->reset, cpu_pmu, 1); } /* @@ -160,21 +168,46 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu) static int __cpuinit cpu_pmu_notify(struct notifier_block *b, unsigned long action, void *hcpu) { + struct arm_pmu *pmu = per_cpu(cpu_pmu, (long)hcpu); + if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) return NOTIFY_DONE; - if (cpu_pmu && cpu_pmu->reset) - cpu_pmu->reset(cpu_pmu); + if (pmu && pmu->reset) + pmu->reset(pmu); else return NOTIFY_DONE; return NOTIFY_OK; } +static int cpu_pmu_pm_notify(struct notifier_block *b, + unsigned long action, void *hcpu) +{ + int cpu = smp_processor_id(); + struct arm_pmu *pmu = per_cpu(cpu_pmu, cpu); + struct cpupmu_regs *pmuregs = &per_cpu(cpu_pmu_regs, cpu); + + if (!pmu) + return NOTIFY_DONE; + + if (action == CPU_PM_ENTER && pmu->save_regs) { + pmu->save_regs(pmu, pmuregs); + } else if (action == CPU_PM_EXIT && pmu->restore_regs) { + pmu->restore_regs(pmu, pmuregs); + } + + return NOTIFY_OK; +} + static struct notifier_block __cpuinitdata cpu_pmu_hotplug_notifier = { .notifier_call = cpu_pmu_notify, }; +static struct notifier_block __cpuinitdata cpu_pmu_pm_notifier = { + .notifier_call = cpu_pmu_pm_notify, +}; + /* * PMU platform driver and devicetree bindings. */ @@ -246,6 +279,9 @@ static int probe_current_pmu(struct arm_pmu *pmu) } } + /* assume PMU support all the CPUs in this case */ + cpumask_setall(&pmu->valid_cpus); + put_cpu(); return ret; } @@ -253,15 +289,10 @@ static int probe_current_pmu(struct arm_pmu *pmu) static int cpu_pmu_device_probe(struct platform_device *pdev) { const struct of_device_id *of_id; - int (*init_fn)(struct arm_pmu *); struct device_node *node = pdev->dev.of_node; struct arm_pmu *pmu; - int ret = -ENODEV; - - if (cpu_pmu) { - pr_info("attempt to register multiple PMU devices!"); - return -ENOSPC; - } + int ret = 0; + int cpu; pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); if (!pmu) { @@ -270,8 +301,28 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) } if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) { - init_fn = of_id->data; - ret = init_fn(pmu); + smp_call_func_t init_fn = (smp_call_func_t)of_id->data; + struct device_node *ncluster; + int cluster = -1; + cpumask_t sibling_mask; + + ncluster = of_parse_phandle(node, "cluster", 0); + if (ncluster) { + int len; + const u32 *hwid; + hwid = of_get_property(ncluster, "reg", &len); + if (hwid && len == 4) + cluster = be32_to_cpup(hwid); + } + /* set sibling mask to all cpu mask if socket is not specified */ + if (cluster == -1 || + cluster_to_logical_mask(cluster, &sibling_mask)) + cpumask_setall(&sibling_mask); + + smp_call_function_any(&sibling_mask, init_fn, pmu, 1); + + /* now set the valid_cpus after init */ + cpumask_copy(&pmu->valid_cpus, &sibling_mask); } else { ret = probe_current_pmu(pmu); } @@ -281,10 +332,12 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) goto out_free; } - cpu_pmu = pmu; - cpu_pmu->plat_device = pdev; - cpu_pmu_init(cpu_pmu); - ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW); + for_each_cpu_mask(cpu, pmu->valid_cpus) + per_cpu(cpu_pmu, cpu) = pmu; + + pmu->plat_device = pdev; + cpu_pmu_init(pmu); + ret = armpmu_register(pmu, -1); if (!ret) return 0; @@ -313,9 +366,17 @@ static int __init register_pmu_driver(void) if (err) return err; + err = cpu_pm_register_notifier(&cpu_pmu_pm_notifier); + if (err) { + unregister_cpu_notifier(&cpu_pmu_hotplug_notifier); + return err; + } + err = platform_driver_register(&cpu_pmu_driver); - if (err) + if (err) { + cpu_pm_unregister_notifier(&cpu_pmu_pm_notifier); unregister_cpu_notifier(&cpu_pmu_hotplug_notifier); + } return err; } diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 039cffb053a..654db5030c3 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -950,6 +950,51 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu) } #endif +static void armv7pmu_save_regs(struct arm_pmu *cpu_pmu, + struct cpupmu_regs *regs) +{ + unsigned int cnt; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (regs->pmc)); + if (!(regs->pmc & ARMV7_PMNC_E)) + return; + + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (regs->pmcntenset)); + asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r" (regs->pmuseren)); + asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (regs->pmintenset)); + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (regs->pmxevtcnt[0])); + for (cnt = ARMV7_IDX_COUNTER0; + cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) { + armv7_pmnc_select_counter(cnt); + asm volatile("mrc p15, 0, %0, c9, c13, 1" + : "=r"(regs->pmxevttype[cnt])); + asm volatile("mrc p15, 0, %0, c9, c13, 2" + : "=r"(regs->pmxevtcnt[cnt])); + } + return; +} + +static void armv7pmu_restore_regs(struct arm_pmu *cpu_pmu, + struct cpupmu_regs *regs) +{ + unsigned int cnt; + if (!(regs->pmc & ARMV7_PMNC_E)) + return; + + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (regs->pmcntenset)); + asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r" (regs->pmuseren)); + asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (regs->pmintenset)); + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (regs->pmxevtcnt[0])); + for (cnt = ARMV7_IDX_COUNTER0; + cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) { + armv7_pmnc_select_counter(cnt); + asm volatile("mcr p15, 0, %0, c9, c13, 1" + : : "r"(regs->pmxevttype[cnt])); + asm volatile("mcr p15, 0, %0, c9, c13, 2" + : : "r"(regs->pmxevtcnt[cnt])); + } + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (regs->pmc)); +} + static void armv7pmu_enable_event(struct perf_event *event) { unsigned long flags; @@ -1223,6 +1268,8 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->start = armv7pmu_start; cpu_pmu->stop = armv7pmu_stop; cpu_pmu->reset = armv7pmu_reset; + cpu_pmu->save_regs = armv7pmu_save_regs; + cpu_pmu->restore_regs = armv7pmu_restore_regs; cpu_pmu->max_period = (1LLU << 32) - 1; }; @@ -1240,7 +1287,7 @@ static u32 armv7_read_num_pmnc_events(void) static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A8"; + cpu_pmu->name = "ARMv7_Cortex_A8"; cpu_pmu->map_event = armv7_a8_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); return 0; @@ -1249,7 +1296,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A9"; + cpu_pmu->name = "ARMv7_Cortex_A9"; cpu_pmu->map_event = armv7_a9_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); return 0; @@ -1258,7 +1305,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A5"; + cpu_pmu->name = "ARMv7_Cortex_A5"; cpu_pmu->map_event = armv7_a5_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); return 0; @@ -1267,7 +1314,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A15"; + cpu_pmu->name = "ARMv7_Cortex_A15"; cpu_pmu->map_event = armv7_a15_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; @@ -1277,7 +1324,7 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A7"; + cpu_pmu->name = "ARMv7_Cortex_A7"; cpu_pmu->map_event = armv7_a7_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c index 36531643cc2..0daf4f25228 100644 --- a/arch/arm/kernel/psci.c +++ b/arch/arm/kernel/psci.c @@ -17,6 +17,7 @@ #include <linux/init.h> #include <linux/of.h> +#include <linux/string.h> #include <asm/compiler.h> #include <asm/errno.h> @@ -26,6 +27,11 @@ struct psci_operations psci_ops; +/* Type of psci support. Currently can only be enabled or disabled */ +#define PSCI_SUP_DISABLED 0 +#define PSCI_SUP_ENABLED 1 + +static unsigned int psci; static int (*invoke_psci_fn)(u32, u32, u32, u32); enum psci_function { @@ -42,6 +48,7 @@ static u32 psci_function_id[PSCI_FN_MAX]; #define PSCI_RET_EOPNOTSUPP -1 #define PSCI_RET_EINVAL -2 #define PSCI_RET_EPERM -3 +#define PSCI_RET_EALREADYON -4 static int psci_to_linux_errno(int errno) { @@ -54,6 +61,8 @@ static int psci_to_linux_errno(int errno) return -EINVAL; case PSCI_RET_EPERM: return -EPERM; + case PSCI_RET_EALREADYON: + return -EAGAIN; }; return -EINVAL; @@ -158,15 +167,18 @@ static const struct of_device_id psci_of_match[] __initconst = { {}, }; -static int __init psci_init(void) +void __init psci_init(void) { struct device_node *np; const char *method; u32 id; + if (psci == PSCI_SUP_DISABLED) + return; + np = of_find_matching_node(NULL, psci_of_match); if (!np) - return 0; + return; pr_info("probing function IDs from device-tree\n"); @@ -206,6 +218,35 @@ static int __init psci_init(void) out_put_node: of_node_put(np); - return 0; + return; +} + +int __init psci_probe(void) +{ + struct device_node *np; + int ret = -ENODEV; + + if (psci == PSCI_SUP_ENABLED) { + np = of_find_matching_node(NULL, psci_of_match); + if (np) + ret = 0; + } + + of_node_put(np); + return ret; +} + +static int __init early_psci(char *val) +{ + int ret = 0; + + if (strcmp(val, "enable") == 0) + psci = PSCI_SUP_ENABLED; + else if (strcmp(val, "disable") == 0) + psci = PSCI_SUP_DISABLED; + else + ret = -EINVAL; + + return ret; } -early_initcall(psci_init); +early_param("psci", early_psci); diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c new file mode 100644 index 00000000000..23a11424c56 --- /dev/null +++ b/arch/arm/kernel/psci_smp.c @@ -0,0 +1,84 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2012 ARM Limited + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/init.h> +#include <linux/irqchip/arm-gic.h> +#include <linux/smp.h> +#include <linux/of.h> + +#include <asm/psci.h> +#include <asm/smp_plat.h> + +/* + * psci_smp assumes that the following is true about PSCI: + * + * cpu_suspend Suspend the execution on a CPU + * @state we don't currently describe affinity levels, so just pass 0. + * @entry_point the first instruction to be executed on return + * returns 0 success, < 0 on failure + * + * cpu_off Power down a CPU + * @state we don't currently describe affinity levels, so just pass 0. + * no return on successful call + * + * cpu_on Power up a CPU + * @cpuid cpuid of target CPU, as from MPIDR + * @entry_point the first instruction to be executed on return + * returns 0 success, < 0 on failure + * + * migrate Migrate the context to a different CPU + * @cpuid cpuid of target CPU, as from MPIDR + * returns 0 success, < 0 on failure + * + */ + +extern void secondary_startup(void); + +static int __cpuinit psci_boot_secondary(unsigned int cpu, + struct task_struct *idle) +{ + if (psci_ops.cpu_on) + return psci_ops.cpu_on(cpu_logical_map(cpu), + __pa(secondary_startup)); + return -ENODEV; +} + +#ifdef CONFIG_HOTPLUG_CPU +void __ref psci_cpu_die(unsigned int cpu) +{ + const struct psci_power_state ps = { + .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, + }; + + if (psci_ops.cpu_off) + psci_ops.cpu_off(ps); + + /* We should never return */ + panic("psci: cpu %d failed to shutdown\n", cpu); +} +#else +#define psci_cpu_die NULL +#endif + +bool __init psci_smp_available(void) +{ + /* is cpu_on available at least? */ + return (psci_ops.cpu_on != NULL); +} + +struct smp_operations __initdata psci_smp_ops = { + .smp_boot_secondary = psci_boot_secondary, + .cpu_die = psci_cpu_die, +}; diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index b4b1d397592..6002fe06b12 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -37,6 +37,7 @@ #include <asm/cputype.h> #include <asm/elf.h> #include <asm/procinfo.h> +#include <asm/psci.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/smp_plat.h> @@ -261,6 +262,19 @@ static int cpu_has_aliasing_icache(unsigned int arch) int aliasing_icache; unsigned int id_reg, num_sets, line_size; +#ifdef CONFIG_BIG_LITTLE + /* + * We expect a combination of Cortex-A15 and Cortex-A7 cores. + * A7 = VIPT aliasing I-cache + * A15 = PIPT (non-aliasing) I-cache + * To cater for this discrepancy, let's assume aliasing I-cache + * all the time. This means unneeded extra work on the A15 but + * only ptrace is affected which is not performance critical. + */ + if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc0f0) + return 1; +#endif + /* PIPT caches never alias. */ if (icache_is_pipt()) return 0; @@ -796,9 +810,15 @@ void __init setup_arch(char **cmdline_p) unflatten_device_tree(); arm_dt_init_cpu_maps(); + psci_init(); #ifdef CONFIG_SMP if (is_smp()) { - smp_set_ops(mdesc->smp); + if (!mdesc->smp_init || !mdesc->smp_init()) { + if (psci_smp_available()) + smp_set_ops(&psci_smp_ops); + else if (mdesc->smp) + smp_set_ops(mdesc->smp); + } smp_init_cpus(); } #endif diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 987dcf33415..b5c1e636ed8 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -4,6 +4,7 @@ #include <asm/assembler.h> #include <asm/glue-cache.h> #include <asm/glue-proc.h> +#include "entry-header.S" .text /* @@ -30,9 +31,8 @@ ENTRY(__cpu_suspend) mov r2, r5 @ virtual SP ldr r3, =sleep_save_sp #ifdef CONFIG_SMP - ALT_SMP(mrc p15, 0, lr, c0, c0, 5) - ALT_UP(mov lr, #0) - and lr, lr, #15 + get_thread_info r5 + ldr lr, [r5, #TI_CPU] @ cpu logical index add r3, r3, lr, lsl #2 #endif bl __cpu_suspend_save @@ -82,10 +82,13 @@ ENDPROC(cpu_resume_after_mmu) .align ENTRY(cpu_resume) #ifdef CONFIG_SMP + mov r1, #0 @ fall-back logical index for UP + ALT_SMP(mrc p15, 0, r0, c0, c0, 5) + ALT_UP_B(1f) + bic r0, #0xff000000 + bl cpu_logical_index @ return logical index in r1 +1: adr r0, sleep_save_sp - ALT_SMP(mrc p15, 0, r1, c0, c0, 5) - ALT_UP(mov r1, #0) - and r1, r1, #15 ldr r0, [r0, r1, lsl #2] @ stack phys addr #else ldr r0, sleep_save_sp @ stack phys addr @@ -102,3 +105,20 @@ sleep_save_sp: .rept CONFIG_NR_CPUS .long 0 @ preserve stack phys ptr here .endr + +#ifdef CONFIG_SMP +cpu_logical_index: + adr r3, cpu_map_ptr + ldr r2, [r3] + add r3, r3, r2 @ virt_to_phys(__cpu_logical_map) + mov r1, #0 +1: + ldr r2, [r3, r1, lsl #2] + cmp r2, r0 + moveq pc, lr + add r1, r1, #1 + b 1b + +cpu_map_ptr: + .long __cpu_logical_map - . +#endif diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 5919eb451bb..46e8ab384e6 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -57,7 +57,7 @@ struct secondary_data secondary_data; * control for which core is the next to come out of the secondary * boot "holding pen" */ -volatile int __cpuinitdata pen_release = -1; +volatile int pen_release = -1; enum ipi_msg_type { IPI_WAKEUP, @@ -66,6 +66,7 @@ enum ipi_msg_type { IPI_CALL_FUNC, IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, + IPI_COMPLETION, }; static DECLARE_COMPLETION(cpu_running); @@ -463,6 +464,7 @@ static const char *ipi_types[NR_IPI] = { S(IPI_CALL_FUNC, "Function call interrupts"), S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), + S(IPI_COMPLETION, "completion interrupts"), }; void show_ipi_list(struct seq_file *p, int prec) @@ -588,6 +590,19 @@ static void ipi_cpu_stop(unsigned int cpu) cpu_relax(); } +static DEFINE_PER_CPU(struct completion *, cpu_completion); + +int register_ipi_completion(struct completion *completion, int cpu) +{ + per_cpu(cpu_completion, cpu) = completion; + return IPI_COMPLETION; +} + +static void ipi_complete(unsigned int cpu) +{ + complete(per_cpu(cpu_completion, cpu)); +} + /* * Main handler for inter-processor interrupts */ @@ -638,6 +653,12 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; + case IPI_COMPLETION: + irq_enter(); + ipi_complete(cpu); + irq_exit(); + break; + default: printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index c5a59546a25..677da58d9e8 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -23,6 +23,7 @@ #include <linux/slab.h> #include <asm/cputype.h> +#include <asm/smp_plat.h> #include <asm/topology.h> /* @@ -289,6 +290,140 @@ void store_cpu_topology(unsigned int cpuid) cpu_topology[cpuid].socket_id, mpidr); } + +#ifdef CONFIG_SCHED_HMP + +static const char * const little_cores[] = { + "arm,cortex-a7", + NULL, +}; + +static bool is_little_cpu(struct device_node *cn) +{ + const char * const *lc; + for (lc = little_cores; *lc; lc++) + if (of_device_is_compatible(cn, *lc)) + return true; + return false; +} + +void __init arch_get_fast_and_slow_cpus(struct cpumask *fast, + struct cpumask *slow) +{ + struct device_node *cn = NULL; + int cpu; + + cpumask_clear(fast); + cpumask_clear(slow); + + /* + * Use the config options if they are given. This helps testing + * HMP scheduling on systems without a big.LITTLE architecture. + */ + if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) { + if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast)) + WARN(1, "Failed to parse HMP fast cpu mask!\n"); + if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow)) + WARN(1, "Failed to parse HMP slow cpu mask!\n"); + return; + } + + /* + * Else, parse device tree for little cores. + */ + while ((cn = of_find_node_by_type(cn, "cpu"))) { + + const u32 *mpidr; + int len; + + mpidr = of_get_property(cn, "reg", &len); + if (!mpidr || len != 4) { + pr_err("* %s missing reg property\n", cn->full_name); + continue; + } + + cpu = get_logical_index(be32_to_cpup(mpidr)); + if (cpu == -EINVAL) { + pr_err("couldn't get logical index for mpidr %x\n", + be32_to_cpup(mpidr)); + break; + } + + if (is_little_cpu(cn)) + cpumask_set_cpu(cpu, slow); + else + cpumask_set_cpu(cpu, fast); + } + + if (!cpumask_empty(fast) && !cpumask_empty(slow)) + return; + + /* + * We didn't find both big and little cores so let's call all cores + * fast as this will keep the system running, with all cores being + * treated equal. + */ + cpumask_setall(fast); + cpumask_clear(slow); +} + +struct cpumask hmp_slow_cpu_mask; + +void __init arch_get_hmp_domains(struct list_head *hmp_domains_list) +{ + struct cpumask hmp_fast_cpu_mask; + struct hmp_domain *domain; + + arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask); + + /* + * Initialize hmp_domains + * Must be ordered with respect to compute capacity. + * Fastest domain at head of list. + */ + if(!cpumask_empty(&hmp_slow_cpu_mask)) { + domain = (struct hmp_domain *) + kmalloc(sizeof(struct hmp_domain), GFP_KERNEL); + cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask); + cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus); + list_add(&domain->hmp_domains, hmp_domains_list); + } + domain = (struct hmp_domain *) + kmalloc(sizeof(struct hmp_domain), GFP_KERNEL); + cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask); + cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus); + list_add(&domain->hmp_domains, hmp_domains_list); +} +#endif /* CONFIG_SCHED_HMP */ + + +/* + * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster + * @socket_id: cluster HW identifier + * @cluster_mask: the cpumask location to be initialized, modified by the + * function only if return value == 0 + * + * Return: + * + * 0 on success + * -EINVAL if cluster_mask is NULL or there is no record matching socket_id + */ +int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask) +{ + int cpu; + + if (!cluster_mask) + return -EINVAL; + + for_each_online_cpu(cpu) + if (socket_id == topology_physical_package_id(cpu)) { + cpumask_copy(cluster_mask, topology_core_cpumask(cpu)); + return 0; + } + + return -EINVAL; +} + /* * init_cpu_topology is called at boot when only one cpu is running * which prevent simultaneous write access to cpu_topology array diff --git a/arch/arm/mach-exynos/mach-exynos5-dt.c b/arch/arm/mach-exynos/mach-exynos5-dt.c index 753b94f3fca..d88234e14f9 100644 --- a/arch/arm/mach-exynos/mach-exynos5-dt.c +++ b/arch/arm/mach-exynos/mach-exynos5-dt.c @@ -14,6 +14,7 @@ #include <linux/memblock.h> #include <linux/io.h> #include <linux/clocksource.h> +#include <linux/dma-mapping.h> #include <asm/mach/arch.h> #include <mach/regs-pmu.h> @@ -23,11 +24,31 @@ #include "common.h" +static u64 dma_mask64 = DMA_BIT_MASK(64); + static void __init exynos5_dt_map_io(void) { exynos_init_io(NULL, 0); } +static int exynos5250_platform_notifier(struct notifier_block *nb, + unsigned long event, void *__dev) +{ + struct device *dev = __dev; + + if (event != BUS_NOTIFY_ADD_DEVICE) + return NOTIFY_DONE; + + dev->dma_mask = &dma_mask64; + dev->coherent_dma_mask = DMA_BIT_MASK(64); + + return NOTIFY_OK; +} + +static struct notifier_block exynos5250_platform_nb = { + .notifier_call = exynos5250_platform_notifier, +}; + static void __init exynos5_dt_machine_init(void) { struct device_node *i2c_np; @@ -52,6 +73,11 @@ static void __init exynos5_dt_machine_init(void) } } + if (config_enabled(CONFIG_ARM_LPAE) && + of_machine_is_compatible("samsung,exynos5250")) + bus_register_notifier(&platform_bus_type, + &exynos5250_platform_nb); + of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); } diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig index 5907e10c37f..dd3d5975a5c 100644 --- a/arch/arm/mach-vexpress/Kconfig +++ b/arch/arm/mach-vexpress/Kconfig @@ -1,5 +1,7 @@ config ARCH_VEXPRESS bool "ARM Ltd. Versatile Express family" if ARCH_MULTI_V7 + select ARCH_HAS_CPUFREQ + select ARCH_HAS_OPP select ARCH_REQUIRE_GPIOLIB select ARM_AMBA select ARM_GIC @@ -56,5 +58,23 @@ config ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA config ARCH_VEXPRESS_CA9X4 bool "Versatile Express Cortex-A9x4 tile" + select ARM_ERRATA_643719 + +config ARCH_VEXPRESS_DCSCB + bool "Dual Cluster System Control Block (DCSCB) support" + depends on MCPM + select ARM_CCI + help + Support for the Dual Cluster System Configuration Block (DCSCB). + This is needed to provide CPU and cluster power management + on RTSM implementing big.LITTLE. + +config ARCH_VEXPRESS_TC2 + bool "TC2 cluster management" + depends on MCPM + select VEXPRESS_SPC + select ARM_CCI + help + Support for CPU and cluster power management on TC2. endmenu diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile index 42703e8b4d3..14193dc7e6e 100644 --- a/arch/arm/mach-vexpress/Makefile +++ b/arch/arm/mach-vexpress/Makefile @@ -6,5 +6,13 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \ obj-y := v2m.o obj-$(CONFIG_ARCH_VEXPRESS_CA9X4) += ct-ca9x4.o +obj-$(CONFIG_ARCH_VEXPRESS_DCSCB) += dcscb.o dcscb_setup.o +CFLAGS_REMOVE_dcscb.o = -pg +obj-$(CONFIG_ARCH_VEXPRESS_TC2) += tc2_pm.o tc2_pm_setup.o +CFLAGS_REMOVE_tc2_pm.o = -pg +ifeq ($(CONFIG_ARCH_VEXPRESS_TC2),y) +obj-$(CONFIG_ARM_PSCI) += tc2_pm_psci.o +CFLAGS_REMOVE_tc2_pm_psci.o = -pg +endif obj-$(CONFIG_SMP) += platsmp.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o diff --git a/arch/arm/mach-vexpress/core.h b/arch/arm/mach-vexpress/core.h index f134cd4a85f..bde4374ab6d 100644 --- a/arch/arm/mach-vexpress/core.h +++ b/arch/arm/mach-vexpress/core.h @@ -6,6 +6,8 @@ void vexpress_dt_smp_map_io(void); +bool vexpress_smp_init_ops(void); + extern struct smp_operations vexpress_smp_ops; extern void vexpress_cpu_die(unsigned int cpu); diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c new file mode 100644 index 00000000000..948aec0890f --- /dev/null +++ b/arch/arm/mach-vexpress/dcscb.c @@ -0,0 +1,260 @@ +/* + * arch/arm/mach-vexpress/dcscb.c - Dual Cluster System Configuration Block + * + * Created by: Nicolas Pitre, May 2012 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/io.h> +#include <linux/spinlock.h> +#include <linux/errno.h> +#include <linux/of_address.h> +#include <linux/vexpress.h> +#include <linux/arm-cci.h> + +#include <asm/mcpm.h> +#include <asm/proc-fns.h> +#include <asm/cacheflush.h> +#include <asm/cputype.h> +#include <asm/cp15.h> +#include <asm/psci.h> + + +#define RST_HOLD0 0x0 +#define RST_HOLD1 0x4 +#define SYS_SWRESET 0x8 +#define RST_STAT0 0xc +#define RST_STAT1 0x10 +#define EAG_CFG_R 0x20 +#define EAG_CFG_W 0x24 +#define KFC_CFG_R 0x28 +#define KFC_CFG_W 0x2c +#define DCS_CFG_R 0x30 + +/* + * We can't use regular spinlocks. In the switcher case, it is possible + * for an outbound CPU to call power_down() while its inbound counterpart + * is already live using the same logical CPU number which trips lockdep + * debugging. + */ +static arch_spinlock_t dcscb_lock = __ARCH_SPIN_LOCK_UNLOCKED; + +static void __iomem *dcscb_base; +static int dcscb_use_count[4][2]; +static int dcscb_allcpus_mask[2]; + +static int dcscb_power_up(unsigned int cpu, unsigned int cluster) +{ + unsigned int rst_hold, cpumask = (1 << cpu); + unsigned int all_mask = dcscb_allcpus_mask[cluster]; + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + if (cpu >= 4 || cluster >= 2) + return -EINVAL; + + /* + * Since this is called with IRQs enabled, and no arch_spin_lock_irq + * variant exists, we need to disable IRQs manually here. + */ + local_irq_disable(); + arch_spin_lock(&dcscb_lock); + + dcscb_use_count[cpu][cluster]++; + if (dcscb_use_count[cpu][cluster] == 1) { + rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4); + if (rst_hold & (1 << 8)) { + /* remove cluster reset and add individual CPU's reset */ + rst_hold &= ~(1 << 8); + rst_hold |= all_mask; + } + rst_hold &= ~(cpumask | (cpumask << 4)); + writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4); + } else if (dcscb_use_count[cpu][cluster] != 2) { + /* + * The only possible values are: + * 0 = CPU down + * 1 = CPU (still) up + * 2 = CPU requested to be up before it had a chance + * to actually make itself down. + * Any other value is a bug. + */ + BUG(); + } + + arch_spin_unlock(&dcscb_lock); + local_irq_enable(); + + return 0; +} + +static void dcscb_power_down(void) +{ + unsigned int mpidr, cpu, cluster, rst_hold, cpumask, all_mask; + bool last_man = false, skip_wfi = false; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + cpumask = (1 << cpu); + all_mask = dcscb_allcpus_mask[cluster]; + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cpu >= 4 || cluster >= 2); + + __mcpm_cpu_going_down(cpu, cluster); + + arch_spin_lock(&dcscb_lock); + BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP); + dcscb_use_count[cpu][cluster]--; + if (dcscb_use_count[cpu][cluster] == 0) { + rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4); + rst_hold |= cpumask; + if (((rst_hold | (rst_hold >> 4)) & all_mask) == all_mask) { + rst_hold |= (1 << 8); + last_man = true; + } + writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4); + } else if (dcscb_use_count[cpu][cluster] == 1) { + /* + * A power_up request went ahead of us. + * Even if we do not want to shut this CPU down, + * the caller expects a certain state as if the WFI + * was aborted. So let's continue with cache cleaning. + */ + skip_wfi = true; + } else + BUG(); + + if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { + arch_spin_unlock(&dcscb_lock); + + /* + * Flush all cache levels for this cluster. + * + * A15/A7 can hit in the cache with SCTLR.C=0, so we don't need + * a preliminary flush here for those CPUs. At least, that's + * the theory -- without the extra flush, Linux explodes on + * RTSM (to be investigated). + */ + flush_cache_all(); + set_cr(get_cr() & ~CR_C); + flush_cache_all(); + + /* + * This is a harmless no-op. On platforms with a real + * outer cache this might either be needed or not, + * depending on where the outer cache sits. + */ + outer_flush_all(); + + /* Disable local coherency by clearing the ACTLR "SMP" bit: */ + set_auxcr(get_auxcr() & ~(1 << 6)); + + /* + * Disable cluster-level coherency by masking + * incoming snoops and DVM messages: + */ + cci_disable_port_by_cpu(mpidr); + + __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN); + } else { + arch_spin_unlock(&dcscb_lock); + + /* + * Flush the local CPU cache. + * + * A15/A7 can hit in the cache with SCTLR.C=0, so we don't need + * a preliminary flush here for those CPUs. At least, that's + * the theory -- without the extra flush, Linux explodes on + * RTSM (to be investigated). + */ + flush_cache_louis(); + set_cr(get_cr() & ~CR_C); + flush_cache_louis(); + + /* Disable local coherency by clearing the ACTLR "SMP" bit: */ + set_auxcr(get_auxcr() & ~(1 << 6)); + } + + __mcpm_cpu_down(cpu, cluster); + + /* Now we are prepared for power-down, do it: */ + dsb(); + if (!skip_wfi) + wfi(); + + /* Not dead at this point? Let our caller cope. */ +} + +static const struct mcpm_platform_ops dcscb_power_ops = { + .power_up = dcscb_power_up, + .power_down = dcscb_power_down, +}; + +static void __init dcscb_usage_count_init(void) +{ + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cpu >= 4 || cluster >= 2); + dcscb_use_count[cpu][cluster] = 1; +} + +extern void dcscb_power_up_setup(unsigned int affinity_level); + +static int __init dcscb_init(void) +{ + struct device_node *node; + unsigned int cfg; + int ret; + + ret = psci_probe(); + if (!ret) { + pr_debug("psci found. Aborting native init\n"); + return -ENODEV; + } + + if (!cci_probed()) + return -ENODEV; + + node = of_find_compatible_node(NULL, NULL, "arm,rtsm,dcscb"); + if (!node) + return -ENODEV; + dcscb_base = of_iomap(node, 0); + if (!dcscb_base) + return -EADDRNOTAVAIL; + cfg = readl_relaxed(dcscb_base + DCS_CFG_R); + dcscb_allcpus_mask[0] = (1 << (((cfg >> 16) >> (0 << 2)) & 0xf)) - 1; + dcscb_allcpus_mask[1] = (1 << (((cfg >> 16) >> (1 << 2)) & 0xf)) - 1; + dcscb_usage_count_init(); + + ret = mcpm_platform_register(&dcscb_power_ops); + if (!ret) + ret = mcpm_sync_init(dcscb_power_up_setup); + if (ret) { + iounmap(dcscb_base); + return ret; + } + + pr_info("VExpress DCSCB support installed\n"); + + /* + * Future entries into the kernel can now go + * through the cluster entry vectors. + */ + vexpress_flags_set(virt_to_phys(mcpm_entry_point)); + + return 0; +} + +early_initcall(dcscb_init); diff --git a/arch/arm/mach-vexpress/dcscb_setup.S b/arch/arm/mach-vexpress/dcscb_setup.S new file mode 100644 index 00000000000..4bb7fbe0f62 --- /dev/null +++ b/arch/arm/mach-vexpress/dcscb_setup.S @@ -0,0 +1,38 @@ +/* + * arch/arm/include/asm/dcscb_setup.S + * + * Created by: Dave Martin, 2012-06-22 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> + + +ENTRY(dcscb_power_up_setup) + + cmp r0, #0 @ check affinity level + beq 2f + +/* + * Enable cluster-level coherency, in preparation for turning on the MMU. + * The ACTLR SMP bit does not need to be set here, because cpu_resume() + * already restores that. + * + * A15/A7 may not require explicit L2 invalidation on reset, dependent + * on hardware integration decisions. + * For now, this code assumes that L2 is either already invalidated, + * or invalidation is not required. + */ + + b cci_enable_port_for_self + +2: @ Implementation-specific local CPU setup operations should go here, + @ if any. In this case, there is nothing to do. + + bx lr + +ENDPROC(dcscb_power_up_setup) diff --git a/arch/arm/mach-vexpress/include/mach/tc2.h b/arch/arm/mach-vexpress/include/mach/tc2.h new file mode 100644 index 00000000000..d3b5a2225a0 --- /dev/null +++ b/arch/arm/mach-vexpress/include/mach/tc2.h @@ -0,0 +1,10 @@ +#ifndef __MACH_TC2_H +#define __MACH_TC2_H + +/* + * cpu and cluster limits + */ +#define TC2_MAX_CPUS 3 +#define TC2_MAX_CLUSTERS 2 + +#endif diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c index dc1ace55d55..993c9ae5dc5 100644 --- a/arch/arm/mach-vexpress/platsmp.c +++ b/arch/arm/mach-vexpress/platsmp.c @@ -12,9 +12,11 @@ #include <linux/errno.h> #include <linux/smp.h> #include <linux/io.h> +#include <linux/of.h> #include <linux/of_fdt.h> #include <linux/vexpress.h> +#include <asm/mcpm.h> #include <asm/smp_scu.h> #include <asm/mach/map.h> @@ -203,3 +205,21 @@ struct smp_operations __initdata vexpress_smp_ops = { .cpu_die = vexpress_cpu_die, #endif }; + +bool __init vexpress_smp_init_ops(void) +{ +#ifdef CONFIG_MCPM + /* + * The best way to detect a multi-cluster configuration at the moment + * is to look for the presence of a CCI in the system. + * Override the default vexpress_smp_ops if so. + */ + struct device_node *node; + node = of_find_compatible_node(NULL, NULL, "arm,cci-400"); + if (node && of_device_is_available(node)) { + mcpm_smp_set_ops(); + return true; + } +#endif + return false; +} diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c new file mode 100644 index 00000000000..9c742edb7ae --- /dev/null +++ b/arch/arm/mach-vexpress/tc2_pm.c @@ -0,0 +1,271 @@ +/* + * arch/arm/mach-vexpress/tc2_pm.c - TC2 power management support + * + * Created by: Nicolas Pitre, October 2012 + * Copyright: (C) 2012 Linaro Limited + * + * Some portions of this file were originally written by Achin Gupta + * Copyright: (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/errno.h> +#include <linux/irqchip/arm-gic.h> + +#include <asm/mcpm.h> +#include <asm/proc-fns.h> +#include <asm/cacheflush.h> +#include <asm/cputype.h> +#include <asm/cp15.h> +#include <asm/psci.h> + +#include <mach/motherboard.h> +#include <mach/tc2.h> + +#include <linux/vexpress.h> +#include <linux/arm-cci.h> + +/* + * We can't use regular spinlocks. In the switcher case, it is possible + * for an outbound CPU to call power_down() after its inbound counterpart + * is already live using the same logical CPU number which trips lockdep + * debugging. + */ +static arch_spinlock_t tc2_pm_lock = __ARCH_SPIN_LOCK_UNLOCKED; + +static int tc2_pm_use_count[TC2_MAX_CPUS][TC2_MAX_CLUSTERS]; + +static int tc2_pm_power_up(unsigned int cpu, unsigned int cluster) +{ + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + if (cluster >= TC2_MAX_CLUSTERS || + cpu >= vexpress_spc_get_nb_cpus(cluster)) + return -EINVAL; + + /* + * Since this is called with IRQs enabled, and no arch_spin_lock_irq + * variant exists, we need to disable IRQs manually here. + */ + local_irq_disable(); + arch_spin_lock(&tc2_pm_lock); + + if (!tc2_pm_use_count[0][cluster] && + !tc2_pm_use_count[1][cluster] && + !tc2_pm_use_count[2][cluster]) + vexpress_spc_powerdown_enable(cluster, 0); + + tc2_pm_use_count[cpu][cluster]++; + if (tc2_pm_use_count[cpu][cluster] == 1) { + vexpress_spc_write_resume_reg(cluster, cpu, + virt_to_phys(mcpm_entry_point)); + vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 1); + } else if (tc2_pm_use_count[cpu][cluster] != 2) { + /* + * The only possible values are: + * 0 = CPU down + * 1 = CPU (still) up + * 2 = CPU requested to be up before it had a chance + * to actually make itself down. + * Any other value is a bug. + */ + BUG(); + } + + arch_spin_unlock(&tc2_pm_lock); + local_irq_enable(); + + return 0; +} + +static void tc2_pm_down(u64 residency) +{ + unsigned int mpidr, cpu, cluster; + bool last_man = false, skip_wfi = false; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cluster >= TC2_MAX_CLUSTERS || + cpu >= vexpress_spc_get_nb_cpus(cluster)); + + __mcpm_cpu_going_down(cpu, cluster); + + arch_spin_lock(&tc2_pm_lock); + BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP); + tc2_pm_use_count[cpu][cluster]--; + if (tc2_pm_use_count[cpu][cluster] == 0) { + vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 1); + if (!tc2_pm_use_count[0][cluster] && + !tc2_pm_use_count[1][cluster] && + !tc2_pm_use_count[2][cluster] && + (!residency || residency > 5000)) { + vexpress_spc_powerdown_enable(cluster, 1); + vexpress_spc_set_global_wakeup_intr(1); + last_man = true; + } + } else if (tc2_pm_use_count[cpu][cluster] == 1) { + /* + * A power_up request went ahead of us. + * Even if we do not want to shut this CPU down, + * the caller expects a certain state as if the WFI + * was aborted. So let's continue with cache cleaning. + */ + skip_wfi = true; + } else + BUG(); + + gic_cpu_if_down(); + + if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { + arch_spin_unlock(&tc2_pm_lock); + + set_cr(get_cr() & ~CR_C); + flush_cache_all(); + asm volatile ("clrex"); + set_auxcr(get_auxcr() & ~(1 << 6)); + + cci_disable_port_by_cpu(mpidr); + + /* + * Ensure that both C & I bits are disabled in the SCTLR + * before disabling ACE snoops. This ensures that no + * coherency traffic will originate from this cpu after + * ACE snoops are turned off. + */ + cpu_proc_fin(); + + __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN); + } else { + /* + * If last man then undo any setup done previously. + */ + if (last_man) { + vexpress_spc_powerdown_enable(cluster, 0); + vexpress_spc_set_global_wakeup_intr(0); + } + + arch_spin_unlock(&tc2_pm_lock); + + set_cr(get_cr() & ~CR_C); + flush_cache_louis(); + asm volatile ("clrex"); + set_auxcr(get_auxcr() & ~(1 << 6)); + } + + __mcpm_cpu_down(cpu, cluster); + + /* Now we are prepared for power-down, do it: */ + if (!skip_wfi) + wfi(); + + /* Not dead at this point? Let our caller cope. */ +} + +static void tc2_pm_power_down(void) +{ + tc2_pm_down(0); +} + +static void tc2_pm_suspend(u64 residency) +{ + extern void tc2_resume(void); + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + vexpress_spc_write_resume_reg(cluster, cpu, + virt_to_phys(tc2_resume)); + + tc2_pm_down(residency); +} + +static void tc2_pm_powered_up(void) +{ + unsigned int mpidr, cpu, cluster; + unsigned long flags; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cluster >= TC2_MAX_CLUSTERS || + cpu >= vexpress_spc_get_nb_cpus(cluster)); + + local_irq_save(flags); + arch_spin_lock(&tc2_pm_lock); + + if (!tc2_pm_use_count[0][cluster] && + !tc2_pm_use_count[1][cluster] && + !tc2_pm_use_count[2][cluster]) { + vexpress_spc_powerdown_enable(cluster, 0); + vexpress_spc_set_global_wakeup_intr(0); + } + + if (!tc2_pm_use_count[cpu][cluster]) + tc2_pm_use_count[cpu][cluster] = 1; + + vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 0); + vexpress_spc_write_resume_reg(cluster, cpu, 0); + + arch_spin_unlock(&tc2_pm_lock); + local_irq_restore(flags); +} + +static const struct mcpm_platform_ops tc2_pm_power_ops = { + .power_up = tc2_pm_power_up, + .power_down = tc2_pm_power_down, + .suspend = tc2_pm_suspend, + .powered_up = tc2_pm_powered_up, +}; + +static void __init tc2_pm_usage_count_init(void) +{ + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cluster >= TC2_MAX_CLUSTERS || + cpu >= vexpress_spc_get_nb_cpus(cluster)); + + tc2_pm_use_count[cpu][cluster] = 1; +} + +extern void tc2_pm_power_up_setup(unsigned int affinity_level); + +static int __init tc2_pm_init(void) +{ + int ret; + + ret = psci_probe(); + if (!ret) { + pr_debug("psci found. Aborting native init\n"); + return -ENODEV; + } + + if (!vexpress_spc_check_loaded()) + return -ENODEV; + + tc2_pm_usage_count_init(); + + ret = mcpm_platform_register(&tc2_pm_power_ops); + if (!ret) + ret = mcpm_sync_init(tc2_pm_power_up_setup); + if (!ret) + pr_info("TC2 power management initialized\n"); + return ret; +} + +early_initcall(tc2_pm_init); diff --git a/arch/arm/mach-vexpress/tc2_pm_psci.c b/arch/arm/mach-vexpress/tc2_pm_psci.c new file mode 100644 index 00000000000..c2fdc22e4c0 --- /dev/null +++ b/arch/arm/mach-vexpress/tc2_pm_psci.c @@ -0,0 +1,173 @@ +/* + * arch/arm/mach-vexpress/tc2_pm_psci.c - TC2 PSCI support + * + * Created by: Achin Gupta, December 2012 + * Copyright: (C) 2012 ARM Limited + * + * Some portions of this file were originally written by Nicolas Pitre + * Copyright: (C) 2012 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/errno.h> + +#include <asm/mcpm.h> +#include <asm/proc-fns.h> +#include <asm/cacheflush.h> +#include <asm/psci.h> +#include <asm/atomic.h> +#include <asm/cputype.h> +#include <asm/cp15.h> + +#include <mach/motherboard.h> +#include <mach/tc2.h> + +#include <linux/vexpress.h> + +/* + * Platform specific state id understood by the firmware and used to + * program the power controller + */ +#define PSCI_POWER_STATE_ID 0 + +static atomic_t tc2_pm_use_count[TC2_MAX_CPUS][TC2_MAX_CLUSTERS]; + +static int tc2_pm_psci_power_up(unsigned int cpu, unsigned int cluster) +{ + unsigned int mpidr = (cluster << 8) | cpu; + int ret = 0; + + BUG_ON(!psci_ops.cpu_on); + + switch (atomic_inc_return(&tc2_pm_use_count[cpu][cluster])) { + case 1: + /* + * This is a request to power up a cpu that linux thinks has + * been powered down. Retries are needed if the firmware has + * seen the power down request as yet. + */ + do + ret = psci_ops.cpu_on(mpidr, + virt_to_phys(mcpm_entry_point)); + while (ret == -EAGAIN); + + return ret; + case 2: + /* This power up request has overtaken a power down request */ + return ret; + default: + /* Any other value is a bug */ + BUG(); + } +} + +static void tc2_pm_psci_power_down(void) +{ + struct psci_power_state power_state; + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + BUG_ON(!psci_ops.cpu_off); + + switch (atomic_dec_return(&tc2_pm_use_count[cpu][cluster])) { + case 1: + /* + * Overtaken by a power up. Flush caches, exit coherency, + * return & fake a reset + */ + set_cr(get_cr() & ~CR_C); + + flush_cache_louis(); + + asm volatile ("clrex"); + set_auxcr(get_auxcr() & ~(1 << 6)); + + return; + case 0: + /* A normal request to possibly power down the cluster */ + power_state.id = PSCI_POWER_STATE_ID; + power_state.type = PSCI_POWER_STATE_TYPE_POWER_DOWN; + power_state.affinity_level = PSCI_POWER_STATE_AFFINITY_LEVEL1; + + psci_ops.cpu_off(power_state); + + /* On success this function never returns */ + default: + /* Any other value is a bug */ + BUG(); + } +} + +static void tc2_pm_psci_suspend(u64 unused) +{ + struct psci_power_state power_state; + + BUG_ON(!psci_ops.cpu_suspend); + + /* On TC2 always attempt to power down the cluster */ + power_state.id = PSCI_POWER_STATE_ID; + power_state.type = PSCI_POWER_STATE_TYPE_POWER_DOWN; + power_state.affinity_level = PSCI_POWER_STATE_AFFINITY_LEVEL1; + + psci_ops.cpu_suspend(power_state, virt_to_phys(mcpm_entry_point)); + + /* On success this function never returns */ + BUG(); +} + +static const struct mcpm_platform_ops tc2_pm_power_ops = { + .power_up = tc2_pm_psci_power_up, + .power_down = tc2_pm_psci_power_down, + .suspend = tc2_pm_psci_suspend, +}; + +static void __init tc2_pm_usage_count_init(void) +{ + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cluster >= TC2_MAX_CLUSTERS || + cpu >= vexpress_spc_get_nb_cpus(cluster)); + + atomic_set(&tc2_pm_use_count[cpu][cluster], 1); +} + +static int __init tc2_pm_psci_init(void) +{ + int ret; + + ret = psci_probe(); + if (ret) { + pr_debug("psci not found. Aborting psci init\n"); + return -ENODEV; + } + + if (!vexpress_spc_check_loaded()) { + pr_debug("spc not found. Aborting psci init\n"); + return -ENODEV; + } + + tc2_pm_usage_count_init(); + + ret = mcpm_platform_register(&tc2_pm_power_ops); + if (!ret) + ret = mcpm_sync_init(NULL); + if (!ret) + pr_info("TC2 power management initialized\n"); + return ret; +} + +early_initcall(tc2_pm_psci_init); diff --git a/arch/arm/mach-vexpress/tc2_pm_setup.S b/arch/arm/mach-vexpress/tc2_pm_setup.S new file mode 100644 index 00000000000..a18dafeeb0e --- /dev/null +++ b/arch/arm/mach-vexpress/tc2_pm_setup.S @@ -0,0 +1,68 @@ +/* + * arch/arm/include/asm/tc2_pm_setup.S + * + * Created by: Nicolas Pitre, October 2012 + ( (based on dcscb_setup.S by Dave Martin) + * Copyright: (C) 2012 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + +#include <linux/linkage.h> +#include <asm/mcpm.h> + + +#define SPC_PHYS_BASE 0x7FFF0000 +#define SPC_WAKE_INT_STAT 0xb2c + +#define SNOOP_CTL_A15 0x404 +#define SNOOP_CTL_A7 0x504 + +#define A15_SNOOP_MASK (0x3 << 7) +#define A7_SNOOP_MASK (0x1 << 13) + +#define A15_BX_ADDR0 0xB68 + + +ENTRY(tc2_resume) + mrc p15, 0, r0, c0, c0, 5 + ubfx r1, r0, #0, #4 @ r1 = cpu + ubfx r2, r0, #8, #4 @ r2 = cluster + add r1, r1, r2, lsl #2 @ r1 = index of CPU in WAKE_INT_STAT + ldr r3, =SPC_PHYS_BASE + SPC_WAKE_INT_STAT + ldr r3, [r3] + lsr r3, r1 + tst r3, #1 + wfieq @ if no pending IRQ reenters wfi + b mcpm_entry_point +ENDPROC(tc2_resume) + +/* + * Enable cluster-level coherency, in preparation for turning on the MMU. + * The ACTLR SMP bit does not need to be set here, because cpu_resume() + * already restores that. + */ + +ENTRY(tc2_pm_power_up_setup) + + cmp r0, #0 + beq 2f + + b cci_enable_port_for_self + +2: @ Clear the BX addr register + ldr r3, =SPC_PHYS_BASE + A15_BX_ADDR0 + mrc p15, 0, r0, c0, c0, 5 @ MPIDR + ubfx r1, r0, #8, #4 @ cluster + ubfx r0, r0, #0, #4 @ cpu + add r3, r3, r1, lsl #4 + mov r1, #0 + str r1, [r3, r0, lsl #2] + dsb + + bx lr + +ENDPROC(tc2_pm_power_up_setup) diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c index 8802030df98..057f99b62ea 100644 --- a/arch/arm/mach-vexpress/v2m.c +++ b/arch/arm/mach-vexpress/v2m.c @@ -10,6 +10,7 @@ #include <linux/smp.h> #include <linux/init.h> #include <linux/irqchip.h> +#include <linux/memblock.h> #include <linux/of_address.h> #include <linux/of_fdt.h> #include <linux/of_irq.h> @@ -373,6 +374,31 @@ MACHINE_START(VEXPRESS, "ARM-Versatile Express") .init_machine = v2m_init, MACHINE_END +static void __init v2m_dt_hdlcd_init(void) +{ + struct device_node *node; + int len, na, ns; + const __be32 *prop; + phys_addr_t fb_base, fb_size; + + node = of_find_compatible_node(NULL, NULL, "arm,hdlcd"); + if (!node) + return; + + na = of_n_addr_cells(node); + ns = of_n_size_cells(node); + + prop = of_get_property(node, "framebuffer", &len); + if (WARN_ON(!prop || len < (na + ns) * sizeof(*prop))) + return; + + fb_base = of_read_number(prop, na); + fb_size = of_read_number(prop + na, ns); + + if (WARN_ON(memblock_remove(fb_base, fb_size))) + return; +}; + static struct map_desc v2m_rs1_io_desc __initdata = { .virtual = V2M_PERIPH, .pfn = __phys_to_pfn(0x1c000000), @@ -423,6 +449,8 @@ void __init v2m_dt_init_early(void) pr_warning("vexpress: DT HBI (%x) is not matching " "hardware (%x)!\n", dt_hbi, hbi); } + + v2m_dt_hdlcd_init(); } static void __init v2m_dt_timer_init(void) @@ -456,6 +484,7 @@ static const char * const v2m_dt_match[] __initconst = { DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express") .dt_compat = v2m_dt_match, .smp = smp_ops(vexpress_smp_ops), + .smp_init = smp_init_ops(vexpress_smp_init_ops), .map_io = v2m_dt_map_io, .init_early = v2m_dt_init_early, .init_irq = irqchip_init, diff --git a/arch/arm/mach-virt/Makefile b/arch/arm/mach-virt/Makefile index 042afc1f8c4..7ddbfa60227 100644 --- a/arch/arm/mach-virt/Makefile +++ b/arch/arm/mach-virt/Makefile @@ -3,4 +3,3 @@ # obj-y := virt.o -obj-$(CONFIG_SMP) += platsmp.o diff --git a/arch/arm/mach-virt/platsmp.c b/arch/arm/mach-virt/platsmp.c deleted file mode 100644 index f4143f5bfa5..00000000000 --- a/arch/arm/mach-virt/platsmp.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Dummy Virtual Machine - does what it says on the tin. - * - * Copyright (C) 2012 ARM Ltd - * Author: Will Deacon <will.deacon@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/of.h> - -#include <asm/psci.h> -#include <asm/smp_plat.h> - -extern void secondary_startup(void); - -static void __init virt_smp_init_cpus(void) -{ -} - -static void __init virt_smp_prepare_cpus(unsigned int max_cpus) -{ -} - -static int __cpuinit virt_boot_secondary(unsigned int cpu, - struct task_struct *idle) -{ - if (psci_ops.cpu_on) - return psci_ops.cpu_on(cpu_logical_map(cpu), - __pa(secondary_startup)); - return -ENODEV; -} - -struct smp_operations __initdata virt_smp_ops = { - .smp_init_cpus = virt_smp_init_cpus, - .smp_prepare_cpus = virt_smp_prepare_cpus, - .smp_boot_secondary = virt_boot_secondary, -}; diff --git a/arch/arm/mach-virt/virt.c b/arch/arm/mach-virt/virt.c index 061f283f579..a67d2dd5bb6 100644 --- a/arch/arm/mach-virt/virt.c +++ b/arch/arm/mach-virt/virt.c @@ -36,11 +36,8 @@ static const char *virt_dt_match[] = { NULL }; -extern struct smp_operations virt_smp_ops; - DT_MACHINE_START(VIRT, "Dummy Virtual Machine") .init_irq = irqchip_init, .init_machine = virt_init, - .smp = smp_ops(virt_smp_ops), .dt_compat = virt_dt_match, MACHINE_END diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 5dbf13f954f..e207aa5f846 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -446,8 +446,16 @@ do_translation_fault(unsigned long addr, unsigned int fsr, if (pud_none(*pud_k)) goto bad_area; - if (!pud_present(*pud)) + if (!pud_present(*pud)) { set_pud(pud, *pud_k); + /* + * There is a small window during free_pgtables() where the + * user *pud entry is 0 but the TLB has not been invalidated + * and we get a level 2 (pmd) translation fault caused by the + * intermediate TLB caching of the old level 1 (pud) entry. + */ + flush_tlb_kernel_page(addr); + } pmd = pmd_offset(pud, addr); pmd_k = pmd_offset(pud_k, addr); @@ -470,8 +478,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, #endif if (pmd_none(pmd_k[index])) goto bad_area; + if (!pmd_present(pmd[index])) + copy_pmd(pmd, pmd_k); - copy_pmd(pmd, pmd_k); return 0; bad_area: diff --git a/arch/arm/plat-versatile/headsmp.S b/arch/arm/plat-versatile/headsmp.S index b178d44e9ea..2677bc3762d 100644 --- a/arch/arm/plat-versatile/headsmp.S +++ b/arch/arm/plat-versatile/headsmp.S @@ -11,8 +11,6 @@ #include <linux/linkage.h> #include <linux/init.h> - __INIT - /* * Realview/Versatile Express specific entry point for secondary CPUs. * This provides a "holding pen" into which all secondary cores are held diff --git a/block/blk-core.c b/block/blk-core.c index d5745b5833c..0852e5d4343 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -3180,7 +3180,8 @@ int __init blk_dev_init(void) /* used for unplugging and affects IO latency/throughput - HIGHPRI */ kblockd_workqueue = alloc_workqueue("kblockd", - WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + WQ_MEM_RECLAIM | WQ_HIGHPRI | + WQ_POWER_EFFICIENT, 0); if (!kblockd_workqueue) panic("Failed to create kblockd\n"); diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 9c4bb8266bc..4464c823cff 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -144,7 +144,8 @@ void put_io_context(struct io_context *ioc) if (atomic_long_dec_and_test(&ioc->refcount)) { spin_lock_irqsave(&ioc->lock, flags); if (!hlist_empty(&ioc->icq_list)) - schedule_work(&ioc->release_work); + queue_work(system_power_efficient_wq, + &ioc->release_work); else free_ioc = true; spin_unlock_irqrestore(&ioc->lock, flags); diff --git a/block/genhd.c b/block/genhd.c index cdeb5277dfd..dadf42b454a 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1489,9 +1489,11 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now) intv = disk_events_poll_jiffies(disk); set_timer_slack(&ev->dwork.timer, intv / 4); if (check_now) - queue_delayed_work(system_freezable_wq, &ev->dwork, 0); + queue_delayed_work(system_freezable_power_efficient_wq, + &ev->dwork, 0); else if (intv) - queue_delayed_work(system_freezable_wq, &ev->dwork, intv); + queue_delayed_work(system_freezable_power_efficient_wq, + &ev->dwork, intv); out_unlock: spin_unlock_irqrestore(&ev->lock, flags); } @@ -1534,7 +1536,8 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask) spin_lock_irq(&ev->lock); ev->clearing |= mask; if (!ev->block) - mod_delayed_work(system_freezable_wq, &ev->dwork, 0); + mod_delayed_work(system_freezable_power_efficient_wq, + &ev->dwork, 0); spin_unlock_irq(&ev->lock); } @@ -1627,7 +1630,8 @@ static void disk_check_events(struct disk_events *ev, intv = disk_events_poll_jiffies(disk); if (!ev->block && intv) - queue_delayed_work(system_freezable_wq, &ev->dwork, intv); + queue_delayed_work(system_freezable_power_efficient_wq, + &ev->dwork, intv); spin_unlock_irq(&ev->lock); diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index b05ecab915c..5286e2d333b 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -26,4 +26,11 @@ config OMAP_INTERCONNECT help Driver to enable OMAP interconnect error handling driver. + +config ARM_CCI + bool "ARM CCI driver support" + depends on ARM + help + Driver supporting the CCI cache coherent interconnect for ARM + platforms. endmenu diff --git a/drivers/bus/Makefile b/drivers/bus/Makefile index 3c7b53c1209..670cea44380 100644 --- a/drivers/bus/Makefile +++ b/drivers/bus/Makefile @@ -7,3 +7,5 @@ obj-$(CONFIG_OMAP_OCP2SCP) += omap-ocp2scp.o # Interconnect bus driver for OMAP SoCs. obj-$(CONFIG_OMAP_INTERCONNECT) += omap_l3_smx.o omap_l3_noc.o +# CCI cache coherent interconnect for ARM platforms +obj-$(CONFIG_ARM_CCI) += arm-cci.o diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c new file mode 100644 index 00000000000..2d1387bf172 --- /dev/null +++ b/drivers/bus/arm-cci.c @@ -0,0 +1,945 @@ +/* + * CCI cache coherent interconnect driver + * + * Copyright (C) 2013 ARM Ltd. + * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/arm-cci.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/of_address.h> +#include <linux/slab.h> + +#include <asm/cacheflush.h> +#include <asm/irq_regs.h> +#include <asm/pmu.h> +#include <asm/smp_plat.h> + +#define DRIVER_NAME "CCI" + +#define CCI_PORT_CTRL 0x0 +#define CCI_CTRL_STATUS 0xc + +#define CCI_ENABLE_SNOOP_REQ 0x1 +#define CCI_ENABLE_DVM_REQ 0x2 +#define CCI_ENABLE_REQ (CCI_ENABLE_SNOOP_REQ | CCI_ENABLE_DVM_REQ) + +struct cci_nb_ports { + unsigned int nb_ace; + unsigned int nb_ace_lite; +}; + +enum cci_ace_port_type { + ACE_INVALID_PORT = 0x0, + ACE_PORT, + ACE_LITE_PORT, +}; + +struct cci_ace_port { + void __iomem *base; + unsigned long phys; + enum cci_ace_port_type type; + struct device_node *dn; +}; + +static struct cci_ace_port *ports; +static unsigned int nb_cci_ports; + +static void __iomem *cci_ctrl_base; +static unsigned long cci_ctrl_phys; + +#ifdef CONFIG_HW_PERF_EVENTS + +static void __iomem *cci_pmu_base; + +#define CCI400_PMCR 0x0100 + +#define CCI400_PMU_CYCLE_CNTR_BASE 0x0000 +#define CCI400_PMU_CNTR_BASE(idx) (CCI400_PMU_CYCLE_CNTR_BASE + (idx) * 0x1000) + +#define CCI400_PMCR_CEN 0x00000001 +#define CCI400_PMCR_RST 0x00000002 +#define CCI400_PMCR_CCR 0x00000004 +#define CCI400_PMCR_CCD 0x00000008 +#define CCI400_PMCR_EX 0x00000010 +#define CCI400_PMCR_DP 0x00000020 +#define CCI400_PMCR_NCNT_MASK 0x0000F800 +#define CCI400_PMCR_NCNT_SHIFT 11 + +#define CCI400_PMU_EVT_SEL 0x000 +#define CCI400_PMU_CNTR 0x004 +#define CCI400_PMU_CNTR_CTRL 0x008 +#define CCI400_PMU_OVERFLOW 0x00C + +#define CCI400_PMU_OVERFLOW_FLAG 1 + +enum cci400_perf_events { + CCI400_PMU_CYCLES = 0xFF +}; + +#define CCI400_PMU_EVENT_MASK 0xff +#define CCI400_PMU_EVENT_SOURCE(event) ((event >> 5) & 0x7) +#define CCI400_PMU_EVENT_CODE(event) (event & 0x1f) + +#define CCI400_PMU_EVENT_SOURCE_S0 0 +#define CCI400_PMU_EVENT_SOURCE_S4 4 +#define CCI400_PMU_EVENT_SOURCE_M0 5 +#define CCI400_PMU_EVENT_SOURCE_M2 7 + +#define CCI400_PMU_EVENT_SLAVE_MIN 0x0 +#define CCI400_PMU_EVENT_SLAVE_MAX 0x13 + +#define CCI400_PMU_EVENT_MASTER_MIN 0x14 +#define CCI400_PMU_EVENT_MASTER_MAX 0x1A + +#define CCI400_PMU_MAX_HW_EVENTS 5 /* CCI PMU has 4 counters + 1 cycle counter */ + +#define CCI400_PMU_CYCLE_COUNTER_IDX 0 +#define CCI400_PMU_COUNTER0_IDX 1 +#define CCI400_PMU_COUNTER_LAST(cci_pmu) (CCI400_PMU_CYCLE_COUNTER_IDX + cci_pmu->num_events - 1) + + +static struct perf_event *events[CCI400_PMU_MAX_HW_EVENTS]; +static unsigned long used_mask[BITS_TO_LONGS(CCI400_PMU_MAX_HW_EVENTS)]; +static struct pmu_hw_events cci_hw_events = { + .events = events, + .used_mask = used_mask, +}; + +static int cci_pmu_validate_hw_event(u8 hw_event) +{ + u8 ev_source = CCI400_PMU_EVENT_SOURCE(hw_event); + u8 ev_code = CCI400_PMU_EVENT_CODE(hw_event); + + if (ev_source <= CCI400_PMU_EVENT_SOURCE_S4 && + ev_code <= CCI400_PMU_EVENT_SLAVE_MAX) + return hw_event; + else if (CCI400_PMU_EVENT_SOURCE_M0 <= ev_source && + ev_source <= CCI400_PMU_EVENT_SOURCE_M2 && + CCI400_PMU_EVENT_MASTER_MIN <= ev_code && + ev_code <= CCI400_PMU_EVENT_MASTER_MAX) + return hw_event; + + return -EINVAL; +} + +static inline int cci_pmu_counter_is_valid(struct arm_pmu *cci_pmu, int idx) +{ + return CCI400_PMU_CYCLE_COUNTER_IDX <= idx && + idx <= CCI400_PMU_COUNTER_LAST(cci_pmu); +} + +static inline u32 cci_pmu_read_register(int idx, unsigned int offset) +{ + return readl_relaxed(cci_pmu_base + CCI400_PMU_CNTR_BASE(idx) + offset); +} + +static inline void cci_pmu_write_register(u32 value, int idx, unsigned int offset) +{ + return writel_relaxed(value, cci_pmu_base + CCI400_PMU_CNTR_BASE(idx) + offset); +} + +static inline void cci_pmu_disable_counter(int idx) +{ + cci_pmu_write_register(0, idx, CCI400_PMU_CNTR_CTRL); +} + +static inline void cci_pmu_enable_counter(int idx) +{ + cci_pmu_write_register(1, idx, CCI400_PMU_CNTR_CTRL); +} + +static inline void cci_pmu_select_event(int idx, unsigned long event) +{ + event &= CCI400_PMU_EVENT_MASK; + cci_pmu_write_register(event, idx, CCI400_PMU_EVT_SEL); +} + +static u32 cci_pmu_get_max_counters(void) +{ + u32 n_cnts = (readl_relaxed(cci_ctrl_base + CCI400_PMCR) & + CCI400_PMCR_NCNT_MASK) >> CCI400_PMCR_NCNT_SHIFT; + + /* add 1 for cycle counter */ + return n_cnts + 1; +} + +static struct pmu_hw_events *cci_pmu_get_hw_events(void) +{ + return &cci_hw_events; +} + +static int cci_pmu_get_event_idx(struct pmu_hw_events *hw, struct perf_event *event) +{ + struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hw_event = &event->hw; + unsigned long cci_event = hw_event->config_base & CCI400_PMU_EVENT_MASK; + int idx; + + if (cci_event == CCI400_PMU_CYCLES) { + if (test_and_set_bit(CCI400_PMU_CYCLE_COUNTER_IDX, hw->used_mask)) + return -EAGAIN; + + return CCI400_PMU_CYCLE_COUNTER_IDX; + } + + for (idx = CCI400_PMU_COUNTER0_IDX; idx <= CCI400_PMU_COUNTER_LAST(cci_pmu); ++idx) { + if (!test_and_set_bit(idx, hw->used_mask)) + return idx; + } + + /* No counters available */ + return -EAGAIN; +} + +static int cci_pmu_map_event(struct perf_event *event) +{ + int mapping; + u8 config = event->attr.config & CCI400_PMU_EVENT_MASK; + + if (event->attr.type < PERF_TYPE_MAX) + return -ENOENT; + + /* 0xff is used to represent CCI Cycles */ + if (config == 0xff) + mapping = config; + else + mapping = cci_pmu_validate_hw_event(config); + + return mapping; +} + +static int cci_pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler) +{ + int irq, err, i = 0; + struct platform_device *pmu_device = cci_pmu->plat_device; + + if (unlikely(!pmu_device)) + return -ENODEV; + + /* CCI exports 6 interrupts - 1 nERRORIRQ + 5 nEVNTCNTOVERFLOW (PMU) + nERRORIRQ will be handled by secure firmware on TC2. So we + assume that all CCI interrupts listed in the linux device + tree are PMU interrupts. + + The following code should then be able to handle different routing + of the CCI PMU interrupts. + */ + while ((irq = platform_get_irq(pmu_device, i)) > 0) { + err = request_irq(irq, handler, 0, "arm-cci-pmu", cci_pmu); + if (err) { + dev_err(&pmu_device->dev, "unable to request IRQ%d for ARM CCI PMU counters\n", + irq); + return err; + } + i++; + } + + return 0; +} + +static irqreturn_t cci_pmu_handle_irq(int irq_num, void *dev) +{ + struct arm_pmu *cci_pmu = (struct arm_pmu *)dev; + struct pmu_hw_events *events = cci_pmu->get_hw_events(); + struct perf_sample_data data; + struct pt_regs *regs; + int idx; + + regs = get_irq_regs(); + + /* Iterate over counters and update the corresponding perf events. + This should work regardless of whether we have per-counter overflow + interrupt or a combined overflow interrupt. */ + for (idx = CCI400_PMU_CYCLE_COUNTER_IDX; idx <= CCI400_PMU_COUNTER_LAST(cci_pmu); idx++) { + struct perf_event *event = events->events[idx]; + struct hw_perf_event *hw_counter; + + if (!event) + continue; + + hw_counter = &event->hw; + + /* Did this counter overflow? */ + if (!(cci_pmu_read_register(idx, CCI400_PMU_OVERFLOW) & CCI400_PMU_OVERFLOW_FLAG)) + continue; + cci_pmu_write_register(CCI400_PMU_OVERFLOW_FLAG, idx, CCI400_PMU_OVERFLOW); + + armpmu_event_update(event); + perf_sample_data_init(&data, 0, hw_counter->last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cci_pmu->disable(event); + } + + irq_work_run(); + return IRQ_HANDLED; +} + +static void cci_pmu_free_irq(struct arm_pmu *cci_pmu) +{ + int irq, i = 0; + struct platform_device *pmu_device = cci_pmu->plat_device; + + while ((irq = platform_get_irq(pmu_device, i)) > 0) { + free_irq(irq, cci_pmu); + i++; + } +} + +static void cci_pmu_enable_event(struct perf_event *event) +{ + unsigned long flags; + struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = cci_pmu->get_hw_events(); + struct hw_perf_event *hw_counter = &event->hw; + int idx = hw_counter->idx; + + if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) { + dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); + return; + } + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Configure the event to count, unless you are counting cycles */ + if (idx != CCI400_PMU_CYCLE_COUNTER_IDX) + cci_pmu_select_event(idx, hw_counter->config_base); + + cci_pmu_enable_counter(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void cci_pmu_disable_event(struct perf_event *event) +{ + unsigned long flags; + struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = cci_pmu->get_hw_events(); + struct hw_perf_event *hw_counter = &event->hw; + int idx = hw_counter->idx; + + if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) { + dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); + return; + } + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + cci_pmu_disable_counter(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void cci_pmu_start(struct arm_pmu *cci_pmu) +{ + u32 val; + unsigned long flags; + struct pmu_hw_events *events = cci_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Enable all the PMU counters. */ + val = readl(cci_ctrl_base + CCI400_PMCR) | CCI400_PMCR_CEN; + writel(val, cci_ctrl_base + CCI400_PMCR); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void cci_pmu_stop(struct arm_pmu *cci_pmu) +{ + u32 val; + unsigned long flags; + struct pmu_hw_events *events = cci_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Disable all the PMU counters. */ + val = readl(cci_ctrl_base + CCI400_PMCR) & ~CCI400_PMCR_CEN; + writel(val, cci_ctrl_base + CCI400_PMCR); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static u32 cci_pmu_read_counter(struct perf_event *event) +{ + struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hw_counter = &event->hw; + int idx = hw_counter->idx; + u32 value; + + if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) { + dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); + return 0; + } + value = cci_pmu_read_register(idx, CCI400_PMU_CNTR); + + return value; +} + +static void cci_pmu_write_counter(struct perf_event *event, u32 value) +{ + struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hw_counter = &event->hw; + int idx = hw_counter->idx; + + if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) + dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); + else + cci_pmu_write_register(value, idx, CCI400_PMU_CNTR); +} + +static struct arm_pmu cci_pmu = { + .name = DRIVER_NAME, + .max_period = (1LLU << 32) - 1, + .get_hw_events = cci_pmu_get_hw_events, + .get_event_idx = cci_pmu_get_event_idx, + .map_event = cci_pmu_map_event, + .request_irq = cci_pmu_request_irq, + .handle_irq = cci_pmu_handle_irq, + .free_irq = cci_pmu_free_irq, + .enable = cci_pmu_enable_event, + .disable = cci_pmu_disable_event, + .start = cci_pmu_start, + .stop = cci_pmu_stop, + .read_counter = cci_pmu_read_counter, + .write_counter = cci_pmu_write_counter, +}; + +static int cci_pmu_probe(struct platform_device *pdev) +{ + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + cci_pmu_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(cci_pmu_base)) + return PTR_ERR(cci_pmu_base); + + cci_pmu.plat_device = pdev; + cci_pmu.num_events = cci_pmu_get_max_counters(); + raw_spin_lock_init(&cci_hw_events.pmu_lock); + cpumask_setall(&cci_pmu.valid_cpus); + + return armpmu_register(&cci_pmu, -1); +} + +static const struct of_device_id arm_cci_pmu_matches[] = { + {.compatible = "arm,cci-400-pmu"}, + {}, +}; + +static struct platform_driver cci_pmu_platform_driver = { + .driver = { + .name = DRIVER_NAME, + .of_match_table = arm_cci_pmu_matches, + }, + .probe = cci_pmu_probe, +}; + +static int __init cci_pmu_init(void) +{ + if (platform_driver_register(&cci_pmu_platform_driver)) + WARN(1, "unable to register CCI platform driver\n"); + return 0; +} + +#else + +static int __init cci_pmu_init(void) +{ + return 0; +} + +#endif /* CONFIG_HW_PERF_EVENTS */ + +struct cpu_port { + u64 mpidr; + u32 port; +}; + +/* + * Use the port MSB as valid flag, shift can be made dynamic + * by computing number of bits required for port indexes. + * Code disabling CCI cpu ports runs with D-cache invalidated + * and SCTLR bit clear so data accesses must be kept to a minimum + * to improve performance; for now shift is left static to + * avoid one more data access while disabling the CCI port. + */ +#define PORT_VALID_SHIFT 31 +#define PORT_VALID (0x1 << PORT_VALID_SHIFT) + +static inline void init_cpu_port(struct cpu_port *port, u32 index, u64 mpidr) +{ + port->port = PORT_VALID | index; + port->mpidr = mpidr; +} + +static inline bool cpu_port_is_valid(struct cpu_port *port) +{ + return !!(port->port & PORT_VALID); +} + +static inline bool cpu_port_match(struct cpu_port *port, u64 mpidr) +{ + return port->mpidr == (mpidr & MPIDR_HWID_BITMASK); +} + +static struct cpu_port cpu_port[NR_CPUS]; + +/** + * __cci_ace_get_port - Function to retrieve the port index connected to + * a cpu or device. + * + * @dn: device node of the device to look-up + * @type: port type + * + * Return value: + * - CCI port index if success + * - -ENODEV if failure + */ +static int __cci_ace_get_port(struct device_node *dn, int type) +{ + int i; + bool ace_match; + struct device_node *cci_portn; + + cci_portn = of_parse_phandle(dn, "cci-control-port", 0); + for (i = 0; i < nb_cci_ports; i++) { + ace_match = ports[i].type == type; + if (ace_match && cci_portn == ports[i].dn) + return i; + } + return -ENODEV; +} + +int cci_ace_get_port(struct device_node *dn) +{ + return __cci_ace_get_port(dn, ACE_LITE_PORT); +} +EXPORT_SYMBOL_GPL(cci_ace_get_port); + +static void __init cci_ace_init_ports(void) +{ + int port, ac, cpu; + u64 hwid; + const u32 *cell; + struct device_node *cpun, *cpus; + + cpus = of_find_node_by_path("/cpus"); + if (WARN(!cpus, "Missing cpus node, bailing out\n")) + return; + + if (WARN_ON(of_property_read_u32(cpus, "#address-cells", &ac))) + ac = of_n_addr_cells(cpus); + + /* + * Port index look-up speeds up the function disabling ports by CPU, + * since the logical to port index mapping is done once and does + * not change after system boot. + * The stashed index array is initialized for all possible CPUs + * at probe time. + */ + for_each_child_of_node(cpus, cpun) { + if (of_node_cmp(cpun->type, "cpu")) + continue; + cell = of_get_property(cpun, "reg", NULL); + if (WARN(!cell, "%s: missing reg property\n", cpun->full_name)) + continue; + + hwid = of_read_number(cell, ac); + cpu = get_logical_index(hwid & MPIDR_HWID_BITMASK); + + if (cpu < 0 || !cpu_possible(cpu)) + continue; + port = __cci_ace_get_port(cpun, ACE_PORT); + if (port < 0) + continue; + + init_cpu_port(&cpu_port[cpu], port, cpu_logical_map(cpu)); + } + + for_each_possible_cpu(cpu) { + WARN(!cpu_port_is_valid(&cpu_port[cpu]), + "CPU %u does not have an associated CCI port\n", + cpu); + } +} +/* + * Functions to enable/disable a CCI interconnect slave port + * + * They are called by low-level power management code to disable slave + * interfaces snoops and DVM broadcast. + * Since they may execute with cache data allocation disabled and + * after the caches have been cleaned and invalidated the functions provide + * no explicit locking since they may run with D-cache disabled, so normal + * cacheable kernel locks based on ldrex/strex may not work. + * Locking has to be provided by BSP implementations to ensure proper + * operations. + */ + +/** + * cci_port_control() - function to control a CCI port + * + * @port: index of the port to setup + * @enable: if true enables the port, if false disables it + */ +static void notrace cci_port_control(unsigned int port, bool enable) +{ + void __iomem *base = ports[port].base; + + writel_relaxed(enable ? CCI_ENABLE_REQ : 0, base + CCI_PORT_CTRL); + /* + * This function is called from power down procedures + * and must not execute any instruction that might + * cause the processor to be put in a quiescent state + * (eg wfi). Hence, cpu_relax() can not be added to this + * read loop to optimize power, since it might hide possibly + * disruptive operations. + */ + while (readl_relaxed(cci_ctrl_base + CCI_CTRL_STATUS) & 0x1) + ; +} + +/** + * cci_disable_port_by_cpu() - function to disable a CCI port by CPU + * reference + * + * @mpidr: mpidr of the CPU whose CCI port should be disabled + * + * Disabling a CCI port for a CPU implies disabling the CCI port + * controlling that CPU cluster. Code disabling CPU CCI ports + * must make sure that the CPU running the code is the last active CPU + * in the cluster ie all other CPUs are quiescent in a low power state. + * + * Return: + * 0 on success + * -ENODEV on port look-up failure + */ +int notrace cci_disable_port_by_cpu(u64 mpidr) +{ + int cpu; + bool is_valid; + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { + is_valid = cpu_port_is_valid(&cpu_port[cpu]); + if (is_valid && cpu_port_match(&cpu_port[cpu], mpidr)) { + cci_port_control(cpu_port[cpu].port, false); + return 0; + } + } + return -ENODEV; +} +EXPORT_SYMBOL_GPL(cci_disable_port_by_cpu); + +/** + * cci_enable_port_for_self() - enable a CCI port for calling CPU + * + * Enabling a CCI port for the calling CPU implies enabling the CCI + * port controlling that CPU's cluster. Caller must make sure that the + * CPU running the code is the first active CPU in the cluster and all + * other CPUs are quiescent in a low power state or waiting for this CPU + * to complete the CCI initialization. + * + * Because this is called when the MMU is still off and with no stack, + * the code must be position independent and ideally rely on callee + * clobbered registers only. To achieve this we must code this function + * entirely in assembler. + * + * On success this returns with the proper CCI port enabled. In case of + * any failure this never returns as the inability to enable the CCI is + * fatal and there is no possible recovery at this stage. + */ +asmlinkage void __naked cci_enable_port_for_self(void) +{ + asm volatile ("\n" + +" mrc p15, 0, r0, c0, c0, 5 @ get MPIDR value \n" +" and r0, r0, #"__stringify(MPIDR_HWID_BITMASK)" \n" +" adr r1, 5f \n" +" ldr r2, [r1] \n" +" add r1, r1, r2 @ &cpu_port \n" +" add ip, r1, %[sizeof_cpu_port] \n" + + /* Loop over the cpu_port array looking for a matching MPIDR */ +"1: ldr r2, [r1, %[offsetof_cpu_port_mpidr_lsb]] \n" +" cmp r2, r0 @ compare MPIDR \n" +" bne 2f \n" + + /* Found a match, now test port validity */ +" ldr r3, [r1, %[offsetof_cpu_port_port]] \n" +" tst r3, #"__stringify(PORT_VALID)" \n" +" bne 3f \n" + + /* no match, loop with the next cpu_port entry */ +"2: add r1, r1, %[sizeof_struct_cpu_port] \n" +" cmp r1, ip @ done? \n" +" blo 1b \n" + + /* CCI port not found -- cheaply try to stall this CPU */ +"cci_port_not_found: \n" +" wfi \n" +" wfe \n" +" b cci_port_not_found \n" + + /* Use matched port index to look up the corresponding ports entry */ +"3: bic r3, r3, #"__stringify(PORT_VALID)" \n" +" adr r0, 6f \n" +" ldmia r0, {r1, r2} \n" +" sub r1, r1, r0 @ virt - phys \n" +" ldr r0, [r0, r2] @ *(&ports) \n" +" mov r2, %[sizeof_struct_ace_port] \n" +" mla r0, r2, r3, r0 @ &ports[index] \n" +" sub r0, r0, r1 @ virt_to_phys() \n" + + /* Enable the CCI port */ +" ldr r0, [r0, %[offsetof_port_phys]] \n" +" mov r3, #"__stringify(CCI_ENABLE_REQ)" \n" +" str r3, [r0, #"__stringify(CCI_PORT_CTRL)"] \n" + + /* poll the status reg for completion */ +" adr r1, 7f \n" +" ldr r0, [r1] \n" +" ldr r0, [r0, r1] @ cci_ctrl_base \n" +"4: ldr r1, [r0, #"__stringify(CCI_CTRL_STATUS)"] \n" +" tst r1, #1 \n" +" bne 4b \n" + +" mov r0, #0 \n" +" bx lr \n" + +" .align 2 \n" +"5: .word cpu_port - . \n" +"6: .word . \n" +" .word ports - 6b \n" +"7: .word cci_ctrl_phys - . \n" + : : + [sizeof_cpu_port] "i" (sizeof(cpu_port)), +#ifndef __ARMEB__ + [offsetof_cpu_port_mpidr_lsb] "i" (offsetof(struct cpu_port, mpidr)), +#else + [offsetof_cpu_port_mpidr_lsb] "i" (offsetof(struct cpu_port, mpidr)+4), +#endif + [offsetof_cpu_port_port] "i" (offsetof(struct cpu_port, port)), + [sizeof_struct_cpu_port] "i" (sizeof(struct cpu_port)), + [sizeof_struct_ace_port] "i" (sizeof(struct cci_ace_port)), + [offsetof_port_phys] "i" (offsetof(struct cci_ace_port, phys)) ); + + unreachable(); +} + +/** + * __cci_control_port_by_device() - function to control a CCI port by device + * reference + * + * @dn: device node pointer of the device whose CCI port should be + * controlled + * @enable: if true enables the port, if false disables it + * + * Return: + * 0 on success + * -ENODEV on port look-up failure + */ +int notrace __cci_control_port_by_device(struct device_node *dn, bool enable) +{ + int port; + + if (!dn) + return -ENODEV; + + port = __cci_ace_get_port(dn, ACE_LITE_PORT); + if (WARN_ONCE(port < 0, "node %s ACE lite port look-up failure\n", + dn->full_name)) + return -ENODEV; + cci_port_control(port, enable); + return 0; +} +EXPORT_SYMBOL_GPL(__cci_control_port_by_device); + +/** + * __cci_control_port_by_index() - function to control a CCI port by port index + * + * @port: port index previously retrieved with cci_ace_get_port() + * @enable: if true enables the port, if false disables it + * + * Return: + * 0 on success + * -ENODEV on port index out of range + * -EPERM if operation carried out on an ACE PORT + */ +int notrace __cci_control_port_by_index(u32 port, bool enable) +{ + if (port >= nb_cci_ports || ports[port].type == ACE_INVALID_PORT) + return -ENODEV; + /* + * CCI control for ports connected to CPUS is extremely fragile + * and must be made to go through a specific and controlled + * interface (ie cci_disable_port_by_cpu(); control by general purpose + * indexing is therefore disabled for ACE ports. + */ + if (ports[port].type == ACE_PORT) + return -EPERM; + + cci_port_control(port, enable); + return 0; +} +EXPORT_SYMBOL_GPL(__cci_control_port_by_index); + +static const struct cci_nb_ports cci400_ports = { + .nb_ace = 2, + .nb_ace_lite = 3 +}; + +static const struct of_device_id arm_cci_matches[] = { + {.compatible = "arm,cci-400", .data = &cci400_ports }, + {}, +}; + +static const struct of_device_id arm_cci_ctrl_if_matches[] = { + {.compatible = "arm,cci-400-ctrl-if", }, + {}, +}; + +static int __init cci_probe(void) +{ + struct cci_nb_ports const *cci_config; + int ret, i, nb_ace = 0, nb_ace_lite = 0; + struct device_node *np, *cp; + struct resource res; + const char *match_str; + bool is_ace; + + np = of_find_matching_node(NULL, arm_cci_matches); + if (!np) + return -ENODEV; + + cci_config = of_match_node(arm_cci_matches, np)->data; + if (!cci_config) + return -ENODEV; + + nb_cci_ports = cci_config->nb_ace + cci_config->nb_ace_lite; + + ports = kcalloc(sizeof(*ports), nb_cci_ports, GFP_KERNEL); + if (!ports) + return -ENOMEM; + + ret = of_address_to_resource(np, 0, &res); + if (!ret) { + cci_ctrl_base = ioremap(res.start, resource_size(&res)); + cci_ctrl_phys = res.start; + } + if (ret || !cci_ctrl_base) { + WARN(1, "unable to ioremap CCI ctrl\n"); + ret = -ENXIO; + goto memalloc_err; + } + + for_each_child_of_node(np, cp) { + if (!of_match_node(arm_cci_ctrl_if_matches, cp)) + continue; + + i = nb_ace + nb_ace_lite; + + if (i >= nb_cci_ports) + break; + + if (of_property_read_string(cp, "interface-type", + &match_str)) { + WARN(1, "node %s missing interface-type property\n", + cp->full_name); + continue; + } + is_ace = strcmp(match_str, "ace") == 0; + if (!is_ace && strcmp(match_str, "ace-lite")) { + WARN(1, "node %s containing invalid interface-type property, skipping it\n", + cp->full_name); + continue; + } + + ret = of_address_to_resource(cp, 0, &res); + if (!ret) { + ports[i].base = ioremap(res.start, resource_size(&res)); + ports[i].phys = res.start; + } + if (ret || !ports[i].base) { + WARN(1, "unable to ioremap CCI port %d\n", i); + continue; + } + + if (is_ace) { + if (WARN_ON(nb_ace >= cci_config->nb_ace)) + continue; + ports[i].type = ACE_PORT; + ++nb_ace; + } else { + if (WARN_ON(nb_ace_lite >= cci_config->nb_ace_lite)) + continue; + ports[i].type = ACE_LITE_PORT; + ++nb_ace_lite; + } + ports[i].dn = cp; + } + + /* initialize a stashed array of ACE ports to speed-up look-up */ + cci_ace_init_ports(); + + /* + * Multi-cluster systems may need this data when non-coherent, during + * cluster power-up/power-down. Make sure it reaches main memory. + */ + sync_cache_w(&cci_ctrl_base); + sync_cache_w(&cci_ctrl_phys); + sync_cache_w(&ports); + sync_cache_w(&cpu_port); + __sync_cache_range_w(ports, sizeof(*ports) * nb_cci_ports); + pr_info("ARM CCI driver probed\n"); + return 0; + +memalloc_err: + + kfree(ports); + return ret; +} + +static int cci_init_status = -EAGAIN; +static DEFINE_MUTEX(cci_probing); + +static int __init cci_init(void) +{ + if (cci_init_status != -EAGAIN) + return cci_init_status; + + mutex_lock(&cci_probing); + if (cci_init_status == -EAGAIN) + cci_init_status = cci_probe(); + mutex_unlock(&cci_probing); + return cci_init_status; +} + +/* + * To sort out early init calls ordering a helper function is provided to + * check if the CCI driver has beed initialized. Function check if the driver + * has been initialized, if not it calls the init function that probes + * the driver and updates the return value. + */ +bool __init cci_probed(void) +{ + return cci_init() == 0; +} +EXPORT_SYMBOL_GPL(cci_probed); + +early_initcall(cci_init); +core_initcall(cci_pmu_init); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ARM CCI support"); diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 0357ac44638..d0d9b212475 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -42,7 +42,7 @@ config COMMON_CLK_WM831X config COMMON_CLK_VERSATILE bool "Clock driver for ARM Reference designs" - depends on ARCH_INTEGRATOR || ARCH_REALVIEW || ARCH_VEXPRESS + depends on ARCH_INTEGRATOR || ARCH_REALVIEW || ARCH_VEXPRESS || ARM64 ---help--- Supports clocking on ARM Reference designs: - Integrator/AP and Integrator/CP diff --git a/drivers/clk/versatile/Makefile b/drivers/clk/versatile/Makefile index c16ca787170..6e76bf87ca8 100644 --- a/drivers/clk/versatile/Makefile +++ b/drivers/clk/versatile/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_ARCH_INTEGRATOR) += clk-integrator.o obj-$(CONFIG_INTEGRATOR_IMPD1) += clk-impd1.o obj-$(CONFIG_ARCH_REALVIEW) += clk-realview.o obj-$(CONFIG_ARCH_VEXPRESS) += clk-vexpress.o clk-sp810.o -obj-$(CONFIG_VEXPRESS_CONFIG) += clk-vexpress-osc.o +obj-$(CONFIG_VEXPRESS_CONFIG) += clk-vexpress-osc.o clk-vexpress-spc.o diff --git a/drivers/clk/versatile/clk-vexpress-osc.c b/drivers/clk/versatile/clk-vexpress-osc.c index 256c8be74df..2dc8b41a339 100644 --- a/drivers/clk/versatile/clk-vexpress-osc.c +++ b/drivers/clk/versatile/clk-vexpress-osc.c @@ -107,7 +107,7 @@ void __init vexpress_osc_of_setup(struct device_node *node) osc->func = vexpress_config_func_get_by_node(node); if (!osc->func) { pr_err("Failed to obtain config func for node '%s'!\n", - node->name); + node->full_name); goto error; } @@ -119,7 +119,7 @@ void __init vexpress_osc_of_setup(struct device_node *node) of_property_read_string(node, "clock-output-names", &init.name); if (!init.name) - init.name = node->name; + init.name = node->full_name; init.ops = &vexpress_osc_ops; init.flags = CLK_IS_ROOT; diff --git a/drivers/clk/versatile/clk-vexpress-spc.c b/drivers/clk/versatile/clk-vexpress-spc.c new file mode 100644 index 00000000000..bb566e244b0 --- /dev/null +++ b/drivers/clk/versatile/clk-vexpress-spc.c @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2012 ARM Limited + * Copyright (C) 2012 Linaro + * + * Author: Viresh Kumar <viresh.kumar@linaro.org> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +/* SPC clock programming interface for Vexpress cpus */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/clk-provider.h> +#include <linux/clkdev.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/vexpress.h> + +struct clk_spc { + struct clk_hw hw; + spinlock_t *lock; + int cluster; +}; + +#define to_clk_spc(spc) container_of(spc, struct clk_spc, hw) + +static unsigned long spc_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_spc *spc = to_clk_spc(hw); + u32 freq; + + if (vexpress_spc_get_performance(spc->cluster, &freq)) { + return -EIO; + pr_err("%s: Failed", __func__); + } + + return freq * 1000; +} + +static long spc_round_rate(struct clk_hw *hw, unsigned long drate, + unsigned long *parent_rate) +{ + return drate; +} + +static int spc_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_spc *spc = to_clk_spc(hw); + + return vexpress_spc_set_performance(spc->cluster, rate / 1000); +} + +static struct clk_ops clk_spc_ops = { + .recalc_rate = spc_recalc_rate, + .round_rate = spc_round_rate, + .set_rate = spc_set_rate, +}; + +struct clk *vexpress_clk_register_spc(const char *name, int cluster_id) +{ + struct clk_init_data init; + struct clk_spc *spc; + struct clk *clk; + + if (!name) { + pr_err("Invalid name passed"); + return ERR_PTR(-EINVAL); + } + + spc = kzalloc(sizeof(*spc), GFP_KERNEL); + if (!spc) { + pr_err("could not allocate spc clk\n"); + return ERR_PTR(-ENOMEM); + } + + spc->hw.init = &init; + spc->cluster = cluster_id; + + init.name = name; + init.ops = &clk_spc_ops; + init.flags = CLK_IS_ROOT | CLK_GET_RATE_NOCACHE; + init.num_parents = 0; + + clk = clk_register(NULL, &spc->hw); + if (!IS_ERR_OR_NULL(clk)) + return clk; + + pr_err("clk register failed\n"); + kfree(spc); + + return NULL; +} + +#if defined(CONFIG_OF) +void __init vexpress_clk_of_register_spc(void) +{ + char name[14] = "cpu-cluster.X"; + struct device_node *node = NULL; + struct clk *clk; + const u32 *val; + int cluster_id = 0, len; + + if (!of_find_compatible_node(NULL, NULL, "arm,vexpress-spc")) { + pr_debug("%s: No SPC found, Exiting!!\n", __func__); + return; + } + + while ((node = of_find_node_by_name(node, "cluster"))) { + val = of_get_property(node, "reg", &len); + if (val && len == 4) + cluster_id = be32_to_cpup(val); + + name[12] = cluster_id + '0'; + clk = vexpress_clk_register_spc(name, cluster_id); + if (IS_ERR(clk)) + return; + + pr_debug("Registered clock '%s'\n", name); + clk_register_clkdev(clk, NULL, name); + } +} +CLK_OF_DECLARE(spc, "arm,vexpress-spc", vexpress_clk_of_register_spc); +#endif diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 6e57543fe0b..8327444b76c 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -4,7 +4,7 @@ config ARM_BIG_LITTLE_CPUFREQ tristate "Generic ARM big LITTLE CPUfreq driver" - depends on ARM_CPU_TOPOLOGY && PM_OPP && HAVE_CLK + depends on ARM_CPU_TOPOLOGY && PM_OPP && HAVE_CLK && BIG_LITTLE help This enables the Generic CPUfreq driver for ARM big.LITTLE platforms. @@ -15,6 +15,14 @@ config ARM_DT_BL_CPUFREQ This enables probing via DT for Generic CPUfreq driver for ARM big.LITTLE platform. This gets frequency tables from DT. +config ARM_VEXPRESS_BL_CPUFREQ + tristate "ARM Vexpress big LITTLE CPUfreq driver" + select ARM_BIG_LITTLE_CPUFREQ + depends on VEXPRESS_SPC + help + This enables the CPUfreq driver for ARM Vexpress big.LITTLE platform. + If in doubt, say N. + config ARM_EXYNOS_CPUFREQ bool "SAMSUNG EXYNOS SoCs" depends on ARCH_EXYNOS diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 315b9231feb..1db9b4929cf 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY) += amd_freq_sensitivity.o obj-$(CONFIG_ARM_BIG_LITTLE_CPUFREQ) += arm_big_little.o # big LITTLE per platform glues. Keep DT_BL_CPUFREQ as the last entry in all big # LITTLE drivers, so that it is probed last. +obj-$(CONFIG_ARM_VEXPRESS_BL_CPUFREQ) += vexpress_big_little.o obj-$(CONFIG_ARM_DT_BL_CPUFREQ) += arm_big_little_dt.o obj-$(CONFIG_ARCH_DAVINCI_DA850) += davinci-cpufreq.o diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index 5d7f53fcd6f..076f25a59e0 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -24,27 +24,148 @@ #include <linux/cpufreq.h> #include <linux/cpumask.h> #include <linux/export.h> +#include <linux/mutex.h> #include <linux/of_platform.h> #include <linux/opp.h> #include <linux/slab.h> #include <linux/topology.h> #include <linux/types.h> +#include <asm/bL_switcher.h> #include "arm_big_little.h" -/* Currently we support only two clusters */ -#define MAX_CLUSTERS 2 +#ifdef CONFIG_BL_SWITCHER +bool bL_switching_enabled; +#endif + +#define ACTUAL_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq << 1 : freq) +#define VIRT_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq >> 1 : freq) static struct cpufreq_arm_bL_ops *arm_bL_ops; static struct clk *clk[MAX_CLUSTERS]; -static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS]; -static atomic_t cluster_usage[MAX_CLUSTERS] = {ATOMIC_INIT(0), ATOMIC_INIT(0)}; +static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1]; +static atomic_t cluster_usage[MAX_CLUSTERS + 1] = {ATOMIC_INIT(0), + ATOMIC_INIT(0)}; + +static unsigned int clk_big_min; /* (Big) clock frequencies */ +static unsigned int clk_little_max; /* Maximum clock frequency (Little) */ + +static DEFINE_PER_CPU(unsigned int, physical_cluster); +static DEFINE_PER_CPU(unsigned int, cpu_last_req_freq); + +static struct mutex cluster_lock[MAX_CLUSTERS]; + +static unsigned int find_cluster_maxfreq(int cluster) +{ + int j; + u32 max_freq = 0, cpu_freq; + + for_each_online_cpu(j) { + cpu_freq = per_cpu(cpu_last_req_freq, j); + + if ((cluster == per_cpu(physical_cluster, j)) && + (max_freq < cpu_freq)) + max_freq = cpu_freq; + } -static unsigned int bL_cpufreq_get(unsigned int cpu) + pr_debug("%s: cluster: %d, max freq: %d\n", __func__, cluster, + max_freq); + + return max_freq; +} + +static unsigned int clk_get_cpu_rate(unsigned int cpu) { - u32 cur_cluster = cpu_to_cluster(cpu); + u32 cur_cluster = per_cpu(physical_cluster, cpu); + u32 rate = clk_get_rate(clk[cur_cluster]) / 1000; + + /* For switcher we use virtual A15 clock rates */ + if (is_bL_switching_enabled()) + rate = VIRT_FREQ(cur_cluster, rate); + + pr_debug("%s: cpu: %d, cluster: %d, freq: %u\n", __func__, cpu, + cur_cluster, rate); - return clk_get_rate(clk[cur_cluster]) / 1000; + return rate; +} + +static unsigned int bL_cpufreq_get_rate(unsigned int cpu) +{ + if (is_bL_switching_enabled()) { + pr_debug("%s: freq: %d\n", __func__, per_cpu(cpu_last_req_freq, + cpu)); + + return per_cpu(cpu_last_req_freq, cpu); + } else { + return clk_get_cpu_rate(cpu); + } +} + +static unsigned int +bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate) +{ + u32 new_rate, prev_rate; + int ret; + bool bLs = is_bL_switching_enabled(); + + mutex_lock(&cluster_lock[new_cluster]); + + if (bLs) { + prev_rate = per_cpu(cpu_last_req_freq, cpu); + per_cpu(cpu_last_req_freq, cpu) = rate; + per_cpu(physical_cluster, cpu) = new_cluster; + + new_rate = find_cluster_maxfreq(new_cluster); + new_rate = ACTUAL_FREQ(new_cluster, new_rate); + } else { + new_rate = rate; + } + + pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d, freq: %d\n", + __func__, cpu, old_cluster, new_cluster, new_rate); + + ret = clk_set_rate(clk[new_cluster], new_rate * 1000); + if (WARN_ON(ret)) { + pr_err("clk_set_rate failed: %d, new cluster: %d\n", ret, + new_cluster); + if (bLs) { + per_cpu(cpu_last_req_freq, cpu) = prev_rate; + per_cpu(physical_cluster, cpu) = old_cluster; + } + + mutex_unlock(&cluster_lock[new_cluster]); + + return ret; + } + + mutex_unlock(&cluster_lock[new_cluster]); + + /* Recalc freq for old cluster when switching clusters */ + if (old_cluster != new_cluster) { + pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d\n", + __func__, cpu, old_cluster, new_cluster); + + /* Switch cluster */ + bL_switch_request(cpu, new_cluster); + + mutex_lock(&cluster_lock[old_cluster]); + + /* Set freq of old cluster if there are cpus left on it */ + new_rate = find_cluster_maxfreq(old_cluster); + new_rate = ACTUAL_FREQ(old_cluster, new_rate); + + if (new_rate) { + pr_debug("%s: Updating rate of old cluster: %d, to freq: %d\n", + __func__, old_cluster, new_rate); + + if (clk_set_rate(clk[old_cluster], new_rate * 1000)) + pr_err("%s: clk_set_rate failed: %d, old cluster: %d\n", + __func__, ret, old_cluster); + } + mutex_unlock(&cluster_lock[old_cluster]); + } + + return 0; } /* Validate policy frequency range */ @@ -60,12 +181,14 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { struct cpufreq_freqs freqs; - u32 cpu = policy->cpu, freq_tab_idx, cur_cluster; + u32 cpu = policy->cpu, freq_tab_idx, cur_cluster, new_cluster, + actual_cluster; int ret = 0; - cur_cluster = cpu_to_cluster(policy->cpu); + cur_cluster = cpu_to_cluster(cpu); + new_cluster = actual_cluster = per_cpu(physical_cluster, cpu); - freqs.old = bL_cpufreq_get(policy->cpu); + freqs.old = bL_cpufreq_get_rate(cpu); /* Determine valid target frequency using freq_table */ cpufreq_frequency_table_target(policy, freq_table[cur_cluster], @@ -79,13 +202,21 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy, if (freqs.old == freqs.new) return 0; + if (is_bL_switching_enabled()) { + if ((actual_cluster == A15_CLUSTER) && + (freqs.new < clk_big_min)) { + new_cluster = A7_CLUSTER; + } else if ((actual_cluster == A7_CLUSTER) && + (freqs.new > clk_little_max)) { + new_cluster = A15_CLUSTER; + } + } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); - ret = clk_set_rate(clk[cur_cluster], freqs.new * 1000); - if (ret) { - pr_err("clk_set_rate failed: %d\n", ret); + ret = bL_cpufreq_set_rate(cpu, actual_cluster, new_cluster, freqs.new); + if (ret) return ret; - } policy->cur = freqs.new; @@ -94,7 +225,73 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy, return ret; } -static void put_cluster_clk_and_freq_table(struct device *cpu_dev) +static inline u32 get_table_count(struct cpufreq_frequency_table *table) +{ + int count; + + for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++) + ; + + return count; +} + +/* get the minimum frequency in the cpufreq_frequency_table */ +static inline u32 get_table_min(struct cpufreq_frequency_table *table) +{ + int i; + uint32_t min_freq = ~0; + for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) + if (table[i].frequency < min_freq) + min_freq = table[i].frequency; + return min_freq; +} + +/* get the maximum frequency in the cpufreq_frequency_table */ +static inline u32 get_table_max(struct cpufreq_frequency_table *table) +{ + int i; + uint32_t max_freq = 0; + for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) + if (table[i].frequency > max_freq) + max_freq = table[i].frequency; + return max_freq; +} + +static int merge_cluster_tables(void) +{ + int i, j, k = 0, count = 1; + struct cpufreq_frequency_table *table; + + for (i = 0; i < MAX_CLUSTERS; i++) + count += get_table_count(freq_table[i]); + + table = kzalloc(sizeof(*table) * count, GFP_KERNEL); + if (!table) + return -ENOMEM; + + freq_table[MAX_CLUSTERS] = table; + + /* Add in reverse order to get freqs in increasing order */ + for (i = MAX_CLUSTERS - 1; i >= 0; i--) { + for (j = 0; freq_table[i][j].frequency != CPUFREQ_TABLE_END; + j++) { + table[k].frequency = VIRT_FREQ(i, + freq_table[i][j].frequency); + pr_debug("%s: index: %d, freq: %d\n", __func__, k, + table[k].frequency); + k++; + } + } + + table[k].index = k; + table[k].frequency = CPUFREQ_TABLE_END; + + pr_debug("%s: End, table: %p, count: %d\n", __func__, table, k); + + return 0; +} + +static void _put_cluster_clk_and_freq_table(struct device *cpu_dev) { u32 cluster = cpu_to_cluster(cpu_dev->id); @@ -105,10 +302,35 @@ static void put_cluster_clk_and_freq_table(struct device *cpu_dev) } } -static int get_cluster_clk_and_freq_table(struct device *cpu_dev) +static void put_cluster_clk_and_freq_table(struct device *cpu_dev) { u32 cluster = cpu_to_cluster(cpu_dev->id); - char name[14] = "cpu-cluster."; + int i; + + if (cluster < MAX_CLUSTERS) + return _put_cluster_clk_and_freq_table(cpu_dev); + + if (atomic_dec_return(&cluster_usage[MAX_CLUSTERS])) + return; + + for (i = 0; i < MAX_CLUSTERS; i++) { + struct device *cdev = get_cpu_device(i); + if (!cdev) { + pr_err("%s: failed to get cpu%d device\n", __func__, i); + return; + } + + _put_cluster_clk_and_freq_table(cdev); + } + + /* free virtual table */ + kfree(freq_table[MAX_CLUSTERS]); +} + +static int _get_cluster_clk_and_freq_table(struct device *cpu_dev) +{ + u32 cluster = cpu_to_cluster(cpu_dev->id); + char name[14] = "cpu-cluster.X"; int ret; if (atomic_inc_return(&cluster_usage[cluster]) != 1) @@ -149,6 +371,62 @@ atomic_dec: return ret; } +static int get_cluster_clk_and_freq_table(struct device *cpu_dev) +{ + u32 cluster = cpu_to_cluster(cpu_dev->id); + int i, ret; + + if (cluster < MAX_CLUSTERS) + return _get_cluster_clk_and_freq_table(cpu_dev); + + if (atomic_inc_return(&cluster_usage[MAX_CLUSTERS]) != 1) + return 0; + + /* + * Get data for all clusters and fill virtual cluster with a merge of + * both + */ + for (i = 0; i < MAX_CLUSTERS; i++) { + struct device *cdev = get_cpu_device(i); + if (!cdev) { + pr_err("%s: failed to get cpu%d device\n", __func__, i); + return -ENODEV; + } + + ret = _get_cluster_clk_and_freq_table(cdev); + if (ret) + goto put_clusters; + } + + ret = merge_cluster_tables(); + if (ret) + goto put_clusters; + + /* Assuming 2 cluster, set clk_big_min and clk_little_max */ + clk_big_min = get_table_min(freq_table[0]); + clk_little_max = VIRT_FREQ(1, get_table_max(freq_table[1])); + + pr_debug("%s: cluster: %d, clk_big_min: %d, clk_little_max: %d\n", + __func__, cluster, clk_big_min, clk_little_max); + + return 0; + +put_clusters: + while (i--) { + struct device *cdev = get_cpu_device(i); + if (!cdev) { + pr_err("%s: failed to get cpu%d device\n", __func__, i); + return -ENODEV; + } + + _put_cluster_clk_and_freq_table(cdev); + } + + atomic_dec(&cluster_usage[MAX_CLUSTERS]); + + return ret; +} + /* Per-CPU initialization */ static int bL_cpufreq_init(struct cpufreq_policy *policy) { @@ -177,37 +455,30 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy) cpufreq_frequency_table_get_attr(freq_table[cur_cluster], policy->cpu); + if (cur_cluster < MAX_CLUSTERS) { + cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu)); + + per_cpu(physical_cluster, policy->cpu) = cur_cluster; + } else { + /* Assumption: during init, we are always running on A15 */ + per_cpu(physical_cluster, policy->cpu) = A15_CLUSTER; + } + if (arm_bL_ops->get_transition_latency) policy->cpuinfo.transition_latency = arm_bL_ops->get_transition_latency(cpu_dev); else policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - policy->cur = bL_cpufreq_get(policy->cpu); + policy->cur = clk_get_cpu_rate(policy->cpu); - cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu)); + if (is_bL_switching_enabled()) + per_cpu(cpu_last_req_freq, policy->cpu) = policy->cur; dev_info(cpu_dev, "%s: CPU %d initialized\n", __func__, policy->cpu); return 0; } -static int bL_cpufreq_exit(struct cpufreq_policy *policy) -{ - struct device *cpu_dev; - - cpu_dev = get_cpu_device(policy->cpu); - if (!cpu_dev) { - pr_err("%s: failed to get cpu%d device\n", __func__, - policy->cpu); - return -ENODEV; - } - - put_cluster_clk_and_freq_table(cpu_dev); - dev_dbg(cpu_dev, "%s: Exited, cpu: %d\n", __func__, policy->cpu); - - return 0; -} - /* Export freq_table to sysfs */ static struct freq_attr *bL_cpufreq_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, @@ -219,16 +490,47 @@ static struct cpufreq_driver bL_cpufreq_driver = { .flags = CPUFREQ_STICKY, .verify = bL_cpufreq_verify_policy, .target = bL_cpufreq_set_target, - .get = bL_cpufreq_get, + .get = bL_cpufreq_get_rate, .init = bL_cpufreq_init, - .exit = bL_cpufreq_exit, .have_governor_per_policy = true, .attr = bL_cpufreq_attr, }; +static int bL_cpufreq_switcher_notifier(struct notifier_block *nfb, + unsigned long action, void *_arg) +{ + pr_debug("%s: action: %ld\n", __func__, action); + + switch (action) { + case BL_NOTIFY_PRE_ENABLE: + case BL_NOTIFY_PRE_DISABLE: + cpufreq_unregister_driver(&bL_cpufreq_driver); + break; + + case BL_NOTIFY_POST_ENABLE: + set_switching_enabled(true); + cpufreq_register_driver(&bL_cpufreq_driver); + break; + + case BL_NOTIFY_POST_DISABLE: + set_switching_enabled(false); + cpufreq_register_driver(&bL_cpufreq_driver); + break; + + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static struct notifier_block bL_switcher_notifier = { + .notifier_call = bL_cpufreq_switcher_notifier, +}; + int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops) { - int ret; + int ret, i; if (arm_bL_ops) { pr_debug("%s: Already registered: %s, exiting\n", __func__, @@ -243,16 +545,29 @@ int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops) arm_bL_ops = ops; + ret = bL_switcher_get_enabled(); + set_switching_enabled(ret); + + for (i = 0; i < MAX_CLUSTERS; i++) + mutex_init(&cluster_lock[i]); + ret = cpufreq_register_driver(&bL_cpufreq_driver); if (ret) { pr_info("%s: Failed registering platform driver: %s, err: %d\n", __func__, ops->name, ret); arm_bL_ops = NULL; } else { - pr_info("%s: Registered platform driver: %s\n", __func__, - ops->name); + ret = bL_switcher_register_notifier(&bL_switcher_notifier); + if (ret) { + cpufreq_unregister_driver(&bL_cpufreq_driver); + arm_bL_ops = NULL; + } else { + pr_info("%s: Registered platform driver: %s\n", + __func__, ops->name); + } } + bL_switcher_put_enabled(); return ret; } EXPORT_SYMBOL_GPL(bL_cpufreq_register); @@ -265,9 +580,31 @@ void bL_cpufreq_unregister(struct cpufreq_arm_bL_ops *ops) return; } + bL_switcher_get_enabled(); + bL_switcher_unregister_notifier(&bL_switcher_notifier); cpufreq_unregister_driver(&bL_cpufreq_driver); + bL_switcher_put_enabled(); pr_info("%s: Un-registered platform driver: %s\n", __func__, arm_bL_ops->name); + + /* For saving table get/put on every cpu in/out */ + if (is_bL_switching_enabled()) { + put_cluster_clk_and_freq_table(get_cpu_device(0)); + } else { + int i; + + for (i = 0; i < MAX_CLUSTERS; i++) { + struct device *cdev = get_cpu_device(i); + if (!cdev) { + pr_err("%s: failed to get cpu%d device\n", + __func__, i); + return; + } + + put_cluster_clk_and_freq_table(cdev); + } + } + arm_bL_ops = NULL; } EXPORT_SYMBOL_GPL(bL_cpufreq_unregister); diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h index 79b2ce17884..4f5a03d3aef 100644 --- a/drivers/cpufreq/arm_big_little.h +++ b/drivers/cpufreq/arm_big_little.h @@ -23,6 +23,20 @@ #include <linux/device.h> #include <linux/types.h> +/* Currently we support only two clusters */ +#define A15_CLUSTER 0 +#define A7_CLUSTER 1 +#define MAX_CLUSTERS 2 + +#ifdef CONFIG_BL_SWITCHER +extern bool bL_switching_enabled; +#define is_bL_switching_enabled() bL_switching_enabled +#define set_switching_enabled(x) (bL_switching_enabled = (x)) +#else +#define is_bL_switching_enabled() false +#define set_switching_enabled(x) do { } while (0) +#endif + struct cpufreq_arm_bL_ops { char name[CPUFREQ_NAME_LEN]; int (*get_transition_latency)(struct device *cpu_dev); @@ -36,7 +50,8 @@ struct cpufreq_arm_bL_ops { static inline int cpu_to_cluster(int cpu) { - return topology_physical_package_id(cpu); + return is_bL_switching_enabled() ? MAX_CLUSTERS: + topology_physical_package_id(cpu); } int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops); diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index bfd6273fd87..1db214b2660 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -21,6 +21,7 @@ #include <linux/spinlock.h> #include <linux/notifier.h> #include <asm/cputime.h> +#include <asm/bL_switcher.h> static spinlock_t cpufreq_stats_lock; @@ -378,7 +379,7 @@ static struct notifier_block notifier_trans_block = { .notifier_call = cpufreq_stat_notifier_trans }; -static int __init cpufreq_stats_init(void) +static int cpufreq_stats_setup(void) { int ret; unsigned int cpu; @@ -406,7 +407,8 @@ static int __init cpufreq_stats_init(void) return 0; } -static void __exit cpufreq_stats_exit(void) + +static void cpufreq_stats_cleanup(void) { unsigned int cpu; @@ -421,6 +423,49 @@ static void __exit cpufreq_stats_exit(void) } } +static int cpufreq_stats_switcher_notifier(struct notifier_block *nfb, + unsigned long action, void *_arg) +{ + switch (action) { + case BL_NOTIFY_PRE_ENABLE: + case BL_NOTIFY_PRE_DISABLE: + cpufreq_stats_cleanup(); + break; + + case BL_NOTIFY_POST_ENABLE: + case BL_NOTIFY_POST_DISABLE: + cpufreq_stats_setup(); + break; + + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static struct notifier_block switcher_notifier = { + .notifier_call = cpufreq_stats_switcher_notifier, +}; + +static int __init cpufreq_stats_init(void) +{ + int ret; + spin_lock_init(&cpufreq_stats_lock); + + ret = cpufreq_stats_setup(); + if (!ret) + bL_switcher_register_notifier(&switcher_notifier); + + return ret; +} + +static void __exit cpufreq_stats_exit(void) +{ + bL_switcher_unregister_notifier(&switcher_notifier); + cpufreq_stats_cleanup(); +} + MODULE_AUTHOR("Zou Nan hai <nanhai.zou@intel.com>"); MODULE_DESCRIPTION("'cpufreq_stats' - A driver to export cpufreq stats " "through sysfs filesystem"); diff --git a/drivers/cpufreq/vexpress_big_little.c b/drivers/cpufreq/vexpress_big_little.c new file mode 100644 index 00000000000..1abb883c051 --- /dev/null +++ b/drivers/cpufreq/vexpress_big_little.c @@ -0,0 +1,86 @@ +/* + * Vexpress big.LITTLE CPUFreq Interface driver + * + * It provides necessary ops to arm_big_little cpufreq driver and gets + * Frequency information from Device Tree. Freq table in DT must be in KHz. + * + * Copyright (C) 2013 Linaro. + * Viresh Kumar <viresh.kumar@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/cpufreq.h> +#include <linux/export.h> +#include <linux/opp.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/vexpress.h> +#include "arm_big_little.h" + +static int vexpress_init_opp_table(struct device *cpu_dev) +{ + int i = -1, count, cluster = cpu_to_cluster(cpu_dev->id); + u32 *table; + int ret; + + count = vexpress_spc_get_freq_table(cluster, &table); + if (!table || !count) { + pr_err("SPC controller returned invalid freq table"); + return -EINVAL; + } + + while (++i < count) { + /* FIXME: Voltage value */ + ret = opp_add(cpu_dev, table[i] * 1000, 900000); + if (ret) { + dev_warn(cpu_dev, "%s: Failed to add OPP %d, err: %d\n", + __func__, table[i] * 1000, ret); + return ret; + } + } + + return 0; +} + +static int vexpress_get_transition_latency(struct device *cpu_dev) +{ + /* 1 ms */ + return 1000000; +} + +static struct cpufreq_arm_bL_ops vexpress_bL_ops = { + .name = "vexpress-bL", + .get_transition_latency = vexpress_get_transition_latency, + .init_opp_table = vexpress_init_opp_table, +}; + +static int vexpress_bL_init(void) +{ + if (!vexpress_spc_check_loaded()) { + pr_info("%s: No SPC found\n", __func__); + return -ENOENT; + } + + return bL_cpufreq_register(&vexpress_bL_ops); +} +module_init(vexpress_bL_init); + +static void vexpress_bL_exit(void) +{ + return bL_cpufreq_unregister(&vexpress_bL_ops); +} +module_exit(vexpress_bL_exit); + +MODULE_AUTHOR("Viresh Kumar <viresh.kumar@linaro.org>"); +MODULE_DESCRIPTION("ARM Vexpress big LITTLE cpufreq driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index 0d8bd55e776..7d8256a5ea9 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -4,6 +4,6 @@ obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o - +obj-$(CONFIG_BIG_LITTLE) += arm_big_little.o obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o obj-$(CONFIG_ARCH_KIRKWOOD) += cpuidle-kirkwood.o diff --git a/drivers/cpuidle/arm_big_little.c b/drivers/cpuidle/arm_big_little.c new file mode 100644 index 00000000000..e5378896a8c --- /dev/null +++ b/drivers/cpuidle/arm_big_little.c @@ -0,0 +1,183 @@ +/* + * big.LITTLE CPU idle driver. + * + * Copyright (C) 2012 ARM Ltd. + * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/arm-cci.h> +#include <linux/bitmap.h> +#include <linux/cpuidle.h> +#include <linux/cpu_pm.h> +#include <linux/clockchips.h> +#include <linux/debugfs.h> +#include <linux/hrtimer.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/tick.h> +#include <linux/vexpress.h> +#include <asm/mcpm.h> +#include <asm/cpuidle.h> +#include <asm/cputype.h> +#include <asm/idmap.h> +#include <asm/proc-fns.h> +#include <asm/suspend.h> +#include <linux/of.h> + +static int bl_cpuidle_simple_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + ktime_t time_start, time_end; + s64 diff; + + time_start = ktime_get(); + + cpu_do_idle(); + + time_end = ktime_get(); + + local_irq_enable(); + + diff = ktime_to_us(ktime_sub(time_end, time_start)); + if (diff > INT_MAX) + diff = INT_MAX; + + dev->last_residency = (int) diff; + + return index; +} + +static int bl_enter_powerdown(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx); + +static struct cpuidle_state bl_cpuidle_set[] __initdata = { + [0] = { + .enter = bl_cpuidle_simple_enter, + .exit_latency = 1, + .target_residency = 1, + .power_usage = UINT_MAX, + .flags = CPUIDLE_FLAG_TIME_VALID, + .name = "WFI", + .desc = "ARM WFI", + }, + [1] = { + .enter = bl_enter_powerdown, + .exit_latency = 300, + .target_residency = 1000, + .flags = CPUIDLE_FLAG_TIME_VALID, + .name = "C1", + .desc = "ARM power down", + }, +}; + +struct cpuidle_driver bl_idle_driver = { + .name = "bl_idle", + .owner = THIS_MODULE, + .safe_state_index = 0 +}; + +static DEFINE_PER_CPU(struct cpuidle_device, bl_idle_dev); + +static int notrace bl_powerdown_finisher(unsigned long arg) +{ + unsigned int mpidr = read_cpuid_mpidr(); + unsigned int cluster = (mpidr >> 8) & 0xf; + unsigned int cpu = mpidr & 0xf; + + mcpm_set_entry_vector(cpu, cluster, cpu_resume); + mcpm_cpu_suspend(0); /* 0 should be replaced with better value here */ + return 1; +} + +/* + * bl_enter_powerdown - Programs CPU to enter the specified state + * @dev: cpuidle device + * @drv: The target state to be programmed + * @idx: state index + * + * Called from the CPUidle framework to program the device to the + * specified target state selected by the governor. + */ +static int bl_enter_powerdown(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx) +{ + struct timespec ts_preidle, ts_postidle, ts_idle; + int ret; + + /* Used to keep track of the total time in idle */ + getnstimeofday(&ts_preidle); + + BUG_ON(!irqs_disabled()); + + cpu_pm_enter(); + + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + + ret = cpu_suspend((unsigned long) dev, bl_powerdown_finisher); + if (ret) + BUG(); + + mcpm_cpu_powered_up(); + + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + + cpu_pm_exit(); + + getnstimeofday(&ts_postidle); + local_irq_enable(); + ts_idle = timespec_sub(ts_postidle, ts_preidle); + + dev->last_residency = ts_idle.tv_nsec / NSEC_PER_USEC + + ts_idle.tv_sec * USEC_PER_SEC; + return idx; +} + +/* + * bl_idle_init + * + * Registers the bl specific cpuidle driver with the cpuidle + * framework with the valid set of states. + */ +int __init bl_idle_init(void) +{ + struct cpuidle_device *dev; + int i, cpu_id; + struct cpuidle_driver *drv = &bl_idle_driver; + + if (!of_find_compatible_node(NULL, NULL, "arm,generic")) { + pr_info("%s: No compatible node found\n", __func__); + return -ENODEV; + } + + drv->state_count = (sizeof(bl_cpuidle_set) / + sizeof(struct cpuidle_state)); + + for (i = 0; i < drv->state_count; i++) { + memcpy(&drv->states[i], &bl_cpuidle_set[i], + sizeof(struct cpuidle_state)); + } + + cpuidle_register_driver(drv); + + for_each_cpu(cpu_id, cpu_online_mask) { + pr_err("CPUidle for CPU%d registered\n", cpu_id); + dev = &per_cpu(bl_idle_dev, cpu_id); + dev->cpu = cpu_id; + + dev->state_count = drv->state_count; + + if (cpuidle_register_device(dev)) { + printk(KERN_ERR "%s: Cpuidle register device failed\n", + __func__); + return -EIO; + } + } + + return 0; +} + +device_initcall(bl_idle_init); diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c index 223379169cb..0e6e408c0a6 100644 --- a/drivers/cpuidle/cpuidle-calxeda.c +++ b/drivers/cpuidle/cpuidle-calxeda.c @@ -37,20 +37,6 @@ extern void highbank_set_cpu_jump(int cpu, void *jump_addr); extern void *scu_base_addr; -static inline unsigned int get_auxcr(void) -{ - unsigned int val; - asm("mrc p15, 0, %0, c1, c0, 1 @ get AUXCR" : "=r" (val) : : "cc"); - return val; -} - -static inline void set_auxcr(unsigned int val) -{ - asm volatile("mcr p15, 0, %0, c1, c0, 1 @ set AUXCR" - : : "r" (val) : "cc"); - isb(); -} - static noinline void calxeda_idle_restore(void) { set_cr(get_cr() | CR_C); diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 19ceaa60e0f..6f00dfa7d4f 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -253,10 +253,9 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids) return -EINVAL; + raw_spin_lock(&irq_controller_lock); mask = 0xff << shift; bit = gic_cpu_map[cpu] << shift; - - raw_spin_lock(&irq_controller_lock); val = readl_relaxed(reg) & ~mask; writel_relaxed(val | bit, reg); raw_spin_unlock(&irq_controller_lock); @@ -453,6 +452,12 @@ static void __cpuinit gic_cpu_init(struct gic_chip_data *gic) writel_relaxed(1, base + GIC_CPU_CTRL); } +void gic_cpu_if_down(void) +{ + void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]); + writel_relaxed(0, cpu_base + GIC_CPU_CTRL); +} + #ifdef CONFIG_CPU_PM /* * Saves the GIC distributor registers during suspend or idle. Must be called @@ -646,7 +651,9 @@ static void __init gic_pm_init(struct gic_chip_data *gic) void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) { int cpu; - unsigned long map = 0; + unsigned long flags, map = 0; + + raw_spin_lock_irqsave(&irq_controller_lock, flags); /* Convert our logical CPU mask into a physical one. */ for_each_cpu(cpu, mask) @@ -660,7 +667,143 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) /* this always happens on GIC0 */ writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT); + + raw_spin_unlock_irqrestore(&irq_controller_lock, flags); +} +#endif + +#ifdef CONFIG_BL_SWITCHER +/* + * gic_send_sgi - send a SGI directly to given CPU interface number + * + * cpu_id: the ID for the destination CPU interface + * irq: the IPI number to send a SGI for + */ +void gic_send_sgi(unsigned int cpu_id, unsigned int irq) +{ + BUG_ON(cpu_id >= NR_GIC_CPU_IF); + cpu_id = 1 << cpu_id; + /* this always happens on GIC0 */ + writel_relaxed((cpu_id << 16) | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT); +} + +/* + * gic_get_cpu_id - get the CPU interface ID for the specified CPU + * + * @cpu: the logical CPU number to get the GIC ID for. + * + * Return the CPU interface ID for the given logical CPU number, + * or -1 if the CPU number is too large or the interface ID is + * unknown (more than one bit set). + */ +int gic_get_cpu_id(unsigned int cpu) +{ + unsigned int cpu_bit; + + if (cpu >= NR_GIC_CPU_IF) + return -1; + cpu_bit = gic_cpu_map[cpu]; + if (cpu_bit & (cpu_bit - 1)) + return -1; + return __ffs(cpu_bit); +} + +/* + * gic_migrate_target - migrate IRQs to another PU interface + * + * @new_cpu_id: the CPU target ID to migrate IRQs to + * + * Migrate all peripheral interrupts with a target matching the current CPU + * to the interface corresponding to @new_cpu_id. The CPU interface mapping + * is also updated. Targets to other CPU interfaces are unchanged. + * This must be called with IRQs locally disabled. + */ +void gic_migrate_target(unsigned int new_cpu_id) +{ + unsigned int old_cpu_id, gic_irqs, gic_nr = 0; + void __iomem *dist_base; + int i, ror_val, cpu = smp_processor_id(); + u32 val, old_mask, active_mask; + + if (gic_nr >= MAX_GIC_NR) + BUG(); + + dist_base = gic_data_dist_base(&gic_data[gic_nr]); + if (!dist_base) + return; + gic_irqs = gic_data[gic_nr].gic_irqs; + + old_cpu_id = __ffs(gic_cpu_map[cpu]); + old_mask = 0x01010101 << old_cpu_id; + ror_val = (old_cpu_id - new_cpu_id) & 31; + + raw_spin_lock(&irq_controller_lock); + + gic_cpu_map[cpu] = 1 << new_cpu_id; + + for (i = 8; i < DIV_ROUND_UP(gic_irqs, 4); i++) { + val = readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4); + active_mask = val & old_mask; + if (active_mask) { + val &= ~active_mask; + val |= ror32(active_mask, ror_val); + writel_relaxed(val, dist_base + GIC_DIST_TARGET + i * 4); + } + } + + raw_spin_unlock(&irq_controller_lock); + + /* + * Now let's migrate and clear any potential SGIs that might be + * pending for us (old_cpu_id). Since GIC_DIST_SGI_PENDING_SET + * is a banked register, we can only forward the SGI using + * GIC_DIST_SOFTINT. The original SGI source is lost but Linux + * doesn't use that information anyway. + * + * For the same reason we do not adjust SGI source information + * for previously sent SGIs by us to other CPUs either. + */ + for (i = 0; i < 16; i += 4) { + int j; + val = readl_relaxed(dist_base + GIC_DIST_SGI_PENDING_SET + i); + if (!val) + continue; + writel_relaxed(val, dist_base + GIC_DIST_SGI_PENDING_CLEAR + i); + for (j = i; j < i + 4; j++) { + if (val & 0xff) + writel_relaxed((1 << (new_cpu_id + 16)) | j, + dist_base + GIC_DIST_SOFTINT); + val >>= 8; + } + } } + +/* + * gic_get_sgir_physaddr - get the physical address for the SGI register + * + * REturn the physical address of the SGI register to be used + * by some early assembly code when the kernel is not yet available. + */ +static unsigned long gic_dist_physaddr; + +unsigned long gic_get_sgir_physaddr(void) +{ + if (!gic_dist_physaddr) + return 0; + return gic_dist_physaddr + GIC_DIST_SOFTINT; +} + +void __init gic_init_physaddr(struct device_node *node) +{ + struct resource res; + if (of_address_to_resource(node, 0, &res) == 0) { + gic_dist_physaddr = res.start; + pr_info("GIC physical location is %#lx\n", gic_dist_physaddr); + } +} + +#else +#define gic_init_physaddr(node) do { } while(0) #endif static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, @@ -844,6 +987,8 @@ int __init gic_of_init(struct device_node *node, struct device_node *parent) percpu_offset = 0; gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset, node); + if (!gic_cnt) + gic_init_physaddr(node); if (parent) { irq = irq_of_parse_and_map(node, 0); diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index d54e985748b..a5e54f0d6a7 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1144,7 +1144,15 @@ config MCP_UCB1200_TS endmenu config VEXPRESS_CONFIG - bool + bool "ARM Versatile Express platform infrastructure" + depends on ARM || ARM64 help Platform configuration infrastructure for the ARM Ltd. Versatile Express. + +config VEXPRESS_SPC + bool "Versatile Express SPC driver support" + depends on ARM + depends on VEXPRESS_CONFIG + help + Serial Power Controller driver for ARM Ltd. test chips. diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 718e94a2a9a..3a0120315aa 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -153,5 +153,6 @@ obj-$(CONFIG_MFD_SEC_CORE) += sec-core.o sec-irq.o obj-$(CONFIG_MFD_SYSCON) += syscon.o obj-$(CONFIG_MFD_LM3533) += lm3533-core.o lm3533-ctrlbank.o obj-$(CONFIG_VEXPRESS_CONFIG) += vexpress-config.o vexpress-sysreg.o +obj-$(CONFIG_VEXPRESS_SPC) += vexpress-spc.o obj-$(CONFIG_MFD_RETU) += retu-mfd.o obj-$(CONFIG_MFD_AS3711) += as3711.o diff --git a/drivers/mfd/vexpress-config.c b/drivers/mfd/vexpress-config.c index 84ce6b9daa3..1af2b0e0182 100644 --- a/drivers/mfd/vexpress-config.c +++ b/drivers/mfd/vexpress-config.c @@ -86,29 +86,13 @@ void vexpress_config_bridge_unregister(struct vexpress_config_bridge *bridge) } EXPORT_SYMBOL(vexpress_config_bridge_unregister); - -struct vexpress_config_func { - struct vexpress_config_bridge *bridge; - void *func; -}; - -struct vexpress_config_func *__vexpress_config_func_get(struct device *dev, - struct device_node *node) +static struct vexpress_config_bridge * + vexpress_config_bridge_find(struct device_node *node) { - struct device_node *bridge_node; - struct vexpress_config_func *func; int i; + struct vexpress_config_bridge *res = NULL; + struct device_node *bridge_node = of_node_get(node); - if (WARN_ON(dev && node && dev->of_node != node)) - return NULL; - if (dev && !node) - node = dev->of_node; - - func = kzalloc(sizeof(*func), GFP_KERNEL); - if (!func) - return NULL; - - bridge_node = of_node_get(node); while (bridge_node) { const __be32 *prop = of_get_property(bridge_node, "arm,vexpress,config-bridge", NULL); @@ -129,13 +113,46 @@ struct vexpress_config_func *__vexpress_config_func_get(struct device *dev, if (test_bit(i, vexpress_config_bridges_map) && bridge->node == bridge_node) { - func->bridge = bridge; - func->func = bridge->info->func_get(dev, node); + res = bridge; break; } } mutex_unlock(&vexpress_config_bridges_mutex); + return res; +} + + +struct vexpress_config_func { + struct vexpress_config_bridge *bridge; + void *func; +}; + +struct vexpress_config_func *__vexpress_config_func_get( + struct vexpress_config_bridge *bridge, + struct device *dev, + struct device_node *node, + const char *id) +{ + struct vexpress_config_func *func; + + if (WARN_ON(dev && node && dev->of_node != node)) + return NULL; + if (dev && !node) + node = dev->of_node; + + if (!bridge) + bridge = vexpress_config_bridge_find(node); + if (!bridge) + return NULL; + + func = kzalloc(sizeof(*func), GFP_KERNEL); + if (!func) + return NULL; + + func->bridge = bridge; + func->func = bridge->info->func_get(dev, node, id); + if (!func->func) { of_node_put(node); kfree(func); diff --git a/drivers/mfd/vexpress-spc.c b/drivers/mfd/vexpress-spc.c new file mode 100644 index 00000000000..0c6718abf1b --- /dev/null +++ b/drivers/mfd/vexpress-spc.c @@ -0,0 +1,633 @@ +/* + * Versatile Express Serial Power Controller (SPC) support + * + * Copyright (C) 2013 ARM Ltd. + * + * Authors: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com> + * Achin Gupta <achin.gupta@arm.com> + * Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/device.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/slab.h> +#include <linux/vexpress.h> + +#include <asm/cacheflush.h> + +#define SCC_CFGREG19 0x120 +#define SCC_CFGREG20 0x124 +#define A15_CONF 0x400 +#define A7_CONF 0x500 +#define SYS_INFO 0x700 +#define PERF_LVL_A15 0xB00 +#define PERF_REQ_A15 0xB04 +#define PERF_LVL_A7 0xB08 +#define PERF_REQ_A7 0xB0c +#define SYS_CFGCTRL 0xB10 +#define SYS_CFGCTRL_REQ 0xB14 +#define PWC_STATUS 0xB18 +#define PWC_FLAG 0xB1c +#define WAKE_INT_MASK 0xB24 +#define WAKE_INT_RAW 0xB28 +#define WAKE_INT_STAT 0xB2c +#define A15_PWRDN_EN 0xB30 +#define A7_PWRDN_EN 0xB34 +#define A7_PWRDNACK 0xB54 +#define A15_BX_ADDR0 0xB68 +#define SYS_CFG_WDATA 0xB70 +#define SYS_CFG_RDATA 0xB74 +#define A7_BX_ADDR0 0xB78 + +#define GBL_WAKEUP_INT_MSK (0x3 << 10) + +#define CLKF_SHIFT 16 +#define CLKF_MASK 0x1FFF +#define CLKR_SHIFT 0 +#define CLKR_MASK 0x3F +#define CLKOD_SHIFT 8 +#define CLKOD_MASK 0xF + +#define OPP_FUNCTION 6 +#define OPP_BASE_DEVICE 0x300 +#define OPP_A15_OFFSET 0x4 +#define OPP_A7_OFFSET 0xc + +#define SYS_CFGCTRL_START (1 << 31) +#define SYS_CFGCTRL_WRITE (1 << 30) +#define SYS_CFGCTRL_FUNC(n) (((n) & 0x3f) << 20) +#define SYS_CFGCTRL_DEVICE(n) (((n) & 0xfff) << 0) + +#define MAX_OPPS 8 +#define MAX_CLUSTERS 2 + +enum { + A15_OPP_TYPE = 0, + A7_OPP_TYPE = 1, + SYS_CFGCTRL_TYPE = 2, + INVALID_TYPE +}; + +#define STAT_COMPLETE(type) ((1 << 0) << (type << 2)) +#define STAT_ERR(type) ((1 << 1) << (type << 2)) +#define RESPONSE_MASK(type) (STAT_COMPLETE(type) | STAT_ERR(type)) + +struct vexpress_spc_drvdata { + void __iomem *baseaddr; + u32 a15_clusid; + int irq; + u32 cur_req_type; + u32 freqs[MAX_CLUSTERS][MAX_OPPS]; + int freqs_cnt[MAX_CLUSTERS]; +}; + +enum spc_func_type { + CONFIG_FUNC = 0, + PERF_FUNC = 1, +}; + +struct vexpress_spc_func { + enum spc_func_type type; + u32 function; + u32 device; +}; + +static struct vexpress_spc_drvdata *info; +static u32 *vexpress_spc_config_data; +static struct vexpress_config_bridge *vexpress_spc_config_bridge; +static struct vexpress_config_func *opp_func, *perf_func; + +static int vexpress_spc_load_result = -EAGAIN; + +static bool vexpress_spc_initialized(void) +{ + return vexpress_spc_load_result == 0; +} + +/** + * vexpress_spc_write_resume_reg() - set the jump address used for warm boot + * + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * @cpu: mpidr[7:0] bitfield describing cpu affinity level + * @addr: physical resume address + */ +void vexpress_spc_write_resume_reg(u32 cluster, u32 cpu, u32 addr) +{ + void __iomem *baseaddr; + + if (WARN_ON_ONCE(cluster >= MAX_CLUSTERS)) + return; + + if (cluster != info->a15_clusid) + baseaddr = info->baseaddr + A7_BX_ADDR0 + (cpu << 2); + else + baseaddr = info->baseaddr + A15_BX_ADDR0 + (cpu << 2); + + writel_relaxed(addr, baseaddr); +} + +/** + * vexpress_spc_get_nb_cpus() - get number of cpus in a cluster + * + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * + * Return: number of cpus in the cluster + * -EINVAL if cluster number invalid + */ +int vexpress_spc_get_nb_cpus(u32 cluster) +{ + u32 val; + + if (WARN_ON_ONCE(cluster >= MAX_CLUSTERS)) + return -EINVAL; + + val = readl_relaxed(info->baseaddr + SYS_INFO); + val = (cluster != info->a15_clusid) ? (val >> 20) : (val >> 16); + return val & 0xf; +} +EXPORT_SYMBOL_GPL(vexpress_spc_get_nb_cpus); + +/** + * vexpress_spc_get_performance - get current performance level of cluster + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * @freq: pointer to the performance level to be assigned + * + * Return: 0 on success + * < 0 on read error + */ +int vexpress_spc_get_performance(u32 cluster, u32 *freq) +{ + u32 perf_cfg_reg; + int perf, ret; + + if (!vexpress_spc_initialized() || (cluster >= MAX_CLUSTERS)) + return -EINVAL; + + perf_cfg_reg = cluster != info->a15_clusid ? PERF_LVL_A7 : PERF_LVL_A15; + ret = vexpress_config_read(perf_func, perf_cfg_reg, &perf); + + if (!ret) + *freq = info->freqs[cluster][perf]; + + return ret; +} +EXPORT_SYMBOL_GPL(vexpress_spc_get_performance); + +/** + * vexpress_spc_get_perf_index - get performance level corresponding to + * a frequency + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * @freq: frequency to be looked-up + * + * Return: perf level index on success + * -EINVAL on error + */ +static int vexpress_spc_find_perf_index(u32 cluster, u32 freq) +{ + int idx; + + for (idx = 0; idx < info->freqs_cnt[cluster]; idx++) + if (info->freqs[cluster][idx] == freq) + break; + return (idx == info->freqs_cnt[cluster]) ? -EINVAL : idx; +} + +/** + * vexpress_spc_set_performance - set current performance level of cluster + * + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * @freq: performance level to be programmed + * + * Returns: 0 on success + * < 0 on write error + */ +int vexpress_spc_set_performance(u32 cluster, u32 freq) +{ + int ret, perf, offset; + + if (!vexpress_spc_initialized() || (cluster >= MAX_CLUSTERS)) + return -EINVAL; + + offset = (cluster != info->a15_clusid) ? PERF_LVL_A7 : PERF_LVL_A15; + + perf = vexpress_spc_find_perf_index(cluster, freq); + + if (perf < 0 || perf >= MAX_OPPS) + return -EINVAL; + + ret = vexpress_config_write(perf_func, offset, perf); + + return ret; +} +EXPORT_SYMBOL_GPL(vexpress_spc_set_performance); + +static void vexpress_spc_set_wake_intr(u32 mask) +{ + writel_relaxed(mask & VEXPRESS_SPC_WAKE_INTR_MASK, + info->baseaddr + WAKE_INT_MASK); +} + +static inline void reg_bitmask(u32 *reg, u32 mask, bool set) +{ + if (set) + *reg |= mask; + else + *reg &= ~mask; +} + +/** + * vexpress_spc_set_global_wakeup_intr() + * + * Function to set/clear global wakeup IRQs. Not protected by locking since + * it might be used in code paths where normal cacheable locks are not + * working. Locking must be provided by the caller to ensure atomicity. + * + * @set: if true, global wake-up IRQs are set, if false they are cleared + */ +void vexpress_spc_set_global_wakeup_intr(bool set) +{ + u32 wake_int_mask_reg = 0; + + wake_int_mask_reg = readl_relaxed(info->baseaddr + WAKE_INT_MASK); + reg_bitmask(&wake_int_mask_reg, GBL_WAKEUP_INT_MSK, set); + vexpress_spc_set_wake_intr(wake_int_mask_reg); +} + +/** + * vexpress_spc_set_cpu_wakeup_irq() + * + * Function to set/clear per-CPU wake-up IRQs. Not protected by locking since + * it might be used in code paths where normal cacheable locks are not + * working. Locking must be provided by the caller to ensure atomicity. + * + * @cpu: mpidr[7:0] bitfield describing cpu affinity level + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * @set: if true, wake-up IRQs are set, if false they are cleared + */ +void vexpress_spc_set_cpu_wakeup_irq(u32 cpu, u32 cluster, bool set) +{ + u32 mask = 0; + u32 wake_int_mask_reg = 0; + + mask = 1 << cpu; + if (info->a15_clusid != cluster) + mask <<= 4; + + wake_int_mask_reg = readl_relaxed(info->baseaddr + WAKE_INT_MASK); + reg_bitmask(&wake_int_mask_reg, mask, set); + vexpress_spc_set_wake_intr(wake_int_mask_reg); +} + +/** + * vexpress_spc_powerdown_enable() + * + * Function to enable/disable cluster powerdown. Not protected by locking + * since it might be used in code paths where normal cacheable locks are not + * working. Locking must be provided by the caller to ensure atomicity. + * + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * @enable: if true enables powerdown, if false disables it + */ +void vexpress_spc_powerdown_enable(u32 cluster, bool enable) +{ + u32 pwdrn_reg = 0; + + if (cluster >= MAX_CLUSTERS) + return; + pwdrn_reg = cluster != info->a15_clusid ? A7_PWRDN_EN : A15_PWRDN_EN; + writel_relaxed(enable, info->baseaddr + pwdrn_reg); +} + +irqreturn_t vexpress_spc_irq_handler(int irq, void *data) +{ + int ret; + u32 status = readl_relaxed(info->baseaddr + PWC_STATUS); + + if (!(status & RESPONSE_MASK(info->cur_req_type))) + return IRQ_NONE; + + if ((status == STAT_COMPLETE(SYS_CFGCTRL_TYPE)) + && vexpress_spc_config_data) { + *vexpress_spc_config_data = + readl_relaxed(info->baseaddr + SYS_CFG_RDATA); + vexpress_spc_config_data = NULL; + } + + ret = STAT_COMPLETE(info->cur_req_type) ? 0 : -EIO; + info->cur_req_type = INVALID_TYPE; + vexpress_config_complete(vexpress_spc_config_bridge, ret); + return IRQ_HANDLED; +} + +/** + * Based on the firmware documentation, this is always fixed to 20 + * All the 4 OSC: A15 PLL0/1, A7 PLL0/1 must be programmed same + * values for both control and value registers. + * This function uses A15 PLL 0 registers to compute multiple factor + * F out = F in * (CLKF + 1) / ((CLKOD + 1) * (CLKR + 1)) + */ +static inline int __get_mult_factor(void) +{ + int i_div, o_div, f_div; + u32 tmp; + + tmp = readl(info->baseaddr + SCC_CFGREG19); + f_div = (tmp >> CLKF_SHIFT) & CLKF_MASK; + + tmp = readl(info->baseaddr + SCC_CFGREG20); + o_div = (tmp >> CLKOD_SHIFT) & CLKOD_MASK; + i_div = (tmp >> CLKR_SHIFT) & CLKR_MASK; + + return (f_div + 1) / ((o_div + 1) * (i_div + 1)); +} + +/** + * vexpress_spc_populate_opps() - initialize opp tables from microcontroller + * + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * + * Return: 0 on success + * < 0 on error + */ +static int vexpress_spc_populate_opps(u32 cluster) +{ + u32 data = 0, ret, i, offset; + int mult_fact = __get_mult_factor(); + + if (WARN_ON_ONCE(cluster >= MAX_CLUSTERS)) + return -EINVAL; + + offset = cluster != info->a15_clusid ? OPP_A7_OFFSET : OPP_A15_OFFSET; + for (i = 0; i < MAX_OPPS; i++) { + ret = vexpress_config_read(opp_func, i + offset, &data); + if (!ret) + info->freqs[cluster][i] = (data & 0xFFFFF) * mult_fact; + else + break; + } + + info->freqs_cnt[cluster] = i; + return ret; +} + +/** + * vexpress_spc_get_freq_table() - Retrieve a pointer to the frequency + * table for a given cluster + * + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * @fptr: pointer to be initialized + * Return: operating points count on success + * -EINVAL on pointer error + */ +int vexpress_spc_get_freq_table(u32 cluster, u32 **fptr) +{ + if (WARN_ON_ONCE(!fptr || cluster >= MAX_CLUSTERS)) + return -EINVAL; + *fptr = info->freqs[cluster]; + return info->freqs_cnt[cluster]; +} +EXPORT_SYMBOL_GPL(vexpress_spc_get_freq_table); + +static void *vexpress_spc_func_get(struct device *dev, + struct device_node *node, const char *id) +{ + struct vexpress_spc_func *spc_func; + u32 func_device[2]; + int err = 0; + + spc_func = kzalloc(sizeof(*spc_func), GFP_KERNEL); + if (!spc_func) + return NULL; + + if (strcmp(id, "opp") == 0) { + spc_func->type = CONFIG_FUNC; + spc_func->function = OPP_FUNCTION; + spc_func->device = OPP_BASE_DEVICE; + } else if (strcmp(id, "perf") == 0) { + spc_func->type = PERF_FUNC; + } else if (node) { + of_node_get(node); + err = of_property_read_u32_array(node, + "arm,vexpress-sysreg,func", func_device, + ARRAY_SIZE(func_device)); + of_node_put(node); + spc_func->type = CONFIG_FUNC; + spc_func->function = func_device[0]; + spc_func->device = func_device[1]; + } + + if (WARN_ON(err)) { + kfree(spc_func); + return NULL; + } + + pr_debug("func 0x%p = 0x%x, %d %d\n", spc_func, + spc_func->function, + spc_func->device, + spc_func->type); + + return spc_func; +} + +static void vexpress_spc_func_put(void *func) +{ + kfree(func); +} + +static int vexpress_spc_func_exec(void *func, int offset, bool write, + u32 *data) +{ + struct vexpress_spc_func *spc_func = func; + u32 command; + + if (!data) + return -EINVAL; + /* + * Setting and retrieval of operating points is not part of + * DCC config interface. It was made to go through the same + * code path so that requests to the M3 can be serialized + * properly with config reads/writes through the common + * vexpress config interface + */ + switch (spc_func->type) { + case PERF_FUNC: + if (write) { + info->cur_req_type = (offset == PERF_LVL_A15) ? + A15_OPP_TYPE : A7_OPP_TYPE; + writel_relaxed(*data, info->baseaddr + offset); + return VEXPRESS_CONFIG_STATUS_WAIT; + } else { + *data = readl_relaxed(info->baseaddr + offset); + return VEXPRESS_CONFIG_STATUS_DONE; + } + case CONFIG_FUNC: + info->cur_req_type = SYS_CFGCTRL_TYPE; + + command = SYS_CFGCTRL_START; + command |= write ? SYS_CFGCTRL_WRITE : 0; + command |= SYS_CFGCTRL_FUNC(spc_func->function); + command |= SYS_CFGCTRL_DEVICE(spc_func->device + offset); + + pr_debug("command %x\n", command); + + if (!write) + vexpress_spc_config_data = data; + else + writel_relaxed(*data, info->baseaddr + SYS_CFG_WDATA); + writel_relaxed(command, info->baseaddr + SYS_CFGCTRL); + + return VEXPRESS_CONFIG_STATUS_WAIT; + default: + return -EINVAL; + } +} + +struct vexpress_config_bridge_info vexpress_spc_config_bridge_info = { + .name = "vexpress-spc", + .func_get = vexpress_spc_func_get, + .func_put = vexpress_spc_func_put, + .func_exec = vexpress_spc_func_exec, +}; + +static const struct of_device_id vexpress_spc_ids[] __initconst = { + { .compatible = "arm,vexpress-spc,v2p-ca15_a7" }, + { .compatible = "arm,vexpress-spc" }, + {}, +}; + +static int __init vexpress_spc_init(void) +{ + int ret; + struct device_node *node = of_find_matching_node(NULL, + vexpress_spc_ids); + + if (!node) + return -ENODEV; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + pr_err("%s: unable to allocate mem\n", __func__); + return -ENOMEM; + } + info->cur_req_type = INVALID_TYPE; + + info->baseaddr = of_iomap(node, 0); + if (WARN_ON(!info->baseaddr)) { + ret = -ENXIO; + goto mem_free; + } + + info->irq = irq_of_parse_and_map(node, 0); + + if (WARN_ON(!info->irq)) { + ret = -ENXIO; + goto unmap; + } + + readl_relaxed(info->baseaddr + PWC_STATUS); + + ret = request_irq(info->irq, vexpress_spc_irq_handler, + IRQF_DISABLED | IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + "arm-spc", info); + + if (ret) { + pr_err("IRQ %d request failed\n", info->irq); + ret = -ENODEV; + goto unmap; + } + + info->a15_clusid = readl_relaxed(info->baseaddr + A15_CONF) & 0xf; + + vexpress_spc_config_bridge = vexpress_config_bridge_register( + node, &vexpress_spc_config_bridge_info); + + if (WARN_ON(!vexpress_spc_config_bridge)) { + ret = -ENODEV; + goto unmap; + } + + opp_func = vexpress_config_func_get(vexpress_spc_config_bridge, "opp"); + perf_func = + vexpress_config_func_get(vexpress_spc_config_bridge, "perf"); + + if (!opp_func || !perf_func) { + ret = -ENODEV; + goto unmap; + } + + if (vexpress_spc_populate_opps(0) || vexpress_spc_populate_opps(1)) { + if (info->irq) + free_irq(info->irq, info); + pr_err("failed to build OPP table\n"); + ret = -ENODEV; + goto unmap; + } + /* + * Multi-cluster systems may need this data when non-coherent, during + * cluster power-up/power-down. Make sure it reaches main memory: + */ + sync_cache_w(info); + sync_cache_w(&info); + pr_info("vexpress-spc loaded at %p\n", info->baseaddr); + return 0; + +unmap: + iounmap(info->baseaddr); + +mem_free: + kfree(info); + return ret; +} + +static bool __init __vexpress_spc_check_loaded(void); +/* + * Pointer spc_check_loaded is swapped after init hence it is safe + * to initialize it to a function in the __init section + */ +static bool (*spc_check_loaded)(void) __refdata = &__vexpress_spc_check_loaded; + +static bool __init __vexpress_spc_check_loaded(void) +{ + if (vexpress_spc_load_result == -EAGAIN) + vexpress_spc_load_result = vexpress_spc_init(); + spc_check_loaded = &vexpress_spc_initialized; + return vexpress_spc_initialized(); +} + +/* + * Function exported to manage early_initcall ordering. + * SPC code is needed very early in the boot process + * to bring CPUs out of reset and initialize power + * management back-end. After boot swap pointers to + * make the functionality check available to loadable + * modules, when early boot init functions have been + * already freed from kernel address space. + */ +bool vexpress_spc_check_loaded(void) +{ + return spc_check_loaded(); +} +EXPORT_SYMBOL_GPL(vexpress_spc_check_loaded); + +static int __init vexpress_spc_early_init(void) +{ + __vexpress_spc_check_loaded(); + return vexpress_spc_load_result; +} +early_initcall(vexpress_spc_early_init); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Serial Power Controller (SPC) support"); diff --git a/drivers/mfd/vexpress-sysreg.c b/drivers/mfd/vexpress-sysreg.c index 96a020b1dcd..7f429afce11 100644 --- a/drivers/mfd/vexpress-sysreg.c +++ b/drivers/mfd/vexpress-sysreg.c @@ -165,7 +165,7 @@ static u32 *vexpress_sysreg_config_data; static int vexpress_sysreg_config_tries; static void *vexpress_sysreg_config_func_get(struct device *dev, - struct device_node *node) + struct device_node *node, const char *id) { struct vexpress_sysreg_config_func *config_func; u32 site; @@ -351,6 +351,8 @@ void __init vexpress_sysreg_of_early_init(void) } +#ifdef CONFIG_GPIOLIB + #define VEXPRESS_SYSREG_GPIO(_name, _reg, _value) \ [VEXPRESS_GPIO_##_name] = { \ .reg = _reg, \ @@ -445,6 +447,8 @@ struct gpio_led_platform_data vexpress_sysreg_leds_pdata = { .leds = vexpress_sysreg_leds, }; +#endif + static ssize_t vexpress_sysreg_sys_id_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -480,6 +484,9 @@ static int vexpress_sysreg_probe(struct platform_device *pdev) setup_timer(&vexpress_sysreg_config_timer, vexpress_sysreg_config_complete, 0); + vexpress_sysreg_dev = &pdev->dev; + +#ifdef CONFIG_GPIOLIB vexpress_sysreg_gpio_chip.dev = &pdev->dev; err = gpiochip_add(&vexpress_sysreg_gpio_chip); if (err) { @@ -490,11 +497,10 @@ static int vexpress_sysreg_probe(struct platform_device *pdev) return err; } - vexpress_sysreg_dev = &pdev->dev; - platform_device_register_data(vexpress_sysreg_dev, "leds-gpio", PLATFORM_DEVID_AUTO, &vexpress_sysreg_leds_pdata, sizeof(vexpress_sysreg_leds_pdata)); +#endif device_create_file(vexpress_sysreg_dev, &dev_attr_sys_id); diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index dfbf978315d..bdd703c6bf1 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -1896,7 +1896,7 @@ static int smc_probe(struct net_device *dev, void __iomem *ioaddr, SMC_SELECT_BANK(lp, 1); val = SMC_GET_BASE(lp); val = ((val & 0x1F00) >> 3) << SMC_IO_SHIFT; - if (((unsigned int)ioaddr & (0x3e0 << SMC_IO_SHIFT)) != val) { + if (((unsigned long)ioaddr & (0x3e0 << SMC_IO_SHIFT)) != val) { printk("%s: IOADDR %p doesn't match configuration (%x).\n", CARDNAME, ioaddr, val); } diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 38f0b312ff8..663d2d0448b 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -439,7 +439,7 @@ void phy_start_machine(struct phy_device *phydev, { phydev->adjust_state = handler; - schedule_delayed_work(&phydev->state_queue, HZ); + queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, HZ); } /** @@ -500,7 +500,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) disable_irq_nosync(irq); atomic_inc(&phydev->irq_disable); - schedule_work(&phydev->phy_queue); + queue_work(system_power_efficient_wq, &phydev->phy_queue); return IRQ_HANDLED; } @@ -655,7 +655,7 @@ static void phy_change(struct work_struct *work) /* reschedule state queue work to run as soon as possible */ cancel_delayed_work_sync(&phydev->state_queue); - schedule_delayed_work(&phydev->state_queue, 0); + queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, 0); return; @@ -918,7 +918,8 @@ void phy_state_machine(struct work_struct *work) if (err < 0) phy_error(phydev); - schedule_delayed_work(&phydev->state_queue, PHY_STATE_TIME * HZ); + queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, + PHY_STATE_TIME * HZ); } static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad, diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig index 349e9ae8090..ee039dcead0 100644 --- a/drivers/power/reset/Kconfig +++ b/drivers/power/reset/Kconfig @@ -32,7 +32,8 @@ config POWER_RESET_RESTART user presses a key. u-boot then boots into Linux. config POWER_RESET_VEXPRESS - bool + bool "ARM Versatile Express power-off and reset driver" + depends on ARM || ARM64 depends on POWER_RESET help Power off and reset support for the ARM Ltd. Versatile diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 2e937bdace6..d0d9c5ea9e3 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -39,6 +39,11 @@ config VIDEOMODE_HELPERS config HDMI bool +config VEXPRESS_DVI_CONTROL + bool "Versatile Express DVI control" + depends on FB && VEXPRESS_CONFIG + default y + menuconfig FB tristate "Support for frame buffer devices" ---help--- @@ -326,6 +331,21 @@ config FB_ARMCLCD here and read <file:Documentation/kbuild/modules.txt>. The module will be called amba-clcd. +config FB_ARMHDLCD + tristate "ARM High Definition LCD support" + depends on FB && ARM + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This framebuffer device driver is for the ARM High Definition + Colour LCD controller. + + If you want to compile this as a module (=code which can be + inserted into and removed from the running kernel), say M + here and read <file:Documentation/kbuild/modules.txt>. The module + will be called arm-hdlcd. + config FB_ACORN bool "Acorn VIDC support" depends on (FB = y) && ARM && ARCH_ACORN diff --git a/drivers/video/Makefile b/drivers/video/Makefile index e8bae8dd480..33869eea498 100644 --- a/drivers/video/Makefile +++ b/drivers/video/Makefile @@ -99,6 +99,7 @@ obj-$(CONFIG_FB_ATMEL) += atmel_lcdfb.o obj-$(CONFIG_FB_PVR2) += pvr2fb.o obj-$(CONFIG_FB_VOODOO1) += sstfb.o obj-$(CONFIG_FB_ARMCLCD) += amba-clcd.o +obj-$(CONFIG_FB_ARMHDLCD) += arm-hdlcd.o obj-$(CONFIG_FB_GOLDFISH) += goldfishfb.o obj-$(CONFIG_FB_68328) += 68328fb.o obj-$(CONFIG_FB_GBE) += gbefb.o @@ -177,3 +178,6 @@ obj-$(CONFIG_VIDEOMODE_HELPERS) += display_timing.o videomode.o ifeq ($(CONFIG_OF),y) obj-$(CONFIG_VIDEOMODE_HELPERS) += of_display_timing.o of_videomode.o endif + +# platform specific output drivers +obj-$(CONFIG_VEXPRESS_DVI_CONTROL) += vexpress-dvi.o diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c index 0a2cce7285b..94a1998338d 100644 --- a/drivers/video/amba-clcd.c +++ b/drivers/video/amba-clcd.c @@ -16,7 +16,10 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/memblock.h> #include <linux/mm.h> +#include <linux/of.h> #include <linux/fb.h> #include <linux/init.h> #include <linux/ioport.h> @@ -30,6 +33,16 @@ #define to_clcd(info) container_of(info, struct clcd_fb, fb) +#ifdef CONFIG_ARM +#define clcdfb_dma_alloc dma_alloc_writecombine +#define clcdfb_dma_free dma_free_writecombine +#define clcdfb_dma_mmap dma_mmap_writecombine +#else +#define clcdfb_dma_alloc dma_alloc_coherent +#define clcdfb_dma_free dma_free_coherent +#define clcdfb_dma_mmap dma_mmap_coherent +#endif + /* This is limited to 16 characters when displayed by X startup */ static const char *clcd_name = "CLCD FB"; @@ -392,6 +405,44 @@ static int clcdfb_blank(int blank_mode, struct fb_info *info) return 0; } +int clcdfb_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vma) +{ + return clcdfb_dma_mmap(&fb->dev->dev, vma, + fb->fb.screen_base, + fb->fb.fix.smem_start, + fb->fb.fix.smem_len); +} + +int clcdfb_mmap_io(struct clcd_fb *fb, struct vm_area_struct *vma) +{ + unsigned long user_count, count, pfn, off; + + user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + count = PAGE_ALIGN(fb->fb.fix.smem_len) >> PAGE_SHIFT; + pfn = fb->fb.fix.smem_start >> PAGE_SHIFT; + off = vma->vm_pgoff; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (off < count && user_count <= (count - off)) + return remap_pfn_range(vma, vma->vm_start, pfn + off, + user_count << PAGE_SHIFT, + vma->vm_page_prot); + + return -ENXIO; +} + +void clcdfb_remove_dma(struct clcd_fb *fb) +{ + clcdfb_dma_free(&fb->dev->dev, fb->fb.fix.smem_len, + fb->fb.screen_base, fb->fb.fix.smem_start); +} + +void clcdfb_remove_io(struct clcd_fb *fb) +{ + iounmap(fb->fb.screen_base); +} + static int clcdfb_mmap(struct fb_info *info, struct vm_area_struct *vma) { @@ -542,14 +593,239 @@ static int clcdfb_register(struct clcd_fb *fb) return ret; } +struct string_lookup { + const char *string; + const u32 val; +}; + +static struct string_lookup vmode_lookups[] = { + { "FB_VMODE_NONINTERLACED", FB_VMODE_NONINTERLACED}, + { "FB_VMODE_INTERLACED", FB_VMODE_INTERLACED}, + { "FB_VMODE_DOUBLE", FB_VMODE_DOUBLE}, + { "FB_VMODE_ODD_FLD_FIRST", FB_VMODE_ODD_FLD_FIRST}, + { NULL, 0 }, +}; + +static struct string_lookup tim2_lookups[] = { + { "TIM2_CLKSEL", TIM2_CLKSEL}, + { "TIM2_IVS", TIM2_IVS}, + { "TIM2_IHS", TIM2_IHS}, + { "TIM2_IPC", TIM2_IPC}, + { "TIM2_IOE", TIM2_IOE}, + { "TIM2_BCD", TIM2_BCD}, + { NULL, 0}, +}; +static struct string_lookup cntl_lookups[] = { + {"CNTL_LCDEN", CNTL_LCDEN}, + {"CNTL_LCDBPP1", CNTL_LCDBPP1}, + {"CNTL_LCDBPP2", CNTL_LCDBPP2}, + {"CNTL_LCDBPP4", CNTL_LCDBPP4}, + {"CNTL_LCDBPP8", CNTL_LCDBPP8}, + {"CNTL_LCDBPP16", CNTL_LCDBPP16}, + {"CNTL_LCDBPP16_565", CNTL_LCDBPP16_565}, + {"CNTL_LCDBPP16_444", CNTL_LCDBPP16_444}, + {"CNTL_LCDBPP24", CNTL_LCDBPP24}, + {"CNTL_LCDBW", CNTL_LCDBW}, + {"CNTL_LCDTFT", CNTL_LCDTFT}, + {"CNTL_LCDMONO8", CNTL_LCDMONO8}, + {"CNTL_LCDDUAL", CNTL_LCDDUAL}, + {"CNTL_BGR", CNTL_BGR}, + {"CNTL_BEBO", CNTL_BEBO}, + {"CNTL_BEPO", CNTL_BEPO}, + {"CNTL_LCDPWR", CNTL_LCDPWR}, + {"CNTL_LCDVCOMP(1)", CNTL_LCDVCOMP(1)}, + {"CNTL_LCDVCOMP(2)", CNTL_LCDVCOMP(2)}, + {"CNTL_LCDVCOMP(3)", CNTL_LCDVCOMP(3)}, + {"CNTL_LCDVCOMP(4)", CNTL_LCDVCOMP(4)}, + {"CNTL_LCDVCOMP(5)", CNTL_LCDVCOMP(5)}, + {"CNTL_LCDVCOMP(6)", CNTL_LCDVCOMP(6)}, + {"CNTL_LCDVCOMP(7)", CNTL_LCDVCOMP(7)}, + {"CNTL_LDMAFIFOTIME", CNTL_LDMAFIFOTIME}, + {"CNTL_WATERMARK", CNTL_WATERMARK}, + { NULL, 0}, +}; +static struct string_lookup caps_lookups[] = { + {"CLCD_CAP_RGB444", CLCD_CAP_RGB444}, + {"CLCD_CAP_RGB5551", CLCD_CAP_RGB5551}, + {"CLCD_CAP_RGB565", CLCD_CAP_RGB565}, + {"CLCD_CAP_RGB888", CLCD_CAP_RGB888}, + {"CLCD_CAP_BGR444", CLCD_CAP_BGR444}, + {"CLCD_CAP_BGR5551", CLCD_CAP_BGR5551}, + {"CLCD_CAP_BGR565", CLCD_CAP_BGR565}, + {"CLCD_CAP_BGR888", CLCD_CAP_BGR888}, + {"CLCD_CAP_444", CLCD_CAP_444}, + {"CLCD_CAP_5551", CLCD_CAP_5551}, + {"CLCD_CAP_565", CLCD_CAP_565}, + {"CLCD_CAP_888", CLCD_CAP_888}, + {"CLCD_CAP_RGB", CLCD_CAP_RGB}, + {"CLCD_CAP_BGR", CLCD_CAP_BGR}, + {"CLCD_CAP_ALL", CLCD_CAP_ALL}, + { NULL, 0}, +}; + +u32 parse_setting(struct string_lookup *lookup, const char *name) +{ + int i = 0; + while (lookup[i].string != NULL) { + if (strcmp(lookup[i].string, name) == 0) + return lookup[i].val; + ++i; + } + return -EINVAL; +} + +u32 get_string_lookup(struct device_node *node, const char *name, + struct string_lookup *lookup) +{ + const char *string; + int count, i, ret = 0; + + count = of_property_count_strings(node, name); + if (count >= 0) + for (i = 0; i < count; i++) + if (of_property_read_string_index(node, name, i, + &string) == 0) + ret |= parse_setting(lookup, string); + return ret; +} + +int get_val(struct device_node *node, const char *string) +{ + u32 ret = 0; + + if (of_property_read_u32(node, string, &ret)) + ret = -1; + return ret; +} + +struct clcd_panel *getPanel(struct device_node *node) +{ + static struct clcd_panel panel; + + panel.mode.refresh = get_val(node, "refresh"); + panel.mode.xres = get_val(node, "xres"); + panel.mode.yres = get_val(node, "yres"); + panel.mode.pixclock = get_val(node, "pixclock"); + panel.mode.left_margin = get_val(node, "left_margin"); + panel.mode.right_margin = get_val(node, "right_margin"); + panel.mode.upper_margin = get_val(node, "upper_margin"); + panel.mode.lower_margin = get_val(node, "lower_margin"); + panel.mode.hsync_len = get_val(node, "hsync_len"); + panel.mode.vsync_len = get_val(node, "vsync_len"); + panel.mode.sync = get_val(node, "sync"); + panel.bpp = get_val(node, "bpp"); + panel.width = (signed short) get_val(node, "width"); + panel.height = (signed short) get_val(node, "height"); + + panel.mode.vmode = get_string_lookup(node, "vmode", vmode_lookups); + panel.tim2 = get_string_lookup(node, "tim2", tim2_lookups); + panel.cntl = get_string_lookup(node, "cntl", cntl_lookups); + panel.caps = get_string_lookup(node, "caps", caps_lookups); + + return &panel; +} + +struct clcd_panel *clcdfb_get_panel(const char *name) +{ + struct device_node *node = NULL; + const char *mode; + struct clcd_panel *panel = NULL; + + do { + node = of_find_compatible_node(node, NULL, "panel"); + if (node) + if (of_property_read_string(node, "mode", &mode) == 0) + if (strcmp(mode, name) == 0) { + panel = getPanel(node); + panel->mode.name = name; + } + } while (node != NULL); + + return panel; +} + +#ifdef CONFIG_OF +static int clcdfb_dt_init(struct clcd_fb *fb) +{ + int err = 0; + struct device_node *node; + const char *mode; + dma_addr_t dma; + u32 use_dma; + const __be32 *prop; + int len, na, ns; + phys_addr_t fb_base, fb_size; + + node = fb->dev->dev.of_node; + if (!node) + return -ENODEV; + + na = of_n_addr_cells(node); + ns = of_n_size_cells(node); + + if (WARN_ON(of_property_read_string(node, "mode", &mode))) + return -ENODEV; + + fb->panel = clcdfb_get_panel(mode); + if (!fb->panel) + return -EINVAL; + fb->fb.fix.smem_len = fb->panel->mode.xres * fb->panel->mode.yres * 2; + + fb->board->name = "Device Tree CLCD PL111"; + fb->board->caps = CLCD_CAP_5551 | CLCD_CAP_565; + fb->board->check = clcdfb_check; + fb->board->decode = clcdfb_decode; + + if (of_property_read_u32(node, "use_dma", &use_dma)) + use_dma = 0; + + if (use_dma) { + fb->fb.screen_base = clcdfb_dma_alloc(&fb->dev->dev, + fb->fb.fix.smem_len, + &dma, GFP_KERNEL); + if (!fb->fb.screen_base) { + pr_err("CLCD: unable to map framebuffer\n"); + return -ENOMEM; + } + + fb->fb.fix.smem_start = dma; + fb->board->mmap = clcdfb_mmap_dma; + fb->board->remove = clcdfb_remove_dma; + } else { + prop = of_get_property(node, "framebuffer", &len); + if (WARN_ON(!prop || len < (na + ns) * sizeof(*prop))) + return -EINVAL; + + fb_base = of_read_number(prop, na); + fb_size = of_read_number(prop + na, ns); + + fb->fb.fix.smem_start = fb_base; + fb->fb.screen_base = ioremap_wc(fb_base, fb_size); + fb->board->mmap = clcdfb_mmap_io; + fb->board->remove = clcdfb_remove_io; + } + + return err; +} +#endif /* CONFIG_OF */ + static int clcdfb_probe(struct amba_device *dev, const struct amba_id *id) { struct clcd_board *board = dev->dev.platform_data; struct clcd_fb *fb; int ret; - if (!board) - return -EINVAL; + if (!board) { +#ifdef CONFIG_OF + if (dev->dev.of_node) { + board = kzalloc(sizeof(struct clcd_board), GFP_KERNEL); + if (!board) + return -ENOMEM; + board->setup = clcdfb_dt_init; + } else +#endif + return -EINVAL; + } ret = amba_request_regions(dev, NULL); if (ret) { diff --git a/drivers/video/arm-hdlcd.c b/drivers/video/arm-hdlcd.c new file mode 100644 index 00000000000..cfd631e3dc5 --- /dev/null +++ b/drivers/video/arm-hdlcd.c @@ -0,0 +1,844 @@ +/* + * drivers/video/arm-hdlcd.c + * + * Copyright (C) 2011 ARM Limited + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + * + * ARM HDLCD Controller + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/mm.h> +#include <linux/delay.h> +#include <linux/of.h> +#include <linux/fb.h> +#include <linux/clk.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/dma-mapping.h> +#include <linux/platform_device.h> +#include <linux/memblock.h> +#include <linux/arm-hdlcd.h> +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#endif + +#include "edid.h" + +#ifdef CONFIG_SERIAL_AMBA_PCU_UART +int get_edid(u8 *msgbuf); +#else +#endif + +#define to_hdlcd_device(info) container_of(info, struct hdlcd_device, fb) + +static struct of_device_id hdlcd_of_matches[] = { + { .compatible = "arm,hdlcd" }, + {}, +}; + +/* Framebuffer size. */ +static unsigned long framebuffer_size; + +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS +static unsigned long buffer_underrun_events; +static DEFINE_SPINLOCK(hdlcd_underrun_lock); + +static void hdlcd_underrun_set(unsigned long val) +{ + spin_lock(&hdlcd_underrun_lock); + buffer_underrun_events = val; + spin_unlock(&hdlcd_underrun_lock); +} + +static unsigned long hdlcd_underrun_get(void) +{ + unsigned long val; + spin_lock(&hdlcd_underrun_lock); + val = buffer_underrun_events; + spin_unlock(&hdlcd_underrun_lock); + return val; +} + +#ifdef CONFIG_PROC_FS +static int hdlcd_underrun_show(struct seq_file *m, void *v) +{ + unsigned char underrun_string[32]; + snprintf(underrun_string, 32, "%lu\n", hdlcd_underrun_get()); + seq_puts(m, underrun_string); + return 0; +} + +static int proc_hdlcd_underrun_open(struct inode *inode, struct file *file) +{ + return single_open(file, hdlcd_underrun_show, NULL); +} + +static const struct file_operations proc_hdlcd_underrun_operations = { + .open = proc_hdlcd_underrun_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int hdlcd_underrun_init(void) +{ + hdlcd_underrun_set(0); + proc_create("hdlcd_underrun", 0, NULL, &proc_hdlcd_underrun_operations); + return 0; +} +static void hdlcd_underrun_close(void) +{ + remove_proc_entry("hdlcd_underrun", NULL); +} +#else +static int hdlcd_underrun_init(void) { return 0; } +static void hdlcd_underrun_close(void) { } +#endif +#endif + +static char *fb_mode = "1680x1050-32@60\0\0\0\0\0"; + +static struct fb_var_screeninfo cached_var_screeninfo; + +static struct fb_videomode hdlcd_default_mode = { + .refresh = 60, + .xres = 1680, + .yres = 1050, + .pixclock = 8403, + .left_margin = 80, + .right_margin = 48, + .upper_margin = 21, + .lower_margin = 3, + .hsync_len = 32, + .vsync_len = 6, + .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, + .vmode = FB_VMODE_NONINTERLACED +}; + +static inline void hdlcd_enable(struct hdlcd_device *hdlcd) +{ + dev_dbg(hdlcd->dev, "HDLCD: output enabled\n"); + writel(1, hdlcd->base + HDLCD_REG_COMMAND); +} + +static inline void hdlcd_disable(struct hdlcd_device *hdlcd) +{ + dev_dbg(hdlcd->dev, "HDLCD: output disabled\n"); + writel(0, hdlcd->base + HDLCD_REG_COMMAND); +} + +static int hdlcd_set_bitfields(struct hdlcd_device *hdlcd, + struct fb_var_screeninfo *var) +{ + int ret = 0; + + memset(&var->transp, 0, sizeof(var->transp)); + var->red.msb_right = 0; + var->green.msb_right = 0; + var->blue.msb_right = 0; + var->blue.offset = 0; + + switch (var->bits_per_pixel) { + case 8: + /* pseudocolor */ + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + break; + case 16: + /* 565 format */ + var->red.length = 5; + var->green.length = 6; + var->blue.length = 5; + break; + case 32: + var->transp.length = 8; + case 24: + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + break; + default: + ret = -EINVAL; + break; + } + + if (!ret) { + if(var->bits_per_pixel != 32) + { + var->green.offset = var->blue.length; + var->red.offset = var->green.offset + var->green.length; + } + else + { + /* Previously, the byte ordering for 32-bit color was + * (msb)<alpha><red><green><blue>(lsb) + * but this does not match what android expects and + * the colors are odd. Instead, use + * <alpha><blue><green><red> + * Since we tell fb what we are doing, console + * , X and directfb access should work fine. + */ + var->green.offset = var->red.length; + var->blue.offset = var->green.offset + var->green.length; + var->transp.offset = var->blue.offset + var->blue.length; + } + } + + return ret; +} + +static int hdlcd_check_var(struct fb_var_screeninfo *var, struct fb_info *info) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + int bytes_per_pixel = var->bits_per_pixel / 8; + +#ifdef HDLCD_NO_VIRTUAL_SCREEN + var->yres_virtual = var->yres; +#else + var->yres_virtual = 2 * var->yres; +#endif + + if ((var->xres_virtual * bytes_per_pixel * var->yres_virtual) > hdlcd->fb.fix.smem_len) + return -ENOMEM; + + if (var->xres > HDLCD_MAX_XRES || var->yres > HDLCD_MAX_YRES) + return -EINVAL; + + /* make sure the bitfields are set appropriately */ + return hdlcd_set_bitfields(hdlcd, var); +} + +/* prototype */ +static int hdlcd_pan_display(struct fb_var_screeninfo *var, + struct fb_info *info); + +#define WRITE_HDLCD_REG(reg, value) writel((value), hdlcd->base + (reg)) +#define READ_HDLCD_REG(reg) readl(hdlcd->base + (reg)) + +static int hdlcd_set_par(struct fb_info *info) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + int bytes_per_pixel = hdlcd->fb.var.bits_per_pixel / 8; + int polarities; + int old_yoffset; + + /* check for shortcuts */ + old_yoffset = cached_var_screeninfo.yoffset; + cached_var_screeninfo.yoffset = info->var.yoffset; + if (!memcmp(&info->var, &cached_var_screeninfo, + sizeof(struct fb_var_screeninfo))) { + if(old_yoffset != info->var.yoffset) { + /* we only changed yoffset, and we already + * already recorded it a couple lines up + */ + hdlcd_pan_display(&info->var, info); + } + /* or no change */ + return 0; + } + + hdlcd->fb.fix.line_length = hdlcd->fb.var.xres * bytes_per_pixel; + + if (hdlcd->fb.var.bits_per_pixel >= 16) + hdlcd->fb.fix.visual = FB_VISUAL_TRUECOLOR; + else + hdlcd->fb.fix.visual = FB_VISUAL_PSEUDOCOLOR; + + memcpy(&cached_var_screeninfo, &info->var, sizeof(struct fb_var_screeninfo)); + + polarities = HDLCD_POLARITY_DATAEN | +#ifndef CONFIG_ARCH_TUSCAN + HDLCD_POLARITY_PIXELCLK | +#endif + HDLCD_POLARITY_DATA; + polarities |= (hdlcd->fb.var.sync & FB_SYNC_HOR_HIGH_ACT) ? HDLCD_POLARITY_HSYNC : 0; + polarities |= (hdlcd->fb.var.sync & FB_SYNC_VERT_HIGH_ACT) ? HDLCD_POLARITY_VSYNC : 0; + + hdlcd_disable(hdlcd); + + WRITE_HDLCD_REG(HDLCD_REG_FB_LINE_LENGTH, hdlcd->fb.var.xres * bytes_per_pixel); + WRITE_HDLCD_REG(HDLCD_REG_FB_LINE_PITCH, hdlcd->fb.var.xres * bytes_per_pixel); + WRITE_HDLCD_REG(HDLCD_REG_FB_LINE_COUNT, hdlcd->fb.var.yres - 1); + WRITE_HDLCD_REG(HDLCD_REG_V_SYNC, hdlcd->fb.var.vsync_len - 1); + WRITE_HDLCD_REG(HDLCD_REG_V_BACK_PORCH, hdlcd->fb.var.upper_margin - 1); + WRITE_HDLCD_REG(HDLCD_REG_V_DATA, hdlcd->fb.var.yres - 1); + WRITE_HDLCD_REG(HDLCD_REG_V_FRONT_PORCH, hdlcd->fb.var.lower_margin - 1); + WRITE_HDLCD_REG(HDLCD_REG_H_SYNC, hdlcd->fb.var.hsync_len - 1); + WRITE_HDLCD_REG(HDLCD_REG_H_BACK_PORCH, hdlcd->fb.var.left_margin - 1); + WRITE_HDLCD_REG(HDLCD_REG_H_DATA, hdlcd->fb.var.xres - 1); + WRITE_HDLCD_REG(HDLCD_REG_H_FRONT_PORCH, hdlcd->fb.var.right_margin - 1); + WRITE_HDLCD_REG(HDLCD_REG_POLARITIES, polarities); + WRITE_HDLCD_REG(HDLCD_REG_PIXEL_FORMAT, (bytes_per_pixel - 1) << 3); +#ifdef HDLCD_RED_DEFAULT_COLOUR + WRITE_HDLCD_REG(HDLCD_REG_RED_SELECT, (0x00ff0000 | (hdlcd->fb.var.red.length & 0xf) << 8) \ + | hdlcd->fb.var.red.offset); +#else + WRITE_HDLCD_REG(HDLCD_REG_RED_SELECT, ((hdlcd->fb.var.red.length & 0xf) << 8) | hdlcd->fb.var.red.offset); +#endif + WRITE_HDLCD_REG(HDLCD_REG_GREEN_SELECT, ((hdlcd->fb.var.green.length & 0xf) << 8) | hdlcd->fb.var.green.offset); + WRITE_HDLCD_REG(HDLCD_REG_BLUE_SELECT, ((hdlcd->fb.var.blue.length & 0xf) << 8) | hdlcd->fb.var.blue.offset); + + clk_set_rate(hdlcd->clk, (1000000000 / hdlcd->fb.var.pixclock) * 1000); + clk_enable(hdlcd->clk); + + hdlcd_enable(hdlcd); + + return 0; +} + +static int hdlcd_setcolreg(unsigned int regno, unsigned int red, unsigned int green, + unsigned int blue, unsigned int transp, struct fb_info *info) +{ + if (regno < 16) { + u32 *pal = info->pseudo_palette; + + pal[regno] = ((red >> 8) << info->var.red.offset) | + ((green >> 8) << info->var.green.offset) | + ((blue >> 8) << info->var.blue.offset); + } + + return 0; +} + +static irqreturn_t hdlcd_irq(int irq, void *data) +{ + struct hdlcd_device *hdlcd = data; + unsigned long irq_mask, irq_status; + + irq_mask = READ_HDLCD_REG(HDLCD_REG_INT_MASK); + irq_status = READ_HDLCD_REG(HDLCD_REG_INT_STATUS); + + /* acknowledge interrupt(s) */ + WRITE_HDLCD_REG(HDLCD_REG_INT_CLEAR, irq_status); +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS + if (irq_status & HDLCD_INTERRUPT_UNDERRUN) { + /* increment the count */ + hdlcd_underrun_set(hdlcd_underrun_get() + 1); + } +#endif + if (irq_status & HDLCD_INTERRUPT_VSYNC) { + /* disable future VSYNC interrupts */ + WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, irq_mask & ~HDLCD_INTERRUPT_VSYNC); + + complete(&hdlcd->vsync_completion); + } + + return IRQ_HANDLED; +} + +static int hdlcd_wait_for_vsync(struct fb_info *info) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + unsigned long irq_mask; + int err; + + /* enable VSYNC interrupt */ + irq_mask = READ_HDLCD_REG(HDLCD_REG_INT_MASK); + WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, irq_mask | HDLCD_INTERRUPT_VSYNC); + + err = wait_for_completion_interruptible_timeout(&hdlcd->vsync_completion, + msecs_to_jiffies(100)); + + if (!err) + return -ETIMEDOUT; + + return 0; +} + +static int hdlcd_blank(int blank_mode, struct fb_info *info) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + + switch (blank_mode) { + case FB_BLANK_POWERDOWN: + clk_disable(hdlcd->clk); + case FB_BLANK_NORMAL: + hdlcd_disable(hdlcd); + break; + case FB_BLANK_UNBLANK: + clk_enable(hdlcd->clk); + hdlcd_enable(hdlcd); + break; + case FB_BLANK_VSYNC_SUSPEND: + case FB_BLANK_HSYNC_SUSPEND: + default: + return 1; + } + + return 0; +} + +static void hdlcd_mmap_open(struct vm_area_struct *vma) +{ +} + +static void hdlcd_mmap_close(struct vm_area_struct *vma) +{ +} + +static struct vm_operations_struct hdlcd_mmap_ops = { + .open = hdlcd_mmap_open, + .close = hdlcd_mmap_close, +}; + +static int hdlcd_mmap(struct fb_info *info, struct vm_area_struct *vma) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + unsigned long off; + unsigned long start; + unsigned long len = hdlcd->fb.fix.smem_len; + + if (vma->vm_end - vma->vm_start == 0) + return 0; + if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) + return -EINVAL; + + off = vma->vm_pgoff << PAGE_SHIFT; + if ((off >= len) || (vma->vm_end - vma->vm_start + off) > len) + return -EINVAL; + + start = hdlcd->fb.fix.smem_start; + off += start; + + vma->vm_pgoff = off >> PAGE_SHIFT; + vma->vm_flags |= VM_IO; + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + vma->vm_ops = &hdlcd_mmap_ops; + if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static int hdlcd_pan_display(struct fb_var_screeninfo *var, struct fb_info *info) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + + hdlcd->fb.var.yoffset = var->yoffset; + WRITE_HDLCD_REG(HDLCD_REG_FB_BASE, hdlcd->fb.fix.smem_start + + (var->yoffset * hdlcd->fb.fix.line_length)); + + hdlcd_wait_for_vsync(info); + + return 0; +} + +static int hdlcd_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg) +{ + int err; + + switch (cmd) { + case FBIO_WAITFORVSYNC: + err = hdlcd_wait_for_vsync(info); + break; + default: + err = -ENOIOCTLCMD; + break; + } + + return err; +} + +static struct fb_ops hdlcd_ops = { + .owner = THIS_MODULE, + .fb_check_var = hdlcd_check_var, + .fb_set_par = hdlcd_set_par, + .fb_setcolreg = hdlcd_setcolreg, + .fb_blank = hdlcd_blank, + .fb_fillrect = cfb_fillrect, + .fb_copyarea = cfb_copyarea, + .fb_imageblit = cfb_imageblit, + .fb_mmap = hdlcd_mmap, + .fb_pan_display = hdlcd_pan_display, + .fb_ioctl = hdlcd_ioctl, + .fb_compat_ioctl = hdlcd_ioctl +}; + +static int hdlcd_setup(struct hdlcd_device *hdlcd) +{ + u32 version; + int err = -EFAULT; + + hdlcd->fb.device = hdlcd->dev; + + hdlcd->clk = clk_get(hdlcd->dev, NULL); + if (IS_ERR(hdlcd->clk)) { + dev_err(hdlcd->dev, "HDLCD: unable to find clock data\n"); + return PTR_ERR(hdlcd->clk); + } + + err = clk_prepare(hdlcd->clk); + if (err) + goto clk_prepare_err; + + hdlcd->base = ioremap_nocache(hdlcd->fb.fix.mmio_start, hdlcd->fb.fix.mmio_len); + if (!hdlcd->base) { + dev_err(hdlcd->dev, "HDLCD: unable to map registers\n"); + goto remap_err; + } + + hdlcd->fb.pseudo_palette = kmalloc(sizeof(u32) * 16, GFP_KERNEL); + if (!hdlcd->fb.pseudo_palette) { + dev_err(hdlcd->dev, "HDLCD: unable to allocate pseudo_palette memory\n"); + err = -ENOMEM; + goto kmalloc_err; + } + + version = readl(hdlcd->base + HDLCD_REG_VERSION); + if ((version & HDLCD_PRODUCT_MASK) != HDLCD_PRODUCT_ID) { + dev_err(hdlcd->dev, "HDLCD: unknown product id: 0x%x\n", version); + err = -EINVAL; + goto kmalloc_err; + } + dev_info(hdlcd->dev, "HDLCD: found ARM HDLCD version r%dp%d\n", + (version & HDLCD_VERSION_MAJOR_MASK) >> 8, + version & HDLCD_VERSION_MINOR_MASK); + + strcpy(hdlcd->fb.fix.id, "hdlcd"); + hdlcd->fb.fbops = &hdlcd_ops; + hdlcd->fb.flags = FBINFO_FLAG_DEFAULT/* | FBINFO_VIRTFB*/; + + hdlcd->fb.fix.type = FB_TYPE_PACKED_PIXELS; + hdlcd->fb.fix.type_aux = 0; + hdlcd->fb.fix.xpanstep = 0; + hdlcd->fb.fix.ypanstep = 1; + hdlcd->fb.fix.ywrapstep = 0; + hdlcd->fb.fix.accel = FB_ACCEL_NONE; + + hdlcd->fb.var.nonstd = 0; + hdlcd->fb.var.activate = FB_ACTIVATE_NOW; + hdlcd->fb.var.height = -1; + hdlcd->fb.var.width = -1; + hdlcd->fb.var.accel_flags = 0; + + init_completion(&hdlcd->vsync_completion); + + if (hdlcd->edid) { + /* build modedb from EDID */ + fb_edid_to_monspecs(hdlcd->edid, &hdlcd->fb.monspecs); + fb_videomode_to_modelist(hdlcd->fb.monspecs.modedb, + hdlcd->fb.monspecs.modedb_len, + &hdlcd->fb.modelist); + fb_find_mode(&hdlcd->fb.var, &hdlcd->fb, fb_mode, + hdlcd->fb.monspecs.modedb, + hdlcd->fb.monspecs.modedb_len, + &hdlcd_default_mode, 32); + } else { + hdlcd->fb.monspecs.hfmin = 0; + hdlcd->fb.monspecs.hfmax = 100000; + hdlcd->fb.monspecs.vfmin = 0; + hdlcd->fb.monspecs.vfmax = 400; + hdlcd->fb.monspecs.dclkmin = 1000000; + hdlcd->fb.monspecs.dclkmax = 100000000; + fb_find_mode(&hdlcd->fb.var, &hdlcd->fb, fb_mode, NULL, 0, &hdlcd_default_mode, 32); + } + + dev_info(hdlcd->dev, "using %dx%d-%d@%d mode\n", hdlcd->fb.var.xres, + hdlcd->fb.var.yres, hdlcd->fb.var.bits_per_pixel, + hdlcd->fb.mode ? hdlcd->fb.mode->refresh : 60); + hdlcd->fb.var.xres_virtual = hdlcd->fb.var.xres; +#ifdef HDLCD_NO_VIRTUAL_SCREEN + hdlcd->fb.var.yres_virtual = hdlcd->fb.var.yres; +#else + hdlcd->fb.var.yres_virtual = hdlcd->fb.var.yres * 2; +#endif + + /* initialise and set the palette */ + if (fb_alloc_cmap(&hdlcd->fb.cmap, NR_PALETTE, 0)) { + dev_err(hdlcd->dev, "failed to allocate cmap memory\n"); + err = -ENOMEM; + goto setup_err; + } + fb_set_cmap(&hdlcd->fb.cmap, &hdlcd->fb); + + /* Allow max number of outstanding requests with the largest beat burst */ + WRITE_HDLCD_REG(HDLCD_REG_BUS_OPTIONS, HDLCD_BUS_MAX_OUTSTAND | HDLCD_BUS_BURST_16); + /* Set the framebuffer base to start of allocated memory */ + WRITE_HDLCD_REG(HDLCD_REG_FB_BASE, hdlcd->fb.fix.smem_start); +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS + /* turn on underrun interrupt for counting */ + WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, HDLCD_INTERRUPT_UNDERRUN); +#else + /* Ensure interrupts are disabled */ + WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, 0); +#endif + fb_set_var(&hdlcd->fb, &hdlcd->fb.var); + + if (!register_framebuffer(&hdlcd->fb)) { + return 0; + } + + dev_err(hdlcd->dev, "HDLCD: cannot register framebuffer\n"); + + fb_dealloc_cmap(&hdlcd->fb.cmap); +setup_err: + iounmap(hdlcd->base); +kmalloc_err: + kfree(hdlcd->fb.pseudo_palette); +remap_err: + clk_unprepare(hdlcd->clk); +clk_prepare_err: + clk_put(hdlcd->clk); + return err; +} + +static inline unsigned char atohex(u8 data) +{ + if (!isxdigit(data)) + return 0; + /* truncate the upper nibble and add 9 to non-digit values */ + return (data > 0x39) ? ((data & 0xf) + 9) : (data & 0xf); +} + +/* EDID data is passed from devicetree in a literal string that can contain spaces and + the hexadecimal dump of the data */ +static int parse_edid_data(struct hdlcd_device *hdlcd, const u8 *edid_data, int data_len) +{ + int i, j; + + if (!edid_data) + return -EINVAL; + + hdlcd->edid = kzalloc(EDID_LENGTH, GFP_KERNEL); + if (!hdlcd->edid) + return -ENOMEM; + + for (i = 0, j = 0; i < data_len; i++) { + if (isspace(edid_data[i])) + continue; + hdlcd->edid[j++] = atohex(edid_data[i]); + if (j >= EDID_LENGTH) + break; + } + + if (j < EDID_LENGTH) { + kfree(hdlcd->edid); + hdlcd->edid = NULL; + return -EINVAL; + } + + return 0; +} + +static int hdlcd_probe(struct platform_device *pdev) +{ + int err = 0, i; + struct hdlcd_device *hdlcd; + struct resource *mem; +#ifdef CONFIG_OF + struct device_node *of_node; +#endif + + memset(&cached_var_screeninfo, 0, sizeof(struct fb_var_screeninfo)); + + dev_dbg(&pdev->dev, "HDLCD: probing\n"); + + hdlcd = kzalloc(sizeof(*hdlcd), GFP_KERNEL); + if (!hdlcd) + return -ENOMEM; + +#ifdef CONFIG_OF + of_node = pdev->dev.of_node; + if (of_node) { + int len; + const u8 *edid; + const __be32 *prop = of_get_property(of_node, "mode", &len); + if (prop) + strncpy(fb_mode, (char *)prop, len); + prop = of_get_property(of_node, "framebuffer", &len); + if (prop) { + hdlcd->fb.fix.smem_start = of_read_ulong(prop, + of_n_addr_cells(of_node)); + prop += of_n_addr_cells(of_node); + framebuffer_size = of_read_ulong(prop, + of_n_size_cells(of_node)); + if (framebuffer_size > HDLCD_MAX_FRAMEBUFFER_SIZE) + framebuffer_size = HDLCD_MAX_FRAMEBUFFER_SIZE; + dev_dbg(&pdev->dev, "HDLCD: phys_addr = 0x%lx, size = 0x%lx\n", + hdlcd->fb.fix.smem_start, framebuffer_size); + } + edid = of_get_property(of_node, "edid", &len); + if (edid) { + err = parse_edid_data(hdlcd, edid, len); +#ifdef CONFIG_SERIAL_AMBA_PCU_UART + } else { + /* ask the firmware to fetch the EDID */ + dev_dbg(&pdev->dev, "HDLCD: Requesting EDID data\n"); + hdlcd->edid = kzalloc(EDID_LENGTH, GFP_KERNEL); + if (!hdlcd->edid) + return -ENOMEM; + err = get_edid(hdlcd->edid); +#endif /* CONFIG_SERIAL_AMBA_PCU_UART */ + } + if (err) + dev_info(&pdev->dev, "HDLCD: Failed to parse EDID data\n"); + } +#endif /* CONFIG_OF */ + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) { + dev_err(&pdev->dev, "HDLCD: cannot get platform resources\n"); + err = -EINVAL; + goto resource_err; + } + + i = platform_get_irq(pdev, 0); + if (i < 0) { + dev_err(&pdev->dev, "HDLCD: no irq defined for vsync\n"); + err = -ENOENT; + goto resource_err; + } else { + err = request_irq(i, hdlcd_irq, 0, dev_name(&pdev->dev), hdlcd); + if (err) { + dev_err(&pdev->dev, "HDLCD: unable to request irq\n"); + goto resource_err; + } + hdlcd->irq = i; + } + + if (!request_mem_region(mem->start, resource_size(mem), dev_name(&pdev->dev))) { + err = -ENXIO; + goto request_err; + } + + if (!hdlcd->fb.fix.smem_start) { + dev_err(&pdev->dev, "platform did not allocate frame buffer memory\n"); + err = -ENOMEM; + goto memalloc_err; + } + hdlcd->fb.screen_base = ioremap_wc(hdlcd->fb.fix.smem_start, framebuffer_size); + if (!hdlcd->fb.screen_base) { + dev_err(&pdev->dev, "unable to ioremap framebuffer\n"); + err = -ENOMEM; + goto probe_err; + } + + hdlcd->fb.screen_size = framebuffer_size; + hdlcd->fb.fix.smem_len = framebuffer_size; + hdlcd->fb.fix.mmio_start = mem->start; + hdlcd->fb.fix.mmio_len = resource_size(mem); + + /* Clear the framebuffer */ + memset(hdlcd->fb.screen_base, 0, framebuffer_size); + + hdlcd->dev = &pdev->dev; + + dev_dbg(&pdev->dev, "HDLCD: framebuffer virt base %p, phys base 0x%lX\n", + hdlcd->fb.screen_base, (unsigned long)hdlcd->fb.fix.smem_start); + + err = hdlcd_setup(hdlcd); + + if (err) + goto probe_err; + + platform_set_drvdata(pdev, hdlcd); + return 0; + +probe_err: + iounmap(hdlcd->fb.screen_base); + memblock_free(hdlcd->fb.fix.smem_start, hdlcd->fb.fix.smem_start); + +memalloc_err: + release_mem_region(mem->start, resource_size(mem)); + +request_err: + free_irq(hdlcd->irq, hdlcd); + +resource_err: + kfree(hdlcd); + + return err; +} + +static int hdlcd_remove(struct platform_device *pdev) +{ + struct hdlcd_device *hdlcd = platform_get_drvdata(pdev); + + clk_disable(hdlcd->clk); + clk_unprepare(hdlcd->clk); + clk_put(hdlcd->clk); + + /* unmap memory */ + iounmap(hdlcd->fb.screen_base); + iounmap(hdlcd->base); + + /* deallocate fb memory */ + fb_dealloc_cmap(&hdlcd->fb.cmap); + kfree(hdlcd->fb.pseudo_palette); + memblock_free(hdlcd->fb.fix.smem_start, hdlcd->fb.fix.smem_start); + release_mem_region(hdlcd->fb.fix.mmio_start, hdlcd->fb.fix.mmio_len); + + free_irq(hdlcd->irq, NULL); + kfree(hdlcd); + + return 0; +} + +#ifdef CONFIG_PM +static int hdlcd_suspend(struct platform_device *pdev, pm_message_t state) +{ + /* not implemented yet */ + return 0; +} + +static int hdlcd_resume(struct platform_device *pdev) +{ + /* not implemented yet */ + return 0; +} +#else +#define hdlcd_suspend NULL +#define hdlcd_resume NULL +#endif + +static struct platform_driver hdlcd_driver = { + .probe = hdlcd_probe, + .remove = hdlcd_remove, + .suspend = hdlcd_suspend, + .resume = hdlcd_resume, + .driver = { + .name = "hdlcd", + .owner = THIS_MODULE, + .of_match_table = hdlcd_of_matches, + }, +}; + +static int __init hdlcd_init(void) +{ +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS + int err = platform_driver_register(&hdlcd_driver); + if (!err) + hdlcd_underrun_init(); + return err; +#else + return platform_driver_register(&hdlcd_driver); +#endif +} + +void __exit hdlcd_exit(void) +{ +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS + hdlcd_underrun_close(); +#endif + platform_driver_unregister(&hdlcd_driver); +} + +module_init(hdlcd_init); +module_exit(hdlcd_exit); + +MODULE_AUTHOR("Liviu Dudau"); +MODULE_DESCRIPTION("ARM HDLCD core driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index a92783e480e..0d8f98c79a6 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -404,7 +404,7 @@ static void cursor_timer_handler(unsigned long dev_addr) struct fb_info *info = (struct fb_info *) dev_addr; struct fbcon_ops *ops = info->fbcon_par; - schedule_work(&info->queue); + queue_work(system_power_efficient_wq, &info->queue); mod_timer(&ops->cursor_timer, jiffies + HZ/5); } diff --git a/drivers/video/vexpress-dvi.c b/drivers/video/vexpress-dvi.c new file mode 100644 index 00000000000..f08753450ee --- /dev/null +++ b/drivers/video/vexpress-dvi.c @@ -0,0 +1,220 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2012 ARM Limited + */ + +#define pr_fmt(fmt) "vexpress-dvi: " fmt + +#include <linux/fb.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/vexpress.h> + + +static struct vexpress_config_func *vexpress_dvimode_func; + +static struct { + u32 xres, yres, mode; +} vexpress_dvi_dvimodes[] = { + { 640, 480, 0 }, /* VGA */ + { 800, 600, 1 }, /* SVGA */ + { 1024, 768, 2 }, /* XGA */ + { 1280, 1024, 3 }, /* SXGA */ + { 1600, 1200, 4 }, /* UXGA */ + { 1920, 1080, 5 }, /* HD1080 */ +}; + +static void vexpress_dvi_mode_set(struct fb_info *info, u32 xres, u32 yres) +{ + int err = -ENOENT; + int i; + + if (!vexpress_dvimode_func) + return; + + for (i = 0; i < ARRAY_SIZE(vexpress_dvi_dvimodes); i++) { + if (vexpress_dvi_dvimodes[i].xres == xres && + vexpress_dvi_dvimodes[i].yres == yres) { + pr_debug("mode: %ux%u = %d\n", xres, yres, + vexpress_dvi_dvimodes[i].mode); + err = vexpress_config_write(vexpress_dvimode_func, 0, + vexpress_dvi_dvimodes[i].mode); + break; + } + } + + if (err) + pr_warn("Failed to set %ux%u mode! (%d)\n", xres, yres, err); +} + + +static struct vexpress_config_func *vexpress_muxfpga_func; +static int vexpress_dvi_fb = -1; + +static int vexpress_dvi_mux_set(struct fb_info *info) +{ + int err; + u32 site = vexpress_get_site_by_dev(info->device); + + if (!vexpress_muxfpga_func) + return -ENXIO; + + err = vexpress_config_write(vexpress_muxfpga_func, 0, site); + if (!err) { + pr_debug("Selected MUXFPGA input %d (fb%d)\n", site, + info->node); + vexpress_dvi_fb = info->node; + vexpress_dvi_mode_set(info, info->var.xres, + info->var.yres); + } else { + pr_warn("Failed to select MUXFPGA input %d (fb%d)! (%d)\n", + site, info->node, err); + } + + return err; +} + +static int vexpress_dvi_fb_select(int fb) +{ + int err; + struct fb_info *info; + + /* fb0 is the default */ + if (fb < 0) + fb = 0; + + info = registered_fb[fb]; + if (!info || !lock_fb_info(info)) + return -ENODEV; + + err = vexpress_dvi_mux_set(info); + + unlock_fb_info(info); + + return err; +} + +static ssize_t vexpress_dvi_fb_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", vexpress_dvi_fb); +} + +static ssize_t vexpress_dvi_fb_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + long value; + int err = kstrtol(buf, 0, &value); + + if (!err) + err = vexpress_dvi_fb_select(value); + + return err ? err : count; +} + +DEVICE_ATTR(fb, S_IRUGO | S_IWUSR, vexpress_dvi_fb_show, + vexpress_dvi_fb_store); + + +static int vexpress_dvi_fb_event_notify(struct notifier_block *self, + unsigned long action, void *data) +{ + struct fb_event *event = data; + struct fb_info *info = event->info; + struct fb_videomode *mode = event->data; + + switch (action) { + case FB_EVENT_FB_REGISTERED: + if (vexpress_dvi_fb < 0) + vexpress_dvi_mux_set(info); + break; + case FB_EVENT_MODE_CHANGE: + case FB_EVENT_MODE_CHANGE_ALL: + if (info->node == vexpress_dvi_fb) + vexpress_dvi_mode_set(info, mode->xres, mode->yres); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block vexpress_dvi_fb_notifier = { + .notifier_call = vexpress_dvi_fb_event_notify, +}; +static bool vexpress_dvi_fb_notifier_registered; + + +enum vexpress_dvi_func { FUNC_MUXFPGA, FUNC_DVIMODE }; + +static struct of_device_id vexpress_dvi_of_match[] = { + { + .compatible = "arm,vexpress-muxfpga", + .data = (void *)FUNC_MUXFPGA, + }, { + .compatible = "arm,vexpress-dvimode", + .data = (void *)FUNC_DVIMODE, + }, + {} +}; + +static int vexpress_dvi_probe(struct platform_device *pdev) +{ + enum vexpress_dvi_func func; + const struct of_device_id *match = + of_match_device(vexpress_dvi_of_match, &pdev->dev); + + if (match) + func = (enum vexpress_dvi_func)match->data; + else + func = pdev->id_entry->driver_data; + + switch (func) { + case FUNC_MUXFPGA: + vexpress_muxfpga_func = + vexpress_config_func_get_by_dev(&pdev->dev); + device_create_file(&pdev->dev, &dev_attr_fb); + break; + case FUNC_DVIMODE: + vexpress_dvimode_func = + vexpress_config_func_get_by_dev(&pdev->dev); + break; + } + + if (!vexpress_dvi_fb_notifier_registered) { + fb_register_client(&vexpress_dvi_fb_notifier); + vexpress_dvi_fb_notifier_registered = true; + } + + vexpress_dvi_fb_select(vexpress_dvi_fb); + + return 0; +} + +static const struct platform_device_id vexpress_dvi_id_table[] = { + { .name = "vexpress-muxfpga", .driver_data = FUNC_MUXFPGA, }, + { .name = "vexpress-dvimode", .driver_data = FUNC_DVIMODE, }, + {} +}; + +static struct platform_driver vexpress_dvi_driver = { + .probe = vexpress_dvi_probe, + .driver = { + .name = "vexpress-dvi", + .of_match_table = vexpress_dvi_of_match, + }, + .id_table = vexpress_dvi_id_table, +}; + +static int __init vexpress_dvi_init(void) +{ + return platform_driver_register(&vexpress_dvi_driver); +} +device_initcall(vexpress_dvi_init); diff --git a/include/linux/arm-cci.h b/include/linux/arm-cci.h new file mode 100644 index 00000000000..79d6edf446d --- /dev/null +++ b/include/linux/arm-cci.h @@ -0,0 +1,61 @@ +/* + * CCI cache coherent interconnect support + * + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __LINUX_ARM_CCI_H +#define __LINUX_ARM_CCI_H + +#include <linux/errno.h> +#include <linux/types.h> + +struct device_node; + +#ifdef CONFIG_ARM_CCI +extern bool cci_probed(void); +extern int cci_ace_get_port(struct device_node *dn); +extern int cci_disable_port_by_cpu(u64 mpidr); +extern int __cci_control_port_by_device(struct device_node *dn, bool enable); +extern int __cci_control_port_by_index(u32 port, bool enable); +#else +static inline bool cci_probed(void) { return false; } +static inline int cci_ace_get_port(struct device_node *dn) +{ + return -ENODEV; +} +static inline int cci_disable_port_by_cpu(u64 mpidr) { return -ENODEV; } +static inline int __cci_control_port_by_device(struct device_node *dn, + bool enable) +{ + return -ENODEV; +} +static inline int __cci_control_port_by_index(u32 port, bool enable) +{ + return -ENODEV; +} +#endif +#define cci_disable_port_by_device(dev) \ + __cci_control_port_by_device(dev, false) +#define cci_enable_port_by_device(dev) \ + __cci_control_port_by_device(dev, true) +#define cci_disable_port_by_index(dev) \ + __cci_control_port_by_index(dev, false) +#define cci_enable_port_by_index(dev) \ + __cci_control_port_by_index(dev, true) + +#endif diff --git a/include/linux/arm-hdlcd.h b/include/linux/arm-hdlcd.h new file mode 100644 index 00000000000..939f3a81d56 --- /dev/null +++ b/include/linux/arm-hdlcd.h @@ -0,0 +1,122 @@ +/* + * include/linux/arm-hdlcd.h + * + * Copyright (C) 2011 ARM Limited + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + * + * ARM HDLCD Controller register definition + */ + +#include <linux/fb.h> +#include <linux/completion.h> + +/* register offsets */ +#define HDLCD_REG_VERSION 0x0000 /* ro */ +#define HDLCD_REG_INT_RAWSTAT 0x0010 /* rw */ +#define HDLCD_REG_INT_CLEAR 0x0014 /* wo */ +#define HDLCD_REG_INT_MASK 0x0018 /* rw */ +#define HDLCD_REG_INT_STATUS 0x001c /* ro */ +#define HDLCD_REG_USER_OUT 0x0020 /* rw */ +#define HDLCD_REG_FB_BASE 0x0100 /* rw */ +#define HDLCD_REG_FB_LINE_LENGTH 0x0104 /* rw */ +#define HDLCD_REG_FB_LINE_COUNT 0x0108 /* rw */ +#define HDLCD_REG_FB_LINE_PITCH 0x010c /* rw */ +#define HDLCD_REG_BUS_OPTIONS 0x0110 /* rw */ +#define HDLCD_REG_V_SYNC 0x0200 /* rw */ +#define HDLCD_REG_V_BACK_PORCH 0x0204 /* rw */ +#define HDLCD_REG_V_DATA 0x0208 /* rw */ +#define HDLCD_REG_V_FRONT_PORCH 0x020c /* rw */ +#define HDLCD_REG_H_SYNC 0x0210 /* rw */ +#define HDLCD_REG_H_BACK_PORCH 0x0214 /* rw */ +#define HDLCD_REG_H_DATA 0x0218 /* rw */ +#define HDLCD_REG_H_FRONT_PORCH 0x021c /* rw */ +#define HDLCD_REG_POLARITIES 0x0220 /* rw */ +#define HDLCD_REG_COMMAND 0x0230 /* rw */ +#define HDLCD_REG_PIXEL_FORMAT 0x0240 /* rw */ +#define HDLCD_REG_BLUE_SELECT 0x0244 /* rw */ +#define HDLCD_REG_GREEN_SELECT 0x0248 /* rw */ +#define HDLCD_REG_RED_SELECT 0x024c /* rw */ + +/* version */ +#define HDLCD_PRODUCT_ID 0x1CDC0000 +#define HDLCD_PRODUCT_MASK 0xFFFF0000 +#define HDLCD_VERSION_MAJOR_MASK 0x0000FF00 +#define HDLCD_VERSION_MINOR_MASK 0x000000FF + +/* interrupts */ +#define HDLCD_INTERRUPT_DMA_END (1 << 0) +#define HDLCD_INTERRUPT_BUS_ERROR (1 << 1) +#define HDLCD_INTERRUPT_VSYNC (1 << 2) +#define HDLCD_INTERRUPT_UNDERRUN (1 << 3) + +/* polarity */ +#define HDLCD_POLARITY_VSYNC (1 << 0) +#define HDLCD_POLARITY_HSYNC (1 << 1) +#define HDLCD_POLARITY_DATAEN (1 << 2) +#define HDLCD_POLARITY_DATA (1 << 3) +#define HDLCD_POLARITY_PIXELCLK (1 << 4) + +/* commands */ +#define HDLCD_COMMAND_DISABLE (0 << 0) +#define HDLCD_COMMAND_ENABLE (1 << 0) + +/* pixel format */ +#define HDLCD_PIXEL_FMT_LITTLE_ENDIAN (0 << 31) +#define HDLCD_PIXEL_FMT_BIG_ENDIAN (1 << 31) +#define HDLCD_BYTES_PER_PIXEL_MASK (3 << 3) + +/* bus options */ +#define HDLCD_BUS_BURST_MASK 0x01f +#define HDLCD_BUS_MAX_OUTSTAND 0xf00 +#define HDLCD_BUS_BURST_NONE (0 << 0) +#define HDLCD_BUS_BURST_1 (1 << 0) +#define HDLCD_BUS_BURST_2 (1 << 1) +#define HDLCD_BUS_BURST_4 (1 << 2) +#define HDLCD_BUS_BURST_8 (1 << 3) +#define HDLCD_BUS_BURST_16 (1 << 4) + +/* Max resolution supported is 4096x4096, 8 bit per color component, + 8 bit alpha, but we are going to choose the usual hardware default + (2048x2048, 32 bpp) and enable double buffering */ +#define HDLCD_MAX_XRES 2048 +#define HDLCD_MAX_YRES 2048 +#define HDLCD_MAX_FRAMEBUFFER_SIZE (HDLCD_MAX_XRES * HDLCD_MAX_YRES << 2) + +#define HDLCD_MEM_BASE (CONFIG_PAGE_OFFSET - 0x1000000) + +#define NR_PALETTE 256 + +/* OEMs using HDLCD may wish to enable these settings if + * display disruption is apparent and you suspect HDLCD + * access to RAM may be starved. + */ +/* Turn HDLCD default color red instead of black so + * that it's easy to see pixel clock data underruns + * (compared to other visual disruption) + */ +//#define HDLCD_RED_DEFAULT_COLOUR +/* Add a counter in the IRQ handler to count buffer underruns + * and /proc/hdlcd_underrun to read the counter + */ +//#define HDLCD_COUNT_BUFFERUNDERRUNS +/* Restrict height to 1x screen size + * + */ +//#define HDLCD_NO_VIRTUAL_SCREEN + +#ifdef CONFIG_ANDROID +#define HDLCD_NO_VIRTUAL_SCREEN +#endif + +struct hdlcd_device { + struct fb_info fb; + struct device *dev; + struct clk *clk; + void __iomem *base; + int irq; + struct completion vsync_completion; + unsigned char *edid; +}; diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 3e203eb23cc..40643ca79cd 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -31,6 +31,8 @@ #define GIC_DIST_TARGET 0x800 #define GIC_DIST_CONFIG 0xc00 #define GIC_DIST_SOFTINT 0xf00 +#define GIC_DIST_SGI_PENDING_CLEAR 0xf10 +#define GIC_DIST_SGI_PENDING_SET 0xf20 #define GICH_HCR 0x0 #define GICH_VTR 0x4 @@ -67,12 +69,19 @@ void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *, u32 offset, struct device_node *); void gic_cascade_irq(unsigned int gic_nr, unsigned int irq); +void gic_cpu_if_down(void); + static inline void gic_init(unsigned int nr, int start, void __iomem *dist , void __iomem *cpu) { gic_init_bases(nr, start, dist, cpu, 0, NULL); } +void gic_send_sgi(unsigned int cpu_id, unsigned int irq); +int gic_get_cpu_id(unsigned int cpu); +void gic_migrate_target(unsigned int new_cpu_id); +unsigned long gic_get_sgir_physaddr(void); + #endif /* __ASSEMBLY */ #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 178a8d909f1..0e2a546cdad 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -885,6 +885,13 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); bool cpus_share_cache(int this_cpu, int that_cpu); +#ifdef CONFIG_SCHED_HMP +struct hmp_domain { + struct cpumask cpus; + struct cpumask possible_cpus; + struct list_head hmp_domains; +}; +#endif /* CONFIG_SCHED_HMP */ #else /* CONFIG_SMP */ struct sched_domain_attr; @@ -931,6 +938,12 @@ struct sched_avg { u64 last_runnable_update; s64 decay_count; unsigned long load_avg_contrib; + unsigned long load_avg_ratio; +#ifdef CONFIG_SCHED_HMP + u64 hmp_last_up_migration; + u64 hmp_last_down_migration; +#endif + u32 usage_avg_sum; }; #ifdef CONFIG_SCHEDSTATS diff --git a/include/linux/vexpress.h b/include/linux/vexpress.h index ea7168a6808..c1191ab4cb9 100644 --- a/include/linux/vexpress.h +++ b/include/linux/vexpress.h @@ -68,7 +68,8 @@ */ struct vexpress_config_bridge_info { const char *name; - void *(*func_get)(struct device *dev, struct device_node *node); + void *(*func_get)(struct device *dev, struct device_node *node, + const char *id); void (*func_put)(void *func); int (*func_exec)(void *func, int offset, bool write, u32 *data); }; @@ -87,12 +88,17 @@ void vexpress_config_complete(struct vexpress_config_bridge *bridge, struct vexpress_config_func; -struct vexpress_config_func *__vexpress_config_func_get(struct device *dev, - struct device_node *node); +struct vexpress_config_func *__vexpress_config_func_get( + struct vexpress_config_bridge *bridge, + struct device *dev, + struct device_node *node, + const char *id); +#define vexpress_config_func_get(bridge, id) \ + __vexpress_config_func_get(bridge, NULL, NULL, id) #define vexpress_config_func_get_by_dev(dev) \ - __vexpress_config_func_get(dev, NULL) + __vexpress_config_func_get(NULL, dev, NULL, NULL) #define vexpress_config_func_get_by_node(node) \ - __vexpress_config_func_get(NULL, node) + __vexpress_config_func_get(NULL, NULL, node, NULL) void vexpress_config_func_put(struct vexpress_config_func *func); /* Both may sleep! */ @@ -120,7 +126,53 @@ void vexpress_sysreg_of_early_init(void); struct clk *vexpress_osc_setup(struct device *dev); void vexpress_osc_of_setup(struct device_node *node); +struct clk *vexpress_clk_register_spc(const char *name, int cluster_id); +void vexpress_clk_of_register_spc(void); + void vexpress_clk_init(void __iomem *sp810_base); void vexpress_clk_of_init(void); +/* SPC */ + +#define VEXPRESS_SPC_WAKE_INTR_IRQ(cluster, cpu) \ + (1 << (4 * (cluster) + (cpu))) +#define VEXPRESS_SPC_WAKE_INTR_FIQ(cluster, cpu) \ + (1 << (7 * (cluster) + (cpu))) +#define VEXPRESS_SPC_WAKE_INTR_SWDOG (1 << 10) +#define VEXPRESS_SPC_WAKE_INTR_GTIMER (1 << 11) +#define VEXPRESS_SPC_WAKE_INTR_MASK 0xFFF + +#ifdef CONFIG_VEXPRESS_SPC +extern bool vexpress_spc_check_loaded(void); +extern void vexpress_spc_set_cpu_wakeup_irq(u32 cpu, u32 cluster, bool set); +extern void vexpress_spc_set_global_wakeup_intr(bool set); +extern int vexpress_spc_get_freq_table(u32 cluster, u32 **fptr); +extern int vexpress_spc_get_performance(u32 cluster, u32 *freq); +extern int vexpress_spc_set_performance(u32 cluster, u32 freq); +extern void vexpress_spc_write_resume_reg(u32 cluster, u32 cpu, u32 addr); +extern int vexpress_spc_get_nb_cpus(u32 cluster); +extern void vexpress_spc_powerdown_enable(u32 cluster, bool enable); +#else +static inline bool vexpress_spc_check_loaded(void) { return false; } +static inline void vexpress_spc_set_cpu_wakeup_irq(u32 cpu, u32 cluster, + bool set) { } +static inline void vexpress_spc_set_global_wakeup_intr(bool set) { } +static inline int vexpress_spc_get_freq_table(u32 cluster, u32 **fptr) +{ + return -ENODEV; +} +static inline int vexpress_spc_get_performance(u32 cluster, u32 *freq) +{ + return -ENODEV; +} +static inline int vexpress_spc_set_performance(u32 cluster, u32 freq) +{ + return -ENODEV; +} +static inline void vexpress_spc_write_resume_reg(u32 cluster, + u32 cpu, u32 addr) { } +static inline int vexpress_spc_get_nb_cpus(u32 cluster) { return -ENODEV; } +static inline void vexpress_spc_powerdown_enable(u32 cluster, bool enable) { } +#endif + #endif diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index c586679b6fe..a30ab7910ff 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -198,7 +198,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item); extern void dec_zone_state(struct zone *, enum zone_stat_item); extern void __dec_zone_state(struct zone *, enum zone_stat_item); -void refresh_cpu_vm_stats(int); +bool refresh_cpu_vm_stats(int); void refresh_zone_stat_thresholds(void); void drain_zonestat(struct zone *zone, struct per_cpu_pageset *); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 623488fdc1f..a9f4119c7e2 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -303,6 +303,33 @@ enum { WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ WQ_SYSFS = 1 << 6, /* visible in sysfs, see wq_sysfs_register() */ + /* + * Per-cpu workqueues are generally preferred because they tend to + * show better performance thanks to cache locality. Per-cpu + * workqueues exclude the scheduler from choosing the CPU to + * execute the worker threads, which has an unfortunate side effect + * of increasing power consumption. + * + * The scheduler considers a CPU idle if it doesn't have any task + * to execute and tries to keep idle cores idle to conserve power; + * however, for example, a per-cpu work item scheduled from an + * interrupt handler on an idle CPU will force the scheduler to + * excute the work item on that CPU breaking the idleness, which in + * turn may lead to more scheduling choices which are sub-optimal + * in terms of power consumption. + * + * Workqueues marked with WQ_POWER_EFFICIENT are per-cpu by default + * but become unbound if workqueue.power_efficient kernel param is + * specified. Per-cpu workqueues which are identified to + * contribute significantly to power-consumption are identified and + * marked with this flag and enabling the power_efficient mode + * leads to noticeable power saving at the cost of small + * performance disadvantage. + * + * http://thread.gmane.org/gmane.linux.kernel/1480396 + */ + WQ_POWER_EFFICIENT = 1 << 7, + __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ @@ -333,11 +360,19 @@ enum { * * system_freezable_wq is equivalent to system_wq except that it's * freezable. + * + * *_power_efficient_wq are inclined towards saving power and converted + * into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise, + * they are same as their non-power-efficient counterparts - e.g. + * system_power_efficient_wq is identical to system_wq if + * 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info. */ extern struct workqueue_struct *system_wq; extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_freezable_wq; +extern struct workqueue_struct *system_power_efficient_wq; +extern struct workqueue_struct *system_freezable_power_efficient_wq; static inline struct workqueue_struct * __deprecated __system_nrt_wq(void) { diff --git a/include/trace/events/power_cpu_migrate.h b/include/trace/events/power_cpu_migrate.h new file mode 100644 index 00000000000..f76dd4de625 --- /dev/null +++ b/include/trace/events/power_cpu_migrate.h @@ -0,0 +1,67 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM power + +#if !defined(_TRACE_POWER_CPU_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_POWER_CPU_MIGRATE_H + +#include <linux/tracepoint.h> + +#define __cpu_migrate_proto \ + TP_PROTO(u64 timestamp, \ + u32 cpu_hwid) +#define __cpu_migrate_args \ + TP_ARGS(timestamp, \ + cpu_hwid) + +DECLARE_EVENT_CLASS(cpu_migrate, + + __cpu_migrate_proto, + __cpu_migrate_args, + + TP_STRUCT__entry( + __field(u64, timestamp ) + __field(u32, cpu_hwid ) + ), + + TP_fast_assign( + __entry->timestamp = timestamp; + __entry->cpu_hwid = cpu_hwid; + ), + + TP_printk("timestamp=%llu cpu_hwid=0x%08lX", + (unsigned long long)__entry->timestamp, + (unsigned long)__entry->cpu_hwid + ) +); + +#define __define_cpu_migrate_event(name) \ + DEFINE_EVENT(cpu_migrate, cpu_migrate_##name, \ + __cpu_migrate_proto, \ + __cpu_migrate_args \ + ) + +__define_cpu_migrate_event(begin); +__define_cpu_migrate_event(finish); +__define_cpu_migrate_event(current); + +#undef __define_cpu_migrate +#undef __cpu_migrate_proto +#undef __cpu_migrate_args + +/* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */ +#ifndef _PWR_CPU_MIGRATE_EVENT_AVOID_DOUBLE_DEFINING +#define _PWR_CPU_MIGRATE_EVENT_AVOID_DOUBLE_DEFINING + +/* + * Set from_phys_cpu and to_phys_cpu to CPU_MIGRATE_ALL_CPUS to indicate + * a whole-cluster migration: + */ +#define CPU_MIGRATE_ALL_CPUS 0x80000000U +#endif + +#endif /* _TRACE_POWER_CPU_MIGRATE_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE power_cpu_migrate +#include <trace/define_trace.h> diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index e5586caff67..203e8e9933b 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -430,6 +430,159 @@ TRACE_EVENT(sched_pi_setprio, __entry->oldprio, __entry->newprio) ); +/* + * Tracepoint for showing tracked load contribution. + */ +TRACE_EVENT(sched_task_load_contrib, + + TP_PROTO(struct task_struct *tsk, unsigned long load_contrib), + + TP_ARGS(tsk, load_contrib), + + TP_STRUCT__entry( + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(unsigned long, load_contrib) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->load_contrib = load_contrib; + ), + + TP_printk("comm=%s pid=%d load_contrib=%lu", + __entry->comm, __entry->pid, + __entry->load_contrib) +); + +/* + * Tracepoint for showing tracked task runnable ratio [0..1023]. + */ +TRACE_EVENT(sched_task_runnable_ratio, + + TP_PROTO(struct task_struct *tsk, unsigned long ratio), + + TP_ARGS(tsk, ratio), + + TP_STRUCT__entry( + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(unsigned long, ratio) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->ratio = ratio; + ), + + TP_printk("comm=%s pid=%d ratio=%lu", + __entry->comm, __entry->pid, + __entry->ratio) +); + +/* + * Tracepoint for showing tracked rq runnable ratio [0..1023]. + */ +TRACE_EVENT(sched_rq_runnable_ratio, + + TP_PROTO(int cpu, unsigned long ratio), + + TP_ARGS(cpu, ratio), + + TP_STRUCT__entry( + __field(int, cpu) + __field(unsigned long, ratio) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->ratio = ratio; + ), + + TP_printk("cpu=%d ratio=%lu", + __entry->cpu, + __entry->ratio) +); + +/* + * Tracepoint for showing tracked rq runnable load. + */ +TRACE_EVENT(sched_rq_runnable_load, + + TP_PROTO(int cpu, u64 load), + + TP_ARGS(cpu, load), + + TP_STRUCT__entry( + __field(int, cpu) + __field(u64, load) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->load = load; + ), + + TP_printk("cpu=%d load=%llu", + __entry->cpu, + __entry->load) +); + +/* + * Tracepoint for showing tracked task cpu usage ratio [0..1023]. + */ +TRACE_EVENT(sched_task_usage_ratio, + + TP_PROTO(struct task_struct *tsk, unsigned long ratio), + + TP_ARGS(tsk, ratio), + + TP_STRUCT__entry( + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(unsigned long, ratio) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->ratio = ratio; + ), + + TP_printk("comm=%s pid=%d ratio=%lu", + __entry->comm, __entry->pid, + __entry->ratio) +); + +/* + * Tracepoint for HMP (CONFIG_SCHED_HMP) task migrations. + */ +TRACE_EVENT(sched_hmp_migrate, + + TP_PROTO(struct task_struct *tsk, int dest, int force), + + TP_ARGS(tsk, dest, force), + + TP_STRUCT__entry( + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(int, dest) + __field(int, force) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->dest = dest; + __entry->force = force; + ), + + TP_printk("comm=%s pid=%d dest=%d force=%d", + __entry->comm, __entry->pid, + __entry->dest, __entry->force) +); #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 6bd4a90d199..4d230eafd5e 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -423,7 +423,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, f->lsm_rule = NULL; /* Support legacy tests for a valid loginuid */ - if ((f->type == AUDIT_LOGINUID) && (f->val == 4294967295)) { + if ((f->type == AUDIT_LOGINUID) && (f->val == ~0U)) { f->type = AUDIT_LOGINUID_SET; f->val = 0; } diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 192a302d6cf..473b2b6eccb 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -23,10 +23,27 @@ static struct lock_class_key irq_desc_lock_class; #if defined(CONFIG_SMP) +static int __init irq_affinity_setup(char *str) +{ + zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); + cpulist_parse(str, irq_default_affinity); + /* + * Set at least the boot cpu. We don't want to end up with + * bugreports caused by random comandline masks + */ + cpumask_set_cpu(smp_processor_id(), irq_default_affinity); + return 1; +} +__setup("irqaffinity=", irq_affinity_setup); + static void __init init_irq_default_affinity(void) { - alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); - cpumask_setall(irq_default_affinity); +#ifdef CONFIG_CPUMASK_OFFSTACK + if (!irq_default_affinity) + zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); +#endif + if (cpumask_empty(irq_default_affinity)) + cpumask_setall(irq_default_affinity); } #else static void __init init_irq_default_affinity(void) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 5dfdc9ea180..46455961a88 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -263,6 +263,26 @@ config PM_GENERIC_DOMAINS bool depends on PM +config WQ_POWER_EFFICIENT_DEFAULT + bool "Enable workqueue power-efficient mode by default" + depends on PM + default n + help + Per-cpu workqueues are generally preferred because they show + better performance thanks to cache locality; unfortunately, + per-cpu workqueues tend to be more power hungry than unbound + workqueues. + + Enabling workqueue.power_efficient kernel parameter makes the + per-cpu workqueues which were observed to contribute + significantly to power consumption unbound, leading to measurably + lower power usage at the cost of small performance overhead. + + This config option determines whether workqueue.power_efficient + is enabled by default. + + If in doubt, say N. + config PM_GENERIC_DOMAINS_SLEEP def_bool y depends on PM_SLEEP && PM_GENERIC_DOMAINS diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e8b335016c5..fb9b7b74a83 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1617,6 +1617,20 @@ static void __sched_fork(struct task_struct *p) #if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED) p->se.avg.runnable_avg_period = 0; p->se.avg.runnable_avg_sum = 0; +#ifdef CONFIG_SCHED_HMP + /* keep LOAD_AVG_MAX in sync with fair.c if load avg series is changed */ +#define LOAD_AVG_MAX 47742 + if (p->mm) { + p->se.avg.hmp_last_up_migration = 0; + p->se.avg.hmp_last_down_migration = 0; + p->se.avg.load_avg_ratio = 1023; + p->se.avg.load_avg_contrib = + (1023 * scale_load_down(p->se.load.weight)); + p->se.avg.runnable_avg_period = LOAD_AVG_MAX; + p->se.avg.runnable_avg_sum = LOAD_AVG_MAX; + p->se.avg.usage_avg_sum = LOAD_AVG_MAX; + } +#endif #endif #ifdef CONFIG_SCHEDSTATS memset(&p->se.statistics, 0, sizeof(p->se.statistics)); @@ -3813,6 +3827,8 @@ static struct task_struct *find_process_by_pid(pid_t pid) return pid ? find_task_by_vpid(pid) : current; } +extern struct cpumask hmp_slow_cpu_mask; + /* Actually do priority change: must hold rq lock. */ static void __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) @@ -3822,8 +3838,14 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) p->normal_prio = normal_prio(p); /* we are holding p->pi_lock already */ p->prio = rt_mutex_getprio(p); - if (rt_prio(p->prio)) + if (rt_prio(p->prio)) { p->sched_class = &rt_sched_class; +#ifdef CONFIG_SCHED_HMP + if (!cpumask_empty(&hmp_slow_cpu_mask)) + if (cpumask_equal(&p->cpus_allowed, cpu_all_mask)) + do_set_cpus_allowed(p, &hmp_slow_cpu_mask); +#endif + } else p->sched_class = &fair_sched_class; set_load_weight(p); diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 75024a67352..fbd8caa83ef 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -94,6 +94,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group #ifdef CONFIG_SMP P(se->avg.runnable_avg_sum); P(se->avg.runnable_avg_period); + P(se->avg.usage_avg_sum); P(se->avg.load_avg_contrib); P(se->avg.decay_count); #endif @@ -223,6 +224,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) cfs_rq->tg_runnable_contrib); SEQ_printf(m, " .%-30s: %d\n", "tg->runnable_avg", atomic_read(&cfs_rq->tg->runnable_avg)); + SEQ_printf(m, " .%-30s: %d\n", "tg->usage_avg", + atomic_read(&cfs_rq->tg->usage_avg)); #endif print_cfs_group_stats(m, cpu, cfs_rq->tg); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 03b73bea33d..5f277f2d34d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -31,9 +31,20 @@ #include <linux/task_work.h> #include <trace/events/sched.h> +#ifdef CONFIG_HMP_VARIABLE_SCALE +#include <linux/sysfs.h> +#include <linux/vmalloc.h> +#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE +/* Include cpufreq header to add a notifier so that cpu frequency + * scaling can track the current CPU frequency + */ +#include <linux/cpufreq.h> +#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */ +#endif /* CONFIG_HMP_VARIABLE_SCALE */ #include "sched.h" + /* * Targeted preemption latency for CPU-bound tasks: * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds) @@ -1201,8 +1212,95 @@ static u32 __compute_runnable_contrib(u64 n) return contrib + runnable_avg_yN_sum[n]; } -/* - * We can represent the historical contribution to runnable average as the +#ifdef CONFIG_HMP_VARIABLE_SCALE + +#define HMP_VARIABLE_SCALE_SHIFT 16ULL +struct hmp_global_attr { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, + struct attribute *attr, char *buf); + ssize_t (*store)(struct kobject *a, struct attribute *b, + const char *c, size_t count); + int *value; + int (*to_sysfs)(int); + int (*from_sysfs)(int); +}; + +#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE +#define HMP_DATA_SYSFS_MAX 4 +#else +#define HMP_DATA_SYSFS_MAX 3 +#endif + +struct hmp_data_struct { +#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE + int freqinvar_load_scale_enabled; +#endif + int multiplier; /* used to scale the time delta */ + struct attribute_group attr_group; + struct attribute *attributes[HMP_DATA_SYSFS_MAX + 1]; + struct hmp_global_attr attr[HMP_DATA_SYSFS_MAX]; +} hmp_data; + +static u64 hmp_variable_scale_convert(u64 delta); +#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE +/* Frequency-Invariant Load Modification: + * Loads are calculated as in PJT's patch however we also scale the current + * contribution in line with the frequency of the CPU that the task was + * executed on. + * In this version, we use a simple linear scale derived from the maximum + * frequency reported by CPUFreq. As an example: + * + * Consider that we ran a task for 100% of the previous interval. + * + * Our CPU was under asynchronous frequency control through one of the + * CPUFreq governors. + * + * The CPUFreq governor reports that it is able to scale the CPU between + * 500MHz and 1GHz. + * + * During the period, the CPU was running at 1GHz. + * + * In this case, our load contribution for that period is calculated as + * 1 * (number_of_active_microseconds) + * + * This results in our task being able to accumulate maximum load as normal. + * + * + * Consider now that our CPU was executing at 500MHz. + * + * We now scale the load contribution such that it is calculated as + * 0.5 * (number_of_active_microseconds) + * + * Our task can only record 50% maximum load during this period. + * + * This represents the task consuming 50% of the CPU's *possible* compute + * capacity. However the task did consume 100% of the CPU's *available* + * compute capacity which is the value seen by the CPUFreq governor and + * user-side CPU Utilization tools. + * + * Restricting tracked load to be scaled by the CPU's frequency accurately + * represents the consumption of possible compute capacity and allows the + * HMP migration's simple threshold migration strategy to interact more + * predictably with CPUFreq's asynchronous compute capacity changes. + */ +#define SCHED_FREQSCALE_SHIFT 10 +struct cpufreq_extents { + u32 curr_scale; + u32 min; + u32 max; + u32 flags; +}; +/* Flag set when the governor in use only allows one frequency. + * Disables scaling. + */ +#define SCHED_LOAD_FREQINVAR_SINGLEFREQ 0x01 + +static struct cpufreq_extents freq_scale[CONFIG_NR_CPUS]; +#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */ +#endif /* CONFIG_HMP_VARIABLE_SCALE */ + +/* We can represent the historical contribution to runnable average as the * coefficients of a geometric series. To do this we sub-divide our runnable * history into segments of approximately 1ms (1024us); label the segment that * occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g. @@ -1231,13 +1329,24 @@ static u32 __compute_runnable_contrib(u64 n) */ static __always_inline int __update_entity_runnable_avg(u64 now, struct sched_avg *sa, - int runnable) + int runnable, + int running, + int cpu) { u64 delta, periods; u32 runnable_contrib; int delta_w, decayed = 0; +#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE + u64 scaled_delta; + u32 scaled_runnable_contrib; + int scaled_delta_w; + u32 curr_scale = 1024; +#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */ delta = now - sa->last_runnable_update; +#ifdef CONFIG_HMP_VARIABLE_SCALE + delta = hmp_variable_scale_convert(delta); +#endif /* * This should only happen when time goes backwards, which it * unfortunately does during sched clock init when we swap over to TSC. @@ -1256,6 +1365,12 @@ static __always_inline int __update_entity_runnable_avg(u64 now, return 0; sa->last_runnable_update = now; +#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE + /* retrieve scale factor for load */ + if (hmp_data.freqinvar_load_scale_enabled) + curr_scale = freq_scale[cpu].curr_scale; +#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */ + /* delta_w is the amount already accumulated against our next period */ delta_w = sa->runnable_avg_period % 1024; if (delta + delta_w >= 1024) { @@ -1268,8 +1383,20 @@ static __always_inline int __update_entity_runnable_avg(u64 now, * period and accrue it. */ delta_w = 1024 - delta_w; + /* scale runnable time if necessary */ +#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE + scaled_delta_w = (delta_w * curr_scale) + >> SCHED_FREQSCALE_SHIFT; + if (runnable) + sa->runnable_avg_sum += scaled_delta_w; + if (running) + sa->usage_avg_sum += scaled_delta_w; +#else if (runnable) sa->runnable_avg_sum += delta_w; + if (running) + sa->usage_avg_sum += delta_w; +#endif /* #ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */ sa->runnable_avg_period += delta_w; delta -= delta_w; @@ -1277,22 +1404,49 @@ static __always_inline int __update_entity_runnable_avg(u64 now, /* Figure out how many additional periods this update spans */ periods = delta / 1024; delta %= 1024; - + /* decay the load we have accumulated so far */ sa->runnable_avg_sum = decay_load(sa->runnable_avg_sum, periods + 1); sa->runnable_avg_period = decay_load(sa->runnable_avg_period, periods + 1); - + sa->usage_avg_sum = decay_load(sa->usage_avg_sum, periods + 1); + /* add the contribution from this period */ /* Efficiently calculate \sum (1..n_period) 1024*y^i */ runnable_contrib = __compute_runnable_contrib(periods); + /* Apply load scaling if necessary. + * Note that multiplying the whole series is same as + * multiplying all terms + */ +#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE + scaled_runnable_contrib = (runnable_contrib * curr_scale) + >> SCHED_FREQSCALE_SHIFT; + if (runnable) + sa->runnable_avg_sum += scaled_runnable_contrib; + if (running) + sa->usage_avg_sum += scaled_runnable_contrib; +#else if (runnable) sa->runnable_avg_sum += runnable_contrib; + if (running) + sa->usage_avg_sum += runnable_contrib; +#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */ sa->runnable_avg_period += runnable_contrib; } /* Remainder of delta accrued against u_0` */ + /* scale if necessary */ +#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE + scaled_delta = ((delta * curr_scale) >> SCHED_FREQSCALE_SHIFT); + if (runnable) + sa->runnable_avg_sum += scaled_delta; + if (running) + sa->usage_avg_sum += scaled_delta; +#else if (runnable) sa->runnable_avg_sum += delta; + if (running) + sa->usage_avg_sum += delta; +#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */ sa->runnable_avg_period += delta; return decayed; @@ -1338,16 +1492,28 @@ static inline void __update_tg_runnable_avg(struct sched_avg *sa, struct cfs_rq *cfs_rq) { struct task_group *tg = cfs_rq->tg; - long contrib; + long contrib, usage_contrib; /* The fraction of a cpu used by this cfs_rq */ contrib = div_u64(sa->runnable_avg_sum << NICE_0_SHIFT, sa->runnable_avg_period + 1); contrib -= cfs_rq->tg_runnable_contrib; - if (abs(contrib) > cfs_rq->tg_runnable_contrib / 64) { + usage_contrib = div_u64(sa->usage_avg_sum << NICE_0_SHIFT, + sa->runnable_avg_period + 1); + usage_contrib -= cfs_rq->tg_usage_contrib; + + /* + * contrib/usage at this point represent deltas, only update if they + * are substantive. + */ + if ((abs(contrib) > cfs_rq->tg_runnable_contrib / 64) || + (abs(usage_contrib) > cfs_rq->tg_usage_contrib / 64)) { atomic_add(contrib, &tg->runnable_avg); cfs_rq->tg_runnable_contrib += contrib; + + atomic_add(usage_contrib, &tg->usage_avg); + cfs_rq->tg_usage_contrib += usage_contrib; } } @@ -1408,12 +1574,18 @@ static inline void __update_task_entity_contrib(struct sched_entity *se) contrib = se->avg.runnable_avg_sum * scale_load_down(se->load.weight); contrib /= (se->avg.runnable_avg_period + 1); se->avg.load_avg_contrib = scale_load(contrib); + trace_sched_task_load_contrib(task_of(se), se->avg.load_avg_contrib); + contrib = se->avg.runnable_avg_sum * scale_load_down(NICE_0_LOAD); + contrib /= (se->avg.runnable_avg_period + 1); + se->avg.load_avg_ratio = scale_load(contrib); + trace_sched_task_runnable_ratio(task_of(se), se->avg.load_avg_ratio); } /* Compute the current contribution to load_avg by se, return any delta */ -static long __update_entity_load_avg_contrib(struct sched_entity *se) +static long __update_entity_load_avg_contrib(struct sched_entity *se, long *ratio) { long old_contrib = se->avg.load_avg_contrib; + long old_ratio = se->avg.load_avg_ratio; if (entity_is_task(se)) { __update_task_entity_contrib(se); @@ -1422,6 +1594,8 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se) __update_group_entity_contrib(se); } + if (ratio) + *ratio = se->avg.load_avg_ratio - old_ratio; return se->avg.load_avg_contrib - old_contrib; } @@ -1441,9 +1615,13 @@ static inline void update_entity_load_avg(struct sched_entity *se, int update_cfs_rq) { struct cfs_rq *cfs_rq = cfs_rq_of(se); - long contrib_delta; + long contrib_delta, ratio_delta; u64 now; + int cpu = -1; /* not used in normal case */ +#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE + cpu = cfs_rq->rq->cpu; +#endif /* * For a group entity we need to use their owned cfs_rq_clock_task() in * case they are the parent of a throttled hierarchy. @@ -1453,18 +1631,21 @@ static inline void update_entity_load_avg(struct sched_entity *se, else now = cfs_rq_clock_task(group_cfs_rq(se)); - if (!__update_entity_runnable_avg(now, &se->avg, se->on_rq)) + if (!__update_entity_runnable_avg(now, &se->avg, se->on_rq, + cfs_rq->curr == se, cpu)) return; - contrib_delta = __update_entity_load_avg_contrib(se); + contrib_delta = __update_entity_load_avg_contrib(se, &ratio_delta); if (!update_cfs_rq) return; - if (se->on_rq) + if (se->on_rq) { cfs_rq->runnable_load_avg += contrib_delta; - else + rq_of(cfs_rq)->avg.load_avg_ratio += ratio_delta; + } else { subtract_blocked_load_contrib(cfs_rq, -contrib_delta); + } } /* @@ -1497,8 +1678,16 @@ static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update) static inline void update_rq_runnable_avg(struct rq *rq, int runnable) { - __update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable); + int cpu = -1; /* not used in normal case */ + +#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE + cpu = rq->cpu; +#endif + __update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable, + runnable, cpu); __update_tg_runnable_avg(&rq->avg, &rq->cfs); + trace_sched_rq_runnable_ratio(cpu_of(rq), rq->avg.load_avg_ratio); + trace_sched_rq_runnable_load(cpu_of(rq), rq->cfs.runnable_load_avg); } /* Add the load generated by se into cfs_rq's child load-average */ @@ -1540,6 +1729,8 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, } cfs_rq->runnable_load_avg += se->avg.load_avg_contrib; + rq_of(cfs_rq)->avg.load_avg_ratio += se->avg.load_avg_ratio; + /* we force update consideration on load-balancer moves */ update_cfs_rq_blocked_load(cfs_rq, !wakeup); } @@ -1558,6 +1749,8 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, update_cfs_rq_blocked_load(cfs_rq, !sleep); cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib; + rq_of(cfs_rq)->avg.load_avg_ratio -= se->avg.load_avg_ratio; + if (sleep) { cfs_rq->blocked_load_avg += se->avg.load_avg_contrib; se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter); @@ -1886,6 +2079,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) */ update_stats_wait_end(cfs_rq, se); __dequeue_entity(cfs_rq, se); + update_entity_load_avg(se, 1); } update_stats_curr_start(cfs_rq, se); @@ -3315,6 +3509,547 @@ done: return target; } +#ifdef CONFIG_SCHED_HMP +/* + * Heterogenous multiprocessor (HMP) optimizations + * + * The cpu types are distinguished using a list of hmp_domains + * which each represent one cpu type using a cpumask. + * The list is assumed ordered by compute capacity with the + * fastest domain first. + */ +DEFINE_PER_CPU(struct hmp_domain *, hmp_cpu_domain); +static const int hmp_max_tasks = 5; + +extern void __init arch_get_hmp_domains(struct list_head *hmp_domains_list); + +/* Setup hmp_domains */ +static int __init hmp_cpu_mask_setup(void) +{ + char buf[64]; + struct hmp_domain *domain; + struct list_head *pos; + int dc, cpu; + + pr_debug("Initializing HMP scheduler:\n"); + + /* Initialize hmp_domains using platform code */ + arch_get_hmp_domains(&hmp_domains); + if (list_empty(&hmp_domains)) { + pr_debug("HMP domain list is empty!\n"); + return 0; + } + + /* Print hmp_domains */ + dc = 0; + list_for_each(pos, &hmp_domains) { + domain = list_entry(pos, struct hmp_domain, hmp_domains); + cpulist_scnprintf(buf, 64, &domain->possible_cpus); + pr_debug(" HMP domain %d: %s\n", dc, buf); + + for_each_cpu_mask(cpu, domain->possible_cpus) { + per_cpu(hmp_cpu_domain, cpu) = domain; + } + dc++; + } + + return 1; +} + +static struct hmp_domain *hmp_get_hmp_domain_for_cpu(int cpu) +{ + struct hmp_domain *domain; + struct list_head *pos; + + list_for_each(pos, &hmp_domains) { + domain = list_entry(pos, struct hmp_domain, hmp_domains); + if(cpumask_test_cpu(cpu, &domain->possible_cpus)) + return domain; + } + return NULL; +} + +static void hmp_online_cpu(int cpu) +{ + struct hmp_domain *domain = hmp_get_hmp_domain_for_cpu(cpu); + + if(domain) + cpumask_set_cpu(cpu, &domain->cpus); +} + +static void hmp_offline_cpu(int cpu) +{ + struct hmp_domain *domain = hmp_get_hmp_domain_for_cpu(cpu); + + if(domain) + cpumask_clear_cpu(cpu, &domain->cpus); +} +/* + * Needed to determine heaviest tasks etc. + */ +static inline unsigned int hmp_cpu_is_fastest(int cpu); +static inline unsigned int hmp_cpu_is_slowest(int cpu); +static inline struct hmp_domain *hmp_slower_domain(int cpu); +static inline struct hmp_domain *hmp_faster_domain(int cpu); + +/* must hold runqueue lock for queue se is currently on */ +static struct sched_entity *hmp_get_heaviest_task( + struct sched_entity *se, int migrate_up) +{ + int num_tasks = hmp_max_tasks; + struct sched_entity *max_se = se; + unsigned long int max_ratio = se->avg.load_avg_ratio; + const struct cpumask *hmp_target_mask = NULL; + + if (migrate_up) { + struct hmp_domain *hmp; + if (hmp_cpu_is_fastest(cpu_of(se->cfs_rq->rq))) + return max_se; + + hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq)); + hmp_target_mask = &hmp->cpus; + } + /* The currently running task is not on the runqueue */ + se = __pick_first_entity(cfs_rq_of(se)); + + while (num_tasks && se) { + if (entity_is_task(se) && + (se->avg.load_avg_ratio > max_ratio && + hmp_target_mask && + cpumask_intersects(hmp_target_mask, + tsk_cpus_allowed(task_of(se))))) { + max_se = se; + max_ratio = se->avg.load_avg_ratio; + } + se = __pick_next_entity(se); + num_tasks--; + } + return max_se; +} + +static struct sched_entity *hmp_get_lightest_task( + struct sched_entity *se, int migrate_down) +{ + int num_tasks = hmp_max_tasks; + struct sched_entity *min_se = se; + unsigned long int min_ratio = se->avg.load_avg_ratio; + const struct cpumask *hmp_target_mask = NULL; + + if (migrate_down) { + struct hmp_domain *hmp; + if (hmp_cpu_is_slowest(cpu_of(se->cfs_rq->rq))) + return min_se; + hmp = hmp_slower_domain(cpu_of(se->cfs_rq->rq)); + hmp_target_mask = &hmp->cpus; + } + /* The currently running task is not on the runqueue */ + se = __pick_first_entity(cfs_rq_of(se)); + + while (num_tasks && se) { + if (entity_is_task(se) && + (se->avg.load_avg_ratio < min_ratio && + hmp_target_mask && + cpumask_intersects(hmp_target_mask, + tsk_cpus_allowed(task_of(se))))) { + min_se = se; + min_ratio = se->avg.load_avg_ratio; + } + se = __pick_next_entity(se); + num_tasks--; + } + return min_se; +} + +/* + * Migration thresholds should be in the range [0..1023] + * hmp_up_threshold: min. load required for migrating tasks to a faster cpu + * hmp_down_threshold: max. load allowed for tasks migrating to a slower cpu + * The default values (512, 256) offer good responsiveness, but may need + * tweaking suit particular needs. + * + * hmp_up_prio: Only up migrate task with high priority (<hmp_up_prio) + * hmp_next_up_threshold: Delay before next up migration (1024 ~= 1 ms) + * hmp_next_down_threshold: Delay before next down migration (1024 ~= 1 ms) + */ +unsigned int hmp_up_threshold = 512; +unsigned int hmp_down_threshold = 256; +#ifdef CONFIG_SCHED_HMP_PRIO_FILTER +unsigned int hmp_up_prio = NICE_TO_PRIO(CONFIG_SCHED_HMP_PRIO_FILTER_VAL); +#endif +unsigned int hmp_next_up_threshold = 4096; +unsigned int hmp_next_down_threshold = 4096; + +static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se); +static unsigned int hmp_down_migration(int cpu, struct sched_entity *se); +static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd, + int *min_cpu); + +/* Check if cpu is in fastest hmp_domain */ +static inline unsigned int hmp_cpu_is_fastest(int cpu) +{ + struct list_head *pos; + + pos = &hmp_cpu_domain(cpu)->hmp_domains; + return pos == hmp_domains.next; +} + +/* Check if cpu is in slowest hmp_domain */ +static inline unsigned int hmp_cpu_is_slowest(int cpu) +{ + struct list_head *pos; + + pos = &hmp_cpu_domain(cpu)->hmp_domains; + return list_is_last(pos, &hmp_domains); +} + +/* Next (slower) hmp_domain relative to cpu */ +static inline struct hmp_domain *hmp_slower_domain(int cpu) +{ + struct list_head *pos; + + pos = &hmp_cpu_domain(cpu)->hmp_domains; + return list_entry(pos->next, struct hmp_domain, hmp_domains); +} + +/* Previous (faster) hmp_domain relative to cpu */ +static inline struct hmp_domain *hmp_faster_domain(int cpu) +{ + struct list_head *pos; + + pos = &hmp_cpu_domain(cpu)->hmp_domains; + return list_entry(pos->prev, struct hmp_domain, hmp_domains); +} + +/* + * Selects a cpu in previous (faster) hmp_domain + * Note that cpumask_any_and() returns the first cpu in the cpumask + */ +static inline unsigned int hmp_select_faster_cpu(struct task_struct *tsk, + int cpu) +{ + int lowest_cpu=NR_CPUS; + __always_unused int lowest_ratio = hmp_domain_min_load(hmp_faster_domain(cpu), &lowest_cpu); + /* + * If the lowest-loaded CPU in the domain is allowed by the task affinity + * select that one, otherwise select one which is allowed + */ + if(lowest_cpu != NR_CPUS && cpumask_test_cpu(lowest_cpu,tsk_cpus_allowed(tsk))) + return lowest_cpu; + else + return cpumask_any_and(&hmp_faster_domain(cpu)->cpus, + tsk_cpus_allowed(tsk)); +} + +/* + * Selects a cpu in next (slower) hmp_domain + * Note that cpumask_any_and() returns the first cpu in the cpumask + */ +static inline unsigned int hmp_select_slower_cpu(struct task_struct *tsk, + int cpu) +{ + int lowest_cpu=NR_CPUS; + struct hmp_domain *hmp; + __always_unused int lowest_ratio; + + if (hmp_cpu_is_slowest(cpu)) + hmp = hmp_cpu_domain(cpu); + else + hmp = hmp_slower_domain(cpu); + + lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu); + /* + * If the lowest-loaded CPU in the domain is allowed by the task affinity + * select that one, otherwise select one which is allowed + */ + if(lowest_cpu != NR_CPUS && cpumask_test_cpu(lowest_cpu,tsk_cpus_allowed(tsk))) + return lowest_cpu; + else + return cpumask_any_and(&hmp_slower_domain(cpu)->cpus, + tsk_cpus_allowed(tsk)); +} + +static inline void hmp_next_up_delay(struct sched_entity *se, int cpu) +{ + /* hack - always use clock from first online CPU */ + u64 now = cpu_rq(cpumask_first(cpu_online_mask))->clock_task; + se->avg.hmp_last_up_migration = now; + se->avg.hmp_last_down_migration = 0; + cpu_rq(cpu)->avg.hmp_last_up_migration = now; + cpu_rq(cpu)->avg.hmp_last_down_migration = 0; +} + +static inline void hmp_next_down_delay(struct sched_entity *se, int cpu) +{ + /* hack - always use clock from first online CPU */ + u64 now = cpu_rq(cpumask_first(cpu_online_mask))->clock_task; + se->avg.hmp_last_down_migration = now; + se->avg.hmp_last_up_migration = 0; + cpu_rq(cpu)->avg.hmp_last_down_migration = now; + cpu_rq(cpu)->avg.hmp_last_up_migration = 0; +} + +#ifdef CONFIG_HMP_VARIABLE_SCALE +/* + * Heterogenous multiprocessor (HMP) optimizations + * + * These functions allow to change the growing speed of the load_avg_ratio + * by default it goes from 0 to 0.5 in LOAD_AVG_PERIOD = 32ms + * This can now be changed with /sys/kernel/hmp/load_avg_period_ms. + * + * These functions also allow to change the up and down threshold of HMP + * using /sys/kernel/hmp/{up,down}_threshold. + * Both must be between 0 and 1023. The threshold that is compared + * to the load_avg_ratio is up_threshold/1024 and down_threshold/1024. + * + * For instance, if load_avg_period = 64 and up_threshold = 512, an idle + * task with a load of 0 will reach the threshold after 64ms of busy loop. + * + * Changing load_avg_periods_ms has the same effect than changing the + * default scaling factor Y=1002/1024 in the load_avg_ratio computation to + * (1002/1024.0)^(LOAD_AVG_PERIOD/load_avg_period_ms), but the last one + * could trigger overflows. + * For instance, with Y = 1023/1024 in __update_task_entity_contrib() + * "contrib = se->avg.runnable_avg_sum * scale_load_down(se->load.weight);" + * could be overflowed for a weight > 2^12 even is the load_avg_contrib + * should still be a 32bits result. This would not happen by multiplicating + * delta time by 1/22 and setting load_avg_period_ms = 706. + */ + +/* + * By scaling the delta time it end-up increasing or decrease the + * growing speed of the per entity load_avg_ratio + * The scale factor hmp_data.multiplier is a fixed point + * number: (32-HMP_VARIABLE_SCALE_SHIFT).HMP_VARIABLE_SCALE_SHIFT + */ +static u64 hmp_variable_scale_convert(u64 delta) +{ + u64 high = delta >> 32ULL; + u64 low = delta & 0xffffffffULL; + low *= hmp_data.multiplier; + high *= hmp_data.multiplier; + return (low >> HMP_VARIABLE_SCALE_SHIFT) + + (high << (32ULL - HMP_VARIABLE_SCALE_SHIFT)); +} + +static ssize_t hmp_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct hmp_global_attr *hmp_attr = + container_of(attr, struct hmp_global_attr, attr); + int temp = *(hmp_attr->value); + if (hmp_attr->to_sysfs != NULL) + temp = hmp_attr->to_sysfs(temp); + ret = sprintf(buf, "%d\n", temp); + return ret; +} + +static ssize_t hmp_store(struct kobject *a, struct attribute *attr, + const char *buf, size_t count) +{ + int temp; + ssize_t ret = count; + struct hmp_global_attr *hmp_attr = + container_of(attr, struct hmp_global_attr, attr); + char *str = vmalloc(count + 1); + if (str == NULL) + return -ENOMEM; + memcpy(str, buf, count); + str[count] = 0; + if (sscanf(str, "%d", &temp) < 1) + ret = -EINVAL; + else { + if (hmp_attr->from_sysfs != NULL) + temp = hmp_attr->from_sysfs(temp); + if (temp < 0) + ret = -EINVAL; + else + *(hmp_attr->value) = temp; + } + vfree(str); + return ret; +} + +static int hmp_period_tofrom_sysfs(int value) +{ + return (LOAD_AVG_PERIOD << HMP_VARIABLE_SCALE_SHIFT) / value; +} + +/* max value for threshold is 1024 */ +static int hmp_theshold_from_sysfs(int value) +{ + if (value > 1024) + return -1; + return value; +} +#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE +/* freqinvar control is only 0,1 off/on */ +static int hmp_freqinvar_from_sysfs(int value) +{ + if (value < 0 || value > 1) + return -1; + return value; +} +#endif +static void hmp_attr_add( + const char *name, + int *value, + int (*to_sysfs)(int), + int (*from_sysfs)(int)) +{ + int i = 0; + while (hmp_data.attributes[i] != NULL) { + i++; + if (i >= HMP_DATA_SYSFS_MAX) + return; + } + hmp_data.attr[i].attr.mode = 0644; + hmp_data.attr[i].show = hmp_show; + hmp_data.attr[i].store = hmp_store; + hmp_data.attr[i].attr.name = name; + hmp_data.attr[i].value = value; + hmp_data.attr[i].to_sysfs = to_sysfs; + hmp_data.attr[i].from_sysfs = from_sysfs; + hmp_data.attributes[i] = &hmp_data.attr[i].attr; + hmp_data.attributes[i + 1] = NULL; +} + +static int hmp_attr_init(void) +{ + int ret; + memset(&hmp_data, sizeof(hmp_data), 0); + /* by default load_avg_period_ms == LOAD_AVG_PERIOD + * meaning no change + */ + hmp_data.multiplier = hmp_period_tofrom_sysfs(LOAD_AVG_PERIOD); + + hmp_attr_add("load_avg_period_ms", + &hmp_data.multiplier, + hmp_period_tofrom_sysfs, + hmp_period_tofrom_sysfs); + hmp_attr_add("up_threshold", + &hmp_up_threshold, + NULL, + hmp_theshold_from_sysfs); + hmp_attr_add("down_threshold", + &hmp_down_threshold, + NULL, + hmp_theshold_from_sysfs); +#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE + /* default frequency-invariant scaling ON */ + hmp_data.freqinvar_load_scale_enabled = 1; + hmp_attr_add("frequency_invariant_load_scale", + &hmp_data.freqinvar_load_scale_enabled, + NULL, + hmp_freqinvar_from_sysfs); +#endif + hmp_data.attr_group.name = "hmp"; + hmp_data.attr_group.attrs = hmp_data.attributes; + ret = sysfs_create_group(kernel_kobj, + &hmp_data.attr_group); + return 0; +} +late_initcall(hmp_attr_init); +#endif /* CONFIG_HMP_VARIABLE_SCALE */ + +static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd, + int *min_cpu) +{ + int cpu; + int min_cpu_runnable_temp = NR_CPUS; + u64 min_target_last_migration = ULLONG_MAX; + u64 curr_last_migration; + unsigned long min_runnable_load = INT_MAX; + unsigned long contrib; + struct sched_avg *avg; + + for_each_cpu_mask(cpu, hmpd->cpus) { + avg = &cpu_rq(cpu)->avg; + /* used for both up and down migration */ + curr_last_migration = avg->hmp_last_up_migration ? + avg->hmp_last_up_migration : avg->hmp_last_down_migration; + + contrib = avg->load_avg_ratio; + /* + * Consider a runqueue completely busy if there is any load + * on it. Definitely not the best for overall fairness, but + * does well in typical Android use cases. + */ + if (contrib) + contrib = 1023; + + if ((contrib < min_runnable_load) || + (contrib == min_runnable_load && + curr_last_migration < min_target_last_migration)) { + /* + * if the load is the same target the CPU with + * the longest time since a migration. + * This is to spread migration load between + * members of a domain more evenly when the + * domain is fully loaded + */ + min_runnable_load = contrib; + min_cpu_runnable_temp = cpu; + min_target_last_migration = curr_last_migration; + } + } + + if (min_cpu) + *min_cpu = min_cpu_runnable_temp; + + return min_runnable_load; +} + +/* + * Calculate the task starvation + * This is the ratio of actually running time vs. runnable time. + * If the two are equal the task is getting the cpu time it needs or + * it is alone on the cpu and the cpu is fully utilized. + */ +static inline unsigned int hmp_task_starvation(struct sched_entity *se) +{ + u32 starvation; + + starvation = se->avg.usage_avg_sum * scale_load_down(NICE_0_LOAD); + starvation /= (se->avg.runnable_avg_sum + 1); + + return scale_load(starvation); +} + +static inline unsigned int hmp_offload_down(int cpu, struct sched_entity *se) +{ + int min_usage; + int dest_cpu = NR_CPUS; + + if (hmp_cpu_is_slowest(cpu)) + return NR_CPUS; + + /* Is there an idle CPU in the current domain */ + min_usage = hmp_domain_min_load(hmp_cpu_domain(cpu), NULL); + if (min_usage == 0) + return NR_CPUS; + + /* Is the task alone on the cpu? */ + if (cpu_rq(cpu)->cfs.h_nr_running < 2) + return NR_CPUS; + + /* Is the task actually starving? */ + /* >=25% ratio running/runnable = starving */ + if (hmp_task_starvation(se) > 768) + return NR_CPUS; + + /* Does the slower domain have any idle CPUs? */ + min_usage = hmp_domain_min_load(hmp_slower_domain(cpu), &dest_cpu); + if (min_usage > 0) + return NR_CPUS; + + if (cpumask_test_cpu(dest_cpu, &hmp_slower_domain(cpu)->cpus)) + return dest_cpu; + + return NR_CPUS; +} +#endif /* CONFIG_SCHED_HMP */ + /* * sched_balance_self: balance the current task (running on cpu) in domains * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and @@ -3339,6 +4074,36 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) if (p->nr_cpus_allowed == 1) return prev_cpu; +#ifdef CONFIG_SCHED_HMP + /* always put non-kernel forking tasks on a big domain */ + if (p->mm && (sd_flag & SD_BALANCE_FORK)) { + if(hmp_cpu_is_fastest(prev_cpu)) { + struct hmp_domain *hmpdom = list_entry(&hmp_cpu_domain(prev_cpu)->hmp_domains, struct hmp_domain, hmp_domains); + __always_unused int lowest_ratio = hmp_domain_min_load(hmpdom, &new_cpu); + if (new_cpu != NR_CPUS && + cpumask_test_cpu(new_cpu, + tsk_cpus_allowed(p))) { + hmp_next_up_delay(&p->se, new_cpu); + return new_cpu; + } else { + new_cpu = cpumask_any_and( + &hmp_faster_domain(cpu)->cpus, + tsk_cpus_allowed(p)); + if (new_cpu < nr_cpu_ids) { + hmp_next_up_delay(&p->se, new_cpu); + return new_cpu; + } + } + } else { + new_cpu = hmp_select_faster_cpu(p, prev_cpu); + if (new_cpu != NR_CPUS) { + hmp_next_up_delay(&p->se, new_cpu); + return new_cpu; + } + } + } +#endif + if (sd_flag & SD_BALANCE_WAKE) { if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) want_affine = 1; @@ -3413,6 +4178,23 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) unlock: rcu_read_unlock(); +#ifdef CONFIG_SCHED_HMP + if (hmp_up_migration(prev_cpu, &new_cpu, &p->se)) { + hmp_next_up_delay(&p->se, new_cpu); + trace_sched_hmp_migrate(p, new_cpu, 0); + return new_cpu; + } + if (hmp_down_migration(prev_cpu, &p->se)) { + new_cpu = hmp_select_slower_cpu(p, prev_cpu); + hmp_next_down_delay(&p->se, new_cpu); + trace_sched_hmp_migrate(p, new_cpu, 0); + return new_cpu; + } + /* Make sure that the task stays in its previous hmp domain */ + if (!cpumask_test_cpu(new_cpu, &hmp_cpu_domain(prev_cpu)->cpus)) + return prev_cpu; +#endif + return new_cpu; } @@ -3946,7 +4728,6 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) * 1) task is cache cold, or * 2) too many balance attempts have failed. */ - tsk_cache_hot = task_hot(p, env->src_rq->clock_task, env->sd); if (!tsk_cache_hot || env->sd->nr_balance_failed > env->sd->cache_nice_tries) { @@ -5231,7 +6012,9 @@ out_one_pinned: out: return ld_moved; } - +#ifdef CONFIG_SCHED_HMP +static unsigned int hmp_idle_pull(int this_cpu); +#endif /* * idle_balance is called by schedule() if this_cpu is about to become * idle. Attempts to pull tasks from other CPUs. @@ -5276,7 +6059,10 @@ void idle_balance(int this_cpu, struct rq *this_rq) } } rcu_read_unlock(); - +#ifdef CONFIG_SCHED_HMP + if (!pulled_task) + pulled_task = hmp_idle_pull(this_cpu); +#endif raw_spin_lock(&this_rq->lock); if (pulled_task || time_after(jiffies, this_rq->next_balance)) { @@ -5372,7 +6158,11 @@ static struct { static inline int find_new_ilb(int call_cpu) { int ilb = cpumask_first(nohz.idle_cpus_mask); - +#ifdef CONFIG_SCHED_HMP + /* restrict nohz balancing to occur in the same hmp domain */ + ilb = cpumask_first_and(nohz.idle_cpus_mask, + &((struct hmp_domain *)hmp_cpu_domain(call_cpu))->cpus); +#endif if (ilb < nr_cpu_ids && idle_cpu(ilb)) return ilb; @@ -5651,6 +6441,18 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu) if (time_before(now, nohz.next_balance)) return 0; +#ifdef CONFIG_SCHED_HMP + /* + * Bail out if there are no nohz CPUs in our + * HMP domain, since we will move tasks between + * domains through wakeup and force balancing + * as necessary based upon task load. + */ + if (cpumask_first_and(nohz.idle_cpus_mask, + &((struct hmp_domain *)hmp_cpu_domain(cpu))->cpus) >= nr_cpu_ids) + return 0; +#endif + if (rq->nr_running >= 2) goto need_kick; @@ -5683,6 +6485,458 @@ need_kick: static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { } #endif +#ifdef CONFIG_SCHED_HMP +/* Check if task should migrate to a faster cpu */ +static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se) +{ + struct task_struct *p = task_of(se); + u64 now; + + if (target_cpu) + *target_cpu = NR_CPUS; + + if (hmp_cpu_is_fastest(cpu)) + return 0; + +#ifdef CONFIG_SCHED_HMP_PRIO_FILTER + /* Filter by task priority */ + if (p->prio >= hmp_up_prio) + return 0; +#endif + if (se->avg.load_avg_ratio < hmp_up_threshold) + return 0; + + /* Let the task load settle before doing another up migration */ + /* hack - always use clock from first online CPU */ + now = cpu_rq(cpumask_first(cpu_online_mask))->clock_task; + if (((now - se->avg.hmp_last_up_migration) >> 10) + < hmp_next_up_threshold) + return 0; + + /* hmp_domain_min_load only returns 0 for an + * idle CPU or 1023 for any partly-busy one. + * Be explicit about requirement for an idle CPU. + */ + if (hmp_domain_min_load(hmp_faster_domain(cpu), target_cpu) != 0) + return 0; + + if (cpumask_intersects(&hmp_faster_domain(cpu)->cpus, + tsk_cpus_allowed(p))) + return 1; + + return 0; +} + +/* Check if task should migrate to a slower cpu */ +static unsigned int hmp_down_migration(int cpu, struct sched_entity *se) +{ + struct task_struct *p = task_of(se); + u64 now; + + if (hmp_cpu_is_slowest(cpu)) + return 0; + +#ifdef CONFIG_SCHED_HMP_PRIO_FILTER + /* Filter by task priority */ + if ((p->prio >= hmp_up_prio) && + cpumask_intersects(&hmp_slower_domain(cpu)->cpus, + tsk_cpus_allowed(p))) { + return 1; + } +#endif + + /* Let the task load settle before doing another down migration */ + /* hack - always use clock from first online CPU */ + now = cpu_rq(cpumask_first(cpu_online_mask))->clock_task; + if (((now - se->avg.hmp_last_down_migration) >> 10) + < hmp_next_down_threshold) + return 0; + + if (cpumask_intersects(&hmp_slower_domain(cpu)->cpus, + tsk_cpus_allowed(p)) + && se->avg.load_avg_ratio < hmp_down_threshold) { + return 1; + } + return 0; +} + +/* + * hmp_can_migrate_task - may task p from runqueue rq be migrated to this_cpu? + * Ideally this function should be merged with can_migrate_task() to avoid + * redundant code. + */ +static int hmp_can_migrate_task(struct task_struct *p, struct lb_env *env) +{ + int tsk_cache_hot = 0; + + /* + * We do not migrate tasks that are: + * 1) running (obviously), or + * 2) cannot be migrated to this CPU due to cpus_allowed + */ + if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) { + schedstat_inc(p, se.statistics.nr_failed_migrations_affine); + return 0; + } + env->flags &= ~LBF_ALL_PINNED; + + if (task_running(env->src_rq, p)) { + schedstat_inc(p, se.statistics.nr_failed_migrations_running); + return 0; + } + + /* + * Aggressive migration if: + * 1) task is cache cold, or + * 2) too many balance attempts have failed. + */ + + tsk_cache_hot = task_hot(p, env->src_rq->clock_task, env->sd); + if (!tsk_cache_hot || + env->sd->nr_balance_failed > env->sd->cache_nice_tries) { +#ifdef CONFIG_SCHEDSTATS + if (tsk_cache_hot) { + schedstat_inc(env->sd, lb_hot_gained[env->idle]); + schedstat_inc(p, se.statistics.nr_forced_migrations); + } +#endif + return 1; + } + + return 1; +} + +/* + * move_specific_task tries to move a specific task. + * Returns 1 if successful and 0 otherwise. + * Called with both runqueues locked. + */ +static int move_specific_task(struct lb_env *env, struct task_struct *pm) +{ + struct task_struct *p, *n; + + list_for_each_entry_safe(p, n, &env->src_rq->cfs_tasks, se.group_node) { + if (throttled_lb_pair(task_group(p), env->src_rq->cpu, + env->dst_cpu)) + continue; + + if (!hmp_can_migrate_task(p, env)) + continue; + /* Check if we found the right task */ + if (p != pm) + continue; + + move_task(p, env); + /* + * Right now, this is only the third place move_task() + * is called, so we can safely collect move_task() + * stats here rather than inside move_task(). + */ + schedstat_inc(env->sd, lb_gained[env->idle]); + return 1; + } + return 0; +} + +/* + * hmp_active_task_migration_cpu_stop is run by cpu stopper and used to + * migrate a specific task from one runqueue to another. + * hmp_force_up_migration uses this to push a currently running task + * off a runqueue. + * Based on active_load_balance_stop_cpu and can potentially be merged. + */ +static int hmp_active_task_migration_cpu_stop(void *data) +{ + struct rq *busiest_rq = data; + struct task_struct *p = busiest_rq->migrate_task; + int busiest_cpu = cpu_of(busiest_rq); + int target_cpu = busiest_rq->push_cpu; + struct rq *target_rq = cpu_rq(target_cpu); + struct sched_domain *sd; + + raw_spin_lock_irq(&busiest_rq->lock); + /* make sure the requested cpu hasn't gone down in the meantime */ + if (unlikely(busiest_cpu != smp_processor_id() || + !busiest_rq->active_balance)) { + goto out_unlock; + } + /* Is there any task to move? */ + if (busiest_rq->nr_running <= 1) + goto out_unlock; + /* Task has migrated meanwhile, abort forced migration */ + if (task_rq(p) != busiest_rq) + goto out_unlock; + /* + * This condition is "impossible", if it occurs + * we need to fix it. Originally reported by + * Bjorn Helgaas on a 128-cpu setup. + */ + BUG_ON(busiest_rq == target_rq); + + /* move a task from busiest_rq to target_rq */ + double_lock_balance(busiest_rq, target_rq); + + /* Search for an sd spanning us and the target CPU. */ + rcu_read_lock(); + for_each_domain(target_cpu, sd) { + if (cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) + break; + } + + if (likely(sd)) { + struct lb_env env = { + .sd = sd, + .dst_cpu = target_cpu, + .dst_rq = target_rq, + .src_cpu = busiest_rq->cpu, + .src_rq = busiest_rq, + .idle = CPU_IDLE, + }; + + schedstat_inc(sd, alb_count); + + if (move_specific_task(&env, p)) + schedstat_inc(sd, alb_pushed); + else + schedstat_inc(sd, alb_failed); + } + rcu_read_unlock(); + double_unlock_balance(busiest_rq, target_rq); +out_unlock: + busiest_rq->active_balance = 0; + raw_spin_unlock_irq(&busiest_rq->lock); + return 0; +} + +/* + * hmp_idle_pull_cpu_stop is run by cpu stopper and used to + * migrate a specific task from one runqueue to another. + * hmp_idle_pull uses this to push a currently running task + * off a runqueue to a faster CPU. + * Locking is slightly different than usual. + * Based on active_load_balance_stop_cpu and can potentially be merged. + */ +static int hmp_idle_pull_cpu_stop(void *data) +{ + struct rq *busiest_rq = data; + struct task_struct *p = busiest_rq->migrate_task; + int busiest_cpu = cpu_of(busiest_rq); + int target_cpu = busiest_rq->push_cpu; + struct rq *target_rq = cpu_rq(target_cpu); + struct sched_domain *sd; + + raw_spin_lock_irq(&busiest_rq->lock); + + /* make sure the requested cpu hasn't gone down in the meantime */ + if (unlikely(busiest_cpu != smp_processor_id() || + !busiest_rq->active_balance)) + goto out_unlock; + + /* Is there any task to move? */ + if (busiest_rq->nr_running <= 1) + goto out_unlock; + + /* Task has migrated meanwhile, abort forced migration */ + if (task_rq(p) != busiest_rq) + goto out_unlock; + + /* + * This condition is "impossible", if it occurs + * we need to fix it. Originally reported by + * Bjorn Helgaas on a 128-cpu setup. + */ + BUG_ON(busiest_rq == target_rq); + + /* move a task from busiest_rq to target_rq */ + double_lock_balance(busiest_rq, target_rq); + + /* Search for an sd spanning us and the target CPU. */ + rcu_read_lock(); + for_each_domain(target_cpu, sd) { + if (cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) + break; + } + if (likely(sd)) { + struct lb_env env = { + .sd = sd, + .dst_cpu = target_cpu, + .dst_rq = target_rq, + .src_cpu = busiest_rq->cpu, + .src_rq = busiest_rq, + .idle = CPU_IDLE, + }; + + schedstat_inc(sd, alb_count); + + if (move_specific_task(&env, p)) + schedstat_inc(sd, alb_pushed); + else + schedstat_inc(sd, alb_failed); + } + rcu_read_unlock(); + double_unlock_balance(busiest_rq, target_rq); +out_unlock: + busiest_rq->active_balance = 0; + raw_spin_unlock_irq(&busiest_rq->lock); + return 0; +} + +static DEFINE_SPINLOCK(hmp_force_migration); + +/* + * hmp_force_up_migration checks runqueues for tasks that need to + * be actively migrated to a faster cpu. + */ +static void hmp_force_up_migration(int this_cpu) +{ + int cpu, target_cpu; + struct sched_entity *curr, *orig; + struct rq *target; + unsigned long flags; + unsigned int force; + struct task_struct *p; + + if (!spin_trylock(&hmp_force_migration)) + return; + for_each_online_cpu(cpu) { + force = 0; + target = cpu_rq(cpu); + raw_spin_lock_irqsave(&target->lock, flags); + curr = target->cfs.curr; + if (!curr) { + raw_spin_unlock_irqrestore(&target->lock, flags); + continue; + } + if (!entity_is_task(curr)) { + struct cfs_rq *cfs_rq; + + cfs_rq = group_cfs_rq(curr); + while (cfs_rq) { + curr = cfs_rq->curr; + cfs_rq = group_cfs_rq(curr); + } + } + orig = curr; + curr = hmp_get_heaviest_task(curr, 1); + p = task_of(curr); + if (hmp_up_migration(cpu, &target_cpu, curr)) { + if (!target->active_balance) { + target->active_balance = 1; + target->push_cpu = target_cpu; + target->migrate_task = p; + force = 1; + trace_sched_hmp_migrate(p, target->push_cpu, 1); + hmp_next_up_delay(&p->se, target->push_cpu); + } + } + if (!force && !target->active_balance) { + /* + * For now we just check the currently running task. + * Selecting the lightest task for offloading will + * require extensive book keeping. + */ + curr = hmp_get_lightest_task(orig, 1); + target->push_cpu = hmp_offload_down(cpu, curr); + if (target->push_cpu < NR_CPUS) { + target->active_balance = 1; + target->migrate_task = p; + force = 1; + trace_sched_hmp_migrate(p, target->push_cpu, 2); + hmp_next_down_delay(&p->se, target->push_cpu); + } + } + raw_spin_unlock_irqrestore(&target->lock, flags); + if (force) + stop_one_cpu_nowait(cpu_of(target), + hmp_active_task_migration_cpu_stop, + target, &target->active_balance_work); + } + spin_unlock(&hmp_force_migration); +} +/* + * hmp_idle_pull looks at little domain runqueues to see + * if a task should be pulled. + * + * Reuses hmp_force_migration spinlock. + * + */ +static unsigned int hmp_idle_pull(int this_cpu) +{ + int cpu; + struct sched_entity *curr, *orig; + struct hmp_domain *hmp_domain = NULL; + struct rq *target, *rq; + unsigned long flags, ratio = 0; + unsigned int force = 0; + struct task_struct *p = NULL; + + if (!hmp_cpu_is_slowest(this_cpu)) + hmp_domain = hmp_slower_domain(this_cpu); + if (!hmp_domain) + return 0; + + if (!spin_trylock(&hmp_force_migration)) + return 0; + + /* first select a task */ + for_each_cpu(cpu, &hmp_domain->cpus) { + rq = cpu_rq(cpu); + raw_spin_lock_irqsave(&rq->lock, flags); + curr = rq->cfs.curr; + if (!curr) { + raw_spin_unlock_irqrestore(&rq->lock, flags); + continue; + } + if (!entity_is_task(curr)) { + struct cfs_rq *cfs_rq; + + cfs_rq = group_cfs_rq(curr); + while (cfs_rq) { + curr = cfs_rq->curr; + if (!entity_is_task(curr)) + cfs_rq = group_cfs_rq(curr); + else + cfs_rq = NULL; + } + } + orig = curr; + curr = hmp_get_heaviest_task(curr, 1); + if (curr->avg.load_avg_ratio > hmp_up_threshold && + curr->avg.load_avg_ratio > ratio) { + p = task_of(curr); + target = rq; + ratio = curr->avg.load_avg_ratio; + } + raw_spin_unlock_irqrestore(&rq->lock, flags); + } + + if (!p) + goto done; + + /* now we have a candidate */ + raw_spin_lock_irqsave(&target->lock, flags); + if (!target->active_balance && task_rq(p) == target) { + target->active_balance = 1; + target->push_cpu = this_cpu; + target->migrate_task = p; + force = 1; + trace_sched_hmp_migrate(p, target->push_cpu, 3); + hmp_next_up_delay(&p->se, target->push_cpu); + } + raw_spin_unlock_irqrestore(&target->lock, flags); + if (force) { + stop_one_cpu_nowait(cpu_of(target), + hmp_idle_pull_cpu_stop, + target, &target->active_balance_work); + } +done: + spin_unlock(&hmp_force_migration); + return force; +} +#else +static void hmp_force_up_migration(int this_cpu) { } +#endif /* CONFIG_SCHED_HMP */ + /* * run_rebalance_domains is triggered when needed from the scheduler tick. * Also triggered for nohz idle balancing (with nohz_balancing_kick set). @@ -5694,6 +6948,8 @@ static void run_rebalance_domains(struct softirq_action *h) enum cpu_idle_type idle = this_rq->idle_balance ? CPU_IDLE : CPU_NOT_IDLE; + hmp_force_up_migration(this_cpu); + rebalance_domains(this_cpu, idle); /* @@ -5726,11 +6982,17 @@ void trigger_load_balance(struct rq *rq, int cpu) static void rq_online_fair(struct rq *rq) { +#ifdef CONFIG_SCHED_HMP + hmp_online_cpu(rq->cpu); +#endif update_sysctl(); } static void rq_offline_fair(struct rq *rq) { +#ifdef CONFIG_SCHED_HMP + hmp_offline_cpu(rq->cpu); +#endif update_sysctl(); /* Ensure any throttled groups are reachable by pick_next_task */ @@ -6193,6 +7455,139 @@ __init void init_sched_fair_class(void) zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); cpu_notifier(sched_ilb_notifier, 0); #endif + +#ifdef CONFIG_SCHED_HMP + hmp_cpu_mask_setup(); +#endif #endif /* SMP */ } + +#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE +static u32 cpufreq_calc_scale(u32 min, u32 max, u32 curr) +{ + u32 result = curr / max; + return result; +} + +/* Called when the CPU Frequency is changed. + * Once for each CPU. + */ +static int cpufreq_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + int cpu = freq->cpu; + struct cpufreq_extents *extents; + + if (freq->flags & CPUFREQ_CONST_LOOPS) + return NOTIFY_OK; + + if (val != CPUFREQ_POSTCHANGE) + return NOTIFY_OK; + + /* if dynamic load scale is disabled, set the load scale to 1.0 */ + if (!hmp_data.freqinvar_load_scale_enabled) { + freq_scale[cpu].curr_scale = 1024; + return NOTIFY_OK; + } + + extents = &freq_scale[cpu]; + if (extents->flags & SCHED_LOAD_FREQINVAR_SINGLEFREQ) { + /* If our governor was recognised as a single-freq governor, + * use 1.0 + */ + extents->curr_scale = 1024; + } else { + extents->curr_scale = cpufreq_calc_scale(extents->min, + extents->max, freq->new); + } + + return NOTIFY_OK; +} + +/* Called when the CPUFreq governor is changed. + * Only called for the CPUs which are actually changed by the + * userspace. + */ +static int cpufreq_policy_callback(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct cpufreq_policy *policy = data; + struct cpufreq_extents *extents; + int cpu, singleFreq = 0; + static const char performance_governor[] = "performance"; + static const char powersave_governor[] = "powersave"; + + if (event == CPUFREQ_START) + return 0; + + if (event != CPUFREQ_INCOMPATIBLE) + return 0; + + /* CPUFreq governors do not accurately report the range of + * CPU Frequencies they will choose from. + * We recognise performance and powersave governors as + * single-frequency only. + */ + if (!strncmp(policy->governor->name, performance_governor, + strlen(performance_governor)) || + !strncmp(policy->governor->name, powersave_governor, + strlen(powersave_governor))) + singleFreq = 1; + + /* Make sure that all CPUs impacted by this policy are + * updated since we will only get a notification when the + * user explicitly changes the policy on a CPU. + */ + for_each_cpu(cpu, policy->cpus) { + extents = &freq_scale[cpu]; + extents->max = policy->max >> SCHED_FREQSCALE_SHIFT; + extents->min = policy->min >> SCHED_FREQSCALE_SHIFT; + if (!hmp_data.freqinvar_load_scale_enabled) { + extents->curr_scale = 1024; + } else if (singleFreq) { + extents->flags |= SCHED_LOAD_FREQINVAR_SINGLEFREQ; + extents->curr_scale = 1024; + } else { + extents->flags &= ~SCHED_LOAD_FREQINVAR_SINGLEFREQ; + extents->curr_scale = cpufreq_calc_scale(extents->min, + extents->max, policy->cur); + } + } + + return 0; +} + +static struct notifier_block cpufreq_notifier = { + .notifier_call = cpufreq_callback, +}; +static struct notifier_block cpufreq_policy_notifier = { + .notifier_call = cpufreq_policy_callback, +}; + +static int __init register_sched_cpufreq_notifier(void) +{ + int ret = 0; + + /* init safe defaults since there are no policies at registration */ + for (ret = 0; ret < CONFIG_NR_CPUS; ret++) { + /* safe defaults */ + freq_scale[ret].max = 1024; + freq_scale[ret].min = 1024; + freq_scale[ret].curr_scale = 1024; + } + + pr_info("sched: registering cpufreq notifiers for scale-invariant loads\n"); + ret = cpufreq_register_notifier(&cpufreq_policy_notifier, + CPUFREQ_POLICY_NOTIFIER); + + if (ret != -EINVAL) + ret = cpufreq_register_notifier(&cpufreq_notifier, + CPUFREQ_TRANSITION_NOTIFIER); + + return ret; +} + +core_initcall(register_sched_cpufreq_notifier); +#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ce39224d615..27f51ac8670 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -142,7 +142,7 @@ struct task_group { atomic_t load_weight; atomic64_t load_avg; - atomic_t runnable_avg; + atomic_t runnable_avg, usage_avg; #endif #ifdef CONFIG_RT_GROUP_SCHED @@ -279,7 +279,7 @@ struct cfs_rq { #endif /* CONFIG_FAIR_GROUP_SCHED */ /* These always depend on CONFIG_FAIR_GROUP_SCHED */ #ifdef CONFIG_FAIR_GROUP_SCHED - u32 tg_runnable_contrib; + u32 tg_runnable_contrib, tg_usage_contrib; u64 tg_load_contrib; #endif /* CONFIG_FAIR_GROUP_SCHED */ @@ -464,6 +464,9 @@ struct rq { int active_balance; int push_cpu; struct cpu_stop_work active_balance_work; +#ifdef CONFIG_SCHED_HMP + struct task_struct *migrate_task; +#endif /* cpu of this runqueue: */ int cpu; int online; @@ -642,6 +645,12 @@ static inline unsigned int group_first_cpu(struct sched_group *group) extern int group_balance_cpu(struct sched_group *sg); +#ifdef CONFIG_SCHED_HMP +static LIST_HEAD(hmp_domains); +DECLARE_PER_CPU(struct hmp_domain *, hmp_cpu_domain); +#define hmp_cpu_domain(cpu) (per_cpu(hmp_cpu_domain, (cpu))) +#endif /* CONFIG_SCHED_HMP */ + #endif /* CONFIG_SMP */ #include "stats.h" diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e52d002d389..36108647386 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -272,6 +272,15 @@ static cpumask_var_t *wq_numa_possible_cpumask; static bool wq_disable_numa; module_param_named(disable_numa, wq_disable_numa, bool, 0444); +/* see the comment above the definition of WQ_POWER_EFFICIENT */ +#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT +static bool wq_power_efficient = true; +#else +static bool wq_power_efficient; +#endif + +module_param_named(power_efficient, wq_power_efficient, bool, 0444); + static bool wq_numa_enabled; /* unbound NUMA affinity enabled */ /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */ @@ -305,6 +314,10 @@ struct workqueue_struct *system_unbound_wq __read_mostly; EXPORT_SYMBOL_GPL(system_unbound_wq); struct workqueue_struct *system_freezable_wq __read_mostly; EXPORT_SYMBOL_GPL(system_freezable_wq); +struct workqueue_struct *system_power_efficient_wq __read_mostly; +EXPORT_SYMBOL_GPL(system_power_efficient_wq); +struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly; +EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); static int worker_thread(void *__worker); static void copy_workqueue_attrs(struct workqueue_attrs *to, @@ -4107,6 +4120,10 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, struct workqueue_struct *wq; struct pool_workqueue *pwq; + /* see the comment above the definition of WQ_POWER_EFFICIENT */ + if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient) + flags |= WQ_UNBOUND; + /* allocate wq and format name */ if (flags & WQ_UNBOUND) tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]); @@ -5006,8 +5023,15 @@ static int __init init_workqueues(void) WQ_UNBOUND_MAX_ACTIVE); system_freezable_wq = alloc_workqueue("events_freezable", WQ_FREEZABLE, 0); + system_power_efficient_wq = alloc_workqueue("events_power_efficient", + WQ_POWER_EFFICIENT, 0); + system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient", + WQ_FREEZABLE | WQ_POWER_EFFICIENT, + 0); BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq || - !system_unbound_wq || !system_freezable_wq); + !system_unbound_wq || !system_freezable_wq || + !system_power_efficient_wq || + !system_freezable_power_efficient_wq); return 0; } early_initcall(init_workqueues); diff --git a/linaro/configs/android.conf b/linaro/configs/android.conf new file mode 100644 index 00000000000..bb90ecd9e16 --- /dev/null +++ b/linaro/configs/android.conf @@ -0,0 +1,31 @@ +CONFIG_IPV6=y +# CONFIG_IPV6_SIT is not set +CONFIG_PANIC_TIMEOUT=0 +CONFIG_HAS_WAKELOCK=y +CONFIG_WAKELOCK=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_DM_CRYPT=y +CONFIG_POWER_SUPPLY=y +CONFIG_ANDROID_PARANOID_NETWORK=y +CONFIG_NET_ACTIVITY_STATS=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +CONFIG_USB_G_ANDROID=y +CONFIG_SWITCH=y +CONFIG_STAGING=y +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_LOGGER=y +CONFIG_ANDROID_TIMED_OUTPUT=y +CONFIG_ANDROID_TIMED_GPIO=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_ANDROID_INTF_ALARM_DEV=y +CONFIG_CRYPTO_TWOFISH=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=16384 +CONFIG_FUSE_FS=y +CONFIG_CPU_FREQ_GOV_INTERACTIVE=y +CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE=y diff --git a/linaro/configs/arndale.conf b/linaro/configs/arndale.conf new file mode 100644 index 00000000000..109052f2f6e --- /dev/null +++ b/linaro/configs/arndale.conf @@ -0,0 +1,66 @@ +CONFIG_KALLSYMS_ALL=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_BSD_DISKLABEL=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_ARCH_EXYNOS=y +CONFIG_S3C_LOWLEVEL_UART_PORT=2 +CONFIG_ARCH_EXYNOS5=y +# CONFIG_EXYNOS_ATAGS is not set +CONFIG_MACH_EXYNOS4_DT=y +CONFIG_VMSPLIT_2G=y +CONFIG_NR_CPUS=2 +CONFIG_HIGHMEM=y +# CONFIG_COMPACTION is not set +CONFIG_ARM_APPENDED_DTB=y +CONFIG_ARM_ATAG_DTB_COMPAT=y +CONFIG_CMDLINE="root=/dev/ram0 rw ramdisk=8192 initrd=0x41000000,8M console=ttySAC1,115200 init= mem=256M" +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_VFP=y +CONFIG_NEON=y +CONFIG_PM_RUNTIME=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_SG=y +CONFIG_ATA=y +CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_SATA_EXYNOS=y +CONFIG_AX88796=y +CONFIG_AX88796_93CX6=y +CONFIG_INPUT_EVDEV=y +CONFIG_KEYBOARD_GPIO=y +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_SAMSUNG=y +CONFIG_SERIAL_SAMSUNG_CONSOLE=y +CONFIG_HW_RANDOM=y +CONFIG_I2C=y +CONFIG_I2C_S3C2410=y +CONFIG_THERMAL=y +CONFIG_CPU_THERMAL=y +CONFIG_EXYNOS_THERMAL=y +CONFIG_MFD_SEC_CORE=y +CONFIG_REGULATOR=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y +CONFIG_REGULATOR_S5M8767=y +CONFIG_DRM=y +CONFIG_DRM_LOAD_EDID_FIRMWARE=y +CONFIG_DRM_EXYNOS=y +CONFIG_DRM_EXYNOS_DMABUF=y +CONFIG_DRM_EXYNOS_HDMI=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_LOGO=y +CONFIG_MMC=y +CONFIG_MMC_UNSAFE_RESUME=y +CONFIG_MMC_DW=y +CONFIG_MMC_DW_IDMAC=y +CONFIG_MMC_DW_EXYNOS=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_S3C=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_INFO=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_DEBUG_USER=y +CONFIG_TUN=y diff --git a/linaro/configs/big-LITTLE-IKS.conf b/linaro/configs/big-LITTLE-IKS.conf new file mode 100644 index 00000000000..b067fde86ea --- /dev/null +++ b/linaro/configs/big-LITTLE-IKS.conf @@ -0,0 +1,5 @@ +CONFIG_BIG_LITTLE=y +CONFIG_BL_SWITCHER=y +CONFIG_ARM_DT_BL_CPUFREQ=y +CONFIG_ARM_VEXPRESS_BL_CPUFREQ=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y diff --git a/linaro/configs/big-LITTLE-MP.conf b/linaro/configs/big-LITTLE-MP.conf new file mode 100644 index 00000000000..0bbc603a13e --- /dev/null +++ b/linaro/configs/big-LITTLE-MP.conf @@ -0,0 +1,11 @@ +CONFIG_CGROUPS=y +CONFIG_CGROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +CONFIG_NO_HZ=y +CONFIG_SCHED_MC=y +CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE=y +CONFIG_SCHED_HMP=y +CONFIG_HMP_FAST_CPU_MASK="" +CONFIG_HMP_SLOW_CPU_MASK="" +CONFIG_HMP_VARIABLE_SCALE=y +CONFIG_HMP_FREQUENCY_INVARIANT_SCALE=y diff --git a/linaro/configs/debug.conf b/linaro/configs/debug.conf new file mode 100644 index 00000000000..36980566b2d --- /dev/null +++ b/linaro/configs/debug.conf @@ -0,0 +1 @@ +CONFIG_PROVE_LOCKING=y diff --git a/linaro/configs/distribution.conf b/linaro/configs/distribution.conf new file mode 100644 index 00000000000..fbcfed1b6ce --- /dev/null +++ b/linaro/configs/distribution.conf @@ -0,0 +1,44 @@ +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_CGROUPS=y +# CONFIG_COMPAT_BRK is not set +CONFIG_DEFAULT_MMAP_MIN_ADDR=32768 +CONFIG_SECCOMP=y +CONFIG_CC_STACKPROTECTOR=y +CONFIG_SYN_COOKIES=y +CONFIG_IPV6=y +CONFIG_NETLABEL=y +CONFIG_BRIDGE_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NETFILTER_XT_CONNMARK=m +CONFIG_NETFILTER_XT_MARK=m +CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m +CONFIG_NF_CONNTRACK_IPV4=m +CONFIG_NF_NAT_IPV4=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_MANGLE=m +CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_NF_NAT_IPV6=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_EBT_MARK_T=m +CONFIG_BRIDGE=m +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=65536 +CONFIG_INPUT_MISC=y +CONFIG_INPUT_UINPUT=y +# CONFIG_DEVKMEM is not set +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_AUTOFS4_FS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_STRICT_DEVMEM=y +CONFIG_SECURITY=y +CONFIG_LSM_MMAP_MIN_ADDR=0 +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SMACK=y +CONFIG_SECURITY_APPARMOR=y +CONFIG_DEFAULT_SECURITY_APPARMOR=y diff --git a/linaro/configs/highbank.conf b/linaro/configs/highbank.conf new file mode 100644 index 00000000000..bf0f3c14b0d --- /dev/null +++ b/linaro/configs/highbank.conf @@ -0,0 +1,40 @@ +CONFIG_EXPERIMENTAL=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_ARCH_HIGHBANK=y +CONFIG_ARM_ERRATA_754322=y +CONFIG_SMP=y +CONFIG_SCHED_MC=y +CONFIG_AEABI=y +CONFIG_CMDLINE="console=ttyAMA0" +CONFIG_CPU_IDLE=y +CONFIG_VFP=y +CONFIG_NEON=y +CONFIG_NET=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_ATA=y +CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_SATA_HIGHBANK=y +CONFIG_NETDEVICES=y +CONFIG_NET_CALXEDA_XGMAC=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_IPMI_HANDLER=y +CONFIG_IPMI_SI=y +CONFIG_I2C=y +CONFIG_I2C_DESIGNWARE_PLATFORM=y +CONFIG_SPI=y +CONFIG_SPI_PL022=y +CONFIG_GPIO_PL061=y +CONFIG_MMC=y +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_PLTFM=y +CONFIG_EDAC=y +CONFIG_EDAC_MM_EDAC=y +CONFIG_EDAC_HIGHBANK_MC=y +CONFIG_EDAC_HIGHBANK_L2=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_PL031=y +CONFIG_DMADEVICES=y +CONFIG_PL330_DMA=y diff --git a/linaro/configs/kvm-guest.conf b/linaro/configs/kvm-guest.conf new file mode 100644 index 00000000000..00e84a3ba1e --- /dev/null +++ b/linaro/configs/kvm-guest.conf @@ -0,0 +1,11 @@ +CONFIG_BALLOON_COMPACTION=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_NET=y +CONFIG_HVC_DRIVER=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y +CONFIG_VIRTUALIZATION=y +# CONFIG_THUMB2_KERNEL is not set diff --git a/linaro/configs/kvm-host.conf b/linaro/configs/kvm-host.conf new file mode 100644 index 00000000000..21a40e03137 --- /dev/null +++ b/linaro/configs/kvm-host.conf @@ -0,0 +1,11 @@ +CONFIG_VIRTUALIZATION=y +CONFIG_ARM_LPAE=y +CONFIG_ARM_VIRT_EXT=y +CONFIG_HAVE_KVM_IRQCHIP=y +CONFIG_KVM_ARM_HOST=y +CONFIG_KVM_ARM_MAX_VCPUS=4 +CONFIG_KVM_ARM_TIMER=y +CONFIG_KVM_ARM_VGIC=y +CONFIG_KVM_MMIO=y +CONFIG_KVM=y +CONFIG_BLK_DEV_NBD=m diff --git a/linaro/configs/linaro-base.conf b/linaro/configs/linaro-base.conf new file mode 100644 index 00000000000..947ca1f5093 --- /dev/null +++ b/linaro/configs/linaro-base.conf @@ -0,0 +1,94 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=16 +CONFIG_BLK_DEV_INITRD=y +CONFIG_EMBEDDED=y +CONFIG_HOTPLUG=y +CONFIG_PERF_EVENTS=y +CONFIG_SLAB=y +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_SMP=y +CONFIG_SCHED_MC=y +CONFIG_SCHED_SMT=y +CONFIG_THUMB2_KERNEL=y +CONFIG_AEABI=y +# CONFIG_OABI_COMPAT is not set +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +CONFIG_CPU_IDLE=y +CONFIG_BINFMT_MISC=y +CONFIG_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_NET_KEY_MIGRATE=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +# CONFIG_INET_LRO is not set +CONFIG_NETFILTER=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_CONNECTOR=y +CONFIG_MTD=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_OOPS=y +CONFIG_MTD_CFI=y +CONFIG_MTD_CFI_INTELEXT=y +CONFIG_MTD_NAND=y +CONFIG_NETDEVICES=y +CONFIG_EXT2_FS=y +CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y +CONFIG_BTRFS_FS=y +CONFIG_QUOTA=y +CONFIG_QFMT_V2=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_ECRYPT_FS=y +CONFIG_JFFS2_FS=y +CONFIG_JFFS2_SUMMARY=y +CONFIG_JFFS2_FS_XATTR=y +CONFIG_JFFS2_COMPRESSION_OPTIONS=y +CONFIG_JFFS2_LZO=y +CONFIG_JFFS2_RUBIN=y +CONFIG_CRAMFS=y +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_NFS_FS=y +# CONFIG_NFS_V2 is not set +CONFIG_NFS_V3=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_PRINTK_TIME=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_KEYS=y +CONFIG_CRYPTO_MICHAEL_MIC=y +CONFIG_CRC_CCITT=y +CONFIG_CRC_T10DIF=y +CONFIG_CRC_ITU_T=y +CONFIG_CRC7=y +CONFIG_HW_PERF_EVENTS=y +CONFIG_FUNCTION_TRACER=y +CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_PROC_DEVICETREE=y diff --git a/linaro/configs/omap4.conf b/linaro/configs/omap4.conf new file mode 100644 index 00000000000..50fb9d9cb5b --- /dev/null +++ b/linaro/configs/omap4.conf @@ -0,0 +1,194 @@ +CONFIG_EXPERT=y +CONFIG_KPROBES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +# CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_GPIO_PCA953X=y +CONFIG_OMAP_RESET_CLOCKS=y +CONFIG_OMAP_MUX_DEBUG=y +CONFIG_ARCH_OMAP2PLUS=y +CONFIG_SOC_OMAP5=y +# CONFIG_ARCH_OMAP2 is not set +CONFIG_ARCH_VEXPRESS_CA9X4=y +CONFIG_ARM_THUMBEE=y +CONFIG_ARM_ERRATA_411920=y +CONFIG_NR_CPUS=2 +CONFIG_ZBOOT_ROM_TEXT=0x0 +CONFIG_ZBOOT_ROM_BSS=0x0 +CONFIG_CMDLINE="root=/dev/mmcblk0p2 rootwait console=ttyO2,115200" +CONFIG_KEXEC=y +CONFIG_PM_DEBUG=y +CONFIG_CAN=m +CONFIG_CAN_C_CAN=m +CONFIG_CAN_C_CAN_PLATFORM=m +CONFIG_BT=m +CONFIG_BT_HCIUART=m +CONFIG_BT_HCIUART_H4=y +CONFIG_BT_HCIUART_BCSP=y +CONFIG_BT_HCIUART_LL=y +CONFIG_BT_HCIBCM203X=m +CONFIG_BT_HCIBPA10X=m +CONFIG_CFG80211=m +CONFIG_MAC80211=m +CONFIG_MAC80211_RC_PID=y +CONFIG_MAC80211_RC_DEFAULT_PID=y +CONFIG_CMA=y +CONFIG_MTD_NAND_OMAP2=y +CONFIG_MTD_ONENAND=y +CONFIG_MTD_ONENAND_VERIFY_WRITE=y +CONFIG_MTD_ONENAND_OMAP2=y +CONFIG_MTD_UBI=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM_SIZE=16384 +CONFIG_SENSORS_TSL2550=m +CONFIG_SENSORS_LIS3_I2C=m +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_SCAN_ASYNC=y +CONFIG_KS8851=y +CONFIG_KS8851_MLL=y +CONFIG_SMC91X=y +CONFIG_SMSC911X=y +CONFIG_TI_CPSW=y +CONFIG_SMSC_PHY=y +CONFIG_USB_USBNET=y +CONFIG_USB_NET_SMSC95XX=y +CONFIG_USB_ALI_M5632=y +CONFIG_USB_AN2720=y +CONFIG_USB_EPSON2888=y +CONFIG_USB_KC2190=y +CONFIG_LIBERTAS=m +CONFIG_LIBERTAS_USB=m +CONFIG_LIBERTAS_SDIO=m +CONFIG_LIBERTAS_DEBUG=y +CONFIG_INPUT_JOYDEV=y +CONFIG_INPUT_EVDEV=y +CONFIG_KEYBOARD_GPIO=y +CONFIG_KEYBOARD_MATRIX=m +CONFIG_KEYBOARD_TWL4030=y +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_TOUCHSCREEN_ADS7846=y +CONFIG_INPUT_TWL4030_PWRBUTTON=y +CONFIG_VT_HW_CONSOLE_BINDING=y +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=32 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_RSA=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_SERIAL_OMAP=y +CONFIG_SERIAL_OMAP_CONSOLE=y +CONFIG_HW_RANDOM=y +CONFIG_I2C_CHARDEV=y +CONFIG_SPI=y +CONFIG_SPI_OMAP24XX=y +CONFIG_PINCTRL_SINGLE=y +CONFIG_DEBUG_GPIO=y +CONFIG_GPIO_SYSFS=y +CONFIG_GPIO_TWL4030=y +CONFIG_W1=y +CONFIG_SENSORS_LM75=m +CONFIG_WATCHDOG=y +CONFIG_OMAP_WATCHDOG=y +CONFIG_TWL4030_WATCHDOG=y +CONFIG_MFD_TPS65217=y +CONFIG_MFD_TPS65910=y +CONFIG_TWL6040_CORE=y +CONFIG_REGULATOR_TPS65023=y +CONFIG_REGULATOR_TPS6507X=y +CONFIG_REGULATOR_TPS65217=y +CONFIG_REGULATOR_TPS65910=y +CONFIG_REGULATOR_TWL4030=y +CONFIG_FB=y +CONFIG_FIRMWARE_EDID=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_OMAP2_DSS=m +CONFIG_OMAP2_DSS_RFBI=y +CONFIG_OMAP2_DSS_SDI=y +CONFIG_OMAP2_DSS_DSI=y +CONFIG_FB_OMAP2=m +CONFIG_PANEL_GENERIC_DPI=m +CONFIG_PANEL_TFP410=m +CONFIG_PANEL_SHARP_LS037V7DW01=m +CONFIG_PANEL_NEC_NL8048HL11_01B=m +CONFIG_PANEL_TAAL=m +CONFIG_PANEL_TPO_TD043MTEA1=m +CONFIG_PANEL_ACX565AKM=m +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LCD_CLASS_DEVICE=y +CONFIG_LCD_PLATFORM=y +CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y +CONFIG_FONTS=y +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y +CONFIG_LOGO=y +CONFIG_SOUND=m +CONFIG_SND=m +CONFIG_SND_VERBOSE_PRINTK=y +CONFIG_SND_DEBUG=y +CONFIG_SND_USB_AUDIO=m +CONFIG_SND_SOC=m +CONFIG_SND_OMAP_SOC=m +CONFIG_SND_OMAP_SOC_OMAP_TWL4030=m +CONFIG_SND_OMAP_SOC_OMAP_ABE_TWL6040=m +CONFIG_SND_OMAP_SOC_OMAP3_PANDORA=m +CONFIG_USB=y +CONFIG_USB_DEBUG=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_MON=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_WDM=y +CONFIG_USB_STORAGE=y +CONFIG_USB_TEST=y +CONFIG_USB_PHY=y +CONFIG_NOP_USB_XCEIV=y +CONFIG_USB_GADGET=y +CONFIG_USB_GADGET_DEBUG=y +CONFIG_USB_GADGET_DEBUG_FILES=y +CONFIG_USB_GADGET_DEBUG_FS=y +CONFIG_USB_ZERO=m +CONFIG_MMC=y +CONFIG_MMC_UNSAFE_RESUME=y +CONFIG_SDIO_UART=y +CONFIG_MMC_ARMMMCI=y +CONFIG_MMC_OMAP=y +CONFIG_MMC_OMAP_HS=y +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y +CONFIG_LEDS_GPIO=y +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_TIMER=y +CONFIG_LEDS_TRIGGER_ONESHOT=y +CONFIG_LEDS_TRIGGER_HEARTBEAT=y +CONFIG_LEDS_TRIGGER_BACKLIGHT=y +CONFIG_LEDS_TRIGGER_CPU=y +CONFIG_LEDS_TRIGGER_GPIO=y +CONFIG_LEDS_TRIGGER_DEFAULT_ON=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_TWL92330=y +CONFIG_RTC_DRV_TWL4030=y +CONFIG_RTC_DRV_OMAP=y +CONFIG_DMADEVICES=y +CONFIG_DMA_OMAP=y +# CONFIG_EXT3_FS_XATTR is not set +CONFIG_UBIFS_FS=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_ROOT_NFS=y +# CONFIG_DEBUG_BUGVERBOSE is not set +CONFIG_DEBUG_INFO=y +# CONFIG_CRYPTO_ANSI_CPRNG is not set +CONFIG_LIBCRC32C=y +# CONFIG_CPU_FREQ is not set diff --git a/linaro/configs/ubuntu-minimal.conf b/linaro/configs/ubuntu-minimal.conf new file mode 120000 index 00000000000..794e82f3bc1 --- /dev/null +++ b/linaro/configs/ubuntu-minimal.conf @@ -0,0 +1 @@ +distribution.conf
\ No newline at end of file diff --git a/linaro/configs/vexpress-tuning.conf b/linaro/configs/vexpress-tuning.conf new file mode 100644 index 00000000000..adea6cc66de --- /dev/null +++ b/linaro/configs/vexpress-tuning.conf @@ -0,0 +1 @@ +# CONFIG_PROVE_LOCKING is not set diff --git a/linaro/configs/vexpress.conf b/linaro/configs/vexpress.conf new file mode 100644 index 00000000000..94c36e016ad --- /dev/null +++ b/linaro/configs/vexpress.conf @@ -0,0 +1,59 @@ +CONFIG_ARCH_VEXPRESS=y +CONFIG_ARCH_VEXPRESS_CA9X4=y +CONFIG_BIG_LITTLE=y +CONFIG_ARCH_VEXPRESS_TC2=y +CONFIG_ARCH_VEXPRESS_DCSCB=y +CONFIG_ARM_VEXPRESS_BL_CPUFREQ=y +CONFIG_PM_OPP=y +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_INTERACTIVE=y +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +CONFIG_ARM_PSCI=y +CONFIG_HAVE_ARM_ARCH_TIMER=y +CONFIG_NR_CPUS=8 +CONFIG_HIGHMEM=y +CONFIG_HIGHPTE=y +CONFIG_CMDLINE="console=ttyAMA0,38400n8 root=/dev/mmcblk0p2 rootwait mmci.fmax=4000000" +CONFIG_VFP=y +CONFIG_NEON=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_SMSC911X=y +CONFIG_SMC91X=y +CONFIG_INPUT_EVDEV=y +CONFIG_SERIO_AMBAKMI=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_FB=y +CONFIG_FB_ARMCLCD=y +CONFIG_FB_ARMHDLCD=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_ARMAACI=y +CONFIG_USB=y +CONFIG_USB_ISP1760_HCD=y +CONFIG_USB_STORAGE=y +CONFIG_MMC=y +CONFIG_MMC_ARMMMCI=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_PL031=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_ROOT_NFS=y +CONFIG_VEXPRESS_CONFIG=y +CONFIG_SENSORS_VEXPRESS=y +CONFIG_REGULATOR=y +CONFIG_REGULATOR_VEXPRESS=y +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y +CONFIG_LEDS_GPIO=y +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_HEARTBEAT=y +CONFIG_LEDS_TRIGGER_CPU=y diff --git a/linaro/configs/vexpress64.conf b/linaro/configs/vexpress64.conf new file mode 100644 index 00000000000..7895612e1ed --- /dev/null +++ b/linaro/configs/vexpress64.conf @@ -0,0 +1,31 @@ +CONFIG_ARCH_VEXPRESS=y +CONFIG_SMP=y +CONFIG_CMDLINE="console=ttyAMA0" +CONFIG_COMPAT=y +CONFIG_SMC91X=y +CONFIG_INPUT_EVDEV=y +CONFIG_SERIO_AMBAKMI=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +# CONFIG_SERIO_I8042 is not set +CONFIG_FB=y +CONFIG_FB_ARMCLCD=y +CONFIG_FRAMEBUFFER_CONSOLE=y +# CONFIG_VGA_CONSOLE is not set +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_MMC=y +CONFIG_MMC_ARMMMCI=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_PL031=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_ROOT_NFS=y +CONFIG_VIRTIO=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_MMIO=y +CONFIG_REGULATOR=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y diff --git a/linaro/configs/xen.conf b/linaro/configs/xen.conf new file mode 100644 index 00000000000..d24fabbea07 --- /dev/null +++ b/linaro/configs/xen.conf @@ -0,0 +1,7 @@ +CONFIG_XEN=y +CONFIG_XEN_NETDEV_FRONTEND=y +CONFIG_XEN_NETDEV_BACKEND=y +CONFIG_XEN_BLKDEV_FRONTEND=y +CONFIG_XEN_BLKDEV_BACKEND=y +CONFIG_XENFS=y +CONFIG_XEN_COMPAT_XENFS=y diff --git a/mm/vmstat.c b/mm/vmstat.c index f42745e6578..b916a43a6b3 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/cpu.h> +#include <linux/cpumask.h> #include <linux/vmstat.h> #include <linux/sched.h> #include <linux/math64.h> @@ -432,11 +433,12 @@ EXPORT_SYMBOL(dec_zone_page_state); * with the global counters. These could cause remote node cache line * bouncing and will have to be only done when necessary. */ -void refresh_cpu_vm_stats(int cpu) +bool refresh_cpu_vm_stats(int cpu) { struct zone *zone; int i; int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; + bool vm_activity = false; for_each_populated_zone(zone) { struct per_cpu_pageset *p; @@ -483,14 +485,21 @@ void refresh_cpu_vm_stats(int cpu) if (p->expire) continue; - if (p->pcp.count) + if (p->pcp.count) { + vm_activity = true; drain_zone_pages(zone, &p->pcp); + } #endif } for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) - if (global_diff[i]) + if (global_diff[i]) { atomic_long_add(global_diff[i], &vm_stat[i]); + vm_activity = true; + } + + return vm_activity; + } /* @@ -1174,22 +1183,72 @@ static const struct file_operations proc_vmstat_file_operations = { #ifdef CONFIG_SMP static DEFINE_PER_CPU(struct delayed_work, vmstat_work); int sysctl_stat_interval __read_mostly = HZ; +static struct cpumask vmstat_off_cpus; +struct delayed_work vmstat_monitor_work; -static void vmstat_update(struct work_struct *w) +static inline bool need_vmstat(int cpu) { - refresh_cpu_vm_stats(smp_processor_id()); - schedule_delayed_work(&__get_cpu_var(vmstat_work), - round_jiffies_relative(sysctl_stat_interval)); + struct zone *zone; + int i; + + for_each_populated_zone(zone) { + struct per_cpu_pageset *p; + + p = per_cpu_ptr(zone->pageset, cpu); + + for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) + if (p->vm_stat_diff[i]) + return true; + + if (zone_to_nid(zone) != numa_node_id() && p->pcp.count) + return true; + } + + return false; } -static void __cpuinit start_cpu_timer(int cpu) +static void vmstat_update(struct work_struct *w); + +static void start_cpu_timer(int cpu) { struct delayed_work *work = &per_cpu(vmstat_work, cpu); - INIT_DEFERRABLE_WORK(work, vmstat_update); + cpumask_clear_cpu(cpu, &vmstat_off_cpus); schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu)); } +static void __cpuinit setup_cpu_timer(int cpu) +{ + struct delayed_work *work = &per_cpu(vmstat_work, cpu); + + INIT_DEFERRABLE_WORK(work, vmstat_update); + start_cpu_timer(cpu); +} + +static void vmstat_update_monitor(struct work_struct *w) +{ + int cpu; + + for_each_cpu_and(cpu, &vmstat_off_cpus, cpu_online_mask) + if (need_vmstat(cpu)) + start_cpu_timer(cpu); + + queue_delayed_work(system_unbound_wq, &vmstat_monitor_work, + round_jiffies_relative(sysctl_stat_interval)); +} + + +static void vmstat_update(struct work_struct *w) +{ + int cpu = smp_processor_id(); + + if (likely(refresh_cpu_vm_stats(cpu))) + schedule_delayed_work(&__get_cpu_var(vmstat_work), + round_jiffies_relative(sysctl_stat_interval)); + else + cpumask_set_cpu(cpu, &vmstat_off_cpus); +} + /* * Use the cpu notifier to insure that the thresholds are recalculated * when necessary. @@ -1204,17 +1263,19 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, case CPU_ONLINE: case CPU_ONLINE_FROZEN: refresh_zone_stat_thresholds(); - start_cpu_timer(cpu); + setup_cpu_timer(cpu); node_set_state(cpu_to_node(cpu), N_CPU); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: - cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu)); - per_cpu(vmstat_work, cpu).work.func = NULL; + if (!cpumask_test_cpu(cpu, &vmstat_off_cpus)) { + cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu)); + per_cpu(vmstat_work, cpu).work.func = NULL; + } break; case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - start_cpu_timer(cpu); + setup_cpu_timer(cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: @@ -1237,8 +1298,14 @@ static int __init setup_vmstat(void) register_cpu_notifier(&vmstat_notifier); + INIT_DEFERRABLE_WORK(&vmstat_monitor_work, + vmstat_update_monitor); + queue_delayed_work(system_unbound_wq, + &vmstat_monitor_work, + round_jiffies_relative(HZ)); + for_each_online_cpu(cpu) - start_cpu_timer(cpu); + setup_cpu_timer(cpu); #endif #ifdef CONFIG_PROC_FS proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); diff --git a/tools/lib/lk/Makefile b/tools/lib/lk/Makefile index 926cbf3efc7..2c5a1973335 100644 --- a/tools/lib/lk/Makefile +++ b/tools/lib/lk/Makefile @@ -1,5 +1,8 @@ include ../../scripts/Makefile.include +CC = $(CROSS_COMPILE)gcc +AR = $(CROSS_COMPILE)ar + # guard against environment variables LIB_H= LIB_OBJS= |