diff options
Diffstat (limited to 'drivers')
79 files changed, 15621 insertions, 109 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig index 9953a42809ec..d27feb5460f3 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -166,4 +166,6 @@ source "drivers/ipack/Kconfig" source "drivers/reset/Kconfig" +source "drivers/gator/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 130abc1dfd65..092a62e79688 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -152,3 +152,5 @@ obj-$(CONFIG_IIO) += iio/ obj-$(CONFIG_VME_BUS) += vme/ obj-$(CONFIG_IPACK_BUS) += ipack/ obj-$(CONFIG_NTB) += ntb/ + +obj-$(CONFIG_GATOR) += gator/ diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index b05ecab915c4..5286e2d333b0 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -26,4 +26,11 @@ config OMAP_INTERCONNECT help Driver to enable OMAP interconnect error handling driver. + +config ARM_CCI + bool "ARM CCI driver support" + depends on ARM + help + Driver supporting the CCI cache coherent interconnect for ARM + platforms. endmenu diff --git a/drivers/bus/Makefile b/drivers/bus/Makefile index 3c7b53c12091..670cea443802 100644 --- a/drivers/bus/Makefile +++ b/drivers/bus/Makefile @@ -7,3 +7,5 @@ obj-$(CONFIG_OMAP_OCP2SCP) += omap-ocp2scp.o # Interconnect bus driver for OMAP SoCs. obj-$(CONFIG_OMAP_INTERCONNECT) += omap_l3_smx.o omap_l3_noc.o +# CCI cache coherent interconnect for ARM platforms +obj-$(CONFIG_ARM_CCI) += arm-cci.o diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c new file mode 100644 index 000000000000..2d1387bf1724 --- /dev/null +++ b/drivers/bus/arm-cci.c @@ -0,0 +1,945 @@ +/* + * CCI cache coherent interconnect driver + * + * Copyright (C) 2013 ARM Ltd. + * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/arm-cci.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/of_address.h> +#include <linux/slab.h> + +#include <asm/cacheflush.h> +#include <asm/irq_regs.h> +#include <asm/pmu.h> +#include <asm/smp_plat.h> + +#define DRIVER_NAME "CCI" + +#define CCI_PORT_CTRL 0x0 +#define CCI_CTRL_STATUS 0xc + +#define CCI_ENABLE_SNOOP_REQ 0x1 +#define CCI_ENABLE_DVM_REQ 0x2 +#define CCI_ENABLE_REQ (CCI_ENABLE_SNOOP_REQ | CCI_ENABLE_DVM_REQ) + +struct cci_nb_ports { + unsigned int nb_ace; + unsigned int nb_ace_lite; +}; + +enum cci_ace_port_type { + ACE_INVALID_PORT = 0x0, + ACE_PORT, + ACE_LITE_PORT, +}; + +struct cci_ace_port { + void __iomem *base; + unsigned long phys; + enum cci_ace_port_type type; + struct device_node *dn; +}; + +static struct cci_ace_port *ports; +static unsigned int nb_cci_ports; + +static void __iomem *cci_ctrl_base; +static unsigned long cci_ctrl_phys; + +#ifdef CONFIG_HW_PERF_EVENTS + +static void __iomem *cci_pmu_base; + +#define CCI400_PMCR 0x0100 + +#define CCI400_PMU_CYCLE_CNTR_BASE 0x0000 +#define CCI400_PMU_CNTR_BASE(idx) (CCI400_PMU_CYCLE_CNTR_BASE + (idx) * 0x1000) + +#define CCI400_PMCR_CEN 0x00000001 +#define CCI400_PMCR_RST 0x00000002 +#define CCI400_PMCR_CCR 0x00000004 +#define CCI400_PMCR_CCD 0x00000008 +#define CCI400_PMCR_EX 0x00000010 +#define CCI400_PMCR_DP 0x00000020 +#define CCI400_PMCR_NCNT_MASK 0x0000F800 +#define CCI400_PMCR_NCNT_SHIFT 11 + +#define CCI400_PMU_EVT_SEL 0x000 +#define CCI400_PMU_CNTR 0x004 +#define CCI400_PMU_CNTR_CTRL 0x008 +#define CCI400_PMU_OVERFLOW 0x00C + +#define CCI400_PMU_OVERFLOW_FLAG 1 + +enum cci400_perf_events { + CCI400_PMU_CYCLES = 0xFF +}; + +#define CCI400_PMU_EVENT_MASK 0xff +#define CCI400_PMU_EVENT_SOURCE(event) ((event >> 5) & 0x7) +#define CCI400_PMU_EVENT_CODE(event) (event & 0x1f) + +#define CCI400_PMU_EVENT_SOURCE_S0 0 +#define CCI400_PMU_EVENT_SOURCE_S4 4 +#define CCI400_PMU_EVENT_SOURCE_M0 5 +#define CCI400_PMU_EVENT_SOURCE_M2 7 + +#define CCI400_PMU_EVENT_SLAVE_MIN 0x0 +#define CCI400_PMU_EVENT_SLAVE_MAX 0x13 + +#define CCI400_PMU_EVENT_MASTER_MIN 0x14 +#define CCI400_PMU_EVENT_MASTER_MAX 0x1A + +#define CCI400_PMU_MAX_HW_EVENTS 5 /* CCI PMU has 4 counters + 1 cycle counter */ + +#define CCI400_PMU_CYCLE_COUNTER_IDX 0 +#define CCI400_PMU_COUNTER0_IDX 1 +#define CCI400_PMU_COUNTER_LAST(cci_pmu) (CCI400_PMU_CYCLE_COUNTER_IDX + cci_pmu->num_events - 1) + + +static struct perf_event *events[CCI400_PMU_MAX_HW_EVENTS]; +static unsigned long used_mask[BITS_TO_LONGS(CCI400_PMU_MAX_HW_EVENTS)]; +static struct pmu_hw_events cci_hw_events = { + .events = events, + .used_mask = used_mask, +}; + +static int cci_pmu_validate_hw_event(u8 hw_event) +{ + u8 ev_source = CCI400_PMU_EVENT_SOURCE(hw_event); + u8 ev_code = CCI400_PMU_EVENT_CODE(hw_event); + + if (ev_source <= CCI400_PMU_EVENT_SOURCE_S4 && + ev_code <= CCI400_PMU_EVENT_SLAVE_MAX) + return hw_event; + else if (CCI400_PMU_EVENT_SOURCE_M0 <= ev_source && + ev_source <= CCI400_PMU_EVENT_SOURCE_M2 && + CCI400_PMU_EVENT_MASTER_MIN <= ev_code && + ev_code <= CCI400_PMU_EVENT_MASTER_MAX) + return hw_event; + + return -EINVAL; +} + +static inline int cci_pmu_counter_is_valid(struct arm_pmu *cci_pmu, int idx) +{ + return CCI400_PMU_CYCLE_COUNTER_IDX <= idx && + idx <= CCI400_PMU_COUNTER_LAST(cci_pmu); +} + +static inline u32 cci_pmu_read_register(int idx, unsigned int offset) +{ + return readl_relaxed(cci_pmu_base + CCI400_PMU_CNTR_BASE(idx) + offset); +} + +static inline void cci_pmu_write_register(u32 value, int idx, unsigned int offset) +{ + return writel_relaxed(value, cci_pmu_base + CCI400_PMU_CNTR_BASE(idx) + offset); +} + +static inline void cci_pmu_disable_counter(int idx) +{ + cci_pmu_write_register(0, idx, CCI400_PMU_CNTR_CTRL); +} + +static inline void cci_pmu_enable_counter(int idx) +{ + cci_pmu_write_register(1, idx, CCI400_PMU_CNTR_CTRL); +} + +static inline void cci_pmu_select_event(int idx, unsigned long event) +{ + event &= CCI400_PMU_EVENT_MASK; + cci_pmu_write_register(event, idx, CCI400_PMU_EVT_SEL); +} + +static u32 cci_pmu_get_max_counters(void) +{ + u32 n_cnts = (readl_relaxed(cci_ctrl_base + CCI400_PMCR) & + CCI400_PMCR_NCNT_MASK) >> CCI400_PMCR_NCNT_SHIFT; + + /* add 1 for cycle counter */ + return n_cnts + 1; +} + +static struct pmu_hw_events *cci_pmu_get_hw_events(void) +{ + return &cci_hw_events; +} + +static int cci_pmu_get_event_idx(struct pmu_hw_events *hw, struct perf_event *event) +{ + struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hw_event = &event->hw; + unsigned long cci_event = hw_event->config_base & CCI400_PMU_EVENT_MASK; + int idx; + + if (cci_event == CCI400_PMU_CYCLES) { + if (test_and_set_bit(CCI400_PMU_CYCLE_COUNTER_IDX, hw->used_mask)) + return -EAGAIN; + + return CCI400_PMU_CYCLE_COUNTER_IDX; + } + + for (idx = CCI400_PMU_COUNTER0_IDX; idx <= CCI400_PMU_COUNTER_LAST(cci_pmu); ++idx) { + if (!test_and_set_bit(idx, hw->used_mask)) + return idx; + } + + /* No counters available */ + return -EAGAIN; +} + +static int cci_pmu_map_event(struct perf_event *event) +{ + int mapping; + u8 config = event->attr.config & CCI400_PMU_EVENT_MASK; + + if (event->attr.type < PERF_TYPE_MAX) + return -ENOENT; + + /* 0xff is used to represent CCI Cycles */ + if (config == 0xff) + mapping = config; + else + mapping = cci_pmu_validate_hw_event(config); + + return mapping; +} + +static int cci_pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler) +{ + int irq, err, i = 0; + struct platform_device *pmu_device = cci_pmu->plat_device; + + if (unlikely(!pmu_device)) + return -ENODEV; + + /* CCI exports 6 interrupts - 1 nERRORIRQ + 5 nEVNTCNTOVERFLOW (PMU) + nERRORIRQ will be handled by secure firmware on TC2. So we + assume that all CCI interrupts listed in the linux device + tree are PMU interrupts. + + The following code should then be able to handle different routing + of the CCI PMU interrupts. + */ + while ((irq = platform_get_irq(pmu_device, i)) > 0) { + err = request_irq(irq, handler, 0, "arm-cci-pmu", cci_pmu); + if (err) { + dev_err(&pmu_device->dev, "unable to request IRQ%d for ARM CCI PMU counters\n", + irq); + return err; + } + i++; + } + + return 0; +} + +static irqreturn_t cci_pmu_handle_irq(int irq_num, void *dev) +{ + struct arm_pmu *cci_pmu = (struct arm_pmu *)dev; + struct pmu_hw_events *events = cci_pmu->get_hw_events(); + struct perf_sample_data data; + struct pt_regs *regs; + int idx; + + regs = get_irq_regs(); + + /* Iterate over counters and update the corresponding perf events. + This should work regardless of whether we have per-counter overflow + interrupt or a combined overflow interrupt. */ + for (idx = CCI400_PMU_CYCLE_COUNTER_IDX; idx <= CCI400_PMU_COUNTER_LAST(cci_pmu); idx++) { + struct perf_event *event = events->events[idx]; + struct hw_perf_event *hw_counter; + + if (!event) + continue; + + hw_counter = &event->hw; + + /* Did this counter overflow? */ + if (!(cci_pmu_read_register(idx, CCI400_PMU_OVERFLOW) & CCI400_PMU_OVERFLOW_FLAG)) + continue; + cci_pmu_write_register(CCI400_PMU_OVERFLOW_FLAG, idx, CCI400_PMU_OVERFLOW); + + armpmu_event_update(event); + perf_sample_data_init(&data, 0, hw_counter->last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cci_pmu->disable(event); + } + + irq_work_run(); + return IRQ_HANDLED; +} + +static void cci_pmu_free_irq(struct arm_pmu *cci_pmu) +{ + int irq, i = 0; + struct platform_device *pmu_device = cci_pmu->plat_device; + + while ((irq = platform_get_irq(pmu_device, i)) > 0) { + free_irq(irq, cci_pmu); + i++; + } +} + +static void cci_pmu_enable_event(struct perf_event *event) +{ + unsigned long flags; + struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = cci_pmu->get_hw_events(); + struct hw_perf_event *hw_counter = &event->hw; + int idx = hw_counter->idx; + + if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) { + dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); + return; + } + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Configure the event to count, unless you are counting cycles */ + if (idx != CCI400_PMU_CYCLE_COUNTER_IDX) + cci_pmu_select_event(idx, hw_counter->config_base); + + cci_pmu_enable_counter(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void cci_pmu_disable_event(struct perf_event *event) +{ + unsigned long flags; + struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = cci_pmu->get_hw_events(); + struct hw_perf_event *hw_counter = &event->hw; + int idx = hw_counter->idx; + + if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) { + dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); + return; + } + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + cci_pmu_disable_counter(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void cci_pmu_start(struct arm_pmu *cci_pmu) +{ + u32 val; + unsigned long flags; + struct pmu_hw_events *events = cci_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Enable all the PMU counters. */ + val = readl(cci_ctrl_base + CCI400_PMCR) | CCI400_PMCR_CEN; + writel(val, cci_ctrl_base + CCI400_PMCR); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void cci_pmu_stop(struct arm_pmu *cci_pmu) +{ + u32 val; + unsigned long flags; + struct pmu_hw_events *events = cci_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Disable all the PMU counters. */ + val = readl(cci_ctrl_base + CCI400_PMCR) & ~CCI400_PMCR_CEN; + writel(val, cci_ctrl_base + CCI400_PMCR); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static u32 cci_pmu_read_counter(struct perf_event *event) +{ + struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hw_counter = &event->hw; + int idx = hw_counter->idx; + u32 value; + + if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) { + dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); + return 0; + } + value = cci_pmu_read_register(idx, CCI400_PMU_CNTR); + + return value; +} + +static void cci_pmu_write_counter(struct perf_event *event, u32 value) +{ + struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hw_counter = &event->hw; + int idx = hw_counter->idx; + + if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) + dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); + else + cci_pmu_write_register(value, idx, CCI400_PMU_CNTR); +} + +static struct arm_pmu cci_pmu = { + .name = DRIVER_NAME, + .max_period = (1LLU << 32) - 1, + .get_hw_events = cci_pmu_get_hw_events, + .get_event_idx = cci_pmu_get_event_idx, + .map_event = cci_pmu_map_event, + .request_irq = cci_pmu_request_irq, + .handle_irq = cci_pmu_handle_irq, + .free_irq = cci_pmu_free_irq, + .enable = cci_pmu_enable_event, + .disable = cci_pmu_disable_event, + .start = cci_pmu_start, + .stop = cci_pmu_stop, + .read_counter = cci_pmu_read_counter, + .write_counter = cci_pmu_write_counter, +}; + +static int cci_pmu_probe(struct platform_device *pdev) +{ + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + cci_pmu_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(cci_pmu_base)) + return PTR_ERR(cci_pmu_base); + + cci_pmu.plat_device = pdev; + cci_pmu.num_events = cci_pmu_get_max_counters(); + raw_spin_lock_init(&cci_hw_events.pmu_lock); + cpumask_setall(&cci_pmu.valid_cpus); + + return armpmu_register(&cci_pmu, -1); +} + +static const struct of_device_id arm_cci_pmu_matches[] = { + {.compatible = "arm,cci-400-pmu"}, + {}, +}; + +static struct platform_driver cci_pmu_platform_driver = { + .driver = { + .name = DRIVER_NAME, + .of_match_table = arm_cci_pmu_matches, + }, + .probe = cci_pmu_probe, +}; + +static int __init cci_pmu_init(void) +{ + if (platform_driver_register(&cci_pmu_platform_driver)) + WARN(1, "unable to register CCI platform driver\n"); + return 0; +} + +#else + +static int __init cci_pmu_init(void) +{ + return 0; +} + +#endif /* CONFIG_HW_PERF_EVENTS */ + +struct cpu_port { + u64 mpidr; + u32 port; +}; + +/* + * Use the port MSB as valid flag, shift can be made dynamic + * by computing number of bits required for port indexes. + * Code disabling CCI cpu ports runs with D-cache invalidated + * and SCTLR bit clear so data accesses must be kept to a minimum + * to improve performance; for now shift is left static to + * avoid one more data access while disabling the CCI port. + */ +#define PORT_VALID_SHIFT 31 +#define PORT_VALID (0x1 << PORT_VALID_SHIFT) + +static inline void init_cpu_port(struct cpu_port *port, u32 index, u64 mpidr) +{ + port->port = PORT_VALID | index; + port->mpidr = mpidr; +} + +static inline bool cpu_port_is_valid(struct cpu_port *port) +{ + return !!(port->port & PORT_VALID); +} + +static inline bool cpu_port_match(struct cpu_port *port, u64 mpidr) +{ + return port->mpidr == (mpidr & MPIDR_HWID_BITMASK); +} + +static struct cpu_port cpu_port[NR_CPUS]; + +/** + * __cci_ace_get_port - Function to retrieve the port index connected to + * a cpu or device. + * + * @dn: device node of the device to look-up + * @type: port type + * + * Return value: + * - CCI port index if success + * - -ENODEV if failure + */ +static int __cci_ace_get_port(struct device_node *dn, int type) +{ + int i; + bool ace_match; + struct device_node *cci_portn; + + cci_portn = of_parse_phandle(dn, "cci-control-port", 0); + for (i = 0; i < nb_cci_ports; i++) { + ace_match = ports[i].type == type; + if (ace_match && cci_portn == ports[i].dn) + return i; + } + return -ENODEV; +} + +int cci_ace_get_port(struct device_node *dn) +{ + return __cci_ace_get_port(dn, ACE_LITE_PORT); +} +EXPORT_SYMBOL_GPL(cci_ace_get_port); + +static void __init cci_ace_init_ports(void) +{ + int port, ac, cpu; + u64 hwid; + const u32 *cell; + struct device_node *cpun, *cpus; + + cpus = of_find_node_by_path("/cpus"); + if (WARN(!cpus, "Missing cpus node, bailing out\n")) + return; + + if (WARN_ON(of_property_read_u32(cpus, "#address-cells", &ac))) + ac = of_n_addr_cells(cpus); + + /* + * Port index look-up speeds up the function disabling ports by CPU, + * since the logical to port index mapping is done once and does + * not change after system boot. + * The stashed index array is initialized for all possible CPUs + * at probe time. + */ + for_each_child_of_node(cpus, cpun) { + if (of_node_cmp(cpun->type, "cpu")) + continue; + cell = of_get_property(cpun, "reg", NULL); + if (WARN(!cell, "%s: missing reg property\n", cpun->full_name)) + continue; + + hwid = of_read_number(cell, ac); + cpu = get_logical_index(hwid & MPIDR_HWID_BITMASK); + + if (cpu < 0 || !cpu_possible(cpu)) + continue; + port = __cci_ace_get_port(cpun, ACE_PORT); + if (port < 0) + continue; + + init_cpu_port(&cpu_port[cpu], port, cpu_logical_map(cpu)); + } + + for_each_possible_cpu(cpu) { + WARN(!cpu_port_is_valid(&cpu_port[cpu]), + "CPU %u does not have an associated CCI port\n", + cpu); + } +} +/* + * Functions to enable/disable a CCI interconnect slave port + * + * They are called by low-level power management code to disable slave + * interfaces snoops and DVM broadcast. + * Since they may execute with cache data allocation disabled and + * after the caches have been cleaned and invalidated the functions provide + * no explicit locking since they may run with D-cache disabled, so normal + * cacheable kernel locks based on ldrex/strex may not work. + * Locking has to be provided by BSP implementations to ensure proper + * operations. + */ + +/** + * cci_port_control() - function to control a CCI port + * + * @port: index of the port to setup + * @enable: if true enables the port, if false disables it + */ +static void notrace cci_port_control(unsigned int port, bool enable) +{ + void __iomem *base = ports[port].base; + + writel_relaxed(enable ? CCI_ENABLE_REQ : 0, base + CCI_PORT_CTRL); + /* + * This function is called from power down procedures + * and must not execute any instruction that might + * cause the processor to be put in a quiescent state + * (eg wfi). Hence, cpu_relax() can not be added to this + * read loop to optimize power, since it might hide possibly + * disruptive operations. + */ + while (readl_relaxed(cci_ctrl_base + CCI_CTRL_STATUS) & 0x1) + ; +} + +/** + * cci_disable_port_by_cpu() - function to disable a CCI port by CPU + * reference + * + * @mpidr: mpidr of the CPU whose CCI port should be disabled + * + * Disabling a CCI port for a CPU implies disabling the CCI port + * controlling that CPU cluster. Code disabling CPU CCI ports + * must make sure that the CPU running the code is the last active CPU + * in the cluster ie all other CPUs are quiescent in a low power state. + * + * Return: + * 0 on success + * -ENODEV on port look-up failure + */ +int notrace cci_disable_port_by_cpu(u64 mpidr) +{ + int cpu; + bool is_valid; + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { + is_valid = cpu_port_is_valid(&cpu_port[cpu]); + if (is_valid && cpu_port_match(&cpu_port[cpu], mpidr)) { + cci_port_control(cpu_port[cpu].port, false); + return 0; + } + } + return -ENODEV; +} +EXPORT_SYMBOL_GPL(cci_disable_port_by_cpu); + +/** + * cci_enable_port_for_self() - enable a CCI port for calling CPU + * + * Enabling a CCI port for the calling CPU implies enabling the CCI + * port controlling that CPU's cluster. Caller must make sure that the + * CPU running the code is the first active CPU in the cluster and all + * other CPUs are quiescent in a low power state or waiting for this CPU + * to complete the CCI initialization. + * + * Because this is called when the MMU is still off and with no stack, + * the code must be position independent and ideally rely on callee + * clobbered registers only. To achieve this we must code this function + * entirely in assembler. + * + * On success this returns with the proper CCI port enabled. In case of + * any failure this never returns as the inability to enable the CCI is + * fatal and there is no possible recovery at this stage. + */ +asmlinkage void __naked cci_enable_port_for_self(void) +{ + asm volatile ("\n" + +" mrc p15, 0, r0, c0, c0, 5 @ get MPIDR value \n" +" and r0, r0, #"__stringify(MPIDR_HWID_BITMASK)" \n" +" adr r1, 5f \n" +" ldr r2, [r1] \n" +" add r1, r1, r2 @ &cpu_port \n" +" add ip, r1, %[sizeof_cpu_port] \n" + + /* Loop over the cpu_port array looking for a matching MPIDR */ +"1: ldr r2, [r1, %[offsetof_cpu_port_mpidr_lsb]] \n" +" cmp r2, r0 @ compare MPIDR \n" +" bne 2f \n" + + /* Found a match, now test port validity */ +" ldr r3, [r1, %[offsetof_cpu_port_port]] \n" +" tst r3, #"__stringify(PORT_VALID)" \n" +" bne 3f \n" + + /* no match, loop with the next cpu_port entry */ +"2: add r1, r1, %[sizeof_struct_cpu_port] \n" +" cmp r1, ip @ done? \n" +" blo 1b \n" + + /* CCI port not found -- cheaply try to stall this CPU */ +"cci_port_not_found: \n" +" wfi \n" +" wfe \n" +" b cci_port_not_found \n" + + /* Use matched port index to look up the corresponding ports entry */ +"3: bic r3, r3, #"__stringify(PORT_VALID)" \n" +" adr r0, 6f \n" +" ldmia r0, {r1, r2} \n" +" sub r1, r1, r0 @ virt - phys \n" +" ldr r0, [r0, r2] @ *(&ports) \n" +" mov r2, %[sizeof_struct_ace_port] \n" +" mla r0, r2, r3, r0 @ &ports[index] \n" +" sub r0, r0, r1 @ virt_to_phys() \n" + + /* Enable the CCI port */ +" ldr r0, [r0, %[offsetof_port_phys]] \n" +" mov r3, #"__stringify(CCI_ENABLE_REQ)" \n" +" str r3, [r0, #"__stringify(CCI_PORT_CTRL)"] \n" + + /* poll the status reg for completion */ +" adr r1, 7f \n" +" ldr r0, [r1] \n" +" ldr r0, [r0, r1] @ cci_ctrl_base \n" +"4: ldr r1, [r0, #"__stringify(CCI_CTRL_STATUS)"] \n" +" tst r1, #1 \n" +" bne 4b \n" + +" mov r0, #0 \n" +" bx lr \n" + +" .align 2 \n" +"5: .word cpu_port - . \n" +"6: .word . \n" +" .word ports - 6b \n" +"7: .word cci_ctrl_phys - . \n" + : : + [sizeof_cpu_port] "i" (sizeof(cpu_port)), +#ifndef __ARMEB__ + [offsetof_cpu_port_mpidr_lsb] "i" (offsetof(struct cpu_port, mpidr)), +#else + [offsetof_cpu_port_mpidr_lsb] "i" (offsetof(struct cpu_port, mpidr)+4), +#endif + [offsetof_cpu_port_port] "i" (offsetof(struct cpu_port, port)), + [sizeof_struct_cpu_port] "i" (sizeof(struct cpu_port)), + [sizeof_struct_ace_port] "i" (sizeof(struct cci_ace_port)), + [offsetof_port_phys] "i" (offsetof(struct cci_ace_port, phys)) ); + + unreachable(); +} + +/** + * __cci_control_port_by_device() - function to control a CCI port by device + * reference + * + * @dn: device node pointer of the device whose CCI port should be + * controlled + * @enable: if true enables the port, if false disables it + * + * Return: + * 0 on success + * -ENODEV on port look-up failure + */ +int notrace __cci_control_port_by_device(struct device_node *dn, bool enable) +{ + int port; + + if (!dn) + return -ENODEV; + + port = __cci_ace_get_port(dn, ACE_LITE_PORT); + if (WARN_ONCE(port < 0, "node %s ACE lite port look-up failure\n", + dn->full_name)) + return -ENODEV; + cci_port_control(port, enable); + return 0; +} +EXPORT_SYMBOL_GPL(__cci_control_port_by_device); + +/** + * __cci_control_port_by_index() - function to control a CCI port by port index + * + * @port: port index previously retrieved with cci_ace_get_port() + * @enable: if true enables the port, if false disables it + * + * Return: + * 0 on success + * -ENODEV on port index out of range + * -EPERM if operation carried out on an ACE PORT + */ +int notrace __cci_control_port_by_index(u32 port, bool enable) +{ + if (port >= nb_cci_ports || ports[port].type == ACE_INVALID_PORT) + return -ENODEV; + /* + * CCI control for ports connected to CPUS is extremely fragile + * and must be made to go through a specific and controlled + * interface (ie cci_disable_port_by_cpu(); control by general purpose + * indexing is therefore disabled for ACE ports. + */ + if (ports[port].type == ACE_PORT) + return -EPERM; + + cci_port_control(port, enable); + return 0; +} +EXPORT_SYMBOL_GPL(__cci_control_port_by_index); + +static const struct cci_nb_ports cci400_ports = { + .nb_ace = 2, + .nb_ace_lite = 3 +}; + +static const struct of_device_id arm_cci_matches[] = { + {.compatible = "arm,cci-400", .data = &cci400_ports }, + {}, +}; + +static const struct of_device_id arm_cci_ctrl_if_matches[] = { + {.compatible = "arm,cci-400-ctrl-if", }, + {}, +}; + +static int __init cci_probe(void) +{ + struct cci_nb_ports const *cci_config; + int ret, i, nb_ace = 0, nb_ace_lite = 0; + struct device_node *np, *cp; + struct resource res; + const char *match_str; + bool is_ace; + + np = of_find_matching_node(NULL, arm_cci_matches); + if (!np) + return -ENODEV; + + cci_config = of_match_node(arm_cci_matches, np)->data; + if (!cci_config) + return -ENODEV; + + nb_cci_ports = cci_config->nb_ace + cci_config->nb_ace_lite; + + ports = kcalloc(sizeof(*ports), nb_cci_ports, GFP_KERNEL); + if (!ports) + return -ENOMEM; + + ret = of_address_to_resource(np, 0, &res); + if (!ret) { + cci_ctrl_base = ioremap(res.start, resource_size(&res)); + cci_ctrl_phys = res.start; + } + if (ret || !cci_ctrl_base) { + WARN(1, "unable to ioremap CCI ctrl\n"); + ret = -ENXIO; + goto memalloc_err; + } + + for_each_child_of_node(np, cp) { + if (!of_match_node(arm_cci_ctrl_if_matches, cp)) + continue; + + i = nb_ace + nb_ace_lite; + + if (i >= nb_cci_ports) + break; + + if (of_property_read_string(cp, "interface-type", + &match_str)) { + WARN(1, "node %s missing interface-type property\n", + cp->full_name); + continue; + } + is_ace = strcmp(match_str, "ace") == 0; + if (!is_ace && strcmp(match_str, "ace-lite")) { + WARN(1, "node %s containing invalid interface-type property, skipping it\n", + cp->full_name); + continue; + } + + ret = of_address_to_resource(cp, 0, &res); + if (!ret) { + ports[i].base = ioremap(res.start, resource_size(&res)); + ports[i].phys = res.start; + } + if (ret || !ports[i].base) { + WARN(1, "unable to ioremap CCI port %d\n", i); + continue; + } + + if (is_ace) { + if (WARN_ON(nb_ace >= cci_config->nb_ace)) + continue; + ports[i].type = ACE_PORT; + ++nb_ace; + } else { + if (WARN_ON(nb_ace_lite >= cci_config->nb_ace_lite)) + continue; + ports[i].type = ACE_LITE_PORT; + ++nb_ace_lite; + } + ports[i].dn = cp; + } + + /* initialize a stashed array of ACE ports to speed-up look-up */ + cci_ace_init_ports(); + + /* + * Multi-cluster systems may need this data when non-coherent, during + * cluster power-up/power-down. Make sure it reaches main memory. + */ + sync_cache_w(&cci_ctrl_base); + sync_cache_w(&cci_ctrl_phys); + sync_cache_w(&ports); + sync_cache_w(&cpu_port); + __sync_cache_range_w(ports, sizeof(*ports) * nb_cci_ports); + pr_info("ARM CCI driver probed\n"); + return 0; + +memalloc_err: + + kfree(ports); + return ret; +} + +static int cci_init_status = -EAGAIN; +static DEFINE_MUTEX(cci_probing); + +static int __init cci_init(void) +{ + if (cci_init_status != -EAGAIN) + return cci_init_status; + + mutex_lock(&cci_probing); + if (cci_init_status == -EAGAIN) + cci_init_status = cci_probe(); + mutex_unlock(&cci_probing); + return cci_init_status; +} + +/* + * To sort out early init calls ordering a helper function is provided to + * check if the CCI driver has beed initialized. Function check if the driver + * has been initialized, if not it calls the init function that probes + * the driver and updates the return value. + */ +bool __init cci_probed(void) +{ + return cci_init() == 0; +} +EXPORT_SYMBOL_GPL(cci_probed); + +early_initcall(cci_init); +core_initcall(cci_pmu_init); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ARM CCI support"); diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 0357ac44638b..d0d9b2124752 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -42,7 +42,7 @@ config COMMON_CLK_WM831X config COMMON_CLK_VERSATILE bool "Clock driver for ARM Reference designs" - depends on ARCH_INTEGRATOR || ARCH_REALVIEW || ARCH_VEXPRESS + depends on ARCH_INTEGRATOR || ARCH_REALVIEW || ARCH_VEXPRESS || ARM64 ---help--- Supports clocking on ARM Reference designs: - Integrator/AP and Integrator/CP diff --git a/drivers/clk/versatile/Makefile b/drivers/clk/versatile/Makefile index c16ca787170a..6e76bf87ca87 100644 --- a/drivers/clk/versatile/Makefile +++ b/drivers/clk/versatile/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_ARCH_INTEGRATOR) += clk-integrator.o obj-$(CONFIG_INTEGRATOR_IMPD1) += clk-impd1.o obj-$(CONFIG_ARCH_REALVIEW) += clk-realview.o obj-$(CONFIG_ARCH_VEXPRESS) += clk-vexpress.o clk-sp810.o -obj-$(CONFIG_VEXPRESS_CONFIG) += clk-vexpress-osc.o +obj-$(CONFIG_VEXPRESS_CONFIG) += clk-vexpress-osc.o clk-vexpress-spc.o diff --git a/drivers/clk/versatile/clk-vexpress-osc.c b/drivers/clk/versatile/clk-vexpress-osc.c index 256c8be74df8..2dc8b41a339d 100644 --- a/drivers/clk/versatile/clk-vexpress-osc.c +++ b/drivers/clk/versatile/clk-vexpress-osc.c @@ -107,7 +107,7 @@ void __init vexpress_osc_of_setup(struct device_node *node) osc->func = vexpress_config_func_get_by_node(node); if (!osc->func) { pr_err("Failed to obtain config func for node '%s'!\n", - node->name); + node->full_name); goto error; } @@ -119,7 +119,7 @@ void __init vexpress_osc_of_setup(struct device_node *node) of_property_read_string(node, "clock-output-names", &init.name); if (!init.name) - init.name = node->name; + init.name = node->full_name; init.ops = &vexpress_osc_ops; init.flags = CLK_IS_ROOT; diff --git a/drivers/clk/versatile/clk-vexpress-spc.c b/drivers/clk/versatile/clk-vexpress-spc.c new file mode 100644 index 000000000000..bb566e244b0c --- /dev/null +++ b/drivers/clk/versatile/clk-vexpress-spc.c @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2012 ARM Limited + * Copyright (C) 2012 Linaro + * + * Author: Viresh Kumar <viresh.kumar@linaro.org> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +/* SPC clock programming interface for Vexpress cpus */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/clk-provider.h> +#include <linux/clkdev.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/vexpress.h> + +struct clk_spc { + struct clk_hw hw; + spinlock_t *lock; + int cluster; +}; + +#define to_clk_spc(spc) container_of(spc, struct clk_spc, hw) + +static unsigned long spc_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_spc *spc = to_clk_spc(hw); + u32 freq; + + if (vexpress_spc_get_performance(spc->cluster, &freq)) { + return -EIO; + pr_err("%s: Failed", __func__); + } + + return freq * 1000; +} + +static long spc_round_rate(struct clk_hw *hw, unsigned long drate, + unsigned long *parent_rate) +{ + return drate; +} + +static int spc_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_spc *spc = to_clk_spc(hw); + + return vexpress_spc_set_performance(spc->cluster, rate / 1000); +} + +static struct clk_ops clk_spc_ops = { + .recalc_rate = spc_recalc_rate, + .round_rate = spc_round_rate, + .set_rate = spc_set_rate, +}; + +struct clk *vexpress_clk_register_spc(const char *name, int cluster_id) +{ + struct clk_init_data init; + struct clk_spc *spc; + struct clk *clk; + + if (!name) { + pr_err("Invalid name passed"); + return ERR_PTR(-EINVAL); + } + + spc = kzalloc(sizeof(*spc), GFP_KERNEL); + if (!spc) { + pr_err("could not allocate spc clk\n"); + return ERR_PTR(-ENOMEM); + } + + spc->hw.init = &init; + spc->cluster = cluster_id; + + init.name = name; + init.ops = &clk_spc_ops; + init.flags = CLK_IS_ROOT | CLK_GET_RATE_NOCACHE; + init.num_parents = 0; + + clk = clk_register(NULL, &spc->hw); + if (!IS_ERR_OR_NULL(clk)) + return clk; + + pr_err("clk register failed\n"); + kfree(spc); + + return NULL; +} + +#if defined(CONFIG_OF) +void __init vexpress_clk_of_register_spc(void) +{ + char name[14] = "cpu-cluster.X"; + struct device_node *node = NULL; + struct clk *clk; + const u32 *val; + int cluster_id = 0, len; + + if (!of_find_compatible_node(NULL, NULL, "arm,vexpress-spc")) { + pr_debug("%s: No SPC found, Exiting!!\n", __func__); + return; + } + + while ((node = of_find_node_by_name(node, "cluster"))) { + val = of_get_property(node, "reg", &len); + if (val && len == 4) + cluster_id = be32_to_cpup(val); + + name[12] = cluster_id + '0'; + clk = vexpress_clk_register_spc(name, cluster_id); + if (IS_ERR(clk)) + return; + + pr_debug("Registered clock '%s'\n", name); + clk_register_clkdev(clk, NULL, name); + } +} +CLK_OF_DECLARE(spc, "arm,vexpress-spc", vexpress_clk_of_register_spc); +#endif diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 534fcb825153..a9c1324843eb 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -201,7 +201,7 @@ source "drivers/cpufreq/Kconfig.x86" endmenu menu "ARM CPU frequency scaling drivers" -depends on ARM +depends on ARM || ARM64 source "drivers/cpufreq/Kconfig.arm" endmenu diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 6e57543fe0b9..8327444b76cb 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -4,7 +4,7 @@ config ARM_BIG_LITTLE_CPUFREQ tristate "Generic ARM big LITTLE CPUfreq driver" - depends on ARM_CPU_TOPOLOGY && PM_OPP && HAVE_CLK + depends on ARM_CPU_TOPOLOGY && PM_OPP && HAVE_CLK && BIG_LITTLE help This enables the Generic CPUfreq driver for ARM big.LITTLE platforms. @@ -15,6 +15,14 @@ config ARM_DT_BL_CPUFREQ This enables probing via DT for Generic CPUfreq driver for ARM big.LITTLE platform. This gets frequency tables from DT. +config ARM_VEXPRESS_BL_CPUFREQ + tristate "ARM Vexpress big LITTLE CPUfreq driver" + select ARM_BIG_LITTLE_CPUFREQ + depends on VEXPRESS_SPC + help + This enables the CPUfreq driver for ARM Vexpress big.LITTLE platform. + If in doubt, say N. + config ARM_EXYNOS_CPUFREQ bool "SAMSUNG EXYNOS SoCs" depends on ARCH_EXYNOS diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 315b9231feb1..1db9b4929cfa 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY) += amd_freq_sensitivity.o obj-$(CONFIG_ARM_BIG_LITTLE_CPUFREQ) += arm_big_little.o # big LITTLE per platform glues. Keep DT_BL_CPUFREQ as the last entry in all big # LITTLE drivers, so that it is probed last. +obj-$(CONFIG_ARM_VEXPRESS_BL_CPUFREQ) += vexpress_big_little.o obj-$(CONFIG_ARM_DT_BL_CPUFREQ) += arm_big_little_dt.o obj-$(CONFIG_ARCH_DAVINCI_DA850) += davinci-cpufreq.o diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index 5d7f53fcd6f5..076f25a59e00 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -24,27 +24,148 @@ #include <linux/cpufreq.h> #include <linux/cpumask.h> #include <linux/export.h> +#include <linux/mutex.h> #include <linux/of_platform.h> #include <linux/opp.h> #include <linux/slab.h> #include <linux/topology.h> #include <linux/types.h> +#include <asm/bL_switcher.h> #include "arm_big_little.h" -/* Currently we support only two clusters */ -#define MAX_CLUSTERS 2 +#ifdef CONFIG_BL_SWITCHER +bool bL_switching_enabled; +#endif + +#define ACTUAL_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq << 1 : freq) +#define VIRT_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq >> 1 : freq) static struct cpufreq_arm_bL_ops *arm_bL_ops; static struct clk *clk[MAX_CLUSTERS]; -static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS]; -static atomic_t cluster_usage[MAX_CLUSTERS] = {ATOMIC_INIT(0), ATOMIC_INIT(0)}; +static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1]; +static atomic_t cluster_usage[MAX_CLUSTERS + 1] = {ATOMIC_INIT(0), + ATOMIC_INIT(0)}; + +static unsigned int clk_big_min; /* (Big) clock frequencies */ +static unsigned int clk_little_max; /* Maximum clock frequency (Little) */ + +static DEFINE_PER_CPU(unsigned int, physical_cluster); +static DEFINE_PER_CPU(unsigned int, cpu_last_req_freq); + +static struct mutex cluster_lock[MAX_CLUSTERS]; + +static unsigned int find_cluster_maxfreq(int cluster) +{ + int j; + u32 max_freq = 0, cpu_freq; + + for_each_online_cpu(j) { + cpu_freq = per_cpu(cpu_last_req_freq, j); + + if ((cluster == per_cpu(physical_cluster, j)) && + (max_freq < cpu_freq)) + max_freq = cpu_freq; + } -static unsigned int bL_cpufreq_get(unsigned int cpu) + pr_debug("%s: cluster: %d, max freq: %d\n", __func__, cluster, + max_freq); + + return max_freq; +} + +static unsigned int clk_get_cpu_rate(unsigned int cpu) { - u32 cur_cluster = cpu_to_cluster(cpu); + u32 cur_cluster = per_cpu(physical_cluster, cpu); + u32 rate = clk_get_rate(clk[cur_cluster]) / 1000; + + /* For switcher we use virtual A15 clock rates */ + if (is_bL_switching_enabled()) + rate = VIRT_FREQ(cur_cluster, rate); + + pr_debug("%s: cpu: %d, cluster: %d, freq: %u\n", __func__, cpu, + cur_cluster, rate); - return clk_get_rate(clk[cur_cluster]) / 1000; + return rate; +} + +static unsigned int bL_cpufreq_get_rate(unsigned int cpu) +{ + if (is_bL_switching_enabled()) { + pr_debug("%s: freq: %d\n", __func__, per_cpu(cpu_last_req_freq, + cpu)); + + return per_cpu(cpu_last_req_freq, cpu); + } else { + return clk_get_cpu_rate(cpu); + } +} + +static unsigned int +bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate) +{ + u32 new_rate, prev_rate; + int ret; + bool bLs = is_bL_switching_enabled(); + + mutex_lock(&cluster_lock[new_cluster]); + + if (bLs) { + prev_rate = per_cpu(cpu_last_req_freq, cpu); + per_cpu(cpu_last_req_freq, cpu) = rate; + per_cpu(physical_cluster, cpu) = new_cluster; + + new_rate = find_cluster_maxfreq(new_cluster); + new_rate = ACTUAL_FREQ(new_cluster, new_rate); + } else { + new_rate = rate; + } + + pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d, freq: %d\n", + __func__, cpu, old_cluster, new_cluster, new_rate); + + ret = clk_set_rate(clk[new_cluster], new_rate * 1000); + if (WARN_ON(ret)) { + pr_err("clk_set_rate failed: %d, new cluster: %d\n", ret, + new_cluster); + if (bLs) { + per_cpu(cpu_last_req_freq, cpu) = prev_rate; + per_cpu(physical_cluster, cpu) = old_cluster; + } + + mutex_unlock(&cluster_lock[new_cluster]); + + return ret; + } + + mutex_unlock(&cluster_lock[new_cluster]); + + /* Recalc freq for old cluster when switching clusters */ + if (old_cluster != new_cluster) { + pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d\n", + __func__, cpu, old_cluster, new_cluster); + + /* Switch cluster */ + bL_switch_request(cpu, new_cluster); + + mutex_lock(&cluster_lock[old_cluster]); + + /* Set freq of old cluster if there are cpus left on it */ + new_rate = find_cluster_maxfreq(old_cluster); + new_rate = ACTUAL_FREQ(old_cluster, new_rate); + + if (new_rate) { + pr_debug("%s: Updating rate of old cluster: %d, to freq: %d\n", + __func__, old_cluster, new_rate); + + if (clk_set_rate(clk[old_cluster], new_rate * 1000)) + pr_err("%s: clk_set_rate failed: %d, old cluster: %d\n", + __func__, ret, old_cluster); + } + mutex_unlock(&cluster_lock[old_cluster]); + } + + return 0; } /* Validate policy frequency range */ @@ -60,12 +181,14 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { struct cpufreq_freqs freqs; - u32 cpu = policy->cpu, freq_tab_idx, cur_cluster; + u32 cpu = policy->cpu, freq_tab_idx, cur_cluster, new_cluster, + actual_cluster; int ret = 0; - cur_cluster = cpu_to_cluster(policy->cpu); + cur_cluster = cpu_to_cluster(cpu); + new_cluster = actual_cluster = per_cpu(physical_cluster, cpu); - freqs.old = bL_cpufreq_get(policy->cpu); + freqs.old = bL_cpufreq_get_rate(cpu); /* Determine valid target frequency using freq_table */ cpufreq_frequency_table_target(policy, freq_table[cur_cluster], @@ -79,13 +202,21 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy, if (freqs.old == freqs.new) return 0; + if (is_bL_switching_enabled()) { + if ((actual_cluster == A15_CLUSTER) && + (freqs.new < clk_big_min)) { + new_cluster = A7_CLUSTER; + } else if ((actual_cluster == A7_CLUSTER) && + (freqs.new > clk_little_max)) { + new_cluster = A15_CLUSTER; + } + } + cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); - ret = clk_set_rate(clk[cur_cluster], freqs.new * 1000); - if (ret) { - pr_err("clk_set_rate failed: %d\n", ret); + ret = bL_cpufreq_set_rate(cpu, actual_cluster, new_cluster, freqs.new); + if (ret) return ret; - } policy->cur = freqs.new; @@ -94,7 +225,73 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy, return ret; } -static void put_cluster_clk_and_freq_table(struct device *cpu_dev) +static inline u32 get_table_count(struct cpufreq_frequency_table *table) +{ + int count; + + for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++) + ; + + return count; +} + +/* get the minimum frequency in the cpufreq_frequency_table */ +static inline u32 get_table_min(struct cpufreq_frequency_table *table) +{ + int i; + uint32_t min_freq = ~0; + for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) + if (table[i].frequency < min_freq) + min_freq = table[i].frequency; + return min_freq; +} + +/* get the maximum frequency in the cpufreq_frequency_table */ +static inline u32 get_table_max(struct cpufreq_frequency_table *table) +{ + int i; + uint32_t max_freq = 0; + for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) + if (table[i].frequency > max_freq) + max_freq = table[i].frequency; + return max_freq; +} + +static int merge_cluster_tables(void) +{ + int i, j, k = 0, count = 1; + struct cpufreq_frequency_table *table; + + for (i = 0; i < MAX_CLUSTERS; i++) + count += get_table_count(freq_table[i]); + + table = kzalloc(sizeof(*table) * count, GFP_KERNEL); + if (!table) + return -ENOMEM; + + freq_table[MAX_CLUSTERS] = table; + + /* Add in reverse order to get freqs in increasing order */ + for (i = MAX_CLUSTERS - 1; i >= 0; i--) { + for (j = 0; freq_table[i][j].frequency != CPUFREQ_TABLE_END; + j++) { + table[k].frequency = VIRT_FREQ(i, + freq_table[i][j].frequency); + pr_debug("%s: index: %d, freq: %d\n", __func__, k, + table[k].frequency); + k++; + } + } + + table[k].index = k; + table[k].frequency = CPUFREQ_TABLE_END; + + pr_debug("%s: End, table: %p, count: %d\n", __func__, table, k); + + return 0; +} + +static void _put_cluster_clk_and_freq_table(struct device *cpu_dev) { u32 cluster = cpu_to_cluster(cpu_dev->id); @@ -105,10 +302,35 @@ static void put_cluster_clk_and_freq_table(struct device *cpu_dev) } } -static int get_cluster_clk_and_freq_table(struct device *cpu_dev) +static void put_cluster_clk_and_freq_table(struct device *cpu_dev) { u32 cluster = cpu_to_cluster(cpu_dev->id); - char name[14] = "cpu-cluster."; + int i; + + if (cluster < MAX_CLUSTERS) + return _put_cluster_clk_and_freq_table(cpu_dev); + + if (atomic_dec_return(&cluster_usage[MAX_CLUSTERS])) + return; + + for (i = 0; i < MAX_CLUSTERS; i++) { + struct device *cdev = get_cpu_device(i); + if (!cdev) { + pr_err("%s: failed to get cpu%d device\n", __func__, i); + return; + } + + _put_cluster_clk_and_freq_table(cdev); + } + + /* free virtual table */ + kfree(freq_table[MAX_CLUSTERS]); +} + +static int _get_cluster_clk_and_freq_table(struct device *cpu_dev) +{ + u32 cluster = cpu_to_cluster(cpu_dev->id); + char name[14] = "cpu-cluster.X"; int ret; if (atomic_inc_return(&cluster_usage[cluster]) != 1) @@ -149,6 +371,62 @@ atomic_dec: return ret; } +static int get_cluster_clk_and_freq_table(struct device *cpu_dev) +{ + u32 cluster = cpu_to_cluster(cpu_dev->id); + int i, ret; + + if (cluster < MAX_CLUSTERS) + return _get_cluster_clk_and_freq_table(cpu_dev); + + if (atomic_inc_return(&cluster_usage[MAX_CLUSTERS]) != 1) + return 0; + + /* + * Get data for all clusters and fill virtual cluster with a merge of + * both + */ + for (i = 0; i < MAX_CLUSTERS; i++) { + struct device *cdev = get_cpu_device(i); + if (!cdev) { + pr_err("%s: failed to get cpu%d device\n", __func__, i); + return -ENODEV; + } + + ret = _get_cluster_clk_and_freq_table(cdev); + if (ret) + goto put_clusters; + } + + ret = merge_cluster_tables(); + if (ret) + goto put_clusters; + + /* Assuming 2 cluster, set clk_big_min and clk_little_max */ + clk_big_min = get_table_min(freq_table[0]); + clk_little_max = VIRT_FREQ(1, get_table_max(freq_table[1])); + + pr_debug("%s: cluster: %d, clk_big_min: %d, clk_little_max: %d\n", + __func__, cluster, clk_big_min, clk_little_max); + + return 0; + +put_clusters: + while (i--) { + struct device *cdev = get_cpu_device(i); + if (!cdev) { + pr_err("%s: failed to get cpu%d device\n", __func__, i); + return -ENODEV; + } + + _put_cluster_clk_and_freq_table(cdev); + } + + atomic_dec(&cluster_usage[MAX_CLUSTERS]); + + return ret; +} + /* Per-CPU initialization */ static int bL_cpufreq_init(struct cpufreq_policy *policy) { @@ -177,37 +455,30 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy) cpufreq_frequency_table_get_attr(freq_table[cur_cluster], policy->cpu); + if (cur_cluster < MAX_CLUSTERS) { + cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu)); + + per_cpu(physical_cluster, policy->cpu) = cur_cluster; + } else { + /* Assumption: during init, we are always running on A15 */ + per_cpu(physical_cluster, policy->cpu) = A15_CLUSTER; + } + if (arm_bL_ops->get_transition_latency) policy->cpuinfo.transition_latency = arm_bL_ops->get_transition_latency(cpu_dev); else policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - policy->cur = bL_cpufreq_get(policy->cpu); + policy->cur = clk_get_cpu_rate(policy->cpu); - cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu)); + if (is_bL_switching_enabled()) + per_cpu(cpu_last_req_freq, policy->cpu) = policy->cur; dev_info(cpu_dev, "%s: CPU %d initialized\n", __func__, policy->cpu); return 0; } -static int bL_cpufreq_exit(struct cpufreq_policy *policy) -{ - struct device *cpu_dev; - - cpu_dev = get_cpu_device(policy->cpu); - if (!cpu_dev) { - pr_err("%s: failed to get cpu%d device\n", __func__, - policy->cpu); - return -ENODEV; - } - - put_cluster_clk_and_freq_table(cpu_dev); - dev_dbg(cpu_dev, "%s: Exited, cpu: %d\n", __func__, policy->cpu); - - return 0; -} - /* Export freq_table to sysfs */ static struct freq_attr *bL_cpufreq_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, @@ -219,16 +490,47 @@ static struct cpufreq_driver bL_cpufreq_driver = { .flags = CPUFREQ_STICKY, .verify = bL_cpufreq_verify_policy, .target = bL_cpufreq_set_target, - .get = bL_cpufreq_get, + .get = bL_cpufreq_get_rate, .init = bL_cpufreq_init, - .exit = bL_cpufreq_exit, .have_governor_per_policy = true, .attr = bL_cpufreq_attr, }; +static int bL_cpufreq_switcher_notifier(struct notifier_block *nfb, + unsigned long action, void *_arg) +{ + pr_debug("%s: action: %ld\n", __func__, action); + + switch (action) { + case BL_NOTIFY_PRE_ENABLE: + case BL_NOTIFY_PRE_DISABLE: + cpufreq_unregister_driver(&bL_cpufreq_driver); + break; + + case BL_NOTIFY_POST_ENABLE: + set_switching_enabled(true); + cpufreq_register_driver(&bL_cpufreq_driver); + break; + + case BL_NOTIFY_POST_DISABLE: + set_switching_enabled(false); + cpufreq_register_driver(&bL_cpufreq_driver); + break; + + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static struct notifier_block bL_switcher_notifier = { + .notifier_call = bL_cpufreq_switcher_notifier, +}; + int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops) { - int ret; + int ret, i; if (arm_bL_ops) { pr_debug("%s: Already registered: %s, exiting\n", __func__, @@ -243,16 +545,29 @@ int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops) arm_bL_ops = ops; + ret = bL_switcher_get_enabled(); + set_switching_enabled(ret); + + for (i = 0; i < MAX_CLUSTERS; i++) + mutex_init(&cluster_lock[i]); + ret = cpufreq_register_driver(&bL_cpufreq_driver); if (ret) { pr_info("%s: Failed registering platform driver: %s, err: %d\n", __func__, ops->name, ret); arm_bL_ops = NULL; } else { - pr_info("%s: Registered platform driver: %s\n", __func__, - ops->name); + ret = bL_switcher_register_notifier(&bL_switcher_notifier); + if (ret) { + cpufreq_unregister_driver(&bL_cpufreq_driver); + arm_bL_ops = NULL; + } else { + pr_info("%s: Registered platform driver: %s\n", + __func__, ops->name); + } } + bL_switcher_put_enabled(); return ret; } EXPORT_SYMBOL_GPL(bL_cpufreq_register); @@ -265,9 +580,31 @@ void bL_cpufreq_unregister(struct cpufreq_arm_bL_ops *ops) return; } + bL_switcher_get_enabled(); + bL_switcher_unregister_notifier(&bL_switcher_notifier); cpufreq_unregister_driver(&bL_cpufreq_driver); + bL_switcher_put_enabled(); pr_info("%s: Un-registered platform driver: %s\n", __func__, arm_bL_ops->name); + + /* For saving table get/put on every cpu in/out */ + if (is_bL_switching_enabled()) { + put_cluster_clk_and_freq_table(get_cpu_device(0)); + } else { + int i; + + for (i = 0; i < MAX_CLUSTERS; i++) { + struct device *cdev = get_cpu_device(i); + if (!cdev) { + pr_err("%s: failed to get cpu%d device\n", + __func__, i); + return; + } + + put_cluster_clk_and_freq_table(cdev); + } + } + arm_bL_ops = NULL; } EXPORT_SYMBOL_GPL(bL_cpufreq_unregister); diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h index 79b2ce17884d..4f5a03d3aef6 100644 --- a/drivers/cpufreq/arm_big_little.h +++ b/drivers/cpufreq/arm_big_little.h @@ -23,6 +23,20 @@ #include <linux/device.h> #include <linux/types.h> +/* Currently we support only two clusters */ +#define A15_CLUSTER 0 +#define A7_CLUSTER 1 +#define MAX_CLUSTERS 2 + +#ifdef CONFIG_BL_SWITCHER +extern bool bL_switching_enabled; +#define is_bL_switching_enabled() bL_switching_enabled +#define set_switching_enabled(x) (bL_switching_enabled = (x)) +#else +#define is_bL_switching_enabled() false +#define set_switching_enabled(x) do { } while (0) +#endif + struct cpufreq_arm_bL_ops { char name[CPUFREQ_NAME_LEN]; int (*get_transition_latency)(struct device *cpu_dev); @@ -36,7 +50,8 @@ struct cpufreq_arm_bL_ops { static inline int cpu_to_cluster(int cpu) { - return topology_physical_package_id(cpu); + return is_bL_switching_enabled() ? MAX_CLUSTERS: + topology_physical_package_id(cpu); } int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops); diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index bfd6273fd873..66733f1d55d4 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -21,6 +21,9 @@ #include <linux/spinlock.h> #include <linux/notifier.h> #include <asm/cputime.h> +#ifdef CONFIG_BL_SWITCHER +#include <asm/bL_switcher.h> +#endif static spinlock_t cpufreq_stats_lock; @@ -378,7 +381,7 @@ static struct notifier_block notifier_trans_block = { .notifier_call = cpufreq_stat_notifier_trans }; -static int __init cpufreq_stats_init(void) +static int cpufreq_stats_setup(void) { int ret; unsigned int cpu; @@ -406,7 +409,8 @@ static int __init cpufreq_stats_init(void) return 0; } -static void __exit cpufreq_stats_exit(void) + +static void cpufreq_stats_cleanup(void) { unsigned int cpu; @@ -421,6 +425,54 @@ static void __exit cpufreq_stats_exit(void) } } +#ifdef CONFIG_BL_SWITCHER +static int cpufreq_stats_switcher_notifier(struct notifier_block *nfb, + unsigned long action, void *_arg) +{ + switch (action) { + case BL_NOTIFY_PRE_ENABLE: + case BL_NOTIFY_PRE_DISABLE: + cpufreq_stats_cleanup(); + break; + + case BL_NOTIFY_POST_ENABLE: + case BL_NOTIFY_POST_DISABLE: + cpufreq_stats_setup(); + break; + + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static struct notifier_block switcher_notifier = { + .notifier_call = cpufreq_stats_switcher_notifier, +}; +#endif + +static int __init cpufreq_stats_init(void) +{ + int ret; + spin_lock_init(&cpufreq_stats_lock); + + ret = cpufreq_stats_setup(); +#ifdef CONFIG_BL_SWITCHER + if (!ret) + bL_switcher_register_notifier(&switcher_notifier); +#endif + return ret; +} + +static void __exit cpufreq_stats_exit(void) +{ +#ifdef CONFIG_BL_SWITCHER + bL_switcher_unregister_notifier(&switcher_notifier); +#endif + cpufreq_stats_cleanup(); +} + MODULE_AUTHOR("Zou Nan hai <nanhai.zou@intel.com>"); MODULE_DESCRIPTION("'cpufreq_stats' - A driver to export cpufreq stats " "through sysfs filesystem"); diff --git a/drivers/cpufreq/vexpress_big_little.c b/drivers/cpufreq/vexpress_big_little.c new file mode 100644 index 000000000000..1abb883c051b --- /dev/null +++ b/drivers/cpufreq/vexpress_big_little.c @@ -0,0 +1,86 @@ +/* + * Vexpress big.LITTLE CPUFreq Interface driver + * + * It provides necessary ops to arm_big_little cpufreq driver and gets + * Frequency information from Device Tree. Freq table in DT must be in KHz. + * + * Copyright (C) 2013 Linaro. + * Viresh Kumar <viresh.kumar@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/cpufreq.h> +#include <linux/export.h> +#include <linux/opp.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/vexpress.h> +#include "arm_big_little.h" + +static int vexpress_init_opp_table(struct device *cpu_dev) +{ + int i = -1, count, cluster = cpu_to_cluster(cpu_dev->id); + u32 *table; + int ret; + + count = vexpress_spc_get_freq_table(cluster, &table); + if (!table || !count) { + pr_err("SPC controller returned invalid freq table"); + return -EINVAL; + } + + while (++i < count) { + /* FIXME: Voltage value */ + ret = opp_add(cpu_dev, table[i] * 1000, 900000); + if (ret) { + dev_warn(cpu_dev, "%s: Failed to add OPP %d, err: %d\n", + __func__, table[i] * 1000, ret); + return ret; + } + } + + return 0; +} + +static int vexpress_get_transition_latency(struct device *cpu_dev) +{ + /* 1 ms */ + return 1000000; +} + +static struct cpufreq_arm_bL_ops vexpress_bL_ops = { + .name = "vexpress-bL", + .get_transition_latency = vexpress_get_transition_latency, + .init_opp_table = vexpress_init_opp_table, +}; + +static int vexpress_bL_init(void) +{ + if (!vexpress_spc_check_loaded()) { + pr_info("%s: No SPC found\n", __func__); + return -ENOENT; + } + + return bL_cpufreq_register(&vexpress_bL_ops); +} +module_init(vexpress_bL_init); + +static void vexpress_bL_exit(void) +{ + return bL_cpufreq_unregister(&vexpress_bL_ops); +} +module_exit(vexpress_bL_exit); + +MODULE_AUTHOR("Viresh Kumar <viresh.kumar@linaro.org>"); +MODULE_DESCRIPTION("ARM Vexpress big LITTLE cpufreq driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index 0d8bd55e776f..7d8256a5ea97 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -4,6 +4,6 @@ obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o - +obj-$(CONFIG_BIG_LITTLE) += arm_big_little.o obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o obj-$(CONFIG_ARCH_KIRKWOOD) += cpuidle-kirkwood.o diff --git a/drivers/cpuidle/arm_big_little.c b/drivers/cpuidle/arm_big_little.c new file mode 100644 index 000000000000..e5378896a8cb --- /dev/null +++ b/drivers/cpuidle/arm_big_little.c @@ -0,0 +1,183 @@ +/* + * big.LITTLE CPU idle driver. + * + * Copyright (C) 2012 ARM Ltd. + * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/arm-cci.h> +#include <linux/bitmap.h> +#include <linux/cpuidle.h> +#include <linux/cpu_pm.h> +#include <linux/clockchips.h> +#include <linux/debugfs.h> +#include <linux/hrtimer.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/tick.h> +#include <linux/vexpress.h> +#include <asm/mcpm.h> +#include <asm/cpuidle.h> +#include <asm/cputype.h> +#include <asm/idmap.h> +#include <asm/proc-fns.h> +#include <asm/suspend.h> +#include <linux/of.h> + +static int bl_cpuidle_simple_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + ktime_t time_start, time_end; + s64 diff; + + time_start = ktime_get(); + + cpu_do_idle(); + + time_end = ktime_get(); + + local_irq_enable(); + + diff = ktime_to_us(ktime_sub(time_end, time_start)); + if (diff > INT_MAX) + diff = INT_MAX; + + dev->last_residency = (int) diff; + + return index; +} + +static int bl_enter_powerdown(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx); + +static struct cpuidle_state bl_cpuidle_set[] __initdata = { + [0] = { + .enter = bl_cpuidle_simple_enter, + .exit_latency = 1, + .target_residency = 1, + .power_usage = UINT_MAX, + .flags = CPUIDLE_FLAG_TIME_VALID, + .name = "WFI", + .desc = "ARM WFI", + }, + [1] = { + .enter = bl_enter_powerdown, + .exit_latency = 300, + .target_residency = 1000, + .flags = CPUIDLE_FLAG_TIME_VALID, + .name = "C1", + .desc = "ARM power down", + }, +}; + +struct cpuidle_driver bl_idle_driver = { + .name = "bl_idle", + .owner = THIS_MODULE, + .safe_state_index = 0 +}; + +static DEFINE_PER_CPU(struct cpuidle_device, bl_idle_dev); + +static int notrace bl_powerdown_finisher(unsigned long arg) +{ + unsigned int mpidr = read_cpuid_mpidr(); + unsigned int cluster = (mpidr >> 8) & 0xf; + unsigned int cpu = mpidr & 0xf; + + mcpm_set_entry_vector(cpu, cluster, cpu_resume); + mcpm_cpu_suspend(0); /* 0 should be replaced with better value here */ + return 1; +} + +/* + * bl_enter_powerdown - Programs CPU to enter the specified state + * @dev: cpuidle device + * @drv: The target state to be programmed + * @idx: state index + * + * Called from the CPUidle framework to program the device to the + * specified target state selected by the governor. + */ +static int bl_enter_powerdown(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx) +{ + struct timespec ts_preidle, ts_postidle, ts_idle; + int ret; + + /* Used to keep track of the total time in idle */ + getnstimeofday(&ts_preidle); + + BUG_ON(!irqs_disabled()); + + cpu_pm_enter(); + + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + + ret = cpu_suspend((unsigned long) dev, bl_powerdown_finisher); + if (ret) + BUG(); + + mcpm_cpu_powered_up(); + + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + + cpu_pm_exit(); + + getnstimeofday(&ts_postidle); + local_irq_enable(); + ts_idle = timespec_sub(ts_postidle, ts_preidle); + + dev->last_residency = ts_idle.tv_nsec / NSEC_PER_USEC + + ts_idle.tv_sec * USEC_PER_SEC; + return idx; +} + +/* + * bl_idle_init + * + * Registers the bl specific cpuidle driver with the cpuidle + * framework with the valid set of states. + */ +int __init bl_idle_init(void) +{ + struct cpuidle_device *dev; + int i, cpu_id; + struct cpuidle_driver *drv = &bl_idle_driver; + + if (!of_find_compatible_node(NULL, NULL, "arm,generic")) { + pr_info("%s: No compatible node found\n", __func__); + return -ENODEV; + } + + drv->state_count = (sizeof(bl_cpuidle_set) / + sizeof(struct cpuidle_state)); + + for (i = 0; i < drv->state_count; i++) { + memcpy(&drv->states[i], &bl_cpuidle_set[i], + sizeof(struct cpuidle_state)); + } + + cpuidle_register_driver(drv); + + for_each_cpu(cpu_id, cpu_online_mask) { + pr_err("CPUidle for CPU%d registered\n", cpu_id); + dev = &per_cpu(bl_idle_dev, cpu_id); + dev->cpu = cpu_id; + + dev->state_count = drv->state_count; + + if (cpuidle_register_device(dev)) { + printk(KERN_ERR "%s: Cpuidle register device failed\n", + __func__); + return -EIO; + } + } + + return 0; +} + +device_initcall(bl_idle_init); diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c index 223379169cb0..0e6e408c0a63 100644 --- a/drivers/cpuidle/cpuidle-calxeda.c +++ b/drivers/cpuidle/cpuidle-calxeda.c @@ -37,20 +37,6 @@ extern void highbank_set_cpu_jump(int cpu, void *jump_addr); extern void *scu_base_addr; -static inline unsigned int get_auxcr(void) -{ - unsigned int val; - asm("mrc p15, 0, %0, c1, c0, 1 @ get AUXCR" : "=r" (val) : : "cc"); - return val; -} - -static inline void set_auxcr(unsigned int val) -{ - asm volatile("mcr p15, 0, %0, c1, c0, 1 @ set AUXCR" - : : "r" (val) : "cc"); - isb(); -} - static noinline void calxeda_idle_restore(void) { set_cr(get_cr() | CR_C); diff --git a/drivers/extcon/extcon-adc-jack.c b/drivers/extcon/extcon-adc-jack.c index d0233cd18ffa..5985807e52c9 100644 --- a/drivers/extcon/extcon-adc-jack.c +++ b/drivers/extcon/extcon-adc-jack.c @@ -87,7 +87,8 @@ static irqreturn_t adc_jack_irq_thread(int irq, void *_data) { struct adc_jack_data *data = _data; - schedule_delayed_work(&data->handler, data->handling_delay); + queue_delayed_work(system_power_efficient_wq, + &data->handler, data->handling_delay); return IRQ_HANDLED; } diff --git a/drivers/extcon/extcon-gpio.c b/drivers/extcon/extcon-gpio.c index 02bec32adde4..f874c30ddbff 100644 --- a/drivers/extcon/extcon-gpio.c +++ b/drivers/extcon/extcon-gpio.c @@ -56,7 +56,7 @@ static irqreturn_t gpio_irq_handler(int irq, void *dev_id) { struct gpio_extcon_data *extcon_data = dev_id; - schedule_delayed_work(&extcon_data->work, + queue_delayed_work(system_power_efficient_wq, &extcon_data->work, extcon_data->debounce_jiffies); return IRQ_HANDLED; } diff --git a/drivers/gator/Kconfig b/drivers/gator/Kconfig new file mode 100644 index 000000000000..7ea0fcc3d01d --- /dev/null +++ b/drivers/gator/Kconfig @@ -0,0 +1,33 @@ +config GATOR + tristate "Gator module for ARM's Streamline Performance Analyzer" + default m if (ARM || ARM64) + depends on PROFILING + depends on HIGH_RES_TIMERS + depends on LOCAL_TIMERS || !(ARM && SMP) + select TRACING + +config GATOR_WITH_MALI_SUPPORT + bool + +choice + prompt "Enable Mali GPU support in Gator" + depends on GATOR + optional + +config GATOR_MALI_400MP + bool "Mali-400MP" + select GATOR_WITH_MALI_SUPPORT + +config GATOR_MALI_T6XX + bool "Mali-T604 or Mali-T658" + select GATOR_WITH_MALI_SUPPORT + +endchoice + +config GATOR_MALI_PATH + string "Path to Mali driver" + depends on GATOR_WITH_MALI_SUPPORT + default "drivers/gpu/arm/mali400mp" + help + The gator code adds this to its include path so it can get the Mali + trace headers with: #include "linux/mali_linux_trace.h" diff --git a/drivers/gator/LICENSE b/drivers/gator/LICENSE new file mode 100644 index 000000000000..d159169d1050 --- /dev/null +++ b/drivers/gator/LICENSE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/drivers/gator/Makefile b/drivers/gator/Makefile new file mode 100644 index 000000000000..3dc9d059a4b4 --- /dev/null +++ b/drivers/gator/Makefile @@ -0,0 +1,76 @@ +ifneq ($(KERNELRELEASE),) + +# Uncomment the following line to enable kernel stack unwinding within gator, or update gator_backtrace.c +# EXTRA_CFLAGS += -DGATOR_KERNEL_STACK_UNWINDING + +CONFIG_GATOR ?= m +obj-$(CONFIG_GATOR) := gator.o + +gator-y := gator_main.o \ + gator_events_irq.o \ + gator_events_sched.o \ + gator_events_net.o \ + gator_events_block.o \ + gator_events_meminfo.o \ + gator_events_perf_pmu.o \ + gator_events_mmapped.o \ + +# Convert the old GATOR_WITH_MALI_SUPPORT to the new kernel flags +ifneq ($(GATOR_WITH_MALI_SUPPORT),) + CONFIG_GATOR_WITH_MALI_SUPPORT := y + ifeq ($(GATOR_WITH_MALI_SUPPORT),MALI_T6xx) + CONFIG_GATOR_MALI_4XXMP := n + CONFIG_GATOR_MALI_T6XX := y + else + CONFIG_GATOR_MALI_4XXMP := y + CONFIG_GATOR_MALI_T6XX := n + endif + EXTRA_CFLAGS += -DMALI_SUPPORT=$(GATOR_WITH_MALI_SUPPORT) + ifneq ($(GATOR_MALI_INTERFACE_STYLE),) + EXTRA_CFLAGS += -DGATOR_MALI_INTERFACE_STYLE=$(GATOR_MALI_INTERFACE_STYLE) + endif +endif + +ifeq ($(CONFIG_GATOR_WITH_MALI_SUPPORT),y) + ifeq ($(CONFIG_GATOR_MALI_T6XX),y) + gator-y += gator_events_mali_t6xx.o \ + gator_events_mali_t6xx_hw.o + include $(src)/mali_t6xx.mk + else + gator-y += gator_events_mali_4xx.o + endif + gator-y += gator_events_mali_common.o + + ifneq ($(CONFIG_GATOR_MALI_PATH),) + ccflags-y += -I$(CONFIG_GATOR_MALI_PATH) + endif + ccflags-$(CONFIG_GATOR_MALI_4XXMP) += -DMALI_SUPPORT=MALI_4xx + ccflags-$(CONFIG_GATOR_MALI_T6XX) += -DMALI_SUPPORT=MALI_T6xx +endif + +# GATOR_TEST controls whether to include (=1) or exclude (=0) test code. +GATOR_TEST ?= 0 +EXTRA_CFLAGS += -DGATOR_TEST=$(GATOR_TEST) + +gator-$(CONFIG_ARM) += gator_events_armv6.o \ + gator_events_armv7.o \ + gator_events_ccn-504.o \ + gator_events_l2c-310.o \ + gator_events_scorpion.o + +gator-$(CONFIG_ARM64) += gator_events_ccn-504.o + +else + +all: + @echo + @echo "usage:" + @echo " make -C <kernel_build_dir> M=\`pwd\` ARCH=arm CROSS_COMPILE=<...> modules" + @echo + $(error) + +clean: + rm -f *.o .*.cmd modules.order Module.symvers gator.ko gator.mod.c + rm -rf .tmp_versions + +endif diff --git a/drivers/gator/gator.h b/drivers/gator/gator.h new file mode 100644 index 000000000000..d8981ed85a6a --- /dev/null +++ b/drivers/gator/gator.h @@ -0,0 +1,142 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef GATOR_H_ +#define GATOR_H_ + +#include <linux/version.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/list.h> + +#define GATOR_PERF_SUPPORT LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0) +#define GATOR_PERF_PMU_SUPPORT GATOR_PERF_SUPPORT && defined(CONFIG_PERF_EVENTS) && (!(defined(__arm__) || defined(__aarch64__)) || defined(CONFIG_HW_PERF_EVENTS)) +#define GATOR_NO_PERF_SUPPORT (!(GATOR_PERF_SUPPORT)) +#define GATOR_CPU_FREQ_SUPPORT (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)) && defined(CONFIG_CPU_FREQ) +#define GATOR_IKS_SUPPORT defined(CONFIG_BL_SWITCHER) + +// cpu ids +#define ARM1136 0xb36 +#define ARM1156 0xb56 +#define ARM1176 0xb76 +#define ARM11MPCORE 0xb02 +#define CORTEX_A5 0xc05 +#define CORTEX_A7 0xc07 +#define CORTEX_A8 0xc08 +#define CORTEX_A9 0xc09 +#define CORTEX_A12 0xc0d +#define CORTEX_A15 0xc0f +#define SCORPION 0x00f +#define SCORPIONMP 0x02d +#define KRAITSIM 0x049 +#define KRAIT 0x04d +#define KRAIT_S4_PRO 0x06f +#define CORTEX_A53 0xd03 +#define CORTEX_A57 0xd07 +#define AARCH64 0xd0f +#define OTHER 0xfff + +#define MAXSIZE_CORE_NAME 32 + +struct gator_cpu { + const int cpuid; + // Human readable name + const char core_name[MAXSIZE_CORE_NAME]; + // Perf PMU name + const char * const pmu_name; + // gatorfs event name + const char * const pmnc_name; + // compatible from Documentation/devicetree/bindings/arm/cpus.txt + const char * const dt_name; + const int pmnc_counters; +}; + +const struct gator_cpu *gator_find_cpu_by_cpuid(const u32 cpuid); +const struct gator_cpu *gator_find_cpu_by_pmu_name(const char *const name); + +/****************************************************************************** + * Filesystem + ******************************************************************************/ +int gatorfs_create_file_perm(struct super_block *sb, struct dentry *root, + char const *name, + const struct file_operations *fops, int perm); + +struct dentry *gatorfs_mkdir(struct super_block *sb, struct dentry *root, + char const *name); + +int gatorfs_create_ulong(struct super_block *sb, struct dentry *root, + char const *name, unsigned long *val); + +int gatorfs_create_ro_ulong(struct super_block *sb, struct dentry *root, + char const *name, unsigned long *val); + +void gator_op_create_files(struct super_block *sb, struct dentry *root); + +/****************************************************************************** + * Tracepoints + ******************************************************************************/ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) +# error Kernels prior to 2.6.32 not supported +#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) +# define GATOR_DEFINE_PROBE(probe_name, proto) \ + static void probe_##probe_name(PARAMS(proto)) +# define GATOR_REGISTER_TRACE(probe_name) \ + register_trace_##probe_name(probe_##probe_name) +# define GATOR_UNREGISTER_TRACE(probe_name) \ + unregister_trace_##probe_name(probe_##probe_name) +#else +# define GATOR_DEFINE_PROBE(probe_name, proto) \ + static void probe_##probe_name(void *data, PARAMS(proto)) +# define GATOR_REGISTER_TRACE(probe_name) \ + register_trace_##probe_name(probe_##probe_name, NULL) +# define GATOR_UNREGISTER_TRACE(probe_name) \ + unregister_trace_##probe_name(probe_##probe_name, NULL) +#endif + +/****************************************************************************** + * Events + ******************************************************************************/ +struct gator_interface { + void (*shutdown)(void); // Complementary function to init + int (*create_files)(struct super_block *sb, struct dentry *root); + int (*start)(void); + void (*stop)(void); // Complementary function to start + int (*online)(int **buffer, bool migrate); + int (*offline)(int **buffer, bool migrate); + void (*online_dispatch)(int cpu, bool migrate); // called in process context but may not be running on core 'cpu' + void (*offline_dispatch)(int cpu, bool migrate); // called in process context but may not be running on core 'cpu' + int (*read)(int **buffer); + int (*read64)(long long **buffer); + int (*read_proc)(long long **buffer, struct task_struct *); + struct list_head list; +}; + +int gator_events_install(struct gator_interface *interface); +int gator_events_get_key(void); +u32 gator_cpuid(void); + +void gator_backtrace_handler(struct pt_regs *const regs); + +#if !GATOR_IKS_SUPPORT + +#define get_physical_cpu() smp_processor_id() +#define lcpu_to_pcpu(lcpu) lcpu +#define pcpu_to_lcpu(pcpu) pcpu + +#else + +#define get_physical_cpu() lcpu_to_pcpu(get_logical_cpu()) +int lcpu_to_pcpu(const int lcpu); +int pcpu_to_lcpu(const int pcpu); + +#endif + +#define get_logical_cpu() smp_processor_id() +#define on_primary_core() (get_logical_cpu() == 0) + +#endif // GATOR_H_ diff --git a/drivers/gator/gator_annotate.c b/drivers/gator/gator_annotate.c new file mode 100644 index 000000000000..5b9399bea230 --- /dev/null +++ b/drivers/gator/gator_annotate.c @@ -0,0 +1,186 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <asm/uaccess.h> +#include <asm/current.h> +#include <linux/spinlock.h> + +static DEFINE_SPINLOCK(annotate_lock); +static bool collect_annotations = false; + +static int annotate_copy(struct file *file, char const __user *buf, size_t count) +{ + int cpu = 0; + int write = per_cpu(gator_buffer_write, cpu)[ANNOTATE_BUF]; + + if (file == NULL) { + // copy from kernel + memcpy(&per_cpu(gator_buffer, cpu)[ANNOTATE_BUF][write], buf, count); + } else { + // copy from user space + if (copy_from_user(&per_cpu(gator_buffer, cpu)[ANNOTATE_BUF][write], buf, count) != 0) + return -1; + } + per_cpu(gator_buffer_write, cpu)[ANNOTATE_BUF] = (write + count) & gator_buffer_mask[ANNOTATE_BUF]; + + return 0; +} + +static ssize_t annotate_write(struct file *file, char const __user *buf, size_t count_orig, loff_t *offset) +{ + int pid, cpu, header_size, available, contiguous, length1, length2, size, count = count_orig & 0x7fffffff; + bool interrupt_context; + + if (*offset) { + return -EINVAL; + } + + interrupt_context = in_interrupt(); + // Annotations are not supported in interrupt context, but may work if you comment out the the next four lines of code. + // By doing so, annotations in interrupt context can result in deadlocks and lost data. + if (interrupt_context) { + printk(KERN_WARNING "gator: Annotations are not supported in interrupt context. Edit gator_annotate.c in the gator driver to enable annotations in interrupt context.\n"); + return -EINVAL; + } + + retry: + // synchronize between cores and with collect_annotations + spin_lock(&annotate_lock); + + if (!collect_annotations) { + // Not collecting annotations, tell the caller everything was written + size = count_orig; + goto annotate_write_out; + } + + // Annotation only uses a single per-cpu buffer as the data must be in order to the engine + cpu = 0; + + if (current == NULL) { + pid = 0; + } else { + pid = current->pid; + } + + // determine total size of the payload + header_size = MAXSIZE_PACK32 * 3 + MAXSIZE_PACK64; + available = buffer_bytes_available(cpu, ANNOTATE_BUF) - header_size; + size = count < available ? count : available; + + if (size <= 0) { + // Buffer is full, wait until space is available + spin_unlock(&annotate_lock); + + // Drop the annotation as blocking is not allowed in interrupt context + if (interrupt_context) { + return -EINVAL; + } + + wait_event_interruptible(gator_annotate_wait, buffer_bytes_available(cpu, ANNOTATE_BUF) > header_size || !collect_annotations); + + // Check to see if a signal is pending + if (signal_pending(current)) { + return -EINTR; + } + + goto retry; + } + + // synchronize shared variables annotateBuf and annotatePos + if (per_cpu(gator_buffer, cpu)[ANNOTATE_BUF]) { + u64 time = gator_get_time(); + gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, get_physical_cpu()); + gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, pid); + gator_buffer_write_packed_int64(cpu, ANNOTATE_BUF, time); + gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, size); + + // determine the sizes to capture, length1 + length2 will equal size + contiguous = contiguous_space_available(cpu, ANNOTATE_BUF); + if (size < contiguous) { + length1 = size; + length2 = 0; + } else { + length1 = contiguous; + length2 = size - contiguous; + } + + if (annotate_copy(file, buf, length1) != 0) { + size = -EINVAL; + goto annotate_write_out; + } + + if (length2 > 0 && annotate_copy(file, &buf[length1], length2) != 0) { + size = -EINVAL; + goto annotate_write_out; + } + + // Check and commit; commit is set to occur once buffer is 3/4 full + buffer_check(cpu, ANNOTATE_BUF, time); + } + +annotate_write_out: + spin_unlock(&annotate_lock); + + // return the number of bytes written + return size; +} + +#include "gator_annotate_kernel.c" + +static int annotate_release(struct inode *inode, struct file *file) +{ + int cpu = 0; + + // synchronize between cores + spin_lock(&annotate_lock); + + if (per_cpu(gator_buffer, cpu)[ANNOTATE_BUF] && buffer_check_space(cpu, ANNOTATE_BUF, MAXSIZE_PACK64 + 3 * MAXSIZE_PACK32)) { + uint32_t pid = current->pid; + gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, get_physical_cpu()); + gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, pid); + gator_buffer_write_packed_int64(cpu, ANNOTATE_BUF, 0); // time + gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, 0); // size + } + + // Check and commit; commit is set to occur once buffer is 3/4 full + buffer_check(cpu, ANNOTATE_BUF, gator_get_time()); + + spin_unlock(&annotate_lock); + + return 0; +} + +static const struct file_operations annotate_fops = { + .write = annotate_write, + .release = annotate_release +}; + +static int gator_annotate_create_files(struct super_block *sb, struct dentry *root) +{ + return gatorfs_create_file_perm(sb, root, "annotate", &annotate_fops, 0666); +} + +static int gator_annotate_start(void) +{ + collect_annotations = true; + return 0; +} + +static void gator_annotate_stop(void) +{ + // the spinlock here will ensure that when this function exits, we are not in the middle of an annotation + spin_lock(&annotate_lock); + collect_annotations = false; + wake_up(&gator_annotate_wait); + spin_unlock(&annotate_lock); +} diff --git a/drivers/gator/gator_annotate_kernel.c b/drivers/gator/gator_annotate_kernel.c new file mode 100644 index 000000000000..a406e4882974 --- /dev/null +++ b/drivers/gator/gator_annotate_kernel.c @@ -0,0 +1,200 @@ +/** + * Copyright (C) ARM Limited 2012-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define ESCAPE_CODE 0x1c +#define STRING_ANNOTATION 0x06 +#define NAME_CHANNEL_ANNOTATION 0x07 +#define NAME_GROUP_ANNOTATION 0x08 +#define VISUAL_ANNOTATION 0x04 +#define MARKER_ANNOTATION 0x05 + +static void kannotate_write(const char *ptr, unsigned int size) +{ + int retval; + int pos = 0; + loff_t offset = 0; + while (pos < size) { + retval = annotate_write(NULL, &ptr[pos], size - pos, &offset); + if (retval < 0) { + printk(KERN_WARNING "gator: kannotate_write failed with return value %d\n", retval); + return; + } + pos += retval; + } +} + +static void marshal_u16(char *buf, u16 val) { + buf[0] = val & 0xff; + buf[1] = (val >> 8) & 0xff; +} + +static void marshal_u32(char *buf, u32 val) { + buf[0] = val & 0xff; + buf[1] = (val >> 8) & 0xff; + buf[2] = (val >> 16) & 0xff; + buf[3] = (val >> 24) & 0xff; +} + +void gator_annotate_channel(int channel, const char *str) +{ + const u16 str_size = strlen(str) & 0xffff; + char header[8]; + header[0] = ESCAPE_CODE; + header[1] = STRING_ANNOTATION; + marshal_u32(header + 2, channel); + marshal_u16(header + 6, str_size); + kannotate_write(header, sizeof(header)); + kannotate_write(str, str_size); +} + +EXPORT_SYMBOL(gator_annotate_channel); + +void gator_annotate(const char *str) +{ + gator_annotate_channel(0, str); +} + +EXPORT_SYMBOL(gator_annotate); + +void gator_annotate_channel_color(int channel, int color, const char *str) +{ + const u16 str_size = (strlen(str) + 4) & 0xffff; + char header[12]; + header[0] = ESCAPE_CODE; + header[1] = STRING_ANNOTATION; + marshal_u32(header + 2, channel); + marshal_u16(header + 6, str_size); + marshal_u32(header + 8, color); + kannotate_write(header, sizeof(header)); + kannotate_write(str, str_size - 4); +} + +EXPORT_SYMBOL(gator_annotate_channel_color); + +void gator_annotate_color(int color, const char *str) +{ + gator_annotate_channel_color(0, color, str); +} + +EXPORT_SYMBOL(gator_annotate_color); + +void gator_annotate_channel_end(int channel) +{ + char header[8]; + header[0] = ESCAPE_CODE; + header[1] = STRING_ANNOTATION; + marshal_u32(header + 2, channel); + marshal_u16(header + 6, 0); + kannotate_write(header, sizeof(header)); +} + +EXPORT_SYMBOL(gator_annotate_channel_end); + +void gator_annotate_end(void) +{ + gator_annotate_channel_end(0); +} + +EXPORT_SYMBOL(gator_annotate_end); + +void gator_annotate_name_channel(int channel, int group, const char* str) +{ + const u16 str_size = strlen(str) & 0xffff; + char header[12]; + header[0] = ESCAPE_CODE; + header[1] = NAME_CHANNEL_ANNOTATION; + marshal_u32(header + 2, channel); + marshal_u32(header + 6, group); + marshal_u16(header + 10, str_size); + kannotate_write(header, sizeof(header)); + kannotate_write(str, str_size); +} + +EXPORT_SYMBOL(gator_annotate_name_channel); + +void gator_annotate_name_group(int group, const char* str) +{ + const u16 str_size = strlen(str) & 0xffff; + char header[8]; + header[0] = ESCAPE_CODE; + header[1] = NAME_GROUP_ANNOTATION; + marshal_u32(header + 2, group); + marshal_u16(header + 6, str_size); + kannotate_write(header, sizeof(header)); + kannotate_write(str, str_size); +} + +EXPORT_SYMBOL(gator_annotate_name_group); + +void gator_annotate_visual(const char *data, unsigned int length, const char *str) +{ + const u16 str_size = strlen(str) & 0xffff; + char header[4]; + char header_length[4]; + header[0] = ESCAPE_CODE; + header[1] = VISUAL_ANNOTATION; + marshal_u16(header + 2, str_size); + marshal_u32(header_length, length); + kannotate_write(header, sizeof(header)); + kannotate_write(str, str_size); + kannotate_write(header_length, sizeof(header_length)); + kannotate_write(data, length); +} + +EXPORT_SYMBOL(gator_annotate_visual); + +void gator_annotate_marker(void) +{ + char header[4]; + header[0] = ESCAPE_CODE; + header[1] = MARKER_ANNOTATION; + marshal_u16(header + 2, 0); + kannotate_write(header, sizeof(header)); +} + +EXPORT_SYMBOL(gator_annotate_marker); + +void gator_annotate_marker_str(const char *str) +{ + const u16 str_size = strlen(str) & 0xffff; + char header[4]; + header[0] = ESCAPE_CODE; + header[1] = MARKER_ANNOTATION; + marshal_u16(header + 2, str_size); + kannotate_write(header, sizeof(header)); + kannotate_write(str, str_size); +} + +EXPORT_SYMBOL(gator_annotate_marker_str); + +void gator_annotate_marker_color(int color) +{ + char header[8]; + header[0] = ESCAPE_CODE; + header[1] = MARKER_ANNOTATION; + marshal_u16(header + 2, 4); + marshal_u32(header + 4, color); + kannotate_write(header, sizeof(header)); +} + +EXPORT_SYMBOL(gator_annotate_marker_color); + +void gator_annotate_marker_color_str(int color, const char *str) +{ + const u16 str_size = (strlen(str) + 4) & 0xffff; + char header[8]; + header[0] = ESCAPE_CODE; + header[1] = MARKER_ANNOTATION; + marshal_u16(header + 2, str_size); + marshal_u32(header + 4, color); + kannotate_write(header, sizeof(header)); + kannotate_write(str, str_size - 4); +} + +EXPORT_SYMBOL(gator_annotate_marker_color_str); diff --git a/drivers/gator/gator_backtrace.c b/drivers/gator/gator_backtrace.c new file mode 100644 index 000000000000..ffacb490194c --- /dev/null +++ b/drivers/gator/gator_backtrace.c @@ -0,0 +1,168 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +/* + * EABI backtrace stores {fp,lr} on the stack. + */ +struct stack_frame_eabi { + union { + struct { + unsigned long fp; + // May be the fp in the case of a leaf function or clang + unsigned long lr; + // If lr is really the fp, lr2 is the corresponding lr + unsigned long lr2; + }; + // Used to read 32 bit fp/lr from a 64 bit kernel + struct { + u32 fp_32; + // same as lr above + u32 lr_32; + // same as lr2 above + u32 lr2_32; + }; + }; +}; + +static void arm_backtrace_eabi(int cpu, struct pt_regs *const regs, unsigned int depth) +{ +#if defined(__arm__) || defined(__aarch64__) + struct stack_frame_eabi *curr; + struct stack_frame_eabi bufcurr; +#if defined(__arm__) + const bool is_compat = false; + unsigned long fp = regs->ARM_fp; + unsigned long sp = regs->ARM_sp; + unsigned long lr = regs->ARM_lr; + const int gcc_frame_offset = sizeof(unsigned long); +#else + // Is userspace aarch32 (32 bit) + const bool is_compat = compat_user_mode(regs); + unsigned long fp = (is_compat ? regs->regs[11] : regs->regs[29]); + unsigned long sp = (is_compat ? regs->compat_sp : regs->sp); + unsigned long lr = (is_compat ? regs->compat_lr : regs->regs[30]); + const int gcc_frame_offset = (is_compat ? sizeof(u32) : 0); +#endif + // clang frame offset is always zero + int is_user_mode = user_mode(regs); + + // pc (current function) has already been added + + if (!is_user_mode) { + return; + } + + // Add the lr (parent function) + // entry preamble may not have executed + gator_add_trace(cpu, lr); + + // check fp is valid + if (fp == 0 || fp < sp) { + return; + } + + // Get the current stack frame + curr = (struct stack_frame_eabi *)(fp - gcc_frame_offset); + if ((unsigned long)curr & 3) { + return; + } + + while (depth-- && curr) { + if (!access_ok(VERIFY_READ, curr, sizeof(struct stack_frame_eabi)) || + __copy_from_user_inatomic(&bufcurr, curr, sizeof(struct stack_frame_eabi))) { + return; + } + + fp = (is_compat ? bufcurr.fp_32 : bufcurr.fp); + lr = (is_compat ? bufcurr.lr_32 : bufcurr.lr); + +#define calc_next(reg) ((reg) - gcc_frame_offset) + // Returns true if reg is a valid fp +#define validate_next(reg, curr) \ + ((reg) != 0 && (calc_next(reg) & 3) == 0 && (unsigned long)(curr) < calc_next(reg)) + + // Try lr from the stack as the fp because gcc leaf functions do not push lr + // If gcc_frame_offset is non-zero, the lr will also be the clang fp + // This assumes code is at a lower address than the stack + if (validate_next(lr, curr)) { + fp = lr; + lr = (is_compat ? bufcurr.lr2_32 : bufcurr.lr2); + } + + gator_add_trace(cpu, lr); + + if (!validate_next(fp, curr)) { + return; + } + + // Move to the next stack frame + curr = (struct stack_frame_eabi *)calc_next(fp); + } +#endif +} + +#if defined(__arm__) || defined(__aarch64__) +static int report_trace(struct stackframe *frame, void *d) +{ + unsigned int *depth = d, cookie = NO_COOKIE; + unsigned long addr = frame->pc; + + if (*depth) { +#if defined(MODULE) + unsigned int cpu = get_physical_cpu(); + struct module *mod = __module_address(addr); + if (mod) { + cookie = get_cookie(cpu, current, mod->name, false); + addr = addr - (unsigned long)mod->module_core; + } +#endif + marshal_backtrace(addr & ~1, cookie); + (*depth)--; + } + + return *depth == 0; +} +#endif + +// Uncomment the following line to enable kernel stack unwinding within gator, note it can also be defined from the Makefile +// #define GATOR_KERNEL_STACK_UNWINDING + +#if (defined(__arm__) || defined(__aarch64__)) && !defined(GATOR_KERNEL_STACK_UNWINDING) +// Disabled by default +MODULE_PARM_DESC(kernel_stack_unwinding, "Allow kernel stack unwinding."); +bool kernel_stack_unwinding = 0; +module_param(kernel_stack_unwinding, bool, 0644); +#endif + +static void kernel_backtrace(int cpu, struct pt_regs *const regs) +{ +#if defined(__arm__) || defined(__aarch64__) +#ifdef GATOR_KERNEL_STACK_UNWINDING + int depth = gator_backtrace_depth; +#else + int depth = (kernel_stack_unwinding ? gator_backtrace_depth : 1); +#endif + struct stackframe frame; + if (depth == 0) + depth = 1; +#if defined(__arm__) + frame.fp = regs->ARM_fp; + frame.sp = regs->ARM_sp; + frame.lr = regs->ARM_lr; + frame.pc = regs->ARM_pc; +#else + frame.fp = regs->regs[29]; + frame.sp = regs->sp; + frame.pc = regs->pc; +#endif + walk_stackframe(&frame, report_trace, &depth); +#else + marshal_backtrace(PC_REG & ~1, NO_COOKIE); +#endif +} diff --git a/drivers/gator/gator_cookies.c b/drivers/gator/gator_cookies.c new file mode 100644 index 000000000000..eb9b946170c1 --- /dev/null +++ b/drivers/gator/gator_cookies.c @@ -0,0 +1,433 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define COOKIEMAP_ENTRIES 1024 /* must be power of 2 */ +#define TRANSLATE_BUFFER_SIZE 512 // must be a power of 2 - 512/4 = 128 entries +#define TRANSLATE_TEXT_SIZE 256 +#define MAX_COLLISIONS 2 + +static uint32_t *gator_crc32_table; +static unsigned int translate_buffer_mask; + +struct cookie_args { + struct task_struct *task; + const char *text; +}; + +static DEFINE_PER_CPU(char *, translate_text); +static DEFINE_PER_CPU(uint32_t, cookie_next_key); +static DEFINE_PER_CPU(uint64_t *, cookie_keys); +static DEFINE_PER_CPU(uint32_t *, cookie_values); +static DEFINE_PER_CPU(int, translate_buffer_read); +static DEFINE_PER_CPU(int, translate_buffer_write); +static DEFINE_PER_CPU(struct cookie_args *, translate_buffer); + +static uint32_t get_cookie(int cpu, struct task_struct *task, const char *text, bool from_wq); +static void wq_cookie_handler(struct work_struct *unused); +DECLARE_WORK(cookie_work, wq_cookie_handler); +static struct timer_list app_process_wake_up_timer; +static void app_process_wake_up_handler(unsigned long unused_data); + +static uint32_t cookiemap_code(uint64_t value64) +{ + uint32_t value = (uint32_t)((value64 >> 32) + value64); + uint32_t cookiecode = (value >> 24) & 0xff; + cookiecode = cookiecode * 31 + ((value >> 16) & 0xff); + cookiecode = cookiecode * 31 + ((value >> 8) & 0xff); + cookiecode = cookiecode * 31 + ((value >> 0) & 0xff); + cookiecode &= (COOKIEMAP_ENTRIES - 1); + return cookiecode * MAX_COLLISIONS; +} + +static uint32_t gator_chksum_crc32(const char *data) +{ + register unsigned long crc; + const unsigned char *block = data; + int i, length = strlen(data); + + crc = 0xFFFFFFFF; + for (i = 0; i < length; i++) { + crc = ((crc >> 8) & 0x00FFFFFF) ^ gator_crc32_table[(crc ^ *block++) & 0xFF]; + } + + return (crc ^ 0xFFFFFFFF); +} + +/* + * Exists + * Pre: [0][1][v][3]..[n-1] + * Post: [v][0][1][3]..[n-1] + */ +static uint32_t cookiemap_exists(uint64_t key) +{ + unsigned long x, flags, retval = 0; + int cpu = get_physical_cpu(); + uint32_t cookiecode = cookiemap_code(key); + uint64_t *keys = &(per_cpu(cookie_keys, cpu)[cookiecode]); + uint32_t *values = &(per_cpu(cookie_values, cpu)[cookiecode]); + + // Can be called from interrupt handler or from work queue + local_irq_save(flags); + for (x = 0; x < MAX_COLLISIONS; x++) { + if (keys[x] == key) { + uint32_t value = values[x]; + for (; x > 0; x--) { + keys[x] = keys[x - 1]; + values[x] = values[x - 1]; + } + keys[0] = key; + values[0] = value; + retval = value; + break; + } + } + local_irq_restore(flags); + + return retval; +} + +/* + * Add + * Pre: [0][1][2][3]..[n-1] + * Post: [v][0][1][2]..[n-2] + */ +static void cookiemap_add(uint64_t key, uint32_t value) +{ + int cpu = get_physical_cpu(); + int cookiecode = cookiemap_code(key); + uint64_t *keys = &(per_cpu(cookie_keys, cpu)[cookiecode]); + uint32_t *values = &(per_cpu(cookie_values, cpu)[cookiecode]); + int x; + + for (x = MAX_COLLISIONS - 1; x > 0; x--) { + keys[x] = keys[x - 1]; + values[x] = values[x - 1]; + } + keys[0] = key; + values[0] = value; +} + +#ifndef CONFIG_PREEMPT_RT_FULL +static void translate_buffer_write_args(int cpu, struct task_struct *task, const char *text) +{ + unsigned long flags; + int write; + int next_write; + struct cookie_args *args; + + local_irq_save(flags); + + write = per_cpu(translate_buffer_write, cpu); + next_write = (write + 1) & translate_buffer_mask; + + // At least one entry must always remain available as when read == write, the queue is empty not full + if (next_write != per_cpu(translate_buffer_read, cpu)) { + args = &per_cpu(translate_buffer, cpu)[write]; + args->task = task; + args->text = text; + get_task_struct(task); + per_cpu(translate_buffer_write, cpu) = next_write; + } + + local_irq_restore(flags); +} +#endif + +static void translate_buffer_read_args(int cpu, struct cookie_args *args) +{ + unsigned long flags; + int read; + + local_irq_save(flags); + + read = per_cpu(translate_buffer_read, cpu); + *args = per_cpu(translate_buffer, cpu)[read]; + per_cpu(translate_buffer_read, cpu) = (read + 1) & translate_buffer_mask; + + local_irq_restore(flags); +} + +static void wq_cookie_handler(struct work_struct *unused) +{ + struct cookie_args args; + int cpu = get_physical_cpu(), cookie; + + mutex_lock(&start_mutex); + + if (gator_started != 0) { + while (per_cpu(translate_buffer_read, cpu) != per_cpu(translate_buffer_write, cpu)) { + translate_buffer_read_args(cpu, &args); + cookie = get_cookie(cpu, args.task, args.text, true); + marshal_link(cookie, args.task->tgid, args.task->pid); + put_task_struct(args.task); + } + } + + mutex_unlock(&start_mutex); +} + +static void app_process_wake_up_handler(unsigned long unused_data) +{ + // had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater + schedule_work(&cookie_work); +} + +// Retrieve full name from proc/pid/cmdline for java processes on Android +static int translate_app_process(const char **text, int cpu, struct task_struct *task, bool from_wq) +{ + void *maddr; + unsigned int len; + unsigned long addr; + struct mm_struct *mm; + struct page *page = NULL; + struct vm_area_struct *page_vma; + int bytes, offset, retval = 0; + char *buf = per_cpu(translate_text, cpu); + +#ifndef CONFIG_PREEMPT_RT_FULL + // Push work into a work queue if in atomic context as the kernel functions below might sleep + // Rely on the in_interrupt variable rather than in_irq() or in_interrupt() kernel functions, as the value of these functions seems + // inconsistent during a context switch between android/linux versions + if (!from_wq) { + // Check if already in buffer + int pos = per_cpu(translate_buffer_read, cpu); + while (pos != per_cpu(translate_buffer_write, cpu)) { + if (per_cpu(translate_buffer, cpu)[pos].task == task) + goto out; + pos = (pos + 1) & translate_buffer_mask; + } + + translate_buffer_write_args(cpu, task, *text); + + // Not safe to call in RT-Preempt full in schedule switch context + mod_timer(&app_process_wake_up_timer, jiffies + 1); + goto out; + } +#endif + + mm = get_task_mm(task); + if (!mm) + goto out; + if (!mm->arg_end) + goto outmm; + addr = mm->arg_start; + len = mm->arg_end - mm->arg_start; + + if (len > TRANSLATE_TEXT_SIZE) + len = TRANSLATE_TEXT_SIZE; + + down_read(&mm->mmap_sem); + while (len) { + if (get_user_pages(task, mm, addr, 1, 0, 1, &page, &page_vma) <= 0) + goto outsem; + + maddr = kmap(page); + offset = addr & (PAGE_SIZE - 1); + bytes = len; + if (bytes > PAGE_SIZE - offset) + bytes = PAGE_SIZE - offset; + + copy_from_user_page(page_vma, page, addr, buf, maddr + offset, bytes); + + kunmap(page); // release page allocated by get_user_pages() + page_cache_release(page); + + len -= bytes; + buf += bytes; + addr += bytes; + + *text = per_cpu(translate_text, cpu); + retval = 1; + } + + // On app_process startup, /proc/pid/cmdline is initially "zygote" then "<pre-initialized>" but changes after an initial startup period + if (strcmp(*text, "zygote") == 0 || strcmp(*text, "<pre-initialized>") == 0) + retval = 0; + +outsem: + up_read(&mm->mmap_sem); +outmm: + mmput(mm); +out: + return retval; +} + +static uint32_t get_cookie(int cpu, struct task_struct *task, const char *text, bool from_wq) +{ + unsigned long flags, cookie; + uint64_t key; + + key = gator_chksum_crc32(text); + key = (key << 32) | (uint32_t)task->tgid; + + cookie = cookiemap_exists(key); + if (cookie) { + return cookie; + } + + if (strcmp(text, "app_process") == 0) { + if (!translate_app_process(&text, cpu, task, from_wq)) + return UNRESOLVED_COOKIE; + } + + // Can be called from interrupt handler or from work queue or from scheduler trace + local_irq_save(flags); + + cookie = UNRESOLVED_COOKIE; + if (marshal_cookie_header(text)) { + cookie = per_cpu(cookie_next_key, cpu) += nr_cpu_ids; + cookiemap_add(key, cookie); + marshal_cookie(cookie, text); + } + + local_irq_restore(flags); + + return cookie; +} + +static int get_exec_cookie(int cpu, struct task_struct *task) +{ + struct mm_struct *mm = task->mm; + const char *text; + + // kernel threads have no address space + if (!mm) + return NO_COOKIE; + + if (task && task->mm && task->mm->exe_file) { + text = task->mm->exe_file->f_path.dentry->d_name.name; + return get_cookie(cpu, task, text, false); + } + + return UNRESOLVED_COOKIE; +} + +static unsigned long get_address_cookie(int cpu, struct task_struct *task, unsigned long addr, off_t *offset) +{ + unsigned long cookie = NO_COOKIE; + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + const char *text; + + if (!mm) + return cookie; + + for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { + if (addr < vma->vm_start || addr >= vma->vm_end) + continue; + + if (vma->vm_file) { + text = vma->vm_file->f_path.dentry->d_name.name; + cookie = get_cookie(cpu, task, text, false); + *offset = (vma->vm_pgoff << PAGE_SHIFT) + addr - vma->vm_start; + } else { + /* must be an anonymous map */ + *offset = addr; + } + + break; + } + + if (!vma) + cookie = UNRESOLVED_COOKIE; + + return cookie; +} + +static int cookies_initialize(void) +{ + uint32_t crc, poly; + int i, j, cpu, size, err = 0; + + translate_buffer_mask = TRANSLATE_BUFFER_SIZE / sizeof(per_cpu(translate_buffer, 0)[0]) - 1; + + for_each_present_cpu(cpu) { + per_cpu(cookie_next_key, cpu) = nr_cpu_ids + cpu; + + size = COOKIEMAP_ENTRIES * MAX_COLLISIONS * sizeof(uint64_t); + per_cpu(cookie_keys, cpu) = (uint64_t *)kmalloc(size, GFP_KERNEL); + if (!per_cpu(cookie_keys, cpu)) { + err = -ENOMEM; + goto cookie_setup_error; + } + memset(per_cpu(cookie_keys, cpu), 0, size); + + size = COOKIEMAP_ENTRIES * MAX_COLLISIONS * sizeof(uint32_t); + per_cpu(cookie_values, cpu) = (uint32_t *)kmalloc(size, GFP_KERNEL); + if (!per_cpu(cookie_values, cpu)) { + err = -ENOMEM; + goto cookie_setup_error; + } + memset(per_cpu(cookie_values, cpu), 0, size); + + per_cpu(translate_buffer, cpu) = (struct cookie_args *)kmalloc(TRANSLATE_BUFFER_SIZE, GFP_KERNEL); + if (!per_cpu(translate_buffer, cpu)) { + err = -ENOMEM; + goto cookie_setup_error; + } + + per_cpu(translate_buffer_write, cpu) = 0; + per_cpu(translate_buffer_read, cpu) = 0; + + per_cpu(translate_text, cpu) = (char *)kmalloc(TRANSLATE_TEXT_SIZE, GFP_KERNEL); + if (!per_cpu(translate_text, cpu)) { + err = -ENOMEM; + goto cookie_setup_error; + } + } + + // build CRC32 table + poly = 0x04c11db7; + gator_crc32_table = (uint32_t *)kmalloc(256 * sizeof(uint32_t), GFP_KERNEL); + if (!gator_crc32_table) { + err = -ENOMEM; + goto cookie_setup_error; + } + for (i = 0; i < 256; i++) { + crc = i; + for (j = 8; j > 0; j--) { + if (crc & 1) { + crc = (crc >> 1) ^ poly; + } else { + crc >>= 1; + } + } + gator_crc32_table[i] = crc; + } + + setup_timer(&app_process_wake_up_timer, app_process_wake_up_handler, 0); + +cookie_setup_error: + return err; +} + +static void cookies_release(void) +{ + int cpu; + + for_each_present_cpu(cpu) { + kfree(per_cpu(cookie_keys, cpu)); + per_cpu(cookie_keys, cpu) = NULL; + + kfree(per_cpu(cookie_values, cpu)); + per_cpu(cookie_values, cpu) = NULL; + + kfree(per_cpu(translate_buffer, cpu)); + per_cpu(translate_buffer, cpu) = NULL; + per_cpu(translate_buffer_read, cpu) = 0; + per_cpu(translate_buffer_write, cpu) = 0; + + kfree(per_cpu(translate_text, cpu)); + per_cpu(translate_text, cpu) = NULL; + } + + del_timer_sync(&app_process_wake_up_timer); + kfree(gator_crc32_table); + gator_crc32_table = NULL; +} diff --git a/drivers/gator/gator_events_armv6.c b/drivers/gator/gator_events_armv6.c new file mode 100644 index 000000000000..dd7974090b82 --- /dev/null +++ b/drivers/gator/gator_events_armv6.c @@ -0,0 +1,237 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "gator.h" + +// gator_events_perf_pmu.c is used if perf is supported +#if GATOR_NO_PERF_SUPPORT + +static const char *pmnc_name; + +/* + * Per-CPU PMCR + */ +#define PMCR_E (1 << 0) /* Enable */ +#define PMCR_P (1 << 1) /* Count reset */ +#define PMCR_C (1 << 2) /* Cycle counter reset */ +#define PMCR_OFL_PMN0 (1 << 8) /* Count reg 0 overflow */ +#define PMCR_OFL_PMN1 (1 << 9) /* Count reg 1 overflow */ +#define PMCR_OFL_CCNT (1 << 10) /* Cycle counter overflow */ + +#define PMN0 0 +#define PMN1 1 +#define CCNT 2 +#define CNTMAX (CCNT+1) + +static int pmnc_counters = 0; +static unsigned long pmnc_enabled[CNTMAX]; +static unsigned long pmnc_event[CNTMAX]; +static unsigned long pmnc_key[CNTMAX]; + +static DEFINE_PER_CPU(int[CNTMAX * 2], perfCnt); + +static inline void armv6_pmnc_write(u32 val) +{ + /* upper 4bits and 7, 11 are write-as-0 */ + val &= 0x0ffff77f; + asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r" (val)); +} + +static inline u32 armv6_pmnc_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r" (val)); + return val; +} + +static void armv6_pmnc_reset_counter(unsigned int cnt) +{ + u32 val = 0; + switch (cnt) { + case CCNT: + asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r" (val)); + break; + case PMN0: + asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r" (val)); + break; + case PMN1: + asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r" (val)); + break; + } +} + +int gator_events_armv6_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + int i; + + pmnc_counters = 3; + + for (i = PMN0; i <= CCNT; i++) { + char buf[40]; + if (i == CCNT) { + snprintf(buf, sizeof buf, "ARM_%s_ccnt", pmnc_name); + } else { + snprintf(buf, sizeof buf, "ARM_%s_cnt%d", pmnc_name, i); + } + dir = gatorfs_mkdir(sb, root, buf); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]); + gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]); + if (i != CCNT) { + gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]); + } + } + + return 0; +} + +static int gator_events_armv6_online(int **buffer, bool migrate) +{ + unsigned int cnt, len = 0, cpu = smp_processor_id(); + u32 pmnc; + + if (armv6_pmnc_read() & PMCR_E) { + armv6_pmnc_write(armv6_pmnc_read() & ~PMCR_E); + } + + /* initialize PMNC, reset overflow, D bit, C bit and P bit. */ + armv6_pmnc_write(PMCR_OFL_PMN0 | PMCR_OFL_PMN1 | PMCR_OFL_CCNT | + PMCR_C | PMCR_P); + + /* configure control register */ + for (pmnc = 0, cnt = PMN0; cnt <= CCNT; cnt++) { + unsigned long event; + + if (!pmnc_enabled[cnt]) + continue; + + event = pmnc_event[cnt] & 255; + + // Set event (if destined for PMNx counters) + if (cnt == PMN0) { + pmnc |= event << 20; + } else if (cnt == PMN1) { + pmnc |= event << 12; + } + + // Reset counter + armv6_pmnc_reset_counter(cnt); + } + armv6_pmnc_write(pmnc | PMCR_E); + + // return zero values, no need to read as the counters were just reset + for (cnt = PMN0; cnt <= CCNT; cnt++) { + if (pmnc_enabled[cnt]) { + per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt]; + per_cpu(perfCnt, cpu)[len++] = 0; + } + } + + if (buffer) + *buffer = per_cpu(perfCnt, cpu); + + return len; +} + +static int gator_events_armv6_offline(int **buffer, bool migrate) +{ + unsigned int cnt; + + armv6_pmnc_write(armv6_pmnc_read() & ~PMCR_E); + for (cnt = PMN0; cnt <= CCNT; cnt++) { + armv6_pmnc_reset_counter(cnt); + } + + return 0; +} + +static void gator_events_armv6_stop(void) +{ + unsigned int cnt; + + for (cnt = PMN0; cnt <= CCNT; cnt++) { + pmnc_enabled[cnt] = 0; + pmnc_event[cnt] = 0; + } +} + +static int gator_events_armv6_read(int **buffer) +{ + int cnt, len = 0; + int cpu = smp_processor_id(); + + // a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled + if (!(armv6_pmnc_read() & PMCR_E)) { + return 0; + } + + for (cnt = PMN0; cnt <= CCNT; cnt++) { + if (pmnc_enabled[cnt]) { + u32 value = 0; + switch (cnt) { + case CCNT: + asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r" (value)); + break; + case PMN0: + asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r" (value)); + break; + case PMN1: + asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r" (value)); + break; + } + armv6_pmnc_reset_counter(cnt); + + per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt]; + per_cpu(perfCnt, cpu)[len++] = value; + } + } + + if (buffer) + *buffer = per_cpu(perfCnt, cpu); + + return len; +} + +static struct gator_interface gator_events_armv6_interface = { + .create_files = gator_events_armv6_create_files, + .stop = gator_events_armv6_stop, + .online = gator_events_armv6_online, + .offline = gator_events_armv6_offline, + .read = gator_events_armv6_read, +}; + +int gator_events_armv6_init(void) +{ + unsigned int cnt; + + switch (gator_cpuid()) { + case ARM1136: + case ARM1156: + case ARM1176: + pmnc_name = "ARM11"; + break; + case ARM11MPCORE: + pmnc_name = "ARM11MPCore"; + break; + default: + return -1; + } + + for (cnt = PMN0; cnt <= CCNT; cnt++) { + pmnc_enabled[cnt] = 0; + pmnc_event[cnt] = 0; + pmnc_key[cnt] = gator_events_get_key(); + } + + return gator_events_install(&gator_events_armv6_interface); +} + +#endif diff --git a/drivers/gator/gator_events_armv7.c b/drivers/gator/gator_events_armv7.c new file mode 100644 index 000000000000..30881c8fd3fd --- /dev/null +++ b/drivers/gator/gator_events_armv7.c @@ -0,0 +1,312 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Disabling interrupts + * Many of the functions below disable interrupts via local_irq_save(). This disabling of interrupts is done to prevent any race conditions + * between multiple entities (e.g. hrtimer interrupts and event based interrupts) calling the same functions. As accessing the pmu involves + * several steps (disable, select, read, enable), these steps must be performed atomically. Normal synchronization routines cannot be used + * as these functions are being called from interrupt context. + */ + +#include "gator.h" + +// gator_events_perf_pmu.c is used if perf is supported +#if GATOR_NO_PERF_SUPPORT + +// Per-CPU PMNC: config reg +#define PMNC_E (1 << 0) /* Enable all counters */ +#define PMNC_P (1 << 1) /* Reset all counters */ +#define PMNC_C (1 << 2) /* Cycle counter reset */ +#define PMNC_MASK 0x3f /* Mask for writable bits */ + +// ccnt reg +#define CCNT_REG (1 << 31) + +#define CCNT 0 +#define CNT0 1 +#define CNTMAX (6+1) + +static const char *pmnc_name; +static int pmnc_counters; + +static unsigned long pmnc_enabled[CNTMAX]; +static unsigned long pmnc_event[CNTMAX]; +static unsigned long pmnc_key[CNTMAX]; + +static DEFINE_PER_CPU(int[CNTMAX * 2], perfCnt); + +inline void armv7_pmnc_write(u32 val) +{ + val &= PMNC_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (val)); +} + +inline u32 armv7_pmnc_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); + return val; +} + +inline u32 armv7_ccnt_read(u32 reset_value) +{ + unsigned long flags; + u32 newval = -reset_value; + u32 den = CCNT_REG; + u32 val; + + local_irq_save(flags); + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (den)); // disable + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); // read + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (newval)); // new value + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (den)); // enable + local_irq_restore(flags); + + return val; +} + +inline u32 armv7_cntn_read(unsigned int cnt, u32 reset_value) +{ + unsigned long flags; + u32 newval = -reset_value; + u32 sel = (cnt - CNT0); + u32 den = 1 << sel; + u32 oldval; + + local_irq_save(flags); + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (den)); // disable + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (sel)); // select + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (oldval)); // read + asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (newval)); // new value + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (den)); // enable + local_irq_restore(flags); + + return oldval; +} + +static inline void armv7_pmnc_disable_interrupt(unsigned int cnt) +{ + u32 val = cnt ? (1 << (cnt - CNT0)) : (1 << 31); + asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); +} + +inline u32 armv7_pmnc_reset_interrupt(void) +{ + // Get and reset overflow status flags + u32 flags; + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (flags)); + flags &= 0x8000003f; + asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (flags)); + return flags; +} + +static inline u32 armv7_pmnc_enable_counter(unsigned int cnt) +{ + u32 val = cnt ? (1 << (cnt - CNT0)) : CCNT_REG; + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); + return cnt; +} + +static inline u32 armv7_pmnc_disable_counter(unsigned int cnt) +{ + u32 val = cnt ? (1 << (cnt - CNT0)) : CCNT_REG; + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); + return cnt; +} + +static inline int armv7_pmnc_select_counter(unsigned int cnt) +{ + u32 val = (cnt - CNT0); + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); + return cnt; +} + +static inline void armv7_pmnc_write_evtsel(unsigned int cnt, u32 val) +{ + if (armv7_pmnc_select_counter(cnt) == cnt) { + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); + } +} + +static int gator_events_armv7_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + int i; + + for (i = 0; i < pmnc_counters; i++) { + char buf[40]; + if (i == 0) { + snprintf(buf, sizeof buf, "ARM_%s_ccnt", pmnc_name); + } else { + snprintf(buf, sizeof buf, "ARM_%s_cnt%d", pmnc_name, i - 1); + } + dir = gatorfs_mkdir(sb, root, buf); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]); + gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]); + if (i > 0) { + gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]); + } + } + + return 0; +} + +static int gator_events_armv7_online(int **buffer, bool migrate) +{ + unsigned int cnt, len = 0, cpu = smp_processor_id(); + + if (armv7_pmnc_read() & PMNC_E) { + armv7_pmnc_write(armv7_pmnc_read() & ~PMNC_E); + } + + // Initialize & Reset PMNC: C bit and P bit + armv7_pmnc_write(PMNC_P | PMNC_C); + + // Reset overflow flags + armv7_pmnc_reset_interrupt(); + + for (cnt = CCNT; cnt < CNTMAX; cnt++) { + unsigned long event; + + if (!pmnc_enabled[cnt]) + continue; + + // Disable counter + armv7_pmnc_disable_counter(cnt); + + event = pmnc_event[cnt] & 255; + + // Set event (if destined for PMNx counters), we don't need to set the event if it's a cycle count + if (cnt != CCNT) + armv7_pmnc_write_evtsel(cnt, event); + + armv7_pmnc_disable_interrupt(cnt); + + // Reset counter + cnt ? armv7_cntn_read(cnt, 0) : armv7_ccnt_read(0); + + // Enable counter + armv7_pmnc_enable_counter(cnt); + } + + // enable + armv7_pmnc_write(armv7_pmnc_read() | PMNC_E); + + // return zero values, no need to read as the counters were just reset + for (cnt = 0; cnt < pmnc_counters; cnt++) { + if (pmnc_enabled[cnt]) { + per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt]; + per_cpu(perfCnt, cpu)[len++] = 0; + } + } + + if (buffer) + *buffer = per_cpu(perfCnt, cpu); + + return len; +} + +static int gator_events_armv7_offline(int **buffer, bool migrate) +{ + // disable all counters, including PMCCNTR; overflow IRQs will not be signaled + armv7_pmnc_write(armv7_pmnc_read() & ~PMNC_E); + + return 0; +} + +static void gator_events_armv7_stop(void) +{ + unsigned int cnt; + + for (cnt = CCNT; cnt < CNTMAX; cnt++) { + pmnc_enabled[cnt] = 0; + pmnc_event[cnt] = 0; + } +} + +static int gator_events_armv7_read(int **buffer) +{ + int cnt, len = 0; + int cpu = smp_processor_id(); + + // a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled + if (!(armv7_pmnc_read() & PMNC_E)) { + return 0; + } + + for (cnt = 0; cnt < pmnc_counters; cnt++) { + if (pmnc_enabled[cnt]) { + int value; + if (cnt == CCNT) { + value = armv7_ccnt_read(0); + } else { + value = armv7_cntn_read(cnt, 0); + } + per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt]; + per_cpu(perfCnt, cpu)[len++] = value; + } + } + + if (buffer) + *buffer = per_cpu(perfCnt, cpu); + + return len; +} + +static struct gator_interface gator_events_armv7_interface = { + .create_files = gator_events_armv7_create_files, + .stop = gator_events_armv7_stop, + .online = gator_events_armv7_online, + .offline = gator_events_armv7_offline, + .read = gator_events_armv7_read, +}; + +int gator_events_armv7_init(void) +{ + unsigned int cnt; + + switch (gator_cpuid()) { + case CORTEX_A5: + pmnc_name = "Cortex-A5"; + pmnc_counters = 2; + break; + case CORTEX_A7: + pmnc_name = "Cortex-A7"; + pmnc_counters = 4; + break; + case CORTEX_A8: + pmnc_name = "Cortex-A8"; + pmnc_counters = 4; + break; + case CORTEX_A9: + pmnc_name = "Cortex-A9"; + pmnc_counters = 6; + break; + case CORTEX_A15: + pmnc_name = "Cortex-A15"; + pmnc_counters = 6; + break; + default: + return -1; + } + + pmnc_counters++; // CNT[n] + CCNT + + for (cnt = CCNT; cnt < CNTMAX; cnt++) { + pmnc_enabled[cnt] = 0; + pmnc_event[cnt] = 0; + pmnc_key[cnt] = gator_events_get_key(); + } + + return gator_events_install(&gator_events_armv7_interface); +} + +#endif diff --git a/drivers/gator/gator_events_block.c b/drivers/gator/gator_events_block.c new file mode 100644 index 000000000000..691ef2574536 --- /dev/null +++ b/drivers/gator/gator_events_block.c @@ -0,0 +1,153 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include "gator.h" +#include <trace/events/block.h> + +#define BLOCK_RQ_WR 0 +#define BLOCK_RQ_RD 1 + +#define BLOCK_TOTAL (BLOCK_RQ_RD+1) + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) +#define EVENTWRITE REQ_RW +#else +#define EVENTWRITE REQ_WRITE +#endif + +static ulong block_rq_wr_enabled; +static ulong block_rq_rd_enabled; +static ulong block_rq_wr_key; +static ulong block_rq_rd_key; +static atomic_t blockCnt[BLOCK_TOTAL]; +static int blockGet[BLOCK_TOTAL * 4]; + +GATOR_DEFINE_PROBE(block_rq_complete, TP_PROTO(struct request_queue *q, struct request *rq)) +{ + int write, size; + + if (!rq) + return; + + write = rq->cmd_flags & EVENTWRITE; + size = rq->resid_len; + + if (!size) + return; + + if (write) { + if (block_rq_wr_enabled) { + atomic_add(size, &blockCnt[BLOCK_RQ_WR]); + } + } else { + if (block_rq_rd_enabled) { + atomic_add(size, &blockCnt[BLOCK_RQ_RD]); + } + } +} + +static int gator_events_block_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + + /* block_complete_wr */ + dir = gatorfs_mkdir(sb, root, "Linux_block_rq_wr"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &block_rq_wr_enabled); + gatorfs_create_ro_ulong(sb, dir, "key", &block_rq_wr_key); + + /* block_complete_rd */ + dir = gatorfs_mkdir(sb, root, "Linux_block_rq_rd"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &block_rq_rd_enabled); + gatorfs_create_ro_ulong(sb, dir, "key", &block_rq_rd_key); + + return 0; +} + +static int gator_events_block_start(void) +{ + // register tracepoints + if (block_rq_wr_enabled || block_rq_rd_enabled) + if (GATOR_REGISTER_TRACE(block_rq_complete)) + goto fail_block_rq_exit; + pr_debug("gator: registered block event tracepoints\n"); + + return 0; + + // unregister tracepoints on error +fail_block_rq_exit: + pr_err("gator: block event tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n"); + + return -1; +} + +static void gator_events_block_stop(void) +{ + if (block_rq_wr_enabled || block_rq_rd_enabled) + GATOR_UNREGISTER_TRACE(block_rq_complete); + pr_debug("gator: unregistered block event tracepoints\n"); + + block_rq_wr_enabled = 0; + block_rq_rd_enabled = 0; +} + +static int gator_events_block_read(int **buffer) +{ + int len, value, data = 0; + + if (!on_primary_core()) { + return 0; + } + + len = 0; + if (block_rq_wr_enabled && (value = atomic_read(&blockCnt[BLOCK_RQ_WR])) > 0) { + atomic_sub(value, &blockCnt[BLOCK_RQ_WR]); + blockGet[len++] = block_rq_wr_key; + blockGet[len++] = 0; // indicates to Streamline that value bytes were written now, not since the last message + blockGet[len++] = block_rq_wr_key; + blockGet[len++] = value; + data += value; + } + if (block_rq_rd_enabled && (value = atomic_read(&blockCnt[BLOCK_RQ_RD])) > 0) { + atomic_sub(value, &blockCnt[BLOCK_RQ_RD]); + blockGet[len++] = block_rq_rd_key; + blockGet[len++] = 0; // indicates to Streamline that value bytes were read now, not since the last message + blockGet[len++] = block_rq_rd_key; + blockGet[len++] = value; + data += value; + } + + if (buffer) + *buffer = blockGet; + + return len; +} + +static struct gator_interface gator_events_block_interface = { + .create_files = gator_events_block_create_files, + .start = gator_events_block_start, + .stop = gator_events_block_stop, + .read = gator_events_block_read, +}; + +int gator_events_block_init(void) +{ + block_rq_wr_enabled = 0; + block_rq_rd_enabled = 0; + + block_rq_wr_key = gator_events_get_key(); + block_rq_rd_key = gator_events_get_key(); + + return gator_events_install(&gator_events_block_interface); +} diff --git a/drivers/gator/gator_events_ccn-504.c b/drivers/gator/gator_events_ccn-504.c new file mode 100644 index 000000000000..b89231967c75 --- /dev/null +++ b/drivers/gator/gator_events_ccn-504.c @@ -0,0 +1,346 @@ +/** + * Copyright (C) ARM Limited 2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/io.h> +#include <linux/module.h> + +#include "gator.h" + +#define NUM_REGIONS 256 +#define REGION_SIZE (64*1024) +#define REGION_DEBUG 1 +#define REGION_XP 64 +#define NUM_XPS 11 + +// DT (Debug) region +#define PMEVCNTSR0 0x0150 +#define PMCCNTRSR 0x0190 +#define PMCR 0x01A8 +#define PMSR 0x01B0 +#define PMSR_REQ 0x01B8 +#define PMSR_CLR 0x01C0 + +// XP region +#define DT_CONFIG 0x0300 +#define DT_CONTROL 0x0370 + +// Multiple +#define PMU_EVENT_SEL 0x0600 +#define OLY_ID 0xFF00 + +#define CCNT 4 +#define CNTMAX (CCNT + 1) + +#define get_pmu_event_id(event) (((event) >> 0) & 0xFF) +#define get_node_type(event) (((event) >> 8) & 0xFF) +#define get_region(event) (((event) >> 16) & 0xFF) + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) + +// From kernel/params.c +#define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn) \ + int param_set_##name(const char *val, struct kernel_param *kp) \ + { \ + tmptype l; \ + int ret; \ + \ + if (!val) return -EINVAL; \ + ret = strtolfn(val, 0, &l); \ + if (ret == -EINVAL || ((type)l != l)) \ + return -EINVAL; \ + *((type *)kp->arg) = l; \ + return 0; \ + } \ + int param_get_##name(char *buffer, struct kernel_param *kp) \ + { \ + return sprintf(buffer, format, *((type *)kp->arg)); \ + } + +#else + +// From kernel/params.c +#define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn) \ + int param_set_##name(const char *val, const struct kernel_param *kp) \ + { \ + tmptype l; \ + int ret; \ + \ + ret = strtolfn(val, 0, &l); \ + if (ret < 0 || ((type)l != l)) \ + return ret < 0 ? ret : -EINVAL; \ + *((type *)kp->arg) = l; \ + return 0; \ + } \ + int param_get_##name(char *buffer, const struct kernel_param *kp) \ + { \ + return scnprintf(buffer, PAGE_SIZE, format, \ + *((type *)kp->arg)); \ + } \ + struct kernel_param_ops param_ops_##name = { \ + .set = param_set_##name, \ + .get = param_get_##name, \ + }; \ + EXPORT_SYMBOL(param_set_##name); \ + EXPORT_SYMBOL(param_get_##name); \ + EXPORT_SYMBOL(param_ops_##name) + +#endif + +STANDARD_PARAM_DEF(u64, u64, "%llu", u64, strict_strtoull); + +// From include/linux/moduleparam.h +#define param_check_u64(name, p) __param_check(name, p, u64) + +MODULE_PARM_DESC(ccn504_addr, "CCN-504 physical base address"); +static u64 ccn504_addr = 0; +module_param(ccn504_addr, u64, 0444); + +static void __iomem *gator_events_ccn504_base; +static bool gator_events_ccn504_global_enabled; +static unsigned long gator_events_ccn504_enabled[CNTMAX]; +static unsigned long gator_events_ccn504_event[CNTMAX]; +static unsigned long gator_events_ccn504_key[CNTMAX]; +static int gator_events_ccn504_buffer[2*CNTMAX]; +static int gator_events_ccn504_prev[CNTMAX]; + +static void gator_events_ccn504_create_shutdown(void) +{ + if (gator_events_ccn504_base != NULL) { + iounmap(gator_events_ccn504_base); + } +} + +static int gator_events_ccn504_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + int i; + char buf[32]; + + for (i = 0; i < CNTMAX; ++i) { + if (i == CCNT) { + snprintf(buf, sizeof(buf), "CCN-504_ccnt"); + } else { + snprintf(buf, sizeof(buf), "CCN-504_cnt%i", i); + } + dir = gatorfs_mkdir(sb, root, buf); + if (!dir) { + return -1; + } + + gatorfs_create_ulong(sb, dir, "enabled", &gator_events_ccn504_enabled[i]); + if (i != CCNT) { + gatorfs_create_ulong(sb, dir, "event", &gator_events_ccn504_event[i]); + } + gatorfs_create_ro_ulong(sb, dir, "key", &gator_events_ccn504_key[i]); + } + + return 0; +} + +static void gator_events_ccn504_set_dt_config(int xp_node_id, int event_num, int value) +{ + u32 dt_config; + + dt_config = readl(gator_events_ccn504_base + (REGION_XP + xp_node_id)*REGION_SIZE + DT_CONFIG); + dt_config |= (value + event_num) << (4*event_num); + writel(dt_config, gator_events_ccn504_base + (REGION_XP + xp_node_id)*REGION_SIZE + DT_CONFIG); +} + +static int gator_events_ccn504_start(void) +{ + int i; + + gator_events_ccn504_global_enabled = 0; + for (i = 0; i < CNTMAX; ++i) { + if (gator_events_ccn504_enabled[i]) { + gator_events_ccn504_global_enabled = 1; + break; + } + } + + if (!gator_events_ccn504_global_enabled) { + return 0; + } + + memset(&gator_events_ccn504_prev, 0x80, sizeof(gator_events_ccn504_prev)); + + // Disable INTREQ on overflow + // [6] ovfl_intr_en = 0 + // perhaps set to 1? + // [5] cntr_rst = 0 + // No register paring + // [4:1] cntcfg = 0 + // Enable PMU features + // [0] pmu_en = 1 + writel(0x1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMCR); + + // Configure the XPs + for (i = 0; i < NUM_XPS; ++i) { + int dt_control; + + // Pass on all events + writel(0, gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONFIG); + + // Enable PMU capability + // [0] dt_enable = 1 + dt_control = readl(gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONTROL); + dt_control |= 0x1; + writel(dt_control, gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONTROL); + } + + // Assume no other pmu_event_sel registers are set + + // cycle counter does not need to be enabled + for (i = 0; i < CCNT; ++i) { + int pmu_event_id; + int node_type; + int region; + u32 pmu_event_sel; + u32 oly_id_whole; + u32 oly_id; + u32 node_id; + + if (!gator_events_ccn504_enabled[i]) { + continue; + } + + pmu_event_id = get_pmu_event_id(gator_events_ccn504_event[i]); + node_type = get_node_type(gator_events_ccn504_event[i]); + region = get_region(gator_events_ccn504_event[i]); + + // Verify the node_type + oly_id_whole = readl(gator_events_ccn504_base + region*REGION_SIZE + OLY_ID); + oly_id = oly_id_whole & 0x1F; + node_id = (oly_id_whole >> 8) & 0x7F; + if ((oly_id != node_type) || + ((node_type == 0x16) && ((oly_id != 0x14) && (oly_id != 0x15) && (oly_id != 0x16) && (oly_id != 0x18) && (oly_id != 0x19) && (oly_id != 0x1A)))) { + printk(KERN_ERR "gator: oly_id is 0x%x expected 0x%x\n", oly_id, node_type); + return -1; + } + + // Set the control register + pmu_event_sel = readl(gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL); + switch (node_type) { + case 0x08: // XP + pmu_event_sel |= pmu_event_id << (7*i); + gator_events_ccn504_set_dt_config(node_id, i, 0x4); + break; + case 0x04: // HN-F + case 0x16: // RN-I + case 0x10: // SBAS + pmu_event_sel |= pmu_event_id << (4*i); + gator_events_ccn504_set_dt_config(node_id/2, i, (node_id & 1) == 0 ? 0x8 : 0xC); + break; + } + writel(pmu_event_sel, gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL); + } + + return 0; +} + +static void gator_events_ccn504_stop(void) +{ + int i; + + if (!gator_events_ccn504_global_enabled) { + return; + } + + // cycle counter does not need to be disabled + for (i = 0; i < CCNT; ++i) { + int region; + + if (!gator_events_ccn504_enabled[i]) { + continue; + } + + region = get_region(gator_events_ccn504_event[i]); + + writel(0, gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL); + } + + // Clear dt_config + for (i = 0; i < NUM_XPS; ++i) { + writel(0, gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONFIG); + } +} + +static int gator_events_ccn504_read(int **buffer) +{ + int i; + int len = 0; + int value; + + if (!on_primary_core() || !gator_events_ccn504_global_enabled) { + return 0; + } + + // Verify the pmsr register is zero + while (readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR) != 0); + + // Request a PMU snapshot + writel(1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR_REQ); + + // Wait for the snapshot + while (readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR) == 0); + + // Read the shadow registers + for (i = 0; i < CNTMAX; ++i) { + if (!gator_events_ccn504_enabled[i]) { + continue; + } + + value = readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + (i == CCNT ? PMCCNTRSR : PMEVCNTSR0 + 8*i)); + if (gator_events_ccn504_prev[i] != 0x80808080) { + gator_events_ccn504_buffer[len++] = gator_events_ccn504_key[i]; + gator_events_ccn504_buffer[len++] = value - gator_events_ccn504_prev[i]; + } + gator_events_ccn504_prev[i] = value; + + // Are the counters registers cleared when read? Is that what the cntr_rst bit on the pmcr register does? + } + + // Clear the PMU snapshot status + writel(1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR_CLR); + + if (buffer) + *buffer = gator_events_ccn504_buffer; + + return len; +} + +static struct gator_interface gator_events_ccn504_interface = { + .shutdown = gator_events_ccn504_create_shutdown, + .create_files = gator_events_ccn504_create_files, + .start = gator_events_ccn504_start, + .stop = gator_events_ccn504_stop, + .read = gator_events_ccn504_read, +}; + +int gator_events_ccn504_init(void) +{ + int i; + + if (ccn504_addr == 0) { + return -1; + } + + gator_events_ccn504_base = ioremap(ccn504_addr, NUM_REGIONS*REGION_SIZE); + if (gator_events_ccn504_base == NULL) { + printk(KERN_ERR "gator: ioremap returned NULL\n"); + return -1; + } + + for (i = 0; i < CNTMAX; ++i) { + gator_events_ccn504_enabled[i] = 0; + gator_events_ccn504_event[i] = 0; + gator_events_ccn504_key[i] = gator_events_get_key(); + } + + return gator_events_install(&gator_events_ccn504_interface); +} diff --git a/drivers/gator/gator_events_irq.c b/drivers/gator/gator_events_irq.c new file mode 100644 index 000000000000..b11879a248f8 --- /dev/null +++ b/drivers/gator/gator_events_irq.c @@ -0,0 +1,165 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include "gator.h" +#include <trace/events/irq.h> + +#define HARDIRQ 0 +#define SOFTIRQ 1 +#define TOTALIRQ (SOFTIRQ+1) + +static ulong hardirq_enabled; +static ulong softirq_enabled; +static ulong hardirq_key; +static ulong softirq_key; +static DEFINE_PER_CPU(atomic_t[TOTALIRQ], irqCnt); +static DEFINE_PER_CPU(int[TOTALIRQ * 2], irqGet); + +GATOR_DEFINE_PROBE(irq_handler_exit, + TP_PROTO(int irq, struct irqaction *action, int ret)) +{ + atomic_inc(&per_cpu(irqCnt, get_physical_cpu())[HARDIRQ]); +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37) +GATOR_DEFINE_PROBE(softirq_exit, TP_PROTO(struct softirq_action *h, struct softirq_action *vec)) +#else +GATOR_DEFINE_PROBE(softirq_exit, TP_PROTO(unsigned int vec_nr)) +#endif +{ + atomic_inc(&per_cpu(irqCnt, get_physical_cpu())[SOFTIRQ]); +} + +static int gator_events_irq_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + + /* irq */ + dir = gatorfs_mkdir(sb, root, "Linux_irq_irq"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &hardirq_enabled); + gatorfs_create_ro_ulong(sb, dir, "key", &hardirq_key); + + /* soft irq */ + dir = gatorfs_mkdir(sb, root, "Linux_irq_softirq"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &softirq_enabled); + gatorfs_create_ro_ulong(sb, dir, "key", &softirq_key); + + return 0; +} + +static int gator_events_irq_online(int **buffer, bool migrate) +{ + int len = 0, cpu = get_physical_cpu(); + + // synchronization with the irq_exit functions is not necessary as the values are being reset + if (hardirq_enabled) { + atomic_set(&per_cpu(irqCnt, cpu)[HARDIRQ], 0); + per_cpu(irqGet, cpu)[len++] = hardirq_key; + per_cpu(irqGet, cpu)[len++] = 0; + } + + if (softirq_enabled) { + atomic_set(&per_cpu(irqCnt, cpu)[SOFTIRQ], 0); + per_cpu(irqGet, cpu)[len++] = softirq_key; + per_cpu(irqGet, cpu)[len++] = 0; + } + + if (buffer) + *buffer = per_cpu(irqGet, cpu); + + return len; +} + +static int gator_events_irq_start(void) +{ + // register tracepoints + if (hardirq_enabled) + if (GATOR_REGISTER_TRACE(irq_handler_exit)) + goto fail_hardirq_exit; + if (softirq_enabled) + if (GATOR_REGISTER_TRACE(softirq_exit)) + goto fail_softirq_exit; + pr_debug("gator: registered irq tracepoints\n"); + + return 0; + + // unregister tracepoints on error +fail_softirq_exit: + if (hardirq_enabled) + GATOR_UNREGISTER_TRACE(irq_handler_exit); +fail_hardirq_exit: + pr_err("gator: irq tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n"); + + return -1; +} + +static void gator_events_irq_stop(void) +{ + if (hardirq_enabled) + GATOR_UNREGISTER_TRACE(irq_handler_exit); + if (softirq_enabled) + GATOR_UNREGISTER_TRACE(softirq_exit); + pr_debug("gator: unregistered irq tracepoints\n"); + + hardirq_enabled = 0; + softirq_enabled = 0; +} + +static int gator_events_irq_read(int **buffer) +{ + int len, value; + int cpu = get_physical_cpu(); + + len = 0; + if (hardirq_enabled) { + value = atomic_read(&per_cpu(irqCnt, cpu)[HARDIRQ]); + atomic_sub(value, &per_cpu(irqCnt, cpu)[HARDIRQ]); + + per_cpu(irqGet, cpu)[len++] = hardirq_key; + per_cpu(irqGet, cpu)[len++] = value; + } + + if (softirq_enabled) { + value = atomic_read(&per_cpu(irqCnt, cpu)[SOFTIRQ]); + atomic_sub(value, &per_cpu(irqCnt, cpu)[SOFTIRQ]); + + per_cpu(irqGet, cpu)[len++] = softirq_key; + per_cpu(irqGet, cpu)[len++] = value; + } + + if (buffer) + *buffer = per_cpu(irqGet, cpu); + + return len; +} + +static struct gator_interface gator_events_irq_interface = { + .create_files = gator_events_irq_create_files, + .online = gator_events_irq_online, + .start = gator_events_irq_start, + .stop = gator_events_irq_stop, + .read = gator_events_irq_read, +}; + +int gator_events_irq_init(void) +{ + hardirq_key = gator_events_get_key(); + softirq_key = gator_events_get_key(); + + hardirq_enabled = 0; + softirq_enabled = 0; + + return gator_events_install(&gator_events_irq_interface); +} diff --git a/drivers/gator/gator_events_l2c-310.c b/drivers/gator/gator_events_l2c-310.c new file mode 100644 index 000000000000..ee521af22517 --- /dev/null +++ b/drivers/gator/gator_events_l2c-310.c @@ -0,0 +1,208 @@ +/** + * l2c310 (L2 Cache Controller) event counters for gator + * + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/io.h> +#include <linux/module.h> +#if defined(CONFIG_OF) +#include <linux/of.h> +#include <linux/of_address.h> +#endif +#include <asm/hardware/cache-l2x0.h> + +#include "gator.h" + +#define L2C310_COUNTERS_NUM 2 + +static struct { + unsigned long enabled; + unsigned long event; + unsigned long key; +} l2c310_counters[L2C310_COUNTERS_NUM]; + +static int l2c310_buffer[L2C310_COUNTERS_NUM * 2]; + +static void __iomem *l2c310_base; + +static void gator_events_l2c310_reset_counters(void) +{ + u32 val = readl(l2c310_base + L2X0_EVENT_CNT_CTRL); + + val |= ((1 << L2C310_COUNTERS_NUM) - 1) << 1; + + writel(val, l2c310_base + L2X0_EVENT_CNT_CTRL); +} + +static int gator_events_l2c310_create_files(struct super_block *sb, + struct dentry *root) +{ + int i; + + for (i = 0; i < L2C310_COUNTERS_NUM; i++) { + char buf[16]; + struct dentry *dir; + + snprintf(buf, sizeof(buf), "L2C-310_cnt%d", i); + dir = gatorfs_mkdir(sb, root, buf); + if (WARN_ON(!dir)) + return -1; + gatorfs_create_ulong(sb, dir, "enabled", + &l2c310_counters[i].enabled); + gatorfs_create_ulong(sb, dir, "event", + &l2c310_counters[i].event); + gatorfs_create_ro_ulong(sb, dir, "key", + &l2c310_counters[i].key); + } + + return 0; +} + +static int gator_events_l2c310_start(void) +{ + static const unsigned long l2x0_event_cntx_cfg[L2C310_COUNTERS_NUM] = { + L2X0_EVENT_CNT0_CFG, + L2X0_EVENT_CNT1_CFG, + }; + int i; + + /* Counter event sources */ + for (i = 0; i < L2C310_COUNTERS_NUM; i++) + writel((l2c310_counters[i].event & 0xf) << 2, + l2c310_base + l2x0_event_cntx_cfg[i]); + + gator_events_l2c310_reset_counters(); + + /* Event counter enable */ + writel(1, l2c310_base + L2X0_EVENT_CNT_CTRL); + + return 0; +} + +static void gator_events_l2c310_stop(void) +{ + /* Event counter disable */ + writel(0, l2c310_base + L2X0_EVENT_CNT_CTRL); +} + +static int gator_events_l2c310_read(int **buffer) +{ + static const unsigned long l2x0_event_cntx_val[L2C310_COUNTERS_NUM] = { + L2X0_EVENT_CNT0_VAL, + L2X0_EVENT_CNT1_VAL, + }; + int i; + int len = 0; + + if (!on_primary_core()) + return 0; + + for (i = 0; i < L2C310_COUNTERS_NUM; i++) { + if (l2c310_counters[i].enabled) { + l2c310_buffer[len++] = l2c310_counters[i].key; + l2c310_buffer[len++] = readl(l2c310_base + + l2x0_event_cntx_val[i]); + } + } + + /* l2c310 counters are saturating, not wrapping in case of overflow */ + gator_events_l2c310_reset_counters(); + + if (buffer) + *buffer = l2c310_buffer; + + return len; +} + +static struct gator_interface gator_events_l2c310_interface = { + .create_files = gator_events_l2c310_create_files, + .start = gator_events_l2c310_start, + .stop = gator_events_l2c310_stop, + .read = gator_events_l2c310_read, +}; + +#define L2C310_ADDR_PROBE (~0) + +MODULE_PARM_DESC(l2c310_addr, "L2C310 physical base address (0 to disable)"); +static unsigned long l2c310_addr = L2C310_ADDR_PROBE; +module_param(l2c310_addr, ulong, 0444); + +static void __iomem *gator_events_l2c310_probe(void) +{ + phys_addr_t variants[] = { +#if defined(CONFIG_ARCH_EXYNOS4) || defined(CONFIG_ARCH_S5PV310) + 0x10502000, +#endif +#if defined(CONFIG_ARCH_OMAP4) + 0x48242000, +#endif +#if defined(CONFIG_ARCH_TEGRA) + 0x50043000, +#endif +#if defined(CONFIG_ARCH_U8500) + 0xa0412000, +#endif +#if defined(CONFIG_ARCH_VEXPRESS) + 0x1e00a000, // A9x4 core tile (HBI-0191) + 0x2c0f0000, // New memory map tiles +#endif + }; + int i; + void __iomem *base; +#if defined(CONFIG_OF) + struct device_node *node = of_find_all_nodes(NULL); + + if (node) { + of_node_put(node); + + node = of_find_compatible_node(NULL, NULL, "arm,pl310-cache"); + base = of_iomap(node, 0); + of_node_put(node); + + return base; + } +#endif + + for (i = 0; i < ARRAY_SIZE(variants); i++) { + base = ioremap(variants[i], SZ_4K); + if (base) { + u32 cache_id = readl(base + L2X0_CACHE_ID); + + if ((cache_id & 0xff0003c0) == 0x410000c0) + return base; + + iounmap(base); + } + } + + return NULL; +} + +int gator_events_l2c310_init(void) +{ + int i; + + if (gator_cpuid() != CORTEX_A5 && gator_cpuid() != CORTEX_A9) + return -1; + + if (l2c310_addr == L2C310_ADDR_PROBE) + l2c310_base = gator_events_l2c310_probe(); + else if (l2c310_addr) + l2c310_base = ioremap(l2c310_addr, SZ_4K); + + if (!l2c310_base) + return -1; + + for (i = 0; i < L2C310_COUNTERS_NUM; i++) { + l2c310_counters[i].enabled = 0; + l2c310_counters[i].key = gator_events_get_key(); + } + + return gator_events_install(&gator_events_l2c310_interface); +} diff --git a/drivers/gator/gator_events_mali_4xx.c b/drivers/gator/gator_events_mali_4xx.c new file mode 100644 index 000000000000..6719c1ec73a2 --- /dev/null +++ b/drivers/gator/gator_events_mali_4xx.c @@ -0,0 +1,723 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "gator.h" + +#include <linux/module.h> +#include <linux/time.h> +#include <linux/math64.h> + +#include "linux/mali_linux_trace.h" + +#include "gator_events_mali_common.h" +#include "gator_events_mali_4xx.h" + +/* + * There are (currently) four different variants of the comms between gator and Mali: + * 1 (deprecated): No software counter support + * 2 (deprecated): Tracepoint called for each separate s/w counter value as it appears + * 3 (default): Single tracepoint for all s/w counters in a bundle. + * Interface style 3 is the default if no other is specified. 1 and 2 will be eliminated when + * existing Mali DDKs are upgraded. + * 4. As above, but for the Utgard (Mali-450) driver. + */ + +#if !defined(GATOR_MALI_INTERFACE_STYLE) +#define GATOR_MALI_INTERFACE_STYLE (3) +#endif + +#if GATOR_MALI_INTERFACE_STYLE < 4 +#include "mali/mali_mjollnir_profiling_gator_api.h" +#else +#include "mali/mali_utgard_profiling_gator_api.h" +#endif + +/* + * Check that the MALI_SUPPORT define is set to one of the allowable device codes. + */ +#if (MALI_SUPPORT != MALI_4xx) +#error MALI_SUPPORT set to an invalid device code: expecting MALI_4xx +#endif + +/* gatorfs variables for counter enable state, + * the event the counter should count and the + * 'key' (a unique id set by gatord and returned + * by gator.ko) + */ +static unsigned long counter_enabled[NUMBER_OF_EVENTS]; +static unsigned long counter_event[NUMBER_OF_EVENTS]; +static unsigned long counter_key[NUMBER_OF_EVENTS]; + +/* The data we have recorded */ +static u32 counter_data[NUMBER_OF_EVENTS]; +/* The address to sample (or 0 if samples are sent to us) */ +static u32 *counter_address[NUMBER_OF_EVENTS]; + +/* An array used to return the data we recorded + * as key,value pairs hence the *2 + */ +static unsigned long counter_dump[NUMBER_OF_EVENTS * 2]; +static unsigned long counter_prev[NUMBER_OF_EVENTS]; + +/* Note whether tracepoints have been registered */ +static int trace_registered; + +/* + * These numbers define the actual numbers of each block type that exist in the system. Initially + * these are set to the maxima defined above; if the driver is capable of being queried (newer + * drivers only) then the values may be revised. + */ +static unsigned int n_vp_cores = MAX_NUM_VP_CORES; +static unsigned int n_l2_cores = MAX_NUM_L2_CACHE_CORES; +static unsigned int n_fp_cores = MAX_NUM_FP_CORES; + +/** + * Calculate the difference and handle the overflow. + */ +static u32 get_difference(u32 start, u32 end) +{ + if (start - end >= 0) { + return start - end; + } + + // Mali counters are unsigned 32 bit values that wrap. + return (4294967295u - end) + start; +} + +/** + * Returns non-zero if the given counter ID is an activity counter. + */ +static inline int is_activity_counter(unsigned int event_id) +{ + return (event_id >= FIRST_ACTIVITY_EVENT && + event_id <= LAST_ACTIVITY_EVENT); +} + +/** + * Returns non-zero if the given counter ID is a hardware counter. + */ +static inline int is_hw_counter(unsigned int event_id) +{ + return (event_id >= FIRST_HW_COUNTER && event_id <= LAST_HW_COUNTER); +} + +/* + * These are provided for utgard compatibility. + */ +typedef void _mali_profiling_get_mali_version_type(struct _mali_profiling_mali_version *values); +typedef u32 _mali_profiling_get_l2_counters_type(_mali_profiling_l2_counter_values *values); + +#if GATOR_MALI_INTERFACE_STYLE == 2 +/** + * Returns non-zero if the given counter ID is a software counter. + */ +static inline int is_sw_counter(unsigned int event_id) +{ + return (event_id >= FIRST_SW_COUNTER && event_id <= LAST_SW_COUNTER); +} +#endif + +#if GATOR_MALI_INTERFACE_STYLE == 2 +/* + * The Mali DDK uses s64 types to contain software counter values, but gator + * can only use a maximum of 32 bits. This function scales a software counter + * to an appropriate range. + */ +static u32 scale_sw_counter_value(unsigned int event_id, signed long long value) +{ + u32 scaled_value; + + switch (event_id) { + case COUNTER_GLES_UPLOAD_TEXTURE_TIME: + case COUNTER_GLES_UPLOAD_VBO_TIME: + scaled_value = (u32)div_s64(value, 1000000); + break; + default: + scaled_value = (u32)value; + break; + } + + return scaled_value; +} +#endif + +/* Probe for continuously sampled counter */ +#if 0 //WE_DONT_CURRENTLY_USE_THIS_SO_SUPPRESS_WARNING +GATOR_DEFINE_PROBE(mali_sample_address, TP_PROTO(unsigned int event_id, u32 *addr)) +{ + /* Turning on too many pr_debug statements in frequently called functions + * can cause stability and/or performance problems + */ + //pr_debug("gator: mali_sample_address %d %d\n", event_id, addr); + if (event_id >= ACTIVITY_VP && event_id <= COUNTER_FP3_C1) { + counter_address[event_id] = addr; + } +} +#endif + +/* Probe for hardware counter events */ +GATOR_DEFINE_PROBE(mali_hw_counter, TP_PROTO(unsigned int event_id, unsigned int value)) +{ + /* Turning on too many pr_debug statements in frequently called functions + * can cause stability and/or performance problems + */ + //pr_debug("gator: mali_hw_counter %d %d\n", event_id, value); + if (is_hw_counter(event_id)) { + counter_data[event_id] = value; + } +} + +#if GATOR_MALI_INTERFACE_STYLE == 2 +GATOR_DEFINE_PROBE(mali_sw_counter, TP_PROTO(unsigned int event_id, signed long long value)) +{ + if (is_sw_counter(event_id)) { + counter_data[event_id] = scale_sw_counter_value(event_id, value); + } +} +#endif /* GATOR_MALI_INTERFACE_STYLE == 2 */ + +#if GATOR_MALI_INTERFACE_STYLE >= 3 +GATOR_DEFINE_PROBE(mali_sw_counters, TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters)) +{ + u32 i; + + /* Copy over the values for those counters which are enabled. */ + for (i = FIRST_SW_COUNTER; i <= LAST_SW_COUNTER; i++) { + if (counter_enabled[i]) { + counter_data[i] = (u32)(counters[i - FIRST_SW_COUNTER]); + } + } +} +#endif /* GATOR_MALI_INTERFACE_STYLE >= 3 */ + +/** + * Create a single filesystem entry for a specified event. + * @param sb the superblock + * @param root Filesystem root + * @param name The name of the entry to create + * @param event The ID of the event + * @param create_event_item boolean indicating whether to create an 'event' filesystem entry. True to create. + * + * @return 0 if ok, non-zero if the create failed. + */ +static int create_fs_entry(struct super_block *sb, struct dentry *root, const char *name, int event, int create_event_item) +{ + struct dentry *dir; + + dir = gatorfs_mkdir(sb, root, name); + + if (!dir) { + return -1; + } + + if (create_event_item) { + gatorfs_create_ulong(sb, dir, "event", &counter_event[event]); + } + + gatorfs_create_ulong(sb, dir, "enabled", &counter_enabled[event]); + gatorfs_create_ro_ulong(sb, dir, "key", &counter_key[event]); + + return 0; +} + +#if GATOR_MALI_INTERFACE_STYLE > 3 +/* + * Read the version info structure if available + */ +static void initialise_version_info(void) +{ + _mali_profiling_get_mali_version_type *mali_profiling_get_mali_version_symbol; + + mali_profiling_get_mali_version_symbol = symbol_get(_mali_profiling_get_mali_version); + + if (mali_profiling_get_mali_version_symbol) { + struct _mali_profiling_mali_version version_info; + + pr_debug("gator: mali online _mali_profiling_get_mali_version symbol @ %p\n", + mali_profiling_get_mali_version_symbol); + + /* + * Revise the number of each different core type using information derived from the DDK. + */ + mali_profiling_get_mali_version_symbol(&version_info); + + n_fp_cores = version_info.num_of_fp_cores; + n_vp_cores = version_info.num_of_vp_cores; + n_l2_cores = version_info.num_of_l2_cores; + + /* Release the function - we're done with it. */ + symbol_put(_mali_profiling_get_mali_version); + } else { + printk("gator: mali online _mali_profiling_get_mali_version symbol not found\n"); + } +} +#endif + +static int create_files(struct super_block *sb, struct dentry *root) +{ + int event; + const char *mali_name = gator_mali_get_mali_name(); + + char buf[40]; + int core_id; + int counter_number; + + pr_debug("gator: Initialising counters with style = %d\n", GATOR_MALI_INTERFACE_STYLE); + +#if GATOR_MALI_INTERFACE_STYLE > 3 + /* + * Initialise first: this sets up the number of cores available (on compatible DDK versions). + * Ideally this would not need guarding but other parts of the code depend on the interface style being set + * correctly; if it is not then the system can enter an inconsistent state. + */ + initialise_version_info(); +#endif + + /* Vertex processor counters */ + for (core_id = 0; core_id < n_vp_cores; core_id++) { + int activity_counter_id = ACTIVITY_VP_0; + snprintf(buf, sizeof buf, "ARM_%s_VP_%d_active", mali_name, core_id); + if (create_fs_entry(sb, root, buf, activity_counter_id, 0) != 0) { + return -1; + } + + for (counter_number = 0; counter_number < 2; counter_number++) { + int counter_id = COUNTER_VP_0_C0 + (2 * core_id) + counter_number; + + snprintf(buf, sizeof buf, "ARM_%s_VP_%d_cnt%d", mali_name, core_id, counter_number); + if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) { + return -1; + } + } + } + + /* Fragment processors' counters */ + for (core_id = 0; core_id < n_fp_cores; core_id++) { + int activity_counter_id = ACTIVITY_FP_0 + core_id; + + snprintf(buf, sizeof buf, "ARM_%s_FP_%d_active", mali_name, core_id); + if (create_fs_entry(sb, root, buf, activity_counter_id, 0) != 0) { + return -1; + } + + for (counter_number = 0; counter_number < 2; counter_number++) { + int counter_id = COUNTER_FP_0_C0 + (2 * core_id) + counter_number; + + snprintf(buf, sizeof buf, "ARM_%s_FP_%d_cnt%d", mali_name, core_id, counter_number); + if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) { + return -1; + } + } + } + + /* L2 Cache counters */ + for (core_id = 0; core_id < n_l2_cores; core_id++) { + for (counter_number = 0; counter_number < 2; counter_number++) { + int counter_id = COUNTER_L2_0_C0 + (2 * core_id) + counter_number; + + snprintf(buf, sizeof buf, "ARM_%s_L2_%d_cnt%d", mali_name, core_id, counter_number); + if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) { + return -1; + } + } + } + + /* Now set up the software counter entries */ + for (event = FIRST_SW_COUNTER; event <= LAST_SW_COUNTER; event++) { + snprintf(buf, sizeof(buf), "ARM_%s_SW_%d", mali_name, event - FIRST_SW_COUNTER); + + if (create_fs_entry(sb, root, buf, event, 0) != 0) { + return -1; + } + } + + /* Now set up the special counter entries */ + snprintf(buf, sizeof(buf), "ARM_%s_Filmstrip_cnt0", mali_name); + if (create_fs_entry(sb, root, buf, COUNTER_FILMSTRIP, 1) != 0) { + return -1; + } + +#ifdef DVFS_REPORTED_BY_DDK + snprintf(buf, sizeof(buf), "ARM_%s_Frequency", mali_name); + if (create_fs_entry(sb, root, buf, COUNTER_FREQUENCY, 1) != 0) { + return -1; + } + + snprintf(buf, sizeof(buf), "ARM_%s_Voltage", mali_name); + if (create_fs_entry(sb, root, buf, COUNTER_VOLTAGE, 1) != 0) { + return -1; + } +#endif + + return 0; +} + +/* + * Local store for the get_counters entry point into the DDK. + * This is stored here since it is used very regularly. + */ +static mali_profiling_get_counters_type *mali_get_counters = NULL; +static _mali_profiling_get_l2_counters_type *mali_get_l2_counters = NULL; + +/* + * Examine list of counters between two index limits and determine if any one is enabled. + * Returns 1 if any counter is enabled, 0 if none is. + */ +static int is_any_counter_enabled(unsigned int first_counter, unsigned int last_counter) +{ + unsigned int i; + + for (i = first_counter; i <= last_counter; i++) { + if (counter_enabled[i]) { + return 1; /* At least one counter is enabled */ + } + } + + return 0; /* No s/w counters enabled */ +} + +static void init_counters(unsigned int from_counter, unsigned int to_counter) +{ + unsigned int counter_id; + + /* If a Mali driver is present and exporting the appropriate symbol + * then we can request the HW counters (of which there are only 2) + * be configured to count the desired events + */ + mali_profiling_set_event_type *mali_set_hw_event; + + mali_set_hw_event = symbol_get(_mali_profiling_set_event); + + if (mali_set_hw_event) { + pr_debug("gator: mali online _mali_profiling_set_event symbol @ %p\n", mali_set_hw_event); + + for (counter_id = from_counter; counter_id <= to_counter; counter_id++) { + if (counter_enabled[counter_id]) { + mali_set_hw_event(counter_id, counter_event[counter_id]); + } else { + mali_set_hw_event(counter_id, 0xFFFFFFFF); + } + } + + symbol_put(_mali_profiling_set_event); + } else { + printk("gator: mali online _mali_profiling_set_event symbol not found\n"); + } +} + +static void mali_counter_initialize(void) +{ + int i; + int core_id; + + mali_profiling_control_type *mali_control; + + init_counters(COUNTER_L2_0_C0, COUNTER_L2_0_C0 + (2 * n_l2_cores) - 1); + init_counters(COUNTER_VP_0_C0, COUNTER_VP_0_C0 + (2 * n_vp_cores) - 1); + init_counters(COUNTER_FP_0_C0, COUNTER_FP_0_C0 + (2 * n_fp_cores) - 1); + + /* Generic control interface for Mali DDK. */ + mali_control = symbol_get(_mali_profiling_control); + if (mali_control) { + /* The event attribute in the XML file keeps the actual frame rate. */ + unsigned int rate = counter_event[COUNTER_FILMSTRIP] & 0xff; + unsigned int resize_factor = (counter_event[COUNTER_FILMSTRIP] >> 8) & 0xff; + + pr_debug("gator: mali online _mali_profiling_control symbol @ %p\n", mali_control); + + mali_control(SW_COUNTER_ENABLE, (is_any_counter_enabled(FIRST_SW_COUNTER, LAST_SW_COUNTER) ? 1 : 0)); + mali_control(FBDUMP_CONTROL_ENABLE, (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0)); + mali_control(FBDUMP_CONTROL_RATE, rate); + mali_control(FBDUMP_CONTROL_RESIZE_FACTOR, resize_factor); + + pr_debug("gator: sent mali_control enabled=%d, rate=%d\n", (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0), rate); + + symbol_put(_mali_profiling_control); + } else { + printk("gator: mali online _mali_profiling_control symbol not found\n"); + } + + mali_get_counters = symbol_get(_mali_profiling_get_counters); + if (mali_get_counters) { + pr_debug("gator: mali online _mali_profiling_get_counters symbol @ %p\n", mali_get_counters); + + } else { + pr_debug("gator WARNING: mali _mali_profiling_get_counters symbol not defined"); + } + + mali_get_l2_counters = symbol_get(_mali_profiling_get_l2_counters); + if (mali_get_l2_counters) { + pr_debug("gator: mali online _mali_profiling_get_l2_counters symbol @ %p\n", mali_get_l2_counters); + + } else { + pr_debug("gator WARNING: mali _mali_profiling_get_l2_counters symbol not defined"); + } + + if (!mali_get_counters && !mali_get_l2_counters) { + pr_debug("gator: WARNING: no L2 counters available"); + n_l2_cores = 0; + } + + for (core_id = 0; core_id < n_l2_cores; core_id++) { + int counter_id = COUNTER_L2_0_C0 + (2 * core_id); + counter_prev[counter_id] = 0; + counter_prev[counter_id + 1] = 0; + } + + /* Clear counters in the start */ + for (i = 0; i < NUMBER_OF_EVENTS; i++) { + counter_data[i] = 0; + } +} + +static void mali_counter_deinitialize(void) +{ + mali_profiling_set_event_type *mali_set_hw_event; + mali_profiling_control_type *mali_control; + + mali_set_hw_event = symbol_get(_mali_profiling_set_event); + + if (mali_set_hw_event) { + int i; + + pr_debug("gator: mali offline _mali_profiling_set_event symbol @ %p\n", mali_set_hw_event); + for (i = FIRST_HW_COUNTER; i <= LAST_HW_COUNTER; i++) { + mali_set_hw_event(i, 0xFFFFFFFF); + } + + symbol_put(_mali_profiling_set_event); + } else { + printk("gator: mali offline _mali_profiling_set_event symbol not found\n"); + } + + /* Generic control interface for Mali DDK. */ + mali_control = symbol_get(_mali_profiling_control); + + if (mali_control) { + pr_debug("gator: mali offline _mali_profiling_control symbol @ %p\n", mali_control); + + /* Reset the DDK state - disable counter collection */ + mali_control(SW_COUNTER_ENABLE, 0); + + mali_control(FBDUMP_CONTROL_ENABLE, 0); + + symbol_put(_mali_profiling_control); + } else { + printk("gator: mali offline _mali_profiling_control symbol not found\n"); + } + + if (mali_get_counters) { + symbol_put(_mali_profiling_get_counters); + } + + if (mali_get_l2_counters) { + symbol_put(_mali_profiling_get_l2_counters); + } +} + +static int start(void) +{ + // register tracepoints + if (GATOR_REGISTER_TRACE(mali_hw_counter)) { + printk("gator: mali_hw_counter tracepoint failed to activate\n"); + return -1; + } + +#if GATOR_MALI_INTERFACE_STYLE == 1 + /* None. */ +#elif GATOR_MALI_INTERFACE_STYLE == 2 + /* For patched Mali driver. */ + if (GATOR_REGISTER_TRACE(mali_sw_counter)) { + printk("gator: mali_sw_counter tracepoint failed to activate\n"); + return -1; + } +#elif GATOR_MALI_INTERFACE_STYLE >= 3 + /* For Mali drivers with built-in support. */ + if (GATOR_REGISTER_TRACE(mali_sw_counters)) { + printk("gator: mali_sw_counters tracepoint failed to activate\n"); + return -1; + } +#else +#error Unknown GATOR_MALI_INTERFACE_STYLE option. +#endif + + trace_registered = 1; + + mali_counter_initialize(); + return 0; +} + +static void stop(void) +{ + unsigned int cnt; + + pr_debug("gator: mali stop\n"); + + if (trace_registered) { + GATOR_UNREGISTER_TRACE(mali_hw_counter); + +#if GATOR_MALI_INTERFACE_STYLE == 1 + /* None. */ +#elif GATOR_MALI_INTERFACE_STYLE == 2 + /* For patched Mali driver. */ + GATOR_UNREGISTER_TRACE(mali_sw_counter); +#elif GATOR_MALI_INTERFACE_STYLE >= 3 + /* For Mali drivers with built-in support. */ + GATOR_UNREGISTER_TRACE(mali_sw_counters); +#else +#error Unknown GATOR_MALI_INTERFACE_STYLE option. +#endif + + pr_debug("gator: mali timeline tracepoint deactivated\n"); + + trace_registered = 0; + } + + for (cnt = 0; cnt < NUMBER_OF_EVENTS; cnt++) { + counter_enabled[cnt] = 0; + counter_event[cnt] = 0; + counter_address[cnt] = NULL; + } + + mali_counter_deinitialize(); +} + +static void dump_counters(unsigned int from_counter, unsigned int to_counter, unsigned int *len) +{ + unsigned int counter_id; + + for (counter_id = from_counter; counter_id <= to_counter; counter_id++) { + if (counter_enabled[counter_id]) { + counter_dump[(*len)++] = counter_key[counter_id]; + counter_dump[(*len)++] = counter_data[counter_id]; + + counter_data[counter_id] = 0; + } + } +} + +static int read(int **buffer) +{ + int len = 0; + + if (!on_primary_core()) + return 0; + + // Read the L2 C0 and C1 here. + if (n_l2_cores > 0 && is_any_counter_enabled(COUNTER_L2_0_C0, COUNTER_L2_0_C0 + (2 * n_l2_cores))) { + unsigned int unavailable_l2_caches = 0; + _mali_profiling_l2_counter_values cache_values; + unsigned int cache_id; + struct _mali_profiling_core_counters *per_core; + + /* Poke the driver to get the counter values - older style; only one L2 cache */ + if (mali_get_l2_counters) { + unavailable_l2_caches = mali_get_l2_counters(&cache_values); + } else if (mali_get_counters) { + per_core = &cache_values.cores[0]; + mali_get_counters(&per_core->source0, &per_core->value0, &per_core->source1, &per_core->value1); + } else { + /* This should never happen, as n_l2_caches is only set > 0 if one of the above functions is found. */ + } + + /* Fill in the two cache counter values for each cache block. */ + for (cache_id = 0; cache_id < n_l2_cores; cache_id++) { + unsigned int counter_id_0 = COUNTER_L2_0_C0 + (2 * cache_id); + unsigned int counter_id_1 = counter_id_0 + 1; + + if ((1 << cache_id) & unavailable_l2_caches) { + continue; /* This cache is unavailable (powered-off, possibly). */ + } + + per_core = &cache_values.cores[cache_id]; + + if (counter_enabled[counter_id_0]) { + // Calculate and save src0's counter val0 + counter_dump[len++] = counter_key[counter_id_0]; + counter_dump[len++] = get_difference(per_core->value0, counter_prev[counter_id_0]); + } + + if (counter_enabled[counter_id_1]) { + // Calculate and save src1's counter val1 + counter_dump[len++] = counter_key[counter_id_1]; + counter_dump[len++] = get_difference(per_core->value1, counter_prev[counter_id_1]); + } + + // Save the previous values for the counters. + counter_prev[counter_id_0] = per_core->value0; + counter_prev[counter_id_1] = per_core->value1; + } + } + + /* Process other (non-timeline) counters. */ + dump_counters(COUNTER_VP_0_C0, COUNTER_VP_0_C0 + (2 * n_vp_cores) - 1, &len); + dump_counters(COUNTER_FP_0_C0, COUNTER_FP_0_C0 + (2 * n_fp_cores) - 1, &len); + + dump_counters(FIRST_SW_COUNTER, LAST_SW_COUNTER, &len); + +#ifdef DVFS_REPORTED_BY_DDK + { + int cnt; + /* + * Add in the voltage and frequency counters if enabled. Note that, since these are + * actually passed as events, the counter value should not be cleared. + */ + cnt = COUNTER_FREQUENCY; + if (counter_enabled[cnt]) { + counter_dump[len++] = counter_key[cnt]; + counter_dump[len++] = counter_data[cnt]; + } + + cnt = COUNTER_VOLTAGE; + if (counter_enabled[cnt]) { + counter_dump[len++] = counter_key[cnt]; + counter_dump[len++] = counter_data[cnt]; + } + } +#endif + + if (buffer) { + *buffer = (int *)counter_dump; + } + + return len; +} + +static struct gator_interface gator_events_mali_interface = { + .create_files = create_files, + .start = start, + .stop = stop, + .read = read, +}; + +extern void gator_events_mali_log_dvfs_event(unsigned int frequency_mhz, unsigned int voltage_mv) +{ +#ifdef DVFS_REPORTED_BY_DDK + counter_data[COUNTER_FREQUENCY] = frequency_mhz; + counter_data[COUNTER_VOLTAGE] = voltage_mv; +#endif +} + +int gator_events_mali_init(void) +{ + unsigned int cnt; + + pr_debug("gator: mali init\n"); + + for (cnt = 0; cnt < NUMBER_OF_EVENTS; cnt++) { + counter_enabled[cnt] = 0; + counter_event[cnt] = 0; + counter_key[cnt] = gator_events_get_key(); + counter_address[cnt] = NULL; + counter_data[cnt] = 0; + } + + trace_registered = 0; + + return gator_events_install(&gator_events_mali_interface); +} diff --git a/drivers/gator/gator_events_mali_4xx.h b/drivers/gator/gator_events_mali_4xx.h new file mode 100644 index 000000000000..413ad0ffe794 --- /dev/null +++ b/drivers/gator/gator_events_mali_4xx.h @@ -0,0 +1,18 @@ +/** + * Copyright (C) ARM Limited 2011-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +/* + * Header contains common definitions for the Mali-4xx processors. + */ +#if !defined(GATOR_EVENTS_MALI_4xx_H) +#define GATOR_EVENTS_MALI_4xx_H + +extern void gator_events_mali_log_dvfs_event(unsigned int d0, unsigned int d1); + +#endif /* GATOR_EVENTS_MALI_4xx_H */ diff --git a/drivers/gator/gator_events_mali_common.c b/drivers/gator/gator_events_mali_common.c new file mode 100644 index 000000000000..466ca1683c7e --- /dev/null +++ b/drivers/gator/gator_events_mali_common.c @@ -0,0 +1,81 @@ +/** + * Copyright (C) ARM Limited 2012-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include "gator_events_mali_common.h" + +static u32 gator_mali_get_id(void) +{ + return MALI_SUPPORT; +} + +extern const char *gator_mali_get_mali_name(void) +{ + u32 id = gator_mali_get_id(); + + switch (id) { + case MALI_T6xx: + return "Mali-T6xx"; + case MALI_4xx: + return "Mali-4xx"; + default: + pr_debug("gator: Mali-T6xx: unknown Mali ID (%d)\n", id); + return "Mali-Unknown"; + } +} + +extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, mali_counter *counter, unsigned long *event) +{ + int err; + char buf[255]; + struct dentry *dir; + + /* If the counter name is empty ignore it */ + if (strlen(event_name) != 0) { + /* Set up the filesystem entry for this event. */ + snprintf(buf, sizeof(buf), "ARM_%s_%s", mali_name, event_name); + + dir = gatorfs_mkdir(sb, root, buf); + + if (dir == NULL) { + pr_debug("gator: Mali-T6xx: error creating file system for: %s (%s)", event_name, buf); + return -1; + } + + err = gatorfs_create_ulong(sb, dir, "enabled", &counter->enabled); + if (err != 0) { + pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ulong for: %s (%s)", event_name, buf); + return -1; + } + err = gatorfs_create_ro_ulong(sb, dir, "key", &counter->key); + if (err != 0) { + pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ro_ulong for: %s (%s)", event_name, buf); + return -1; + } + if (event != NULL) { + err = gatorfs_create_ulong(sb, dir, "event", event); + if (err != 0) { + pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ro_ulong for: %s (%s)", event_name, buf); + return -1; + } + } + } + + return 0; +} + +extern void gator_mali_initialise_counters(mali_counter counters[], unsigned int n_counters) +{ + unsigned int cnt; + + for (cnt = 0; cnt < n_counters; cnt++) { + mali_counter *counter = &counters[cnt]; + + counter->key = gator_events_get_key(); + counter->enabled = 0; + } +} diff --git a/drivers/gator/gator_events_mali_common.h b/drivers/gator/gator_events_mali_common.h new file mode 100644 index 000000000000..509f9b61884a --- /dev/null +++ b/drivers/gator/gator_events_mali_common.h @@ -0,0 +1,86 @@ +/** + * Copyright (C) ARM Limited 2012-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#if !defined(GATOR_EVENTS_MALI_COMMON_H) +#define GATOR_EVENTS_MALI_COMMON_H + +#include "gator.h" + +#include <linux/module.h> +#include <linux/time.h> +#include <linux/math64.h> +#include <linux/slab.h> +#include <asm/io.h> + +/* Device codes for each known GPU */ +#define MALI_4xx (0x0b07) +#define MALI_T6xx (0x0056) + +/* Ensure that MALI_SUPPORT has been defined to something. */ +#ifndef MALI_SUPPORT +#error MALI_SUPPORT not defined! +#endif + +/* Values for the supported activity event types */ +#define ACTIVITY_START (1) +#define ACTIVITY_STOP (2) + +/* + * Runtime state information for a counter. + */ +typedef struct { + unsigned long key; /* 'key' (a unique id set by gatord and returned by gator.ko) */ + unsigned long enabled; /* counter enable state */ +} mali_counter; + +/* + * Mali-4xx + */ +typedef int mali_profiling_set_event_type(unsigned int, int); +typedef void mali_profiling_control_type(unsigned int, unsigned int); +typedef void mali_profiling_get_counters_type(unsigned int *, unsigned int *, unsigned int *, unsigned int *); + +/* + * Driver entry points for functions called directly by gator. + */ +extern int _mali_profiling_set_event(unsigned int, int); +extern void _mali_profiling_control(unsigned int, unsigned int); +extern void _mali_profiling_get_counters(unsigned int *, unsigned int *, unsigned int *, unsigned int *); + +/** + * Returns a name which identifies the GPU type (eg Mali-4xx, Mali-T6xx). + * + * @return The name as a constant string. + */ +extern const char *gator_mali_get_mali_name(void); + +/** + * Creates a filesystem entry under /dev/gator relating to the specified event name and key, and + * associate the key/enable values with this entry point. + * + * @param mali_name A name related to the type of GPU, obtained from a call to gator_mali_get_mali_name() + * @param event_name The name of the event. + * @param sb Linux super block + * @param root Directory under which the entry will be created. + * @param counter_key Ptr to location which will be associated with the counter key. + * @param counter_enabled Ptr to location which will be associated with the counter enable state. + * + * @return 0 if entry point was created, non-zero if not. + */ +extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, mali_counter *counter, unsigned long *event); + +/** + * Initializes the counter array. + * + * @param keys The array of counters + * @param n_counters The number of entries in each of the arrays. + */ +extern void gator_mali_initialise_counters(mali_counter counters[], unsigned int n_counters); + +#endif /* GATOR_EVENTS_MALI_COMMON_H */ diff --git a/drivers/gator/gator_events_mali_t6xx.c b/drivers/gator/gator_events_mali_t6xx.c new file mode 100644 index 000000000000..7bf7d6a6dbf9 --- /dev/null +++ b/drivers/gator/gator_events_mali_t6xx.c @@ -0,0 +1,560 @@ +/** + * Copyright (C) ARM Limited 2011-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include "gator.h" + +#include <linux/module.h> +#include <linux/time.h> +#include <linux/math64.h> +#include <linux/slab.h> +#include <asm/io.h> + +#include "linux/mali_linux_trace.h" + +#include "gator_events_mali_common.h" + +/* + * Check that the MALI_SUPPORT define is set to one of the allowable device codes. + */ +#if (MALI_SUPPORT != MALI_T6xx) +#error MALI_SUPPORT set to an invalid device code: expecting MALI_T6xx +#endif + +/* Counters for Mali-T6xx: + * + * - Timeline events + * They are tracepoints, but instead of reporting a number they report a START/STOP event. + * They are reported in Streamline as number of microseconds while that particular counter was active. + * + * - SW counters + * They are tracepoints reporting a particular number. + * They are accumulated in sw_counter_data array until they are passed to Streamline, then they are zeroed. + * + * - Accumulators + * They are the same as software counters but their value is not zeroed. + */ + +/* Timeline (start/stop) activity */ +static const char *timeline_event_names[] = { + "PM_SHADER_0", + "PM_SHADER_1", + "PM_SHADER_2", + "PM_SHADER_3", + "PM_SHADER_4", + "PM_SHADER_5", + "PM_SHADER_6", + "PM_SHADER_7", + "PM_TILER_0", + "PM_L2_0", + "PM_L2_1", + "MMU_AS_0", + "MMU_AS_1", + "MMU_AS_2", + "MMU_AS_3" +}; + +enum { + PM_SHADER_0 = 0, + PM_SHADER_1, + PM_SHADER_2, + PM_SHADER_3, + PM_SHADER_4, + PM_SHADER_5, + PM_SHADER_6, + PM_SHADER_7, + PM_TILER_0, + PM_L2_0, + PM_L2_1, + MMU_AS_0, + MMU_AS_1, + MMU_AS_2, + MMU_AS_3 +}; +/* The number of shader blocks in the enum above */ +#define NUM_PM_SHADER (8) + +/* Software Counters */ +static const char *software_counter_names[] = { + "MMU_PAGE_FAULT_0", + "MMU_PAGE_FAULT_1", + "MMU_PAGE_FAULT_2", + "MMU_PAGE_FAULT_3" +}; + +enum { + MMU_PAGE_FAULT_0 = 0, + MMU_PAGE_FAULT_1, + MMU_PAGE_FAULT_2, + MMU_PAGE_FAULT_3 +}; + +/* Software Counters */ +static const char *accumulators_names[] = { + "TOTAL_ALLOC_PAGES" +}; + +enum { + TOTAL_ALLOC_PAGES = 0 +}; + +#define FIRST_TIMELINE_EVENT (0) +#define NUMBER_OF_TIMELINE_EVENTS (sizeof(timeline_event_names) / sizeof(timeline_event_names[0])) +#define FIRST_SOFTWARE_COUNTER (FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS) +#define NUMBER_OF_SOFTWARE_COUNTERS (sizeof(software_counter_names) / sizeof(software_counter_names[0])) +#define FIRST_ACCUMULATOR (FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS) +#define NUMBER_OF_ACCUMULATORS (sizeof(accumulators_names) / sizeof(accumulators_names[0])) +#define FILMSTRIP (NUMBER_OF_TIMELINE_EVENTS + NUMBER_OF_SOFTWARE_COUNTERS + NUMBER_OF_ACCUMULATORS) +#define NUMBER_OF_EVENTS (NUMBER_OF_TIMELINE_EVENTS + NUMBER_OF_SOFTWARE_COUNTERS + NUMBER_OF_ACCUMULATORS + 1) + +/* + * gatorfs variables for counter enable state + */ +static mali_counter counters[NUMBER_OF_EVENTS]; +static unsigned long filmstrip_event; + +/* An array used to return the data we recorded + * as key,value pairs hence the *2 + */ +static unsigned long counter_dump[NUMBER_OF_EVENTS * 2]; + +/* + * Array holding counter start times (in ns) for each counter. A zero here + * indicates that the activity monitored by this counter is not running. + */ +static struct timespec timeline_event_starttime[NUMBER_OF_TIMELINE_EVENTS]; + +/* The data we have recorded */ +static unsigned int timeline_data[NUMBER_OF_TIMELINE_EVENTS]; +static unsigned int sw_counter_data[NUMBER_OF_SOFTWARE_COUNTERS]; +static unsigned int accumulators_data[NUMBER_OF_ACCUMULATORS]; + +/* Hold the previous timestamp, used to calculate the sample interval. */ +static struct timespec prev_timestamp; + +/** + * Returns the timespan (in microseconds) between the two specified timestamps. + * + * @param start Ptr to the start timestamp + * @param end Ptr to the end timestamp + * + * @return Number of microseconds between the two timestamps (can be negative if start follows end). + */ +static inline long get_duration_us(const struct timespec *start, const struct timespec *end) +{ + long event_duration_us = (end->tv_nsec - start->tv_nsec) / 1000; + event_duration_us += (end->tv_sec - start->tv_sec) * 1000000; + + return event_duration_us; +} + +static void record_timeline_event(unsigned int timeline_index, unsigned int type) +{ + struct timespec event_timestamp; + struct timespec *event_start = &timeline_event_starttime[timeline_index]; + + switch (type) { + case ACTIVITY_START: + /* Get the event time... */ + getnstimeofday(&event_timestamp); + + /* Remember the start time if the activity is not already started */ + if (event_start->tv_sec == 0) { + *event_start = event_timestamp; /* Structure copy */ + } + break; + + case ACTIVITY_STOP: + /* if the counter was started... */ + if (event_start->tv_sec != 0) { + /* Get the event time... */ + getnstimeofday(&event_timestamp); + + /* Accumulate the duration in us */ + timeline_data[timeline_index] += get_duration_us(event_start, &event_timestamp); + + /* Reset the start time to indicate the activity is stopped. */ + event_start->tv_sec = 0; + } + break; + + default: + /* Other activity events are ignored. */ + break; + } +} + +/* + * Documentation about the following tracepoints is in mali_linux_trace.h + */ + +GATOR_DEFINE_PROBE(mali_pm_status, TP_PROTO(unsigned int event_id, unsigned long long value)) +{ +#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ +#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ +#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ +#define BIT_AT(value, pos) ((value >> pos) & 1) + + static unsigned long long previous_shader_bitmask = 0; + static unsigned long long previous_tiler_bitmask = 0; + static unsigned long long previous_l2_bitmask = 0; + + switch (event_id) { + case SHADER_PRESENT_LO: + { + unsigned long long changed_bitmask = previous_shader_bitmask ^ value; + int pos; + + for (pos = 0; pos < NUM_PM_SHADER; ++pos) { + if (BIT_AT(changed_bitmask, pos)) { + record_timeline_event(PM_SHADER_0 + pos, BIT_AT(value, pos) ? ACTIVITY_START : ACTIVITY_STOP); + } + } + + previous_shader_bitmask = value; + break; + } + + case TILER_PRESENT_LO: + { + unsigned long long changed = previous_tiler_bitmask ^ value; + + if (BIT_AT(changed, 0)) { + record_timeline_event(PM_TILER_0, BIT_AT(value, 0) ? ACTIVITY_START : ACTIVITY_STOP); + } + + previous_tiler_bitmask = value; + break; + } + + case L2_PRESENT_LO: + { + unsigned long long changed = previous_l2_bitmask ^ value; + + if (BIT_AT(changed, 0)) { + record_timeline_event(PM_L2_0, BIT_AT(value, 0) ? ACTIVITY_START : ACTIVITY_STOP); + } + if (BIT_AT(changed, 4)) { + record_timeline_event(PM_L2_1, BIT_AT(value, 4) ? ACTIVITY_START : ACTIVITY_STOP); + } + + previous_l2_bitmask = value; + break; + } + + default: + /* No other blocks are supported at present */ + break; + } + +#undef SHADER_PRESENT_LO +#undef TILER_PRESENT_LO +#undef L2_PRESENT_LO +#undef BIT_AT +} + +GATOR_DEFINE_PROBE(mali_page_fault_insert_pages, TP_PROTO(int event_id, unsigned long value)) +{ + /* We add to the previous since we may receive many tracepoints in one sample period */ + sw_counter_data[MMU_PAGE_FAULT_0 + event_id] += value; +} + +GATOR_DEFINE_PROBE(mali_mmu_as_in_use, TP_PROTO(int event_id)) +{ + record_timeline_event(MMU_AS_0 + event_id, ACTIVITY_START); +} + +GATOR_DEFINE_PROBE(mali_mmu_as_released, TP_PROTO(int event_id)) +{ + record_timeline_event(MMU_AS_0 + event_id, ACTIVITY_STOP); +} + +GATOR_DEFINE_PROBE(mali_total_alloc_pages_change, TP_PROTO(long long int event_id)) +{ + accumulators_data[TOTAL_ALLOC_PAGES] = event_id; +} + +static int create_files(struct super_block *sb, struct dentry *root) +{ + int event; + /* + * Create the filesystem for all events + */ + int counter_index = 0; + const char *mali_name = gator_mali_get_mali_name(); + mali_profiling_control_type *mali_control; + + for (event = FIRST_TIMELINE_EVENT; event < FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS; event++) { + if (gator_mali_create_file_system(mali_name, timeline_event_names[counter_index], sb, root, &counters[event], NULL) != 0) { + return -1; + } + counter_index++; + } + counter_index = 0; + for (event = FIRST_SOFTWARE_COUNTER; event < FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS; event++) { + if (gator_mali_create_file_system(mali_name, software_counter_names[counter_index], sb, root, &counters[event], NULL) != 0) { + return -1; + } + counter_index++; + } + counter_index = 0; + for (event = FIRST_ACCUMULATOR; event < FIRST_ACCUMULATOR + NUMBER_OF_ACCUMULATORS; event++) { + if (gator_mali_create_file_system(mali_name, accumulators_names[counter_index], sb, root, &counters[event], NULL) != 0) { + return -1; + } + counter_index++; + } + + mali_control = symbol_get(_mali_profiling_control); + if (mali_control) { + if (gator_mali_create_file_system(mali_name, "Filmstrip_cnt0", sb, root, &counters[FILMSTRIP], &filmstrip_event) != 0) { + return -1; + } + symbol_put(_mali_profiling_control); + } + + return 0; +} + +static int register_tracepoints(void) +{ + if (GATOR_REGISTER_TRACE(mali_pm_status)) { + pr_debug("gator: Mali-T6xx: mali_pm_status tracepoint failed to activate\n"); + return 0; + } + + if (GATOR_REGISTER_TRACE(mali_page_fault_insert_pages)) { + pr_debug("gator: Mali-T6xx: mali_page_fault_insert_pages tracepoint failed to activate\n"); + return 0; + } + + if (GATOR_REGISTER_TRACE(mali_mmu_as_in_use)) { + pr_debug("gator: Mali-T6xx: mali_mmu_as_in_use tracepoint failed to activate\n"); + return 0; + } + + if (GATOR_REGISTER_TRACE(mali_mmu_as_released)) { + pr_debug("gator: Mali-T6xx: mali_mmu_as_released tracepoint failed to activate\n"); + return 0; + } + + if (GATOR_REGISTER_TRACE(mali_total_alloc_pages_change)) { + pr_debug("gator: Mali-T6xx: mali_total_alloc_pages_change tracepoint failed to activate\n"); + return 0; + } + + pr_debug("gator: Mali-T6xx: start\n"); + pr_debug("gator: Mali-T6xx: mali_pm_status probe is at %p\n", &probe_mali_pm_status); + pr_debug("gator: Mali-T6xx: mali_page_fault_insert_pages probe is at %p\n", &probe_mali_page_fault_insert_pages); + pr_debug("gator: Mali-T6xx: mali_mmu_as_in_use probe is at %p\n", &probe_mali_mmu_as_in_use); + pr_debug("gator: Mali-T6xx: mali_mmu_as_released probe is at %p\n", &probe_mali_mmu_as_released); + pr_debug("gator: Mali-T6xx: mali_total_alloc_pages_change probe is at %p\n", &probe_mali_total_alloc_pages_change); + + return 1; +} + +static int start(void) +{ + unsigned int cnt; + mali_profiling_control_type *mali_control; + + /* Clean all data for the next capture */ + for (cnt = 0; cnt < NUMBER_OF_TIMELINE_EVENTS; cnt++) { + timeline_event_starttime[cnt].tv_sec = timeline_event_starttime[cnt].tv_nsec = 0; + timeline_data[cnt] = 0; + } + + for (cnt = 0; cnt < NUMBER_OF_SOFTWARE_COUNTERS; cnt++) { + sw_counter_data[cnt] = 0; + } + + for (cnt = 0; cnt < NUMBER_OF_ACCUMULATORS; cnt++) { + accumulators_data[cnt] = 0; + } + + /* Register tracepoints */ + if (register_tracepoints() == 0) { + return -1; + } + + /* Generic control interface for Mali DDK. */ + mali_control = symbol_get(_mali_profiling_control); + if (mali_control) { + /* The event attribute in the XML file keeps the actual frame rate. */ + unsigned int enabled = counters[FILMSTRIP].enabled ? 1 : 0; + unsigned int rate = filmstrip_event & 0xff; + unsigned int resize_factor = (filmstrip_event >> 8) & 0xff; + + pr_debug("gator: mali online _mali_profiling_control symbol @ %p\n", mali_control); + +#define FBDUMP_CONTROL_ENABLE (1) +#define FBDUMP_CONTROL_RATE (2) +#define FBDUMP_CONTROL_RESIZE_FACTOR (4) + mali_control(FBDUMP_CONTROL_ENABLE, enabled); + mali_control(FBDUMP_CONTROL_RATE, rate); + mali_control(FBDUMP_CONTROL_RESIZE_FACTOR, resize_factor); + + pr_debug("gator: sent mali_control enabled=%d, rate=%d, resize_factor=%d\n", enabled, rate, resize_factor); + + symbol_put(_mali_profiling_control); + } else { + printk("gator: mali online _mali_profiling_control symbol not found\n"); + } + + /* + * Set the first timestamp for calculating the sample interval. The first interval could be quite long, + * since it will be the time between 'start' and the first 'read'. + * This means that timeline values will be divided by a big number for the first sample. + */ + getnstimeofday(&prev_timestamp); + + return 0; +} + +static void stop(void) +{ + mali_profiling_control_type *mali_control; + + pr_debug("gator: Mali-T6xx: stop\n"); + + /* + * It is safe to unregister traces even if they were not successfully + * registered, so no need to check. + */ + GATOR_UNREGISTER_TRACE(mali_pm_status); + pr_debug("gator: Mali-T6xx: mali_pm_status tracepoint deactivated\n"); + + GATOR_UNREGISTER_TRACE(mali_page_fault_insert_pages); + pr_debug("gator: Mali-T6xx: mali_page_fault_insert_pages tracepoint deactivated\n"); + + GATOR_UNREGISTER_TRACE(mali_mmu_as_in_use); + pr_debug("gator: Mali-T6xx: mali_mmu_as_in_use tracepoint deactivated\n"); + + GATOR_UNREGISTER_TRACE(mali_mmu_as_released); + pr_debug("gator: Mali-T6xx: mali_mmu_as_released tracepoint deactivated\n"); + + GATOR_UNREGISTER_TRACE(mali_total_alloc_pages_change); + pr_debug("gator: Mali-T6xx: mali_total_alloc_pages_change tracepoint deactivated\n"); + + /* Generic control interface for Mali DDK. */ + mali_control = symbol_get(_mali_profiling_control); + if (mali_control) { + pr_debug("gator: mali offline _mali_profiling_control symbol @ %p\n", mali_control); + + mali_control(FBDUMP_CONTROL_ENABLE, 0); + + symbol_put(_mali_profiling_control); + } else { + printk("gator: mali offline _mali_profiling_control symbol not found\n"); + } +} + +static int read(int **buffer) +{ + int cnt; + int len = 0; + long sample_interval_us = 0; + struct timespec read_timestamp; + + if (!on_primary_core()) { + return 0; + } + + /* Get the start of this sample period. */ + getnstimeofday(&read_timestamp); + + /* + * Calculate the sample interval if the previous sample time is valid. + * We use tv_sec since it will not be 0. + */ + if (prev_timestamp.tv_sec != 0) { + sample_interval_us = get_duration_us(&prev_timestamp, &read_timestamp); + } + + /* Structure copy. Update the previous timestamp. */ + prev_timestamp = read_timestamp; + + /* + * Report the timeline counters (ACTIVITY_START/STOP) + */ + for (cnt = FIRST_TIMELINE_EVENT; cnt < (FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS); cnt++) { + mali_counter *counter = &counters[cnt]; + if (counter->enabled) { + const int index = cnt - FIRST_TIMELINE_EVENT; + unsigned int value; + + /* If the activity is still running, reset its start time to the start of this sample period + * to correct the count. Add the time up to the end of the sample onto the count. */ + if (timeline_event_starttime[index].tv_sec != 0) { + const long event_duration = get_duration_us(&timeline_event_starttime[index], &read_timestamp); + timeline_data[index] += event_duration; + timeline_event_starttime[index] = read_timestamp; /* Activity is still running. */ + } + + if (sample_interval_us != 0) { + /* Convert the counter to a percent-of-sample value */ + value = (timeline_data[index] * 100) / sample_interval_us; + } else { + pr_debug("gator: Mali-T6xx: setting value to zero\n"); + value = 0; + } + + /* Clear the counter value ready for the next sample. */ + timeline_data[index] = 0; + + counter_dump[len++] = counter->key; + counter_dump[len++] = value; + } + } + + /* Report the software counters */ + for (cnt = FIRST_SOFTWARE_COUNTER; cnt < (FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS); cnt++) { + const mali_counter *counter = &counters[cnt]; + if (counter->enabled) { + const int index = cnt - FIRST_SOFTWARE_COUNTER; + counter_dump[len++] = counter->key; + counter_dump[len++] = sw_counter_data[index]; + /* Set the value to zero for the next time */ + sw_counter_data[index] = 0; + } + } + + /* Report the accumulators */ + for (cnt = FIRST_ACCUMULATOR; cnt < (FIRST_ACCUMULATOR + NUMBER_OF_ACCUMULATORS); cnt++) { + const mali_counter *counter = &counters[cnt]; + if (counter->enabled) { + const int index = cnt - FIRST_ACCUMULATOR; + counter_dump[len++] = counter->key; + counter_dump[len++] = accumulators_data[index]; + /* Do not zero the accumulator */ + } + } + + /* Update the buffer */ + if (buffer) { + *buffer = (int *)counter_dump; + } + + return len; +} + +static struct gator_interface gator_events_mali_t6xx_interface = { + .create_files = create_files, + .start = start, + .stop = stop, + .read = read +}; + +extern int gator_events_mali_t6xx_init(void) +{ + pr_debug("gator: Mali-T6xx: sw_counters init\n"); + + gator_mali_initialise_counters(counters, NUMBER_OF_EVENTS); + + return gator_events_install(&gator_events_mali_t6xx_interface); +} diff --git a/drivers/gator/gator_events_mali_t6xx_hw.c b/drivers/gator/gator_events_mali_t6xx_hw.c new file mode 100644 index 000000000000..e406991398d9 --- /dev/null +++ b/drivers/gator/gator_events_mali_t6xx_hw.c @@ -0,0 +1,784 @@ +/** + * Copyright (C) ARM Limited 2012-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include "gator.h" + +#include <linux/module.h> +#include <linux/time.h> +#include <linux/math64.h> +#include <linux/slab.h> +#include <asm/io.h> + +/* Mali T6xx DDK includes */ +#include "linux/mali_linux_trace.h" +#include "kbase/src/common/mali_kbase.h" +#include "kbase/src/linux/mali_kbase_mem_linux.h" + +#include "gator_events_mali_common.h" + +/* If API version is not specified then assume API version 1. */ +#ifndef MALI_DDK_GATOR_API_VERSION +#define MALI_DDK_GATOR_API_VERSION 1 +#endif + +#if (MALI_DDK_GATOR_API_VERSION != 1) && (MALI_DDK_GATOR_API_VERSION != 2) +#error MALI_DDK_GATOR_API_VERSION is invalid (must be 1 for r1/r2 DDK, or 2 for r3 DDK). +#endif + +/* + * Mali-T6xx + */ +typedef struct kbase_device *kbase_find_device_type(int); +typedef kbase_context *kbase_create_context_type(kbase_device *); +typedef void kbase_destroy_context_type(kbase_context *); + +#if MALI_DDK_GATOR_API_VERSION == 1 +typedef void *kbase_va_alloc_type(kbase_context *, u32); +typedef void kbase_va_free_type(kbase_context *, void *); +#elif MALI_DDK_GATOR_API_VERSION == 2 +typedef void *kbase_va_alloc_type(kbase_context *, u32, kbase_hwc_dma_mapping * handle); +typedef void kbase_va_free_type(kbase_context *, kbase_hwc_dma_mapping * handle); +#endif + +typedef mali_error kbase_instr_hwcnt_enable_type(kbase_context *, kbase_uk_hwcnt_setup *); +typedef mali_error kbase_instr_hwcnt_disable_type(kbase_context *); +typedef mali_error kbase_instr_hwcnt_clear_type(kbase_context *); +typedef mali_error kbase_instr_hwcnt_dump_irq_type(kbase_context *); +typedef mali_bool kbase_instr_hwcnt_dump_complete_type(kbase_context *, mali_bool *); + +static kbase_find_device_type *kbase_find_device_symbol; +static kbase_create_context_type *kbase_create_context_symbol; +static kbase_va_alloc_type *kbase_va_alloc_symbol; +static kbase_instr_hwcnt_enable_type *kbase_instr_hwcnt_enable_symbol; +static kbase_instr_hwcnt_clear_type *kbase_instr_hwcnt_clear_symbol; +static kbase_instr_hwcnt_dump_irq_type *kbase_instr_hwcnt_dump_irq_symbol; +static kbase_instr_hwcnt_dump_complete_type *kbase_instr_hwcnt_dump_complete_symbol; +static kbase_instr_hwcnt_disable_type *kbase_instr_hwcnt_disable_symbol; +static kbase_va_free_type *kbase_va_free_symbol; +static kbase_destroy_context_type *kbase_destroy_context_symbol; + +static long shader_present_low = 0; + +/** The interval between reads, in ns. + * + * Earlier we introduced + * a 'hold off for 1ms after last read' to resolve MIDBASE-2178 and MALINE-724. + * However, the 1ms hold off is too long if no context switches occur as there is a race + * between this value and the tick of the read clock in gator which is also 1ms. If we 'miss' the + * current read, the counter values are effectively 'spread' over 2ms and the values seen are half + * what they should be (since Streamline averages over sample time). In the presence of context switches + * this spread can vary and markedly affect the counters. Currently there is no 'proper' solution to + * this, but empirically we have found that reducing the minimum read interval to 950us causes the + * counts to be much more stable. + */ +static const int READ_INTERVAL_NSEC = 950000; + +#if GATOR_TEST +#include "gator_events_mali_t6xx_hw_test.c" +#endif + +/* Blocks for HW counters */ +enum { + JM_BLOCK = 0, + TILER_BLOCK, + SHADER_BLOCK, + MMU_BLOCK +}; + +/* Counters for Mali-T6xx: + * + * - HW counters, 4 blocks + * For HW counters we need strings to create /dev/gator/events files. + * Enums are not needed because the position of the HW name in the array is the same + * of the corresponding value in the received block of memory. + * HW counters are requested by calculating a bitmask, passed then to the driver. + * Every millisecond a HW counters dump is requested, and if the previous has been completed they are read. + */ + +/* Hardware Counters */ +static const char *const hardware_counter_names[] = { + /* Job Manager */ + "", + "", + "", + "", + "MESSAGES_SENT", + "MESSAGES_RECEIVED", + "GPU_ACTIVE", /* 6 */ + "IRQ_ACTIVE", + "JS0_JOBS", + "JS0_TASKS", + "JS0_ACTIVE", + "", + "JS0_WAIT_READ", + "JS0_WAIT_ISSUE", + "JS0_WAIT_DEPEND", + "JS0_WAIT_FINISH", + "JS1_JOBS", + "JS1_TASKS", + "JS1_ACTIVE", + "", + "JS1_WAIT_READ", + "JS1_WAIT_ISSUE", + "JS1_WAIT_DEPEND", + "JS1_WAIT_FINISH", + "JS2_JOBS", + "JS2_TASKS", + "JS2_ACTIVE", + "", + "JS2_WAIT_READ", + "JS2_WAIT_ISSUE", + "JS2_WAIT_DEPEND", + "JS2_WAIT_FINISH", + "JS3_JOBS", + "JS3_TASKS", + "JS3_ACTIVE", + "", + "JS3_WAIT_READ", + "JS3_WAIT_ISSUE", + "JS3_WAIT_DEPEND", + "JS3_WAIT_FINISH", + "JS4_JOBS", + "JS4_TASKS", + "JS4_ACTIVE", + "", + "JS4_WAIT_READ", + "JS4_WAIT_ISSUE", + "JS4_WAIT_DEPEND", + "JS4_WAIT_FINISH", + "JS5_JOBS", + "JS5_TASKS", + "JS5_ACTIVE", + "", + "JS5_WAIT_READ", + "JS5_WAIT_ISSUE", + "JS5_WAIT_DEPEND", + "JS5_WAIT_FINISH", + "JS6_JOBS", + "JS6_TASKS", + "JS6_ACTIVE", + "", + "JS6_WAIT_READ", + "JS6_WAIT_ISSUE", + "JS6_WAIT_DEPEND", + "JS6_WAIT_FINISH", + + /*Tiler */ + "", + "", + "", + "JOBS_PROCESSED", + "TRIANGLES", + "QUADS", + "POLYGONS", + "POINTS", + "LINES", + "VCACHE_HIT", + "VCACHE_MISS", + "FRONT_FACING", + "BACK_FACING", + "PRIM_VISIBLE", + "PRIM_CULLED", + "PRIM_CLIPPED", + "LEVEL0", + "LEVEL1", + "LEVEL2", + "LEVEL3", + "LEVEL4", + "LEVEL5", + "LEVEL6", + "LEVEL7", + "COMMAND_1", + "COMMAND_2", + "COMMAND_3", + "COMMAND_4", + "COMMAND_4_7", + "COMMAND_8_15", + "COMMAND_16_63", + "COMMAND_64", + "COMPRESS_IN", + "COMPRESS_OUT", + "COMPRESS_FLUSH", + "TIMESTAMPS", + "PCACHE_HIT", + "PCACHE_MISS", + "PCACHE_LINE", + "PCACHE_STALL", + "WRBUF_HIT", + "WRBUF_MISS", + "WRBUF_LINE", + "WRBUF_PARTIAL", + "WRBUF_STALL", + "ACTIVE", + "LOADING_DESC", + "INDEX_WAIT", + "INDEX_RANGE_WAIT", + "VERTEX_WAIT", + "PCACHE_WAIT", + "WRBUF_WAIT", + "BUS_READ", + "BUS_WRITE", + "", + "", + "", + "", + "", + "UTLB_STALL", + "UTLB_REPLAY_MISS", + "UTLB_REPLAY_FULL", + "UTLB_NEW_MISS", + "UTLB_HIT", + + /* Shader Core */ + "", + "", + "", + "SHADER_CORE_ACTIVE", + "FRAG_ACTIVE", + "FRAG_PRIMATIVES", + "FRAG_PRIMATIVES_DROPPED", + "FRAG_CYCLE_DESC", + "FRAG_CYCLES_PLR", + "FRAG_CYCLES_VERT", + "FRAG_CYCLES_TRISETUP", + "FRAG_CYCLES_RAST", + "FRAG_THREADS", + "FRAG_DUMMY_THREADS", + "FRAG_QUADS_RAST", + "FRAG_QUADS_EZS_TEST", + "FRAG_QUADS_EZS_KILLED", + "FRAG_QUADS_LZS_TEST", + "FRAG_QUADS_LZS_KILLED", + "FRAG_CYCLE_NO_TILE", + "FRAG_NUM_TILES", + "FRAG_TRANS_ELIM", + "COMPUTE_ACTIVE", + "COMPUTE_TASKS", + "COMPUTE_THREADS", + "COMPUTE_CYCLES_DESC", + "TRIPIPE_ACTIVE", + "ARITH_WORDS", + "ARITH_CYCLES_REG", + "ARITH_CYCLES_L0", + "ARITH_FRAG_DEPEND", + "LS_WORDS", + "LS_ISSUES", + "LS_RESTARTS", + "LS_REISSUES_MISS", + "LS_REISSUES_VD", + "LS_REISSUE_ATTRIB_MISS", + "LS_NO_WB", + "TEX_WORDS", + "TEX_BUBBLES", + "TEX_WORDS_L0", + "TEX_WORDS_DESC", + "TEX_THREADS", + "TEX_RECIRC_FMISS", + "TEX_RECIRC_DESC", + "TEX_RECIRC_MULTI", + "TEX_RECIRC_PMISS", + "TEX_RECIRC_CONF", + "LSC_READ_HITS", + "LSC_READ_MISSES", + "LSC_WRITE_HITS", + "LSC_WRITE_MISSES", + "LSC_ATOMIC_HITS", + "LSC_ATOMIC_MISSES", + "LSC_LINE_FETCHES", + "LSC_DIRTY_LINE", + "LSC_SNOOPS", + "AXI_TLB_STALL", + "AXI_TLB_MIESS", + "AXI_TLB_TRANSACTION", + "LS_TLB_MISS", + "LS_TLB_HIT", + "AXI_BEATS_READ", + "AXI_BEATS_WRITTEN", + + /*L2 and MMU */ + "", + "", + "", + "", + "MMU_HIT", + "MMU_NEW_MISS", + "MMU_REPLAY_FULL", + "MMU_REPLAY_MISS", + "MMU_TABLE_WALK", + "", + "", + "", + "", + "", + "", + "", + "UTLB_HIT", + "UTLB_NEW_MISS", + "UTLB_REPLAY_FULL", + "UTLB_REPLAY_MISS", + "UTLB_STALL", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "L2_WRITE_BEATS", + "L2_READ_BEATS", + "L2_ANY_LOOKUP", + "L2_READ_LOOKUP", + "L2_SREAD_LOOKUP", + "L2_READ_REPLAY", + "L2_READ_SNOOP", + "L2_READ_HIT", + "L2_CLEAN_MISS", + "L2_WRITE_LOOKUP", + "L2_SWRITE_LOOKUP", + "L2_WRITE_REPLAY", + "L2_WRITE_SNOOP", + "L2_WRITE_HIT", + "L2_EXT_READ_FULL", + "L2_EXT_READ_HALF", + "L2_EXT_WRITE_FULL", + "L2_EXT_WRITE_HALF", + "L2_EXT_READ", + "L2_EXT_READ_LINE", + "L2_EXT_WRITE", + "L2_EXT_WRITE_LINE", + "L2_EXT_WRITE_SMALL", + "L2_EXT_BARRIER", + "L2_EXT_AR_STALL", + "L2_EXT_R_BUF_FULL", + "L2_EXT_RD_BUF_FULL", + "L2_EXT_R_RAW", + "L2_EXT_W_STALL", + "L2_EXT_W_BUF_FULL", + "L2_EXT_R_W_HAZARD", + "L2_TAG_HAZARD", + "L2_SNOOP_FULL", + "L2_REPLAY_FULL" +}; + +#define NUMBER_OF_HARDWARE_COUNTERS (sizeof(hardware_counter_names) / sizeof(hardware_counter_names[0])) + +#define GET_HW_BLOCK(c) (((c) >> 6) & 0x3) +#define GET_COUNTER_OFFSET(c) ((c) & 0x3f) + +/* Memory to dump hardware counters into */ +static void *kernel_dump_buffer; + +#if MALI_DDK_GATOR_API_VERSION == 2 +/* DMA state used to manage lifetime of the buffer */ +kbase_hwc_dma_mapping kernel_dump_buffer_handle; +#endif + +/* kbase context and device */ +static kbase_context *kbcontext = NULL; +static struct kbase_device *kbdevice = NULL; + +/* + * The following function has no external prototype in older DDK revisions. When the DDK + * is updated then this should be removed. + */ +struct kbase_device *kbase_find_device(int minor); + +static volatile bool kbase_device_busy = false; +static unsigned int num_hardware_counters_enabled; + +/* + * gatorfs variables for counter enable state + */ +static mali_counter counters[NUMBER_OF_HARDWARE_COUNTERS]; + +/* An array used to return the data we recorded + * as key,value pairs hence the *2 + */ +static unsigned long counter_dump[NUMBER_OF_HARDWARE_COUNTERS * 2]; + +#define SYMBOL_GET(FUNCTION, ERROR_COUNT) \ + if(FUNCTION ## _symbol) \ + { \ + printk("gator: mali " #FUNCTION " symbol was already registered\n"); \ + (ERROR_COUNT)++; \ + } \ + else \ + { \ + FUNCTION ## _symbol = symbol_get(FUNCTION); \ + if(! FUNCTION ## _symbol) \ + { \ + printk("gator: mali online " #FUNCTION " symbol not found\n"); \ + (ERROR_COUNT)++; \ + } \ + } + +#define SYMBOL_CLEANUP(FUNCTION) \ + if(FUNCTION ## _symbol) \ + { \ + symbol_put(FUNCTION); \ + FUNCTION ## _symbol = NULL; \ + } + +/** + * Execute symbol_get for all the Mali symbols and check for success. + * @return the number of symbols not loaded. + */ +static int init_symbols(void) +{ + int error_count = 0; + SYMBOL_GET(kbase_find_device, error_count); + SYMBOL_GET(kbase_create_context, error_count); + SYMBOL_GET(kbase_va_alloc, error_count); + SYMBOL_GET(kbase_instr_hwcnt_enable, error_count); + SYMBOL_GET(kbase_instr_hwcnt_clear, error_count); + SYMBOL_GET(kbase_instr_hwcnt_dump_irq, error_count); + SYMBOL_GET(kbase_instr_hwcnt_dump_complete, error_count); + SYMBOL_GET(kbase_instr_hwcnt_disable, error_count); + SYMBOL_GET(kbase_va_free, error_count); + SYMBOL_GET(kbase_destroy_context, error_count); + + return error_count; +} + +/** + * Execute symbol_put for all the registered Mali symbols. + */ +static void clean_symbols(void) +{ + SYMBOL_CLEANUP(kbase_find_device); + SYMBOL_CLEANUP(kbase_create_context); + SYMBOL_CLEANUP(kbase_va_alloc); + SYMBOL_CLEANUP(kbase_instr_hwcnt_enable); + SYMBOL_CLEANUP(kbase_instr_hwcnt_clear); + SYMBOL_CLEANUP(kbase_instr_hwcnt_dump_irq); + SYMBOL_CLEANUP(kbase_instr_hwcnt_dump_complete); + SYMBOL_CLEANUP(kbase_instr_hwcnt_disable); + SYMBOL_CLEANUP(kbase_va_free); + SYMBOL_CLEANUP(kbase_destroy_context); +} + +/** + * Determines whether a read should take place + * @param current_time The current time, obtained from getnstimeofday() + * @param prev_time_s The number of seconds at the previous read attempt. + * @param next_read_time_ns The time (in ns) when the next read should be allowed. + * + * Note that this function has been separated out here to allow it to be tested. + */ +static int is_read_scheduled(const struct timespec *current_time, u32 *prev_time_s, s32 *next_read_time_ns) +{ + /* If the current ns count rolls over a second, roll the next read time too. */ + if (current_time->tv_sec != *prev_time_s) { + *next_read_time_ns = *next_read_time_ns - NSEC_PER_SEC; + } + + /* Abort the read if the next read time has not arrived. */ + if (current_time->tv_nsec < *next_read_time_ns) { + return 0; + } + + /* Set the next read some fixed time after this one, and update the read timestamp. */ + *next_read_time_ns = current_time->tv_nsec + READ_INTERVAL_NSEC; + + *prev_time_s = current_time->tv_sec; + return 1; +} + +static int start(void) +{ + kbase_uk_hwcnt_setup setup; + mali_error err; + int cnt; + u16 bitmask[] = { 0, 0, 0, 0 }; + unsigned long long shadersPresent = 0; + + /* Setup HW counters */ + num_hardware_counters_enabled = 0; + + if (NUMBER_OF_HARDWARE_COUNTERS != 256) { + pr_debug("Unexpected number of hardware counters defined: expecting 256, got %d\n", NUMBER_OF_HARDWARE_COUNTERS); + } + + /* Calculate enable bitmasks based on counters_enabled array */ + for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) { + const mali_counter *counter = &counters[cnt]; + if (counter->enabled) { + int block = GET_HW_BLOCK(cnt); + int enable_bit = GET_COUNTER_OFFSET(cnt) / 4; + bitmask[block] |= (1 << enable_bit); + pr_debug("gator: Mali-T6xx: hardware counter %s selected [%d]\n", hardware_counter_names[cnt], cnt); + num_hardware_counters_enabled++; + } + } + + /* Create a kbase context for HW counters */ + if (num_hardware_counters_enabled > 0) { + if (init_symbols() > 0) { + clean_symbols(); + /* No Mali driver code entrypoints found - not a fault. */ + return 0; + } + + kbdevice = kbase_find_device_symbol(-1); + + /* If we already got a context, fail */ + if (kbcontext) { + pr_debug("gator: Mali-T6xx: error context already present\n"); + goto out; + } + + /* kbcontext will only be valid after all the Mali symbols are loaded successfully */ + kbcontext = kbase_create_context_symbol(kbdevice); + if (!kbcontext) { + pr_debug("gator: Mali-T6xx: error creating kbase context\n"); + goto out; + } + + + /* See if we can get the number of shader cores */ + shadersPresent = kbdevice->shader_present_bitmap; + shader_present_low = (unsigned long)shadersPresent; + + /* + * The amount of memory needed to store the dump (bytes) + * DUMP_SIZE = number of core groups + * * number of blocks (always 8 for midgard) + * * number of counters per block (always 64 for midgard) + * * number of bytes per counter (always 4 in midgard) + * For a Mali-T6xx with a single core group = 1 * 8 * 64 * 4 = 2048 + * For a Mali-T6xx with a dual core group = 2 * 8 * 64 * 4 = 4096 + */ +#if MALI_DDK_GATOR_API_VERSION == 1 + kernel_dump_buffer = kbase_va_alloc_symbol(kbcontext, 4096); +#elif MALI_DDK_GATOR_API_VERSION == 2 + kernel_dump_buffer = kbase_va_alloc_symbol(kbcontext, 4096, &kernel_dump_buffer_handle); +#endif + if (!kernel_dump_buffer) { + pr_debug("gator: Mali-T6xx: error trying to allocate va\n"); + goto destroy_context; + } + + setup.dump_buffer = (uintptr_t)kernel_dump_buffer; + setup.jm_bm = bitmask[JM_BLOCK]; + setup.tiler_bm = bitmask[TILER_BLOCK]; + setup.shader_bm = bitmask[SHADER_BLOCK]; + setup.mmu_l2_bm = bitmask[MMU_BLOCK]; + /* These counters do not exist on Mali-T60x */ + setup.l3_cache_bm = 0; + + /* Use kbase API to enable hardware counters and provide dump buffer */ + err = kbase_instr_hwcnt_enable_symbol(kbcontext, &setup); + if (err != MALI_ERROR_NONE) { + pr_debug("gator: Mali-T6xx: can't setup hardware counters\n"); + goto free_buffer; + } + pr_debug("gator: Mali-T6xx: hardware counters enabled\n"); + kbase_instr_hwcnt_clear_symbol(kbcontext); + pr_debug("gator: Mali-T6xx: hardware counters cleared \n"); + + kbase_device_busy = false; + } + + return 0; + +free_buffer: +#if MALI_DDK_GATOR_API_VERSION == 1 + kbase_va_free_symbol(kbcontext, kernel_dump_buffer); +#elif MALI_DDK_GATOR_API_VERSION == 2 + kbase_va_free_symbol(kbcontext, &kernel_dump_buffer_handle); +#endif + +destroy_context: + kbase_destroy_context_symbol(kbcontext); + +out: + clean_symbols(); + return -1; +} + +static void stop(void) +{ + unsigned int cnt; + kbase_context *temp_kbcontext; + + pr_debug("gator: Mali-T6xx: stop\n"); + + /* Set all counters as disabled */ + for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) { + counters[cnt].enabled = 0; + } + + /* Destroy the context for HW counters */ + if (num_hardware_counters_enabled > 0 && kbcontext != NULL) { + /* + * Set the global variable to NULL before destroying it, because + * other function will check this before using it. + */ + temp_kbcontext = kbcontext; + kbcontext = NULL; + + kbase_instr_hwcnt_disable_symbol(temp_kbcontext); + +#if MALI_DDK_GATOR_API_VERSION == 1 + kbase_va_free_symbol(temp_kbcontext, kernel_dump_buffer); +#elif MALI_DDK_GATOR_API_VERSION == 2 + kbase_va_free_symbol(temp_kbcontext, &kernel_dump_buffer_handle); +#endif + + kbase_destroy_context_symbol(temp_kbcontext); + + pr_debug("gator: Mali-T6xx: hardware counters stopped\n"); + + clean_symbols(); + } +} + +static int read(int **buffer) +{ + int cnt; + int len = 0; + u32 value = 0; + mali_bool success; + + struct timespec current_time; + static u32 prev_time_s = 0; + static s32 next_read_time_ns = 0; + + if (!on_primary_core()) { + return 0; + } + + getnstimeofday(¤t_time); + + /* + * Discard reads unless a respectable time has passed. This reduces the load on the GPU without sacrificing + * accuracy on the Streamline display. + */ + if (!is_read_scheduled(¤t_time, &prev_time_s, &next_read_time_ns)) { + return 0; + } + + /* + * Report the HW counters + * Only process hardware counters if at least one of the hardware counters is enabled. + */ + if (num_hardware_counters_enabled > 0) { + const unsigned int vithar_blocks[] = { + 0x700, /* VITHAR_JOB_MANAGER, Block 0 */ + 0x400, /* VITHAR_TILER, Block 1 */ + 0x000, /* VITHAR_SHADER_CORE, Block 2 */ + 0x500 /* VITHAR_MEMORY_SYSTEM, Block 3 */ + }; + + if (!kbcontext) { + return -1; + } + + /* Mali symbols can be called safely since a kbcontext is valid */ + if (kbase_instr_hwcnt_dump_complete_symbol(kbcontext, &success) == MALI_TRUE) { + kbase_device_busy = false; + + if (success == MALI_TRUE) { + /* Cycle through hardware counters and accumulate totals */ + for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) { + const mali_counter *counter = &counters[cnt]; + if (counter->enabled) { + const int block = GET_HW_BLOCK(cnt); + const int counter_offset = GET_COUNTER_OFFSET(cnt); + + const char* block_base_address = (char*)kernel_dump_buffer + vithar_blocks[block]; + + /* If counter belongs to shader block need to take into account all cores */ + if (block == SHADER_BLOCK) { + int i = 0; + int shader_core_count = 0; + value = 0; + + for (i = 0; i < 4; i++) { + if ((shader_present_low >> i) & 1) { + value += *((u32*) (block_base_address + (0x100 * i)) + counter_offset); + shader_core_count++; + } + } + + for (i = 0; i < 4; i++) { + if((shader_present_low >> (i+4)) & 1) { + value += *((u32*)(block_base_address + (0x100 * i) + 0x800) + counter_offset); + shader_core_count++; + } + } + + /* Need to total by number of cores to produce an average */ + if (shader_core_count != 0) { + value /= shader_core_count; + } + } else { + value = *((u32*)block_base_address + counter_offset); + } + + counter_dump[len++] = counter->key; + counter_dump[len++] = value; + } + } + } + } + + if (!kbase_device_busy) { + kbase_device_busy = true; + kbase_instr_hwcnt_dump_irq_symbol(kbcontext); + } + } + + /* Update the buffer */ + if (buffer) { + *buffer = (int *)counter_dump; + } + + return len; +} + +static int create_files(struct super_block *sb, struct dentry *root) +{ + unsigned int event; + /* + * Create the filesystem for all events + */ + int counter_index = 0; + const char *mali_name = gator_mali_get_mali_name(); + + for (event = 0; event < NUMBER_OF_HARDWARE_COUNTERS; event++) { + if (gator_mali_create_file_system(mali_name, hardware_counter_names[counter_index], sb, root, &counters[event], NULL) != 0) + return -1; + counter_index++; + } + + return 0; +} + +static struct gator_interface gator_events_mali_t6xx_interface = { + .create_files = create_files, + .start = start, + .stop = stop, + .read = read +}; + +int gator_events_mali_t6xx_hw_init(void) +{ + pr_debug("gator: Mali-T6xx: sw_counters init\n"); + +#if GATOR_TEST + test_all_is_read_scheduled(); +#endif + + gator_mali_initialise_counters(counters, NUMBER_OF_HARDWARE_COUNTERS); + + return gator_events_install(&gator_events_mali_t6xx_interface); +} diff --git a/drivers/gator/gator_events_mali_t6xx_hw_test.c b/drivers/gator/gator_events_mali_t6xx_hw_test.c new file mode 100644 index 000000000000..efb32ddf5483 --- /dev/null +++ b/drivers/gator/gator_events_mali_t6xx_hw_test.c @@ -0,0 +1,55 @@ +/** + * Copyright (C) ARM Limited 2012-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +/** + * Test functions for mali_t600_hw code. + */ + +static int is_read_scheduled(const struct timespec *current_time, u32 *prev_time_s, s32 *next_read_time_ns); + +static int test_is_read_scheduled(u32 s, u32 ns, u32 prev_s, s32 next_ns, int expected_result, s32 expected_next_ns) +{ + struct timespec current_time; + u32 prev_time_s = prev_s; + s32 next_read_time_ns = next_ns; + + current_time.tv_sec = s; + current_time.tv_nsec = ns; + + if (is_read_scheduled(¤t_time, &prev_time_s, &next_read_time_ns) != expected_result) { + printk("Failed do_read(%u, %u, %u, %d): expected %d\n", s, ns, prev_s, next_ns, expected_result); + return 0; + } + + if (next_read_time_ns != expected_next_ns) { + printk("Failed: next_read_ns expected=%d, actual=%d\n", expected_next_ns, next_read_time_ns); + return 0; + } + + return 1; +} + +static void test_all_is_read_scheduled(void) +{ + const int HIGHEST_NS = 999999999; + int n_tests_passed = 0; + + printk("gator: running tests on %s\n", __FILE__); + + n_tests_passed += test_is_read_scheduled(0, 0, 0, 0, 1, READ_INTERVAL_NSEC); /* Null time */ + n_tests_passed += test_is_read_scheduled(100, 1000, 0, 0, 1, READ_INTERVAL_NSEC + 1000); /* Initial values */ + + n_tests_passed += test_is_read_scheduled(100, HIGHEST_NS, 100, HIGHEST_NS + 500, 0, HIGHEST_NS + 500); + n_tests_passed += test_is_read_scheduled(101, 0001, 100, HIGHEST_NS + 500, 0, HIGHEST_NS + 500 - NSEC_PER_SEC); + n_tests_passed += test_is_read_scheduled(101, 600, 100, HIGHEST_NS + 500 - NSEC_PER_SEC, 1, 600 + READ_INTERVAL_NSEC); + + n_tests_passed += test_is_read_scheduled(101, 600, 100, HIGHEST_NS + 500, 1, 600 + READ_INTERVAL_NSEC); + + printk("gator: %d tests passed\n", n_tests_passed); +} diff --git a/drivers/gator/gator_events_meminfo.c b/drivers/gator/gator_events_meminfo.c new file mode 100644 index 000000000000..451290d9af17 --- /dev/null +++ b/drivers/gator/gator_events_meminfo.c @@ -0,0 +1,387 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include "gator.h" + +#include <linux/hardirq.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/semaphore.h> +#include <linux/workqueue.h> +#include <trace/events/kmem.h> + +enum { + MEMINFO_MEMFREE, + MEMINFO_MEMUSED, + MEMINFO_BUFFERRAM, + MEMINFO_TOTAL, +}; + +enum { + PROC_SIZE, + PROC_SHARE, + PROC_TEXT, + PROC_DATA, + PROC_COUNT, +}; + +static const char * const meminfo_names[] = { + "Linux_meminfo_memfree", + "Linux_meminfo_memused", + "Linux_meminfo_bufferram", +}; + +static const char * const proc_names[] = { + "Linux_proc_statm_size", + "Linux_proc_statm_share", + "Linux_proc_statm_text", + "Linux_proc_statm_data", +}; + +static bool meminfo_global_enabled; +static ulong meminfo_enabled[MEMINFO_TOTAL]; +static ulong meminfo_keys[MEMINFO_TOTAL]; +static long long meminfo_buffer[2 * (MEMINFO_TOTAL + 2)]; +static int meminfo_length = 0; +static bool new_data_avail; + +static bool proc_global_enabled; +static ulong proc_enabled[PROC_COUNT]; +static ulong proc_keys[PROC_COUNT]; +static DEFINE_PER_CPU(long long, proc_buffer[2 * (PROC_COUNT + 3)]); + +static int gator_meminfo_func(void *data); +static bool gator_meminfo_run; +// Initialize semaphore unlocked to initialize memory values +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) +static DECLARE_MUTEX(gator_meminfo_sem); +#else +static DEFINE_SEMAPHORE(gator_meminfo_sem); +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) +GATOR_DEFINE_PROBE(mm_page_free_direct, TP_PROTO(struct page *page, unsigned int order)) +#else +GATOR_DEFINE_PROBE(mm_page_free, TP_PROTO(struct page *page, unsigned int order)) +#endif +{ + up(&gator_meminfo_sem); +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) +GATOR_DEFINE_PROBE(mm_pagevec_free, TP_PROTO(struct page *page, int cold)) +#else +GATOR_DEFINE_PROBE(mm_page_free_batched, TP_PROTO(struct page *page, int cold)) +#endif +{ + up(&gator_meminfo_sem); +} + +GATOR_DEFINE_PROBE(mm_page_alloc, TP_PROTO(struct page *page, unsigned int order, gfp_t gfp_flags, int migratetype)) +{ + up(&gator_meminfo_sem); +} + +static int gator_events_meminfo_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + int i; + + for (i = 0; i < MEMINFO_TOTAL; i++) { + dir = gatorfs_mkdir(sb, root, meminfo_names[i]); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &meminfo_enabled[i]); + gatorfs_create_ro_ulong(sb, dir, "key", &meminfo_keys[i]); + } + + for (i = 0; i < PROC_COUNT; ++i) { + dir = gatorfs_mkdir(sb, root, proc_names[i]); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &proc_enabled[i]); + gatorfs_create_ro_ulong(sb, dir, "key", &proc_keys[i]); + } + + return 0; +} + +static int gator_events_meminfo_start(void) +{ + int i; + + new_data_avail = false; + meminfo_global_enabled = 0; + for (i = 0; i < MEMINFO_TOTAL; i++) { + if (meminfo_enabled[i]) { + meminfo_global_enabled = 1; + break; + } + } + + proc_global_enabled = 0; + for (i = 0; i < PROC_COUNT; ++i) { + if (proc_enabled[i]) { + proc_global_enabled = 1; + break; + } + } + if (meminfo_enabled[MEMINFO_MEMUSED]) { + proc_global_enabled = 1; + } + + if (meminfo_global_enabled == 0) + return 0; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) + if (GATOR_REGISTER_TRACE(mm_page_free_direct)) +#else + if (GATOR_REGISTER_TRACE(mm_page_free)) +#endif + goto mm_page_free_exit; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) + if (GATOR_REGISTER_TRACE(mm_pagevec_free)) +#else + if (GATOR_REGISTER_TRACE(mm_page_free_batched)) +#endif + goto mm_page_free_batched_exit; + if (GATOR_REGISTER_TRACE(mm_page_alloc)) + goto mm_page_alloc_exit; + + // Start worker thread + gator_meminfo_run = true; + // Since the mutex starts unlocked, memory values will be initialized + if (IS_ERR(kthread_run(gator_meminfo_func, NULL, "gator_meminfo"))) + goto kthread_run_exit; + + return 0; + +kthread_run_exit: + GATOR_UNREGISTER_TRACE(mm_page_alloc); +mm_page_alloc_exit: +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) + GATOR_UNREGISTER_TRACE(mm_pagevec_free); +#else + GATOR_UNREGISTER_TRACE(mm_page_free_batched); +#endif +mm_page_free_batched_exit: +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) + GATOR_UNREGISTER_TRACE(mm_page_free_direct); +#else + GATOR_UNREGISTER_TRACE(mm_page_free); +#endif +mm_page_free_exit: + return -1; +} + +static void gator_events_meminfo_stop(void) +{ + if (meminfo_global_enabled) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) + GATOR_UNREGISTER_TRACE(mm_page_free_direct); + GATOR_UNREGISTER_TRACE(mm_pagevec_free); +#else + GATOR_UNREGISTER_TRACE(mm_page_free); + GATOR_UNREGISTER_TRACE(mm_page_free_batched); +#endif + GATOR_UNREGISTER_TRACE(mm_page_alloc); + + // Stop worker thread + gator_meminfo_run = false; + up(&gator_meminfo_sem); + } +} + +// Must be run in process context as the kernel function si_meminfo() can sleep +static int gator_meminfo_func(void *data) +{ + struct sysinfo info; + int i, len; + unsigned long long value; + + for (;;) { + if (down_killable(&gator_meminfo_sem)) { + break; + } + + // Eat up any pending events + while (!down_trylock(&gator_meminfo_sem)); + + if (!gator_meminfo_run) { + break; + } + + meminfo_length = len = 0; + + si_meminfo(&info); + for (i = 0; i < MEMINFO_TOTAL; i++) { + if (meminfo_enabled[i]) { + switch (i) { + case MEMINFO_MEMFREE: + value = info.freeram * PAGE_SIZE; + break; + case MEMINFO_MEMUSED: + // pid -1 means system wide + meminfo_buffer[len++] = 1; + meminfo_buffer[len++] = -1; + // Emit value + meminfo_buffer[len++] = meminfo_keys[MEMINFO_MEMUSED]; + meminfo_buffer[len++] = (info.totalram - info.freeram) * PAGE_SIZE; + // Clear pid + meminfo_buffer[len++] = 1; + meminfo_buffer[len++] = 0; + continue; + case MEMINFO_BUFFERRAM: + value = info.bufferram * PAGE_SIZE; + break; + default: + value = 0; + break; + } + meminfo_buffer[len++] = meminfo_keys[i]; + meminfo_buffer[len++] = value; + } + } + + meminfo_length = len; + new_data_avail = true; + } + + return 0; +} + +static int gator_events_meminfo_read(long long **buffer) +{ + if (!on_primary_core() || !meminfo_global_enabled) + return 0; + + if (!new_data_avail) + return 0; + + new_data_avail = false; + + if (buffer) + *buffer = meminfo_buffer; + + return meminfo_length; +} + +static int gator_events_meminfo_read_proc(long long **buffer, struct task_struct *task) +{ + struct mm_struct *mm; + u64 share = 0; + int i; + long long value; + int len = 0; + int cpu = get_physical_cpu(); + long long *buf = per_cpu(proc_buffer, cpu); + + if (!proc_global_enabled) { + return 0; + } + + // Collect the memory stats of the process instead of the thread + if (task->group_leader != NULL) { + task = task->group_leader; + } + + // get_task_mm/mmput is not needed in this context because the task and it's mm are required as part of the sched_switch + mm = task->mm; + if (mm == NULL) { + return 0; + } + + // Derived from task_statm in fs/proc/task_mmu.c + if (meminfo_enabled[MEMINFO_MEMUSED] || proc_enabled[PROC_SHARE]) { + share = get_mm_counter(mm, +#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32) + file_rss +#else + MM_FILEPAGES +#endif + ); + } + + // key of 1 indicates a pid + buf[len++] = 1; + buf[len++] = task->pid; + + for (i = 0; i < PROC_COUNT; ++i) { + if (proc_enabled[i]) { + switch (i) { + case PROC_SIZE: + value = mm->total_vm; + break; + case PROC_SHARE: + value = share; + break; + case PROC_TEXT: + value = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> PAGE_SHIFT; + break; + case PROC_DATA: + value = mm->total_vm - mm->shared_vm; + break; + } + + buf[len++] = proc_keys[i]; + buf[len++] = value * PAGE_SIZE; + } + } + + if (meminfo_enabled[MEMINFO_MEMUSED]) { + value = share + get_mm_counter(mm, +#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32) + anon_rss +#else + MM_ANONPAGES +#endif + ); + // Send resident for this pid + buf[len++] = meminfo_keys[MEMINFO_MEMUSED]; + buf[len++] = value * PAGE_SIZE; + } + + // Clear pid + buf[len++] = 1; + buf[len++] = 0; + + if (buffer) + *buffer = buf; + + return len; +} + +static struct gator_interface gator_events_meminfo_interface = { + .create_files = gator_events_meminfo_create_files, + .start = gator_events_meminfo_start, + .stop = gator_events_meminfo_stop, + .read64 = gator_events_meminfo_read, + .read_proc = gator_events_meminfo_read_proc, +}; + +int gator_events_meminfo_init(void) +{ + int i; + + meminfo_global_enabled = 0; + for (i = 0; i < MEMINFO_TOTAL; i++) { + meminfo_enabled[i] = 0; + meminfo_keys[i] = gator_events_get_key(); + } + + proc_global_enabled = 0; + for (i = 0; i < PROC_COUNT; ++i) { + proc_enabled[i] = 0; + proc_keys[i] = gator_events_get_key(); + } + + return gator_events_install(&gator_events_meminfo_interface); +} diff --git a/drivers/gator/gator_events_mmapped.c b/drivers/gator/gator_events_mmapped.c new file mode 100644 index 000000000000..f055e48d317a --- /dev/null +++ b/drivers/gator/gator_events_mmapped.c @@ -0,0 +1,209 @@ +/* + * Example events provider + * + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Similar entries to those below must be present in the events.xml file. + * To add them to the events.xml, create an events-mmap.xml with the + * following contents and rebuild gatord: + * + * <counter_set name="mmapped_cnt" count="3"/> + * <category name="mmapped" counter_set="mmapped_cnt" per_cpu="no"> + * <event event="0x0" title="Simulated1" name="Sine" display="maximum" average_selection="yes" description="Sort-of-sine"/> + * <event event="0x1" title="Simulated2" name="Triangle" display="maximum" average_selection="yes" description="Triangular wave"/> + * <event event="0x2" title="Simulated3" name="PWM" display="maximum" average_selection="yes" description="PWM Signal"/> + * </category> + * + * When adding custom events, be sure do the following + * - add any needed .c files to the gator driver Makefile + * - call gator_events_install in the events init function + * - add the init function to GATOR_EVENTS_LIST in gator_main.c + * - add a new events-*.xml file to the gator daemon and rebuild + */ + +#include <linux/init.h> +#include <linux/io.h> +#include <linux/ratelimit.h> + +#include "gator.h" + +#define MMAPPED_COUNTERS_NUM 3 + +static int mmapped_global_enabled; + +static struct { + unsigned long enabled; + unsigned long event; + unsigned long key; +} mmapped_counters[MMAPPED_COUNTERS_NUM]; + +static int mmapped_buffer[MMAPPED_COUNTERS_NUM * 2]; + +static s64 prev_time; + +/* Adds mmapped_cntX directories and enabled, event, and key files to /dev/gator/events */ +static int gator_events_mmapped_create_files(struct super_block *sb, + struct dentry *root) +{ + int i; + + for (i = 0; i < MMAPPED_COUNTERS_NUM; i++) { + char buf[16]; + struct dentry *dir; + + snprintf(buf, sizeof(buf), "mmapped_cnt%d", i); + dir = gatorfs_mkdir(sb, root, buf); + if (WARN_ON(!dir)) + return -1; + gatorfs_create_ulong(sb, dir, "enabled", + &mmapped_counters[i].enabled); + gatorfs_create_ulong(sb, dir, "event", + &mmapped_counters[i].event); + gatorfs_create_ro_ulong(sb, dir, "key", + &mmapped_counters[i].key); + } + + return 0; +} + +static int gator_events_mmapped_start(void) +{ + int i; + struct timespec ts; + + getnstimeofday(&ts); + prev_time = timespec_to_ns(&ts); + + mmapped_global_enabled = 0; + for (i = 0; i < MMAPPED_COUNTERS_NUM; i++) { + if (mmapped_counters[i].enabled) { + mmapped_global_enabled = 1; + break; + } + } + + return 0; +} + +static void gator_events_mmapped_stop(void) +{ +} + +/* This function "simulates" counters, generating values of fancy + * functions like sine or triangle... */ +static int mmapped_simulate(int counter, int delta_in_us) +{ + int result = 0; + + switch (counter) { + case 0: /* sort-of-sine */ + { + static int t = 0; + int x; + + t += delta_in_us; + if (t > 2048000) + t = 0; + + if (t % 1024000 < 512000) + x = 512000 - (t % 512000); + else + x = t % 512000; + + result = 32 * x / 512000; + result = result * result; + + if (t < 1024000) + result = 1922 - result; + } + break; + case 1: /* triangle */ + { + static int v, d = 1; + + v = v + d * delta_in_us; + if (v < 0) { + v = 0; + d = 1; + } else if (v > 1000000) { + v = 1000000; + d = -1; + } + + result = v; + } + break; + case 2: /* PWM signal */ + { + static int dc, x, t = 0; + + t += delta_in_us; + if (t > 1000000) + t = 0; + if (x / 1000000 != (x + delta_in_us) / 1000000) + dc = (dc + 100000) % 1000000; + x += delta_in_us; + + result = t < dc ? 0 : 10; + } + break; + } + + return result; +} + +static int gator_events_mmapped_read(int **buffer) +{ + int i; + int len = 0; + int delta_in_us; + struct timespec ts; + s64 time; + + /* System wide counters - read from one core only */ + if (!on_primary_core() || !mmapped_global_enabled) + return 0; + + getnstimeofday(&ts); + time = timespec_to_ns(&ts); + delta_in_us = (int)(time - prev_time) / 1000; + prev_time = time; + + for (i = 0; i < MMAPPED_COUNTERS_NUM; i++) { + if (mmapped_counters[i].enabled) { + mmapped_buffer[len++] = mmapped_counters[i].key; + mmapped_buffer[len++] = + mmapped_simulate(mmapped_counters[i].event, + delta_in_us); + } + } + + if (buffer) + *buffer = mmapped_buffer; + + return len; +} + +static struct gator_interface gator_events_mmapped_interface = { + .create_files = gator_events_mmapped_create_files, + .start = gator_events_mmapped_start, + .stop = gator_events_mmapped_stop, + .read = gator_events_mmapped_read, +}; + +/* Must not be static! */ +int __init gator_events_mmapped_init(void) +{ + int i; + + for (i = 0; i < MMAPPED_COUNTERS_NUM; i++) { + mmapped_counters[i].enabled = 0; + mmapped_counters[i].key = gator_events_get_key(); + } + + return gator_events_install(&gator_events_mmapped_interface); +} diff --git a/drivers/gator/gator_events_net.c b/drivers/gator/gator_events_net.c new file mode 100644 index 000000000000..9c8d3a43eaeb --- /dev/null +++ b/drivers/gator/gator_events_net.c @@ -0,0 +1,172 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include "gator.h" +#include <linux/netdevice.h> +#include <linux/hardirq.h> + +#define NETRX 0 +#define NETTX 1 +#define TOTALNET 2 + +static ulong netrx_enabled; +static ulong nettx_enabled; +static ulong netrx_key; +static ulong nettx_key; +static int rx_total, tx_total; +static ulong netPrev[TOTALNET]; +static int netGet[TOTALNET * 4]; + +static struct timer_list net_wake_up_timer; + +// Must be run in process context as the kernel function dev_get_stats() can sleep +static void get_network_stats(struct work_struct *wsptr) +{ + int rx = 0, tx = 0; + struct net_device *dev; + + for_each_netdev(&init_net, dev) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) + const struct net_device_stats *stats = dev_get_stats(dev); +#else + struct rtnl_link_stats64 temp; + const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); +#endif + rx += stats->rx_bytes; + tx += stats->tx_bytes; + } + rx_total = rx; + tx_total = tx; +} + +DECLARE_WORK(wq_get_stats, get_network_stats); + +static void net_wake_up_handler(unsigned long unused_data) +{ + // had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater + schedule_work(&wq_get_stats); +} + +static void calculate_delta(int *rx, int *tx) +{ + int rx_calc, tx_calc; + + rx_calc = (int)(rx_total - netPrev[NETRX]); + if (rx_calc < 0) + rx_calc = 0; + netPrev[NETRX] += rx_calc; + + tx_calc = (int)(tx_total - netPrev[NETTX]); + if (tx_calc < 0) + tx_calc = 0; + netPrev[NETTX] += tx_calc; + + *rx = rx_calc; + *tx = tx_calc; +} + +static int gator_events_net_create_files(struct super_block *sb, struct dentry *root) +{ + // Network counters are not currently supported in RT-Preempt full because mod_timer is used +#ifndef CONFIG_PREEMPT_RT_FULL + struct dentry *dir; + + dir = gatorfs_mkdir(sb, root, "Linux_net_rx"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &netrx_enabled); + gatorfs_create_ro_ulong(sb, dir, "key", &netrx_key); + + dir = gatorfs_mkdir(sb, root, "Linux_net_tx"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &nettx_enabled); + gatorfs_create_ro_ulong(sb, dir, "key", &nettx_key); +#endif + + return 0; +} + +static int gator_events_net_start(void) +{ + get_network_stats(0); + netPrev[NETRX] = rx_total; + netPrev[NETTX] = tx_total; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) + setup_timer(&net_wake_up_timer, net_wake_up_handler, 0); +#else + setup_deferrable_timer_on_stack(&net_wake_up_timer, net_wake_up_handler, 0); +#endif + return 0; +} + +static void gator_events_net_stop(void) +{ + del_timer_sync(&net_wake_up_timer); + netrx_enabled = 0; + nettx_enabled = 0; +} + +static int gator_events_net_read(int **buffer) +{ + int len, rx_delta, tx_delta; + static int last_rx_delta = 0, last_tx_delta = 0; + + if (!on_primary_core()) + return 0; + + if (!netrx_enabled && !nettx_enabled) + return 0; + + mod_timer(&net_wake_up_timer, jiffies + 1); + + calculate_delta(&rx_delta, &tx_delta); + + len = 0; + if (netrx_enabled && last_rx_delta != rx_delta) { + last_rx_delta = rx_delta; + netGet[len++] = netrx_key; + netGet[len++] = 0; // indicates to Streamline that rx_delta bytes were transmitted now, not since the last message + netGet[len++] = netrx_key; + netGet[len++] = rx_delta; + } + + if (nettx_enabled && last_tx_delta != tx_delta) { + last_tx_delta = tx_delta; + netGet[len++] = nettx_key; + netGet[len++] = 0; // indicates to Streamline that tx_delta bytes were transmitted now, not since the last message + netGet[len++] = nettx_key; + netGet[len++] = tx_delta; + } + + if (buffer) + *buffer = netGet; + + return len; +} + +static struct gator_interface gator_events_net_interface = { + .create_files = gator_events_net_create_files, + .start = gator_events_net_start, + .stop = gator_events_net_stop, + .read = gator_events_net_read, +}; + +int gator_events_net_init(void) +{ + netrx_key = gator_events_get_key(); + nettx_key = gator_events_get_key(); + + netrx_enabled = 0; + nettx_enabled = 0; + + return gator_events_install(&gator_events_net_interface); +} diff --git a/drivers/gator/gator_events_perf_pmu.c b/drivers/gator/gator_events_perf_pmu.c new file mode 100644 index 000000000000..d472df918ab0 --- /dev/null +++ b/drivers/gator/gator_events_perf_pmu.c @@ -0,0 +1,587 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "gator.h" + +// gator_events_armvX.c is used for Linux 2.6.x +#if GATOR_PERF_PMU_SUPPORT + +#include <linux/io.h> +#ifdef CONFIG_OF +#include <linux/of_address.h> +#endif +#include <linux/perf_event.h> +#include <linux/slab.h> + +extern bool event_based_sampling; + +// Maximum number of per-core counters - currently reserves enough space for two full hardware PMUs for big.LITTLE +#define CNTMAX 16 +#define CCI_400 4 +// Maximum number of uncore counters +// + 1 for the cci-400 cycles counter +#define UCCNT (CCI_400 + 1) + +// Default to 0 if unable to probe the revision which was the previous behavior +#define DEFAULT_CCI_REVISION 0 + +// A gator_attr is needed for every counter +struct gator_attr { + // Set once in gator_events_perf_pmu_*_init - the name of the event in the gatorfs + char name[40]; + // Exposed in gatorfs - set by gatord to enable this counter + unsigned long enabled; + // Set once in gator_events_perf_pmu_*_init - the perf type to use, see perf_type_id in the perf_event.h header file. + unsigned long type; + // Exposed in gatorfs - set by gatord to select the event to collect + unsigned long event; + // Exposed in gatorfs - set by gatord with the sample period to use and enable EBS for this counter + unsigned long count; + // Exposed as read only in gatorfs - set once in __attr_init as the key to use in the APC data + unsigned long key; +}; + +// Per-core counter attributes +static struct gator_attr attrs[CNTMAX]; +// Number of initialized per-core counters +static int attr_count; +// Uncore counter attributes +static struct gator_attr uc_attrs[UCCNT]; +// Number of initialized uncore counters +static int uc_attr_count; + +struct gator_event { + int curr; + int prev; + int prev_delta; + bool zero; + struct perf_event *pevent; + struct perf_event_attr *pevent_attr; +}; + +static DEFINE_PER_CPU(struct gator_event[CNTMAX], events); +static struct gator_event uc_events[UCCNT]; +static DEFINE_PER_CPU(int[(CNTMAX + UCCNT)*2], perf_cnt); + +static void gator_events_perf_pmu_stop(void); + +static int __create_files(struct super_block *sb, struct dentry *root, struct gator_attr *const attr) +{ + struct dentry *dir; + + if (attr->name[0] == '\0') { + return 0; + } + dir = gatorfs_mkdir(sb, root, attr->name); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &attr->enabled); + gatorfs_create_ulong(sb, dir, "count", &attr->count); + gatorfs_create_ro_ulong(sb, dir, "key", &attr->key); + gatorfs_create_ulong(sb, dir, "event", &attr->event); + + return 0; +} + +static int gator_events_perf_pmu_create_files(struct super_block *sb, struct dentry *root) +{ + int cnt; + + for (cnt = 0; cnt < attr_count; cnt++) { + if (__create_files(sb, root, &attrs[cnt]) != 0) { + return -1; + } + } + + for (cnt = 0; cnt < uc_attr_count; cnt++) { + if (__create_files(sb, root, &uc_attrs[cnt]) != 0) { + return -1; + } + } + + return 0; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) +static void ebs_overflow_handler(struct perf_event *event, int unused, struct perf_sample_data *data, struct pt_regs *regs) +#else +static void ebs_overflow_handler(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) +#endif +{ + gator_backtrace_handler(regs); +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) +static void dummy_handler(struct perf_event *event, int unused, struct perf_sample_data *data, struct pt_regs *regs) +#else +static void dummy_handler(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) +#endif +{ +// Required as perf_event_create_kernel_counter() requires an overflow handler, even though all we do is poll +} + +static int gator_events_perf_pmu_read(int **buffer); + +static int gator_events_perf_pmu_online(int **buffer, bool migrate) +{ + return gator_events_perf_pmu_read(buffer); +} + +static void __online_dispatch(int cpu, bool migrate, struct gator_attr *const attr, struct gator_event *const event) +{ + perf_overflow_handler_t handler; + + event->zero = true; + + if (event->pevent != NULL || event->pevent_attr == 0 || migrate) { + return; + } + + if (attr->count > 0) { + handler = ebs_overflow_handler; + } else { + handler = dummy_handler; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) + event->pevent = perf_event_create_kernel_counter(event->pevent_attr, cpu, 0, handler); +#else + event->pevent = perf_event_create_kernel_counter(event->pevent_attr, cpu, 0, handler, 0); +#endif + if (IS_ERR(event->pevent)) { + pr_debug("gator: unable to online a counter on cpu %d\n", cpu); + event->pevent = NULL; + return; + } + + if (event->pevent->state != PERF_EVENT_STATE_ACTIVE) { + pr_debug("gator: inactive counter on cpu %d\n", cpu); + perf_event_release_kernel(event->pevent); + event->pevent = NULL; + return; + } +} + +static void gator_events_perf_pmu_online_dispatch(int cpu, bool migrate) +{ + int cnt; + + cpu = pcpu_to_lcpu(cpu); + + for (cnt = 0; cnt < attr_count; cnt++) { + __online_dispatch(cpu, migrate, &attrs[cnt], &per_cpu(events, cpu)[cnt]); + } + + if (cpu == 0) { + for (cnt = 0; cnt < uc_attr_count; cnt++) { + __online_dispatch(cpu, migrate, &uc_attrs[cnt], &uc_events[cnt]); + } + } +} + +static void __offline_dispatch(int cpu, struct gator_event *const event) +{ + struct perf_event *pe = NULL; + + if (event->pevent) { + pe = event->pevent; + event->pevent = NULL; + } + + if (pe) { + perf_event_release_kernel(pe); + } +} + +static void gator_events_perf_pmu_offline_dispatch(int cpu, bool migrate) +{ + int cnt; + + if (migrate) { + return; + } + cpu = pcpu_to_lcpu(cpu); + + for (cnt = 0; cnt < attr_count; cnt++) { + __offline_dispatch(cpu, &per_cpu(events, cpu)[cnt]); + } + + if (cpu == 0) { + for (cnt = 0; cnt < uc_attr_count; cnt++) { + __offline_dispatch(cpu, &uc_events[cnt]); + } + } +} + +static int __check_ebs(struct gator_attr *const attr) +{ + if (attr->count > 0) { + if (!event_based_sampling) { + event_based_sampling = true; + } else { + printk(KERN_WARNING "gator: Only one ebs counter is allowed\n"); + return -1; + } + } + + return 0; +} + +static int __start(struct gator_attr *const attr, struct gator_event *const event) +{ + u32 size = sizeof(struct perf_event_attr); + + event->pevent = NULL; + if (!attr->enabled) { // Skip disabled counters + return 0; + } + + event->prev = 0; + event->curr = 0; + event->prev_delta = 0; + event->pevent_attr = kmalloc(size, GFP_KERNEL); + if (!event->pevent_attr) { + gator_events_perf_pmu_stop(); + return -1; + } + + memset(event->pevent_attr, 0, size); + event->pevent_attr->type = attr->type; + event->pevent_attr->size = size; + event->pevent_attr->config = attr->event; + event->pevent_attr->sample_period = attr->count; + event->pevent_attr->pinned = 1; + + return 0; +} + +static int gator_events_perf_pmu_start(void) +{ + int cnt, cpu; + + event_based_sampling = false; + for (cnt = 0; cnt < attr_count; cnt++) { + if (__check_ebs(&attrs[cnt]) != 0) { + return -1; + } + } + + for (cnt = 0; cnt < uc_attr_count; cnt++) { + if (__check_ebs(&uc_attrs[cnt]) != 0) { + return -1; + } + } + + for_each_present_cpu(cpu) { + for (cnt = 0; cnt < attr_count; cnt++) { + if (__start(&attrs[cnt], &per_cpu(events, cpu)[cnt]) != 0) { + return -1; + } + } + } + + for (cnt = 0; cnt < uc_attr_count; cnt++) { + if (__start(&uc_attrs[cnt], &uc_events[cnt]) != 0) { + return -1; + } + } + + return 0; +} + +static void __event_stop(struct gator_event *const event) +{ + if (event->pevent_attr) { + kfree(event->pevent_attr); + event->pevent_attr = NULL; + } +} + +static void __attr_stop(struct gator_attr *const attr) +{ + attr->enabled = 0; + attr->event = 0; + attr->count = 0; +} + +static void gator_events_perf_pmu_stop(void) +{ + unsigned int cnt, cpu; + + for_each_present_cpu(cpu) { + for (cnt = 0; cnt < attr_count; cnt++) { + __event_stop(&per_cpu(events, cpu)[cnt]); + } + } + + for (cnt = 0; cnt < uc_attr_count; cnt++) { + __event_stop(&uc_events[cnt]); + } + + for (cnt = 0; cnt < attr_count; cnt++) { + __attr_stop(&attrs[cnt]); + } + + for (cnt = 0; cnt < uc_attr_count; cnt++) { + __attr_stop(&uc_attrs[cnt]); + } +} + +static void __read(int *const len, int cpu, struct gator_attr *const attr, struct gator_event *const event) +{ + int delta; + + struct perf_event *const ev = event->pevent; + if (ev != NULL && ev->state == PERF_EVENT_STATE_ACTIVE) { + /* After creating the perf counter in __online_dispatch, there + * is a race condition between gator_events_perf_pmu_online and + * gator_events_perf_pmu_read. So have + * gator_events_perf_pmu_online call gator_events_perf_pmu_read + * and in __read check to see if it's the first call after + * __online_dispatch and if so, run the online code. + */ + if (event->zero) { + ev->pmu->read(ev); + event->prev = event->curr = local64_read(&ev->count); + event->prev_delta = 0; + per_cpu(perf_cnt, cpu)[(*len)++] = attr->key; + per_cpu(perf_cnt, cpu)[(*len)++] = 0; + event->zero = false; + } else { + ev->pmu->read(ev); + event->curr = local64_read(&ev->count); + delta = event->curr - event->prev; + if (delta != 0 || delta != event->prev_delta) { + event->prev_delta = delta; + event->prev = event->curr; + per_cpu(perf_cnt, cpu)[(*len)++] = attr->key; + if (delta < 0) { + delta *= -1; + } + per_cpu(perf_cnt, cpu)[(*len)++] = delta; + } + } + } +} + +static int gator_events_perf_pmu_read(int **buffer) +{ + int cnt, len = 0; + const int cpu = get_logical_cpu(); + + for (cnt = 0; cnt < attr_count; cnt++) { + __read(&len, cpu, &attrs[cnt], &per_cpu(events, cpu)[cnt]); + } + + if (cpu == 0) { + for (cnt = 0; cnt < uc_attr_count; cnt++) { + __read(&len, cpu, &uc_attrs[cnt], &uc_events[cnt]); + } + } + + if (buffer) { + *buffer = per_cpu(perf_cnt, cpu); + } + + return len; +} + +static struct gator_interface gator_events_perf_pmu_interface = { + .create_files = gator_events_perf_pmu_create_files, + .start = gator_events_perf_pmu_start, + .stop = gator_events_perf_pmu_stop, + .online = gator_events_perf_pmu_online, + .online_dispatch = gator_events_perf_pmu_online_dispatch, + .offline_dispatch = gator_events_perf_pmu_offline_dispatch, + .read = gator_events_perf_pmu_read, +}; + +static void __attr_init(struct gator_attr *const attr) +{ + attr->name[0] = '\0'; + attr->enabled = 0; + attr->type = 0; + attr->event = 0; + attr->count = 0; + attr->key = gator_events_get_key(); +} + +#ifdef CONFIG_OF + +static const struct of_device_id arm_cci_matches[] = { + {.compatible = "arm,cci-400" }, + {}, +}; + +static int probe_cci_revision(void) +{ + struct device_node *np; + struct resource res; + void __iomem *cci_ctrl_base; + int rev; + int ret = DEFAULT_CCI_REVISION; + + np = of_find_matching_node(NULL, arm_cci_matches); + if (!np) { + return ret; + } + + if (of_address_to_resource(np, 0, &res)) { + goto node_put; + } + + cci_ctrl_base = ioremap(res.start, resource_size(&res)); + + rev = (readl_relaxed(cci_ctrl_base + 0xfe8) >> 4) & 0xf; + + if (rev <= 4) { + ret = 0; + } else if (rev <= 6) { + ret = 1; + } + + iounmap(cci_ctrl_base); + + node_put: + of_node_put(np); + + return ret; +} + +#else + +static int probe_cci_revision(void) +{ + return DEFAULT_CCI_REVISION; +} + +#endif + +static void gator_events_perf_pmu_cci_init(const int type) +{ + int cnt; + const char *cci_name; + + switch (probe_cci_revision()) { + case 0: + cci_name = "cci-400"; + break; + case 1: + cci_name = "cci-400-r1"; + break; + default: + pr_debug("gator: unrecognized cci-400 revision\n"); + return; + } + + snprintf(uc_attrs[uc_attr_count].name, sizeof(uc_attrs[uc_attr_count].name), "%s_ccnt", cci_name); + uc_attrs[uc_attr_count].type = type; + ++uc_attr_count; + + for (cnt = 0; cnt < CCI_400; ++cnt, ++uc_attr_count) { + struct gator_attr *const attr = &uc_attrs[uc_attr_count]; + snprintf(attr->name, sizeof(attr->name), "%s_cnt%d", cci_name, cnt); + attr->type = type; + } +} + +static void gator_events_perf_pmu_cpu_init(const struct gator_cpu *const gator_cpu, const int type) +{ + int cnt; + + snprintf(attrs[attr_count].name, sizeof(attrs[attr_count].name), "%s_ccnt", gator_cpu->pmnc_name); + attrs[attr_count].type = type; + ++attr_count; + + for (cnt = 0; cnt < gator_cpu->pmnc_counters; ++cnt, ++attr_count) { + struct gator_attr *const attr = &attrs[attr_count]; + snprintf(attr->name, sizeof(attr->name), "%s_cnt%d", gator_cpu->pmnc_name, cnt); + attr->type = type; + } +} + +int gator_events_perf_pmu_init(void) +{ + struct perf_event_attr pea; + struct perf_event *pe; + const struct gator_cpu *gator_cpu; + int type; + int cpu; + int cnt; + bool found_cpu = false; + + for (cnt = 0; cnt < CNTMAX; cnt++) { + __attr_init(&attrs[cnt]); + } + for (cnt = 0; cnt < UCCNT; cnt++) { + __attr_init(&uc_attrs[cnt]); + } + + memset(&pea, 0, sizeof(pea)); + pea.size = sizeof(pea); + pea.config = 0xFF; + attr_count = 0; + uc_attr_count = 0; + for (type = PERF_TYPE_MAX; type < 0x20; ++type) { + pea.type = type; + + // A particular PMU may work on some but not all cores, so try on each core + pe = NULL; + for_each_present_cpu(cpu) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) + pe = perf_event_create_kernel_counter(&pea, cpu, 0, dummy_handler); +#else + pe = perf_event_create_kernel_counter(&pea, cpu, 0, dummy_handler, 0); +#endif + if (!IS_ERR(pe)) { + break; + } + } + // Assume that valid PMUs are contiguous + if (IS_ERR(pe)) { + break; + } + + if (pe->pmu != NULL && type == pe->pmu->type) { + if (strcmp("CCI", pe->pmu->name) == 0 || strcmp("CCI_400", pe->pmu->name) == 0) { + gator_events_perf_pmu_cci_init(type); + } else if ((gator_cpu = gator_find_cpu_by_pmu_name(pe->pmu->name)) != NULL) { + found_cpu = true; + gator_events_perf_pmu_cpu_init(gator_cpu, type); + } + // Initialize gator_attrs for dynamic PMUs here + } + + perf_event_release_kernel(pe); + } + + if (!found_cpu) { + const struct gator_cpu *const gator_cpu = gator_find_cpu_by_cpuid(gator_cpuid()); + if (gator_cpu == NULL) { + return -1; + } + gator_events_perf_pmu_cpu_init(gator_cpu, PERF_TYPE_RAW); + } + + // Initialize gator_attrs for non-dynamic PMUs here + + if (attr_count > CNTMAX) { + printk(KERN_ERR "gator: Too many perf counters\n"); + return -1; + } + + if (uc_attr_count > UCCNT) { + printk(KERN_ERR "gator: Too many perf uncore counters\n"); + return -1; + } + + return gator_events_install(&gator_events_perf_pmu_interface); +} + +#endif diff --git a/drivers/gator/gator_events_sched.c b/drivers/gator/gator_events_sched.c new file mode 100644 index 000000000000..29f4e39e261c --- /dev/null +++ b/drivers/gator/gator_events_sched.c @@ -0,0 +1,113 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include "gator.h" +#include <trace/events/sched.h> + +#define SCHED_SWITCH 0 +#define SCHED_TOTAL (SCHED_SWITCH+1) + +static ulong sched_switch_enabled; +static ulong sched_switch_key; +static DEFINE_PER_CPU(int[SCHED_TOTAL], schedCnt); +static DEFINE_PER_CPU(int[SCHED_TOTAL * 2], schedGet); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) +GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next)) +#else +GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_struct *next)) +#endif +{ + unsigned long flags; + + // disable interrupts to synchronize with gator_events_sched_read() + // spinlocks not needed since percpu buffers are used + local_irq_save(flags); + per_cpu(schedCnt, get_physical_cpu())[SCHED_SWITCH]++; + local_irq_restore(flags); +} + +static int gator_events_sched_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + + /* switch */ + dir = gatorfs_mkdir(sb, root, "Linux_sched_switch"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &sched_switch_enabled); + gatorfs_create_ro_ulong(sb, dir, "key", &sched_switch_key); + + return 0; +} + +static int gator_events_sched_start(void) +{ + // register tracepoints + if (sched_switch_enabled) + if (GATOR_REGISTER_TRACE(sched_switch)) + goto sched_switch_exit; + pr_debug("gator: registered scheduler event tracepoints\n"); + + return 0; + + // unregister tracepoints on error +sched_switch_exit: + pr_err("gator: scheduler event tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n"); + + return -1; +} + +static void gator_events_sched_stop(void) +{ + if (sched_switch_enabled) + GATOR_UNREGISTER_TRACE(sched_switch); + pr_debug("gator: unregistered scheduler event tracepoints\n"); + + sched_switch_enabled = 0; +} + +static int gator_events_sched_read(int **buffer) +{ + unsigned long flags; + int len, value; + int cpu = get_physical_cpu(); + + len = 0; + if (sched_switch_enabled) { + local_irq_save(flags); + value = per_cpu(schedCnt, cpu)[SCHED_SWITCH]; + per_cpu(schedCnt, cpu)[SCHED_SWITCH] = 0; + local_irq_restore(flags); + per_cpu(schedGet, cpu)[len++] = sched_switch_key; + per_cpu(schedGet, cpu)[len++] = value; + } + + if (buffer) + *buffer = per_cpu(schedGet, cpu); + + return len; +} + +static struct gator_interface gator_events_sched_interface = { + .create_files = gator_events_sched_create_files, + .start = gator_events_sched_start, + .stop = gator_events_sched_stop, + .read = gator_events_sched_read, +}; + +int gator_events_sched_init(void) +{ + sched_switch_enabled = 0; + + sched_switch_key = gator_events_get_key(); + + return gator_events_install(&gator_events_sched_interface); +} diff --git a/drivers/gator/gator_events_scorpion.c b/drivers/gator/gator_events_scorpion.c new file mode 100644 index 000000000000..c91db1219d08 --- /dev/null +++ b/drivers/gator/gator_events_scorpion.c @@ -0,0 +1,669 @@ +/** + * Copyright (C) ARM Limited 2011-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "gator.h" + +// gator_events_perf_pmu.c is used if perf is supported +#if GATOR_NO_PERF_SUPPORT + +static const char *pmnc_name; +static int pmnc_counters; + +// Per-CPU PMNC: config reg +#define PMNC_E (1 << 0) /* Enable all counters */ +#define PMNC_P (1 << 1) /* Reset all counters */ +#define PMNC_C (1 << 2) /* Cycle counter reset */ +#define PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define PMNC_X (1 << 4) /* Export to ETM */ +#define PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug */ +#define PMNC_MASK 0x3f /* Mask for writable bits */ + +// ccnt reg +#define CCNT_REG (1 << 31) + +#define CCNT 0 +#define CNT0 1 +#define CNTMAX (4+1) + +static unsigned long pmnc_enabled[CNTMAX]; +static unsigned long pmnc_event[CNTMAX]; +static unsigned long pmnc_key[CNTMAX]; + +static DEFINE_PER_CPU(int[CNTMAX * 2], perfCnt); + +enum scorpion_perf_types { + SCORPION_ICACHE_EXPL_INV = 0x4c, + SCORPION_ICACHE_MISS = 0x4d, + SCORPION_ICACHE_ACCESS = 0x4e, + SCORPION_ICACHE_CACHEREQ_L2 = 0x4f, + SCORPION_ICACHE_NOCACHE_L2 = 0x50, + SCORPION_HIQUP_NOPED = 0x51, + SCORPION_DATA_ABORT = 0x52, + SCORPION_IRQ = 0x53, + SCORPION_FIQ = 0x54, + SCORPION_ALL_EXCPT = 0x55, + SCORPION_UNDEF = 0x56, + SCORPION_SVC = 0x57, + SCORPION_SMC = 0x58, + SCORPION_PREFETCH_ABORT = 0x59, + SCORPION_INDEX_CHECK = 0x5a, + SCORPION_NULL_CHECK = 0x5b, + SCORPION_EXPL_ICIALLU = 0x5c, + SCORPION_IMPL_ICIALLU = 0x5d, + SCORPION_NONICIALLU_BTAC_INV = 0x5e, + SCORPION_ICIMVAU_IMPL_ICIALLU = 0x5f, + SCORPION_SPIPE_ONLY_CYCLES = 0x60, + SCORPION_XPIPE_ONLY_CYCLES = 0x61, + SCORPION_DUAL_CYCLES = 0x62, + SCORPION_DISPATCH_ANY_CYCLES = 0x63, + SCORPION_FIFO_FULLBLK_CMT = 0x64, + SCORPION_FAIL_COND_INST = 0x65, + SCORPION_PASS_COND_INST = 0x66, + SCORPION_ALLOW_VU_CLK = 0x67, + SCORPION_VU_IDLE = 0x68, + SCORPION_ALLOW_L2_CLK = 0x69, + SCORPION_L2_IDLE = 0x6a, + SCORPION_DTLB_IMPL_INV_SCTLR_DACR = 0x6b, + SCORPION_DTLB_EXPL_INV = 0x6c, + SCORPION_DTLB_MISS = 0x6d, + SCORPION_DTLB_ACCESS = 0x6e, + SCORPION_ITLB_MISS = 0x6f, + SCORPION_ITLB_IMPL_INV = 0x70, + SCORPION_ITLB_EXPL_INV = 0x71, + SCORPION_UTLB_D_MISS = 0x72, + SCORPION_UTLB_D_ACCESS = 0x73, + SCORPION_UTLB_I_MISS = 0x74, + SCORPION_UTLB_I_ACCESS = 0x75, + SCORPION_UTLB_INV_ASID = 0x76, + SCORPION_UTLB_INV_MVA = 0x77, + SCORPION_UTLB_INV_ALL = 0x78, + SCORPION_S2_HOLD_RDQ_UNAVAIL = 0x79, + SCORPION_S2_HOLD = 0x7a, + SCORPION_S2_HOLD_DEV_OP = 0x7b, + SCORPION_S2_HOLD_ORDER = 0x7c, + SCORPION_S2_HOLD_BARRIER = 0x7d, + SCORPION_VIU_DUAL_CYCLE = 0x7e, + SCORPION_VIU_SINGLE_CYCLE = 0x7f, + SCORPION_VX_PIPE_WAR_STALL_CYCLES = 0x80, + SCORPION_VX_PIPE_WAW_STALL_CYCLES = 0x81, + SCORPION_VX_PIPE_RAW_STALL_CYCLES = 0x82, + SCORPION_VX_PIPE_LOAD_USE_STALL = 0x83, + SCORPION_VS_PIPE_WAR_STALL_CYCLES = 0x84, + SCORPION_VS_PIPE_WAW_STALL_CYCLES = 0x85, + SCORPION_VS_PIPE_RAW_STALL_CYCLES = 0x86, + SCORPION_EXCEPTIONS_INV_OPERATION = 0x87, + SCORPION_EXCEPTIONS_DIV_BY_ZERO = 0x88, + SCORPION_COND_INST_FAIL_VX_PIPE = 0x89, + SCORPION_COND_INST_FAIL_VS_PIPE = 0x8a, + SCORPION_EXCEPTIONS_OVERFLOW = 0x8b, + SCORPION_EXCEPTIONS_UNDERFLOW = 0x8c, + SCORPION_EXCEPTIONS_DENORM = 0x8d, +#ifdef CONFIG_ARCH_MSM_SCORPIONMP + SCORPIONMP_NUM_BARRIERS = 0x8e, + SCORPIONMP_BARRIER_CYCLES = 0x8f, +#else + SCORPION_BANK_AB_HIT = 0x8e, + SCORPION_BANK_AB_ACCESS = 0x8f, + SCORPION_BANK_CD_HIT = 0x90, + SCORPION_BANK_CD_ACCESS = 0x91, + SCORPION_BANK_AB_DSIDE_HIT = 0x92, + SCORPION_BANK_AB_DSIDE_ACCESS = 0x93, + SCORPION_BANK_CD_DSIDE_HIT = 0x94, + SCORPION_BANK_CD_DSIDE_ACCESS = 0x95, + SCORPION_BANK_AB_ISIDE_HIT = 0x96, + SCORPION_BANK_AB_ISIDE_ACCESS = 0x97, + SCORPION_BANK_CD_ISIDE_HIT = 0x98, + SCORPION_BANK_CD_ISIDE_ACCESS = 0x99, + SCORPION_ISIDE_RD_WAIT = 0x9a, + SCORPION_DSIDE_RD_WAIT = 0x9b, + SCORPION_BANK_BYPASS_WRITE = 0x9c, + SCORPION_BANK_AB_NON_CASTOUT = 0x9d, + SCORPION_BANK_AB_L2_CASTOUT = 0x9e, + SCORPION_BANK_CD_NON_CASTOUT = 0x9f, + SCORPION_BANK_CD_L2_CASTOUT = 0xa0, +#endif + MSM_MAX_EVT +}; + +struct scorp_evt { + u32 evt_type; + u32 val; + u8 grp; + u32 evt_type_act; +}; + +static const struct scorp_evt sc_evt[] = { + {SCORPION_ICACHE_EXPL_INV, 0x80000500, 0, 0x4d}, + {SCORPION_ICACHE_MISS, 0x80050000, 0, 0x4e}, + {SCORPION_ICACHE_ACCESS, 0x85000000, 0, 0x4f}, + {SCORPION_ICACHE_CACHEREQ_L2, 0x86000000, 0, 0x4f}, + {SCORPION_ICACHE_NOCACHE_L2, 0x87000000, 0, 0x4f}, + {SCORPION_HIQUP_NOPED, 0x80080000, 0, 0x4e}, + {SCORPION_DATA_ABORT, 0x8000000a, 0, 0x4c}, + {SCORPION_IRQ, 0x80000a00, 0, 0x4d}, + {SCORPION_FIQ, 0x800a0000, 0, 0x4e}, + {SCORPION_ALL_EXCPT, 0x8a000000, 0, 0x4f}, + {SCORPION_UNDEF, 0x8000000b, 0, 0x4c}, + {SCORPION_SVC, 0x80000b00, 0, 0x4d}, + {SCORPION_SMC, 0x800b0000, 0, 0x4e}, + {SCORPION_PREFETCH_ABORT, 0x8b000000, 0, 0x4f}, + {SCORPION_INDEX_CHECK, 0x8000000c, 0, 0x4c}, + {SCORPION_NULL_CHECK, 0x80000c00, 0, 0x4d}, + {SCORPION_EXPL_ICIALLU, 0x8000000d, 0, 0x4c}, + {SCORPION_IMPL_ICIALLU, 0x80000d00, 0, 0x4d}, + {SCORPION_NONICIALLU_BTAC_INV, 0x800d0000, 0, 0x4e}, + {SCORPION_ICIMVAU_IMPL_ICIALLU, 0x8d000000, 0, 0x4f}, + + {SCORPION_SPIPE_ONLY_CYCLES, 0x80000600, 1, 0x51}, + {SCORPION_XPIPE_ONLY_CYCLES, 0x80060000, 1, 0x52}, + {SCORPION_DUAL_CYCLES, 0x86000000, 1, 0x53}, + {SCORPION_DISPATCH_ANY_CYCLES, 0x89000000, 1, 0x53}, + {SCORPION_FIFO_FULLBLK_CMT, 0x8000000d, 1, 0x50}, + {SCORPION_FAIL_COND_INST, 0x800d0000, 1, 0x52}, + {SCORPION_PASS_COND_INST, 0x8d000000, 1, 0x53}, + {SCORPION_ALLOW_VU_CLK, 0x8000000e, 1, 0x50}, + {SCORPION_VU_IDLE, 0x80000e00, 1, 0x51}, + {SCORPION_ALLOW_L2_CLK, 0x800e0000, 1, 0x52}, + {SCORPION_L2_IDLE, 0x8e000000, 1, 0x53}, + + {SCORPION_DTLB_IMPL_INV_SCTLR_DACR, 0x80000001, 2, 0x54}, + {SCORPION_DTLB_EXPL_INV, 0x80000100, 2, 0x55}, + {SCORPION_DTLB_MISS, 0x80010000, 2, 0x56}, + {SCORPION_DTLB_ACCESS, 0x81000000, 2, 0x57}, + {SCORPION_ITLB_MISS, 0x80000200, 2, 0x55}, + {SCORPION_ITLB_IMPL_INV, 0x80020000, 2, 0x56}, + {SCORPION_ITLB_EXPL_INV, 0x82000000, 2, 0x57}, + {SCORPION_UTLB_D_MISS, 0x80000003, 2, 0x54}, + {SCORPION_UTLB_D_ACCESS, 0x80000300, 2, 0x55}, + {SCORPION_UTLB_I_MISS, 0x80030000, 2, 0x56}, + {SCORPION_UTLB_I_ACCESS, 0x83000000, 2, 0x57}, + {SCORPION_UTLB_INV_ASID, 0x80000400, 2, 0x55}, + {SCORPION_UTLB_INV_MVA, 0x80040000, 2, 0x56}, + {SCORPION_UTLB_INV_ALL, 0x84000000, 2, 0x57}, + {SCORPION_S2_HOLD_RDQ_UNAVAIL, 0x80000800, 2, 0x55}, + {SCORPION_S2_HOLD, 0x88000000, 2, 0x57}, + {SCORPION_S2_HOLD_DEV_OP, 0x80000900, 2, 0x55}, + {SCORPION_S2_HOLD_ORDER, 0x80090000, 2, 0x56}, + {SCORPION_S2_HOLD_BARRIER, 0x89000000, 2, 0x57}, + + {SCORPION_VIU_DUAL_CYCLE, 0x80000001, 4, 0x5c}, + {SCORPION_VIU_SINGLE_CYCLE, 0x80000100, 4, 0x5d}, + {SCORPION_VX_PIPE_WAR_STALL_CYCLES, 0x80000005, 4, 0x5c}, + {SCORPION_VX_PIPE_WAW_STALL_CYCLES, 0x80000500, 4, 0x5d}, + {SCORPION_VX_PIPE_RAW_STALL_CYCLES, 0x80050000, 4, 0x5e}, + {SCORPION_VX_PIPE_LOAD_USE_STALL, 0x80000007, 4, 0x5c}, + {SCORPION_VS_PIPE_WAR_STALL_CYCLES, 0x80000008, 4, 0x5c}, + {SCORPION_VS_PIPE_WAW_STALL_CYCLES, 0x80000800, 4, 0x5d}, + {SCORPION_VS_PIPE_RAW_STALL_CYCLES, 0x80080000, 4, 0x5e}, + {SCORPION_EXCEPTIONS_INV_OPERATION, 0x8000000b, 4, 0x5c}, + {SCORPION_EXCEPTIONS_DIV_BY_ZERO, 0x80000b00, 4, 0x5d}, + {SCORPION_COND_INST_FAIL_VX_PIPE, 0x800b0000, 4, 0x5e}, + {SCORPION_COND_INST_FAIL_VS_PIPE, 0x8b000000, 4, 0x5f}, + {SCORPION_EXCEPTIONS_OVERFLOW, 0x8000000c, 4, 0x5c}, + {SCORPION_EXCEPTIONS_UNDERFLOW, 0x80000c00, 4, 0x5d}, + {SCORPION_EXCEPTIONS_DENORM, 0x8c000000, 4, 0x5f}, + +#ifdef CONFIG_ARCH_MSM_SCORPIONMP + {SCORPIONMP_NUM_BARRIERS, 0x80000e00, 3, 0x59}, + {SCORPIONMP_BARRIER_CYCLES, 0x800e0000, 3, 0x5a}, +#else + {SCORPION_BANK_AB_HIT, 0x80000001, 3, 0x58}, + {SCORPION_BANK_AB_ACCESS, 0x80000100, 3, 0x59}, + {SCORPION_BANK_CD_HIT, 0x80010000, 3, 0x5a}, + {SCORPION_BANK_CD_ACCESS, 0x81000000, 3, 0x5b}, + {SCORPION_BANK_AB_DSIDE_HIT, 0x80000002, 3, 0x58}, + {SCORPION_BANK_AB_DSIDE_ACCESS, 0x80000200, 3, 0x59}, + {SCORPION_BANK_CD_DSIDE_HIT, 0x80020000, 3, 0x5a}, + {SCORPION_BANK_CD_DSIDE_ACCESS, 0x82000000, 3, 0x5b}, + {SCORPION_BANK_AB_ISIDE_HIT, 0x80000003, 3, 0x58}, + {SCORPION_BANK_AB_ISIDE_ACCESS, 0x80000300, 3, 0x59}, + {SCORPION_BANK_CD_ISIDE_HIT, 0x80030000, 3, 0x5a}, + {SCORPION_BANK_CD_ISIDE_ACCESS, 0x83000000, 3, 0x5b}, + {SCORPION_ISIDE_RD_WAIT, 0x80000009, 3, 0x58}, + {SCORPION_DSIDE_RD_WAIT, 0x80090000, 3, 0x5a}, + {SCORPION_BANK_BYPASS_WRITE, 0x8000000a, 3, 0x58}, + {SCORPION_BANK_AB_NON_CASTOUT, 0x8000000c, 3, 0x58}, + {SCORPION_BANK_AB_L2_CASTOUT, 0x80000c00, 3, 0x59}, + {SCORPION_BANK_CD_NON_CASTOUT, 0x800c0000, 3, 0x5a}, + {SCORPION_BANK_CD_L2_CASTOUT, 0x8c000000, 3, 0x5b}, +#endif +}; + +static inline void scorpion_pmnc_write(u32 val) +{ + val &= PMNC_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (val)); +} + +static inline u32 scorpion_pmnc_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); + return val; +} + +static inline u32 scorpion_ccnt_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); + return val; +} + +static inline u32 scorpion_cntn_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); + return val; +} + +static inline u32 scorpion_pmnc_enable_counter(unsigned int cnt) +{ + u32 val; + + if (cnt >= CNTMAX) { + pr_err("gator: CPU%u enabling wrong PMNC counter %d\n", smp_processor_id(), cnt); + return -1; + } + + if (cnt == CCNT) + val = CCNT_REG; + else + val = (1 << (cnt - CNT0)); + + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); + + return cnt; +} + +static inline u32 scorpion_pmnc_disable_counter(unsigned int cnt) +{ + u32 val; + + if (cnt >= CNTMAX) { + pr_err("gator: CPU%u disabling wrong PMNC counter %d\n", smp_processor_id(), cnt); + return -1; + } + + if (cnt == CCNT) + val = CCNT_REG; + else + val = (1 << (cnt - CNT0)); + + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); + + return cnt; +} + +static inline int scorpion_pmnc_select_counter(unsigned int cnt) +{ + u32 val; + + if ((cnt == CCNT) || (cnt >= CNTMAX)) { + pr_err("gator: CPU%u selecting wrong PMNC counter %d\n", smp_processor_id(), cnt); + return -1; + } + + val = (cnt - CNT0); + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); + + return cnt; +} + +static u32 scorpion_read_lpm0(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val)); + return val; +} + +static void scorpion_write_lpm0(u32 val) +{ + asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val)); +} + +static u32 scorpion_read_lpm1(void) +{ + u32 val; + asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val)); + return val; +} + +static void scorpion_write_lpm1(u32 val) +{ + asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val)); +} + +static u32 scorpion_read_lpm2(void) +{ + u32 val; + asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val)); + return val; +} + +static void scorpion_write_lpm2(u32 val) +{ + asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val)); +} + +static u32 scorpion_read_l2lpm(void) +{ + u32 val; + asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val)); + return val; +} + +static void scorpion_write_l2lpm(u32 val) +{ + asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val)); +} + +static u32 scorpion_read_vlpm(void) +{ + u32 val; + asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val)); + return val; +} + +static void scorpion_write_vlpm(u32 val) +{ + asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val)); +} + +struct scorpion_access_funcs { + u32(*read)(void); + void (*write)(u32); +}; + +struct scorpion_access_funcs scor_func[] = { + {scorpion_read_lpm0, scorpion_write_lpm0}, + {scorpion_read_lpm1, scorpion_write_lpm1}, + {scorpion_read_lpm2, scorpion_write_lpm2}, + {scorpion_read_l2lpm, scorpion_write_l2lpm}, + {scorpion_read_vlpm, scorpion_write_vlpm}, +}; + +u32 venum_orig_val; +u32 fp_orig_val; + +static void scorpion_pre_vlpm(void) +{ + u32 venum_new_val; + u32 fp_new_val; + + /* CPACR Enable CP10 access */ + asm volatile("mrc p15, 0, %0, c1, c0, 2" : "=r" (venum_orig_val)); + venum_new_val = venum_orig_val | 0x00300000; + asm volatile("mcr p15, 0, %0, c1, c0, 2" : : "r" (venum_new_val)); + /* Enable FPEXC */ + asm volatile("mrc p10, 7, %0, c8, c0, 0" : "=r" (fp_orig_val)); + fp_new_val = fp_orig_val | 0x40000000; + asm volatile("mcr p10, 7, %0, c8, c0, 0" : : "r" (fp_new_val)); +} + +static void scorpion_post_vlpm(void) +{ + /* Restore FPEXC */ + asm volatile("mcr p10, 7, %0, c8, c0, 0" : : "r" (fp_orig_val)); + /* Restore CPACR */ + asm volatile("mcr p15, 0, %0, c1, c0, 2" : : "r" (venum_orig_val)); +} + +#define COLMN0MASK 0x000000ff +#define COLMN1MASK 0x0000ff00 +#define COLMN2MASK 0x00ff0000 +static u32 scorpion_get_columnmask(u32 setval) +{ + if (setval & COLMN0MASK) + return 0xffffff00; + else if (setval & COLMN1MASK) + return 0xffff00ff; + else if (setval & COLMN2MASK) + return 0xff00ffff; + else + return 0x80ffffff; +} + +static void scorpion_evt_setup(u32 gr, u32 setval) +{ + u32 val; + if (gr == 4) + scorpion_pre_vlpm(); + val = scorpion_get_columnmask(setval) & scor_func[gr].read(); + val = val | setval; + scor_func[gr].write(val); + if (gr == 4) + scorpion_post_vlpm(); +} + +static int get_scorpion_evtinfo(unsigned int evt_type, struct scorp_evt *evtinfo) +{ + u32 idx; + if ((evt_type < 0x4c) || (evt_type >= MSM_MAX_EVT)) + return 0; + idx = evt_type - 0x4c; + if (sc_evt[idx].evt_type == evt_type) { + evtinfo->val = sc_evt[idx].val; + evtinfo->grp = sc_evt[idx].grp; + evtinfo->evt_type_act = sc_evt[idx].evt_type_act; + return 1; + } + return 0; +} + +static inline void scorpion_pmnc_write_evtsel(unsigned int cnt, u32 val) +{ + if (scorpion_pmnc_select_counter(cnt) == cnt) { + if (val < 0x40) { + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); + } else { + u32 zero = 0; + struct scorp_evt evtinfo; + // extract evtinfo.grp and evtinfo.tevt_type_act from val + if (get_scorpion_evtinfo(val, &evtinfo) == 0) + return; + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (evtinfo.evt_type_act)); + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (zero)); + scorpion_evt_setup(evtinfo.grp, val); + } + } +} + +static void scorpion_pmnc_reset_counter(unsigned int cnt) +{ + u32 val = 0; + + if (cnt == CCNT) { + scorpion_pmnc_disable_counter(cnt); + + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (val)); + + if (pmnc_enabled[cnt] != 0) + scorpion_pmnc_enable_counter(cnt); + + } else if (cnt >= CNTMAX) { + pr_err("gator: CPU%u resetting wrong PMNC counter %d\n", smp_processor_id(), cnt); + } else { + scorpion_pmnc_disable_counter(cnt); + + if (scorpion_pmnc_select_counter(cnt) == cnt) + asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (val)); + + if (pmnc_enabled[cnt] != 0) + scorpion_pmnc_enable_counter(cnt); + } +} + +static int gator_events_scorpion_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + int i; + + for (i = 0; i < pmnc_counters; i++) { + char buf[40]; + if (i == 0) { + snprintf(buf, sizeof buf, "%s_ccnt", pmnc_name); + } else { + snprintf(buf, sizeof buf, "%s_cnt%d", pmnc_name, i - 1); + } + dir = gatorfs_mkdir(sb, root, buf); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]); + gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]); + if (i > 0) { + gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]); + } + } + + return 0; +} + +static int gator_events_scorpion_online(int **buffer, bool migrate) +{ + unsigned int cnt, len = 0, cpu = smp_processor_id(); + + if (scorpion_pmnc_read() & PMNC_E) { + scorpion_pmnc_write(scorpion_pmnc_read() & ~PMNC_E); + } + + /* Initialize & Reset PMNC: C bit and P bit */ + scorpion_pmnc_write(PMNC_P | PMNC_C); + + for (cnt = CCNT; cnt < CNTMAX; cnt++) { + unsigned long event; + + if (!pmnc_enabled[cnt]) + continue; + + // disable counter + scorpion_pmnc_disable_counter(cnt); + + event = pmnc_event[cnt] & 255; + + // Set event (if destined for PMNx counters), We don't need to set the event if it's a cycle count + if (cnt != CCNT) + scorpion_pmnc_write_evtsel(cnt, event); + + // reset counter + scorpion_pmnc_reset_counter(cnt); + + // Enable counter, do not enable interrupt for this counter + scorpion_pmnc_enable_counter(cnt); + } + + // enable + scorpion_pmnc_write(scorpion_pmnc_read() | PMNC_E); + + // read the counters and toss the invalid data, return zero instead + for (cnt = 0; cnt < pmnc_counters; cnt++) { + if (pmnc_enabled[cnt]) { + if (cnt == CCNT) { + scorpion_ccnt_read(); + } else if (scorpion_pmnc_select_counter(cnt) == cnt) { + scorpion_cntn_read(); + } + scorpion_pmnc_reset_counter(cnt); + + per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt]; + per_cpu(perfCnt, cpu)[len++] = 0; + } + } + + if (buffer) + *buffer = per_cpu(perfCnt, cpu); + + return len; +} + +static int gator_events_scorpion_offline(int **buffer, bool migrate) +{ + scorpion_pmnc_write(scorpion_pmnc_read() & ~PMNC_E); + return 0; +} + +static void gator_events_scorpion_stop(void) +{ + unsigned int cnt; + + for (cnt = CCNT; cnt < CNTMAX; cnt++) { + pmnc_enabled[cnt] = 0; + pmnc_event[cnt] = 0; + } +} + +static int gator_events_scorpion_read(int **buffer) +{ + int cnt, len = 0; + int cpu = smp_processor_id(); + + // a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled + if (!(scorpion_pmnc_read() & PMNC_E)) { + return 0; + } + + for (cnt = 0; cnt < pmnc_counters; cnt++) { + if (pmnc_enabled[cnt]) { + int value; + if (cnt == CCNT) { + value = scorpion_ccnt_read(); + } else if (scorpion_pmnc_select_counter(cnt) == cnt) { + value = scorpion_cntn_read(); + } else { + value = 0; + } + scorpion_pmnc_reset_counter(cnt); + + per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt]; + per_cpu(perfCnt, cpu)[len++] = value; + } + } + + if (buffer) + *buffer = per_cpu(perfCnt, cpu); + + return len; +} + +static struct gator_interface gator_events_scorpion_interface = { + .create_files = gator_events_scorpion_create_files, + .stop = gator_events_scorpion_stop, + .online = gator_events_scorpion_online, + .offline = gator_events_scorpion_offline, + .read = gator_events_scorpion_read, +}; + +int gator_events_scorpion_init(void) +{ + unsigned int cnt; + + switch (gator_cpuid()) { + case SCORPION: + pmnc_name = "Scorpion"; + pmnc_counters = 4; + break; + case SCORPIONMP: + pmnc_name = "ScorpionMP"; + pmnc_counters = 4; + break; + default: + return -1; + } + + pmnc_counters++; // CNT[n] + CCNT + + for (cnt = CCNT; cnt < CNTMAX; cnt++) { + pmnc_enabled[cnt] = 0; + pmnc_event[cnt] = 0; + pmnc_key[cnt] = gator_events_get_key(); + } + + return gator_events_install(&gator_events_scorpion_interface); +} + +#endif diff --git a/drivers/gator/gator_fs.c b/drivers/gator/gator_fs.c new file mode 100644 index 000000000000..fe6f83d547e9 --- /dev/null +++ b/drivers/gator/gator_fs.c @@ -0,0 +1,382 @@ +/** + * @file gatorfs.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * + * A simple filesystem for configuration and + * access of oprofile. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <asm/uaccess.h> + +#define gatorfs_MAGIC 0x24051020 +#define TMPBUFSIZE 50 +DEFINE_SPINLOCK(gatorfs_lock); + +static struct inode *gatorfs_get_inode(struct super_block *sb, int mode) +{ + struct inode *inode = new_inode(sb); + + if (inode) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37) + inode->i_ino = get_next_ino(); +#endif + inode->i_mode = mode; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + } + return inode; +} + +static const struct super_operations s_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, +}; + +ssize_t gatorfs_str_to_user(char const *str, char __user *buf, size_t count, loff_t *offset) +{ + return simple_read_from_buffer(buf, count, offset, str, strlen(str)); +} + +ssize_t gatorfs_ulong_to_user(unsigned long val, char __user *buf, size_t count, loff_t *offset) +{ + char tmpbuf[TMPBUFSIZE]; + size_t maxlen = snprintf(tmpbuf, TMPBUFSIZE, "%lu\n", val); + if (maxlen > TMPBUFSIZE) + maxlen = TMPBUFSIZE; + return simple_read_from_buffer(buf, count, offset, tmpbuf, maxlen); +} + +ssize_t gatorfs_u64_to_user(u64 val, char __user *buf, size_t count, loff_t *offset) +{ + char tmpbuf[TMPBUFSIZE]; + size_t maxlen = snprintf(tmpbuf, TMPBUFSIZE, "%llu\n", val); + if (maxlen > TMPBUFSIZE) + maxlen = TMPBUFSIZE; + return simple_read_from_buffer(buf, count, offset, tmpbuf, maxlen); +} + +int gatorfs_ulong_from_user(unsigned long *val, char const __user *buf, size_t count) +{ + char tmpbuf[TMPBUFSIZE]; + unsigned long flags; + + if (!count) + return 0; + + if (count > TMPBUFSIZE - 1) + return -EINVAL; + + memset(tmpbuf, 0x0, TMPBUFSIZE); + + if (copy_from_user(tmpbuf, buf, count)) + return -EFAULT; + + spin_lock_irqsave(&gatorfs_lock, flags); + *val = simple_strtoul(tmpbuf, NULL, 0); + spin_unlock_irqrestore(&gatorfs_lock, flags); + return 0; +} + +int gatorfs_u64_from_user(u64 *val, char const __user *buf, size_t count) +{ + char tmpbuf[TMPBUFSIZE]; + unsigned long flags; + + if (!count) + return 0; + + if (count > TMPBUFSIZE - 1) + return -EINVAL; + + memset(tmpbuf, 0x0, TMPBUFSIZE); + + if (copy_from_user(tmpbuf, buf, count)) + return -EFAULT; + + spin_lock_irqsave(&gatorfs_lock, flags); + *val = simple_strtoull(tmpbuf, NULL, 0); + spin_unlock_irqrestore(&gatorfs_lock, flags); + return 0; +} + +static ssize_t ulong_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + unsigned long *val = file->private_data; + return gatorfs_ulong_to_user(*val, buf, count, offset); +} + +static ssize_t u64_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + u64 *val = file->private_data; + return gatorfs_u64_to_user(*val, buf, count, offset); +} + +static ssize_t ulong_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset) +{ + unsigned long *value = file->private_data; + int retval; + + if (*offset) + return -EINVAL; + + retval = gatorfs_ulong_from_user(value, buf, count); + + if (retval) + return retval; + return count; +} + +static ssize_t u64_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset) +{ + u64 *value = file->private_data; + int retval; + + if (*offset) + return -EINVAL; + + retval = gatorfs_u64_from_user(value, buf, count); + + if (retval) + return retval; + return count; +} + +static int default_open(struct inode *inode, struct file *filp) +{ + if (inode->i_private) + filp->private_data = inode->i_private; + return 0; +} + +static const struct file_operations ulong_fops = { + .read = ulong_read_file, + .write = ulong_write_file, + .open = default_open, +}; + +static const struct file_operations u64_fops = { + .read = u64_read_file, + .write = u64_write_file, + .open = default_open, +}; + +static const struct file_operations ulong_ro_fops = { + .read = ulong_read_file, + .open = default_open, +}; + +static const struct file_operations u64_ro_fops = { + .read = u64_read_file, + .open = default_open, +}; + +static struct dentry *__gatorfs_create_file(struct super_block *sb, + struct dentry *root, + char const *name, + const struct file_operations *fops, + int perm) +{ + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_name(root, name); + if (!dentry) + return NULL; + inode = gatorfs_get_inode(sb, S_IFREG | perm); + if (!inode) { + dput(dentry); + return NULL; + } + inode->i_fop = fops; + d_add(dentry, inode); + return dentry; +} + +int gatorfs_create_ulong(struct super_block *sb, struct dentry *root, + char const *name, unsigned long *val) +{ + struct dentry *d = __gatorfs_create_file(sb, root, name, + &ulong_fops, 0644); + if (!d) + return -EFAULT; + + d->d_inode->i_private = val; + return 0; +} + +int gatorfs_create_u64(struct super_block *sb, struct dentry *root, + char const *name, u64 *val) +{ + struct dentry *d = __gatorfs_create_file(sb, root, name, + &u64_fops, 0644); + if (!d) + return -EFAULT; + + d->d_inode->i_private = val; + return 0; +} + +int gatorfs_create_ro_ulong(struct super_block *sb, struct dentry *root, + char const *name, unsigned long *val) +{ + struct dentry *d = __gatorfs_create_file(sb, root, name, + &ulong_ro_fops, 0444); + if (!d) + return -EFAULT; + + d->d_inode->i_private = val; + return 0; +} + +int gatorfs_create_ro_u64(struct super_block *sb, struct dentry *root, + char const *name, u64 * val) +{ + struct dentry *d = + __gatorfs_create_file(sb, root, name, &u64_ro_fops, 0444); + if (!d) + return -EFAULT; + + d->d_inode->i_private = val; + return 0; +} + +static ssize_t atomic_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + atomic_t *val = file->private_data; + return gatorfs_ulong_to_user(atomic_read(val), buf, count, offset); +} + +static const struct file_operations atomic_ro_fops = { + .read = atomic_read_file, + .open = default_open, +}; + +int gatorfs_create_ro_atomic(struct super_block *sb, struct dentry *root, + char const *name, atomic_t *val) +{ + struct dentry *d = __gatorfs_create_file(sb, root, name, + &atomic_ro_fops, 0444); + if (!d) + return -EFAULT; + + d->d_inode->i_private = val; + return 0; +} + +int gatorfs_create_file(struct super_block *sb, struct dentry *root, + char const *name, const struct file_operations *fops) +{ + if (!__gatorfs_create_file(sb, root, name, fops, 0644)) + return -EFAULT; + return 0; +} + +int gatorfs_create_file_perm(struct super_block *sb, struct dentry *root, + char const *name, + const struct file_operations *fops, int perm) +{ + if (!__gatorfs_create_file(sb, root, name, fops, perm)) + return -EFAULT; + return 0; +} + +struct dentry *gatorfs_mkdir(struct super_block *sb, + struct dentry *root, char const *name) +{ + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_name(root, name); + if (!dentry) + return NULL; + inode = gatorfs_get_inode(sb, S_IFDIR | 0755); + if (!inode) { + dput(dentry); + return NULL; + } + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + d_add(dentry, inode); + return dentry; +} + +static int gatorfs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct inode *root_inode; + struct dentry *root_dentry; + + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = gatorfs_MAGIC; + sb->s_op = &s_ops; + sb->s_time_gran = 1; + + root_inode = gatorfs_get_inode(sb, S_IFDIR | 0755); + if (!root_inode) + return -ENOMEM; + root_inode->i_op = &simple_dir_inode_operations; + root_inode->i_fop = &simple_dir_operations; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0) + root_dentry = d_alloc_root(root_inode); +#else + root_dentry = d_make_root(root_inode); +#endif + + if (!root_dentry) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0) + iput(root_inode); +#endif + return -ENOMEM; + } + + sb->s_root = root_dentry; + + gator_op_create_files(sb, root_dentry); + + return 0; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) +static int gatorfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + struct vfsmount *mnt) +{ + return get_sb_single(fs_type, flags, data, gatorfs_fill_super, mnt); +} +#else +static struct dentry *gatorfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_nodev(fs_type, flags, data, gatorfs_fill_super); +} +#endif + +static struct file_system_type gatorfs_type = { + .owner = THIS_MODULE, + .name = "gatorfs", +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) + .get_sb = gatorfs_get_sb, +#else + .mount = gatorfs_mount, +#endif + + .kill_sb = kill_litter_super, +}; + +int __init gatorfs_register(void) +{ + return register_filesystem(&gatorfs_type); +} + +void gatorfs_unregister(void) +{ + unregister_filesystem(&gatorfs_type); +} diff --git a/drivers/gator/gator_hrtimer_gator.c b/drivers/gator/gator_hrtimer_gator.c new file mode 100644 index 000000000000..b0c947afe1e1 --- /dev/null +++ b/drivers/gator/gator_hrtimer_gator.c @@ -0,0 +1,86 @@ +/** + * Copyright (C) ARM Limited 2011-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +// gator_hrtimer_perf.c is used if perf is supported +// update, gator_hrtimer_gator.c always used until issues resolved with perf hrtimers +#if 1 + +void (*callback)(void); +DEFINE_PER_CPU(struct hrtimer, percpu_hrtimer); +DEFINE_PER_CPU(ktime_t, hrtimer_expire); +DEFINE_PER_CPU(int, hrtimer_is_active); +static ktime_t profiling_interval; +static void gator_hrtimer_online(void); +static void gator_hrtimer_offline(void); + +static enum hrtimer_restart gator_hrtimer_notify(struct hrtimer *hrtimer) +{ + int cpu = get_logical_cpu(); + hrtimer_forward(hrtimer, per_cpu(hrtimer_expire, cpu), profiling_interval); + per_cpu(hrtimer_expire, cpu) = ktime_add(per_cpu(hrtimer_expire, cpu), profiling_interval); + (*callback)(); + return HRTIMER_RESTART; +} + +static void gator_hrtimer_online(void) +{ + int cpu = get_logical_cpu(); + struct hrtimer *hrtimer = &per_cpu(percpu_hrtimer, cpu); + + if (per_cpu(hrtimer_is_active, cpu) || profiling_interval.tv64 == 0) + return; + + per_cpu(hrtimer_is_active, cpu) = 1; + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer->function = gator_hrtimer_notify; +#ifdef CONFIG_PREEMPT_RT_BASE + hrtimer->irqsafe = 1; +#endif + per_cpu(hrtimer_expire, cpu) = ktime_add(hrtimer->base->get_time(), profiling_interval); + hrtimer_start(hrtimer, per_cpu(hrtimer_expire, cpu), HRTIMER_MODE_ABS_PINNED); +} + +static void gator_hrtimer_offline(void) +{ + int cpu = get_logical_cpu(); + struct hrtimer *hrtimer = &per_cpu(percpu_hrtimer, cpu); + + if (!per_cpu(hrtimer_is_active, cpu)) + return; + + per_cpu(hrtimer_is_active, cpu) = 0; + hrtimer_cancel(hrtimer); +} + +static int gator_hrtimer_init(int interval, void (*func)(void)) +{ + int cpu; + + (callback) = (func); + + for_each_present_cpu(cpu) { + per_cpu(hrtimer_is_active, cpu) = 0; + } + + // calculate profiling interval + if (interval > 0) { + profiling_interval = ns_to_ktime(1000000000UL / interval); + } else { + profiling_interval.tv64 = 0; + } + + return 0; +} + +static void gator_hrtimer_shutdown(void) +{ + /* empty */ +} + +#endif diff --git a/drivers/gator/gator_hrtimer_perf.c b/drivers/gator/gator_hrtimer_perf.c new file mode 100644 index 000000000000..7b95399478e4 --- /dev/null +++ b/drivers/gator/gator_hrtimer_perf.c @@ -0,0 +1,113 @@ +/** + * Copyright (C) ARM Limited 2011-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +// gator_hrtimer_gator.c is used if perf is not supported +// update, gator_hrtimer_gator.c always used until issues resolved with perf hrtimers +#if 0 + +// Note: perf Cortex support added in 2.6.35 and PERF_COUNT_SW_CPU_CLOCK/hrtimer broken on 2.6.35 and 2.6.36 +// not relevant as this code is not active until 3.0.0, but wanted to document the issue + +void (*callback)(void); +static int profiling_interval; +static DEFINE_PER_CPU(struct perf_event *, perf_hrtimer); +static DEFINE_PER_CPU(struct perf_event_attr *, perf_hrtimer_attr); + +static void gator_hrtimer_shutdown(void); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) +static void hrtimer_overflow_handler(struct perf_event *event, int unused, struct perf_sample_data *data, struct pt_regs *regs) +#else +static void hrtimer_overflow_handler(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) +#endif +{ + (*callback)(); +} + +static int gator_online_single_hrtimer(int cpu) +{ + if (per_cpu(perf_hrtimer, cpu) != 0 || per_cpu(perf_hrtimer_attr, cpu) == 0) + return 0; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) + per_cpu(perf_hrtimer, cpu) = perf_event_create_kernel_counter(per_cpu(perf_hrtimer_attr, cpu), cpu, 0, hrtimer_overflow_handler); +#else + per_cpu(perf_hrtimer, cpu) = perf_event_create_kernel_counter(per_cpu(perf_hrtimer_attr, cpu), cpu, 0, hrtimer_overflow_handler, 0); +#endif + if (IS_ERR(per_cpu(perf_hrtimer, cpu))) { + per_cpu(perf_hrtimer, cpu) = NULL; + return -1; + } + + if (per_cpu(perf_hrtimer, cpu)->state != PERF_EVENT_STATE_ACTIVE) { + perf_event_release_kernel(per_cpu(perf_hrtimer, cpu)); + per_cpu(perf_hrtimer, cpu) = NULL; + return -1; + } + + return 0; +} + +static void gator_hrtimer_online(int cpu) +{ + if (gator_online_single_hrtimer(cpu) < 0) { + pr_debug("gator: unable to online the hrtimer on cpu%d\n", cpu); + } +} + +static void gator_hrtimer_offline(int cpu) +{ + if (per_cpu(perf_hrtimer, cpu)) { + perf_event_release_kernel(per_cpu(perf_hrtimer, cpu)); + per_cpu(perf_hrtimer, cpu) = NULL; + } +} + +static int gator_hrtimer_init(int interval, void (*func)(void)) +{ + u32 size = sizeof(struct perf_event_attr); + int cpu; + + callback = func; + + // calculate profiling interval + profiling_interval = 1000000000 / interval; + + for_each_present_cpu(cpu) { + per_cpu(perf_hrtimer, cpu) = 0; + per_cpu(perf_hrtimer_attr, cpu) = kmalloc(size, GFP_KERNEL); + if (per_cpu(perf_hrtimer_attr, cpu) == 0) { + gator_hrtimer_shutdown(); + return -1; + } + + memset(per_cpu(perf_hrtimer_attr, cpu), 0, size); + per_cpu(perf_hrtimer_attr, cpu)->type = PERF_TYPE_SOFTWARE; + per_cpu(perf_hrtimer_attr, cpu)->size = size; + per_cpu(perf_hrtimer_attr, cpu)->config = PERF_COUNT_SW_CPU_CLOCK; + per_cpu(perf_hrtimer_attr, cpu)->sample_period = profiling_interval; + per_cpu(perf_hrtimer_attr, cpu)->pinned = 1; + } + + return 0; +} + +static void gator_hrtimer_shutdown(void) +{ + int cpu; + + for_each_present_cpu(cpu) { + if (per_cpu(perf_hrtimer_attr, cpu)) { + kfree(per_cpu(perf_hrtimer_attr, cpu)); + per_cpu(perf_hrtimer_attr, cpu) = NULL; + } + } +} + +#endif diff --git a/drivers/gator/gator_iks.c b/drivers/gator/gator_iks.c new file mode 100644 index 000000000000..0a90bdd1904e --- /dev/null +++ b/drivers/gator/gator_iks.c @@ -0,0 +1,197 @@ +/** + * Copyright (C) ARM Limited 2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#if GATOR_IKS_SUPPORT + +#include <linux/of.h> +#include <asm/bL_switcher.h> +#include <asm/smp_plat.h> +#include <trace/events/power_cpu_migrate.h> + +static bool map_cpuids; +static int mpidr_cpuids[NR_CPUS]; +static const struct gator_cpu * mpidr_cpus[NR_CPUS]; +static int __lcpu_to_pcpu[NR_CPUS]; + +static const struct gator_cpu *gator_find_cpu_by_dt_name(const char *const name) +{ + int i; + + for (i = 0; gator_cpus[i].cpuid != 0; ++i) { + const struct gator_cpu *const gator_cpu = &gator_cpus[i]; + if (gator_cpu->dt_name != NULL && strcmp(gator_cpu->dt_name, name) == 0) { + return gator_cpu; + } + } + + return NULL; +} + +static void calc_first_cluster_size(void) +{ + int len; + const u32 *val; + const char *compatible; + struct device_node *cn = NULL; + int mpidr_cpuids_count = 0; + + // Zero is a valid cpuid, so initialize the array to 0xff's + memset(&mpidr_cpuids, 0xff, sizeof(mpidr_cpuids)); + memset(&mpidr_cpus, 0, sizeof(mpidr_cpus)); + + while ((cn = of_find_node_by_type(cn, "cpu"))) { + BUG_ON(mpidr_cpuids_count >= NR_CPUS); + + val = of_get_property(cn, "reg", &len); + if (!val || len != 4) { + pr_err("%s missing reg property\n", cn->full_name); + continue; + } + compatible = of_get_property(cn, "compatible", NULL); + if (compatible == NULL) { + pr_err("%s missing compatible property\n", cn->full_name); + continue; + } + + mpidr_cpuids[mpidr_cpuids_count] = be32_to_cpup(val); + mpidr_cpus[mpidr_cpuids_count] = gator_find_cpu_by_dt_name(compatible); + ++mpidr_cpuids_count; + } + + map_cpuids = (mpidr_cpuids_count == nr_cpu_ids); +} + +static int linearize_mpidr(int mpidr) +{ + int i; + for (i = 0; i < nr_cpu_ids; ++i) { + if (mpidr_cpuids[i] == mpidr) { + return i; + } + } + + BUG(); +} + +int lcpu_to_pcpu(const int lcpu) +{ + int pcpu; + + if (!map_cpuids) + return lcpu; + + BUG_ON(lcpu >= nr_cpu_ids || lcpu < 0); + pcpu = __lcpu_to_pcpu[lcpu]; + BUG_ON(pcpu >= nr_cpu_ids || pcpu < 0); + return pcpu; +} + +int pcpu_to_lcpu(const int pcpu) +{ + int lcpu; + + if (!map_cpuids) + return pcpu; + + BUG_ON(pcpu >= nr_cpu_ids || pcpu < 0); + for (lcpu = 0; lcpu < nr_cpu_ids; ++lcpu) { + if (__lcpu_to_pcpu[lcpu] == pcpu) { + BUG_ON(lcpu >= nr_cpu_ids || lcpu < 0); + return lcpu; + } + } + BUG(); +} + +static void gator_update_cpu_mapping(u32 cpu_hwid) +{ + int lcpu = smp_processor_id(); + int pcpu = linearize_mpidr(cpu_hwid & MPIDR_HWID_BITMASK); + BUG_ON(lcpu >= nr_cpu_ids || lcpu < 0); + BUG_ON(pcpu >= nr_cpu_ids || pcpu < 0); + __lcpu_to_pcpu[lcpu] = pcpu; +} + +GATOR_DEFINE_PROBE(cpu_migrate_begin, TP_PROTO(u64 timestamp, u32 cpu_hwid)) +{ + const int cpu = get_physical_cpu(); + + gator_timer_offline((void *)1); + gator_timer_offline_dispatch(cpu, true); +} + +GATOR_DEFINE_PROBE(cpu_migrate_finish, TP_PROTO(u64 timestamp, u32 cpu_hwid)) +{ + int cpu; + + gator_update_cpu_mapping(cpu_hwid); + + // get_physical_cpu must be called after gator_update_cpu_mapping + cpu = get_physical_cpu(); + gator_timer_online_dispatch(cpu, true); + gator_timer_online((void *)1); +} + +GATOR_DEFINE_PROBE(cpu_migrate_current, TP_PROTO(u64 timestamp, u32 cpu_hwid)) +{ + gator_update_cpu_mapping(cpu_hwid); +} + +static void gator_send_iks_core_names(void) +{ + int cpu; + // Send the cpu names + preempt_disable(); + for (cpu = 0; cpu < nr_cpu_ids; ++cpu) { + if (mpidr_cpus[cpu] != NULL) { + gator_send_core_name(cpu, mpidr_cpus[cpu]->cpuid, mpidr_cpus[cpu]); + } + } + preempt_enable(); +} + +static int gator_migrate_start(void) +{ + int retval = 0; + + if (!map_cpuids) + return retval; + + if (retval == 0) + retval = GATOR_REGISTER_TRACE(cpu_migrate_begin); + if (retval == 0) + retval = GATOR_REGISTER_TRACE(cpu_migrate_finish); + if (retval == 0) + retval = GATOR_REGISTER_TRACE(cpu_migrate_current); + if (retval == 0) { + // Initialize the logical to physical cpu mapping + memset(&__lcpu_to_pcpu, 0xff, sizeof(__lcpu_to_pcpu)); + bL_switcher_trace_trigger(); + } + return retval; +} + +static void gator_migrate_stop(void) +{ + if (!map_cpuids) + return; + + GATOR_UNREGISTER_TRACE(cpu_migrate_current); + GATOR_UNREGISTER_TRACE(cpu_migrate_finish); + GATOR_UNREGISTER_TRACE(cpu_migrate_begin); +} + +#else + +#define calc_first_cluster_size() +#define gator_send_iks_core_names() +#define gator_migrate_start() 0 +#define gator_migrate_stop() + +#endif diff --git a/drivers/gator/gator_main.c b/drivers/gator/gator_main.c new file mode 100644 index 000000000000..19f51c7cd8ee --- /dev/null +++ b/drivers/gator/gator_main.c @@ -0,0 +1,1532 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +// This version must match the gator daemon version +#define PROTOCOL_VERSION 17 +static unsigned long gator_protocol_version = PROTOCOL_VERSION; + +#include <linux/slab.h> +#include <linux/cpu.h> +#include <linux/sched.h> +#include <linux/irq.h> +#include <linux/vmalloc.h> +#include <linux/hardirq.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> +#include <linux/suspend.h> +#include <linux/module.h> +#include <linux/perf_event.h> +#include <linux/utsname.h> +#include <linux/kthread.h> +#include <asm/stacktrace.h> +#include <asm/uaccess.h> + +#include "gator.h" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) +#error kernels prior to 2.6.32 are not supported +#endif + +#if defined(MODULE) && !defined(CONFIG_MODULES) +#error Cannot build a module against a kernel that does not support modules. To resolve, either rebuild the kernel to support modules or build gator as part of the kernel. +#endif + +#if !defined(CONFIG_GENERIC_TRACER) && !defined(CONFIG_TRACING) +#error gator requires the kernel to have CONFIG_GENERIC_TRACER or CONFIG_TRACING defined +#endif + +#ifndef CONFIG_PROFILING +#error gator requires the kernel to have CONFIG_PROFILING defined +#endif + +#ifndef CONFIG_HIGH_RES_TIMERS +#error gator requires the kernel to have CONFIG_HIGH_RES_TIMERS defined to support PC sampling +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && defined(__arm__) && defined(CONFIG_SMP) && !defined(CONFIG_LOCAL_TIMERS) +#error gator requires the kernel to have CONFIG_LOCAL_TIMERS defined on SMP systems +#endif + +#if (GATOR_PERF_SUPPORT) && (!(GATOR_PERF_PMU_SUPPORT)) +#ifndef CONFIG_PERF_EVENTS +#warning gator requires the kernel to have CONFIG_PERF_EVENTS defined to support pmu hardware counters +#elif !defined CONFIG_HW_PERF_EVENTS +#warning gator requires the kernel to have CONFIG_HW_PERF_EVENTS defined to support pmu hardware counters +#endif +#endif + +/****************************************************************************** + * DEFINES + ******************************************************************************/ +#define SUMMARY_BUFFER_SIZE (1*1024) +#define BACKTRACE_BUFFER_SIZE (128*1024) +#define NAME_BUFFER_SIZE (64*1024) +#define COUNTER_BUFFER_SIZE (64*1024) // counters have the core as part of the data and the core value in the frame header may be discarded +#define BLOCK_COUNTER_BUFFER_SIZE (128*1024) +#define ANNOTATE_BUFFER_SIZE (128*1024) // annotate counters have the core as part of the data and the core value in the frame header may be discarded +#define SCHED_TRACE_BUFFER_SIZE (128*1024) +#define GPU_TRACE_BUFFER_SIZE (64*1024) // gpu trace counters have the core as part of the data and the core value in the frame header may be discarded +#define IDLE_BUFFER_SIZE (32*1024) // idle counters have the core as part of the data and the core value in the frame header may be discarded + +#define NO_COOKIE 0U +#define UNRESOLVED_COOKIE ~0U + +#define FRAME_SUMMARY 1 +#define FRAME_BACKTRACE 2 +#define FRAME_NAME 3 +#define FRAME_COUNTER 4 +#define FRAME_BLOCK_COUNTER 5 +#define FRAME_ANNOTATE 6 +#define FRAME_SCHED_TRACE 7 +#define FRAME_GPU_TRACE 8 +#define FRAME_IDLE 9 + +#define MESSAGE_END_BACKTRACE 1 + +#define MESSAGE_COOKIE 1 +#define MESSAGE_THREAD_NAME 2 +#define HRTIMER_CORE_NAME 3 +#define MESSAGE_LINK 4 + +#define MESSAGE_GPU_START 1 +#define MESSAGE_GPU_STOP 2 + +#define MESSAGE_SCHED_SWITCH 1 +#define MESSAGE_SCHED_EXIT 2 +#define MESSAGE_SCHED_START 3 + +#define MESSAGE_IDLE_ENTER 1 +#define MESSAGE_IDLE_EXIT 2 + +#define MAXSIZE_PACK32 5 +#define MAXSIZE_PACK64 10 + +#define FRAME_HEADER_SIZE 3 + +#if defined(__arm__) +#define PC_REG regs->ARM_pc +#elif defined(__aarch64__) +#define PC_REG regs->pc +#else +#define PC_REG regs->ip +#endif + +enum { + SUMMARY_BUF, + BACKTRACE_BUF, + NAME_BUF, + COUNTER_BUF, + BLOCK_COUNTER_BUF, + ANNOTATE_BUF, + SCHED_TRACE_BUF, + GPU_TRACE_BUF, + IDLE_BUF, + NUM_GATOR_BUFS +}; + +/****************************************************************************** + * Globals + ******************************************************************************/ +static unsigned long gator_cpu_cores; +// Size of the largest buffer. Effectively constant, set in gator_op_create_files +static unsigned long userspace_buffer_size; +static unsigned long gator_backtrace_depth; +// How often to commit the buffers for live in nanoseconds +static u64 gator_live_rate; + +static unsigned long gator_started; +static u64 gator_monotonic_started; +static u64 gator_hibernate_time; +static unsigned long gator_buffer_opened; +static unsigned long gator_timer_count; +static unsigned long gator_response_type; +static DEFINE_MUTEX(start_mutex); +static DEFINE_MUTEX(gator_buffer_mutex); + +bool event_based_sampling; + +static DECLARE_WAIT_QUEUE_HEAD(gator_buffer_wait); +static DECLARE_WAIT_QUEUE_HEAD(gator_annotate_wait); +static struct timer_list gator_buffer_wake_up_timer; +static bool gator_buffer_wake_stop; +static struct task_struct *gator_buffer_wake_thread; +static LIST_HEAD(gator_events); + +static DEFINE_PER_CPU(u64, last_timestamp); + +static bool printed_monotonic_warning; + +static bool sent_core_name[NR_CPUS]; + +/****************************************************************************** + * Prototypes + ******************************************************************************/ +static void buffer_check(int cpu, int buftype, u64 time); +static void gator_commit_buffer(int cpu, int buftype, u64 time); +static int buffer_bytes_available(int cpu, int buftype); +static bool buffer_check_space(int cpu, int buftype, int bytes); +static int contiguous_space_available(int cpu, int bufytpe); +static void gator_buffer_write_packed_int(int cpu, int buftype, int x); +static void gator_buffer_write_packed_int64(int cpu, int buftype, long long x); +static void gator_buffer_write_bytes(int cpu, int buftype, const char *x, int len); +static void gator_buffer_write_string(int cpu, int buftype, const char *x); +static void gator_add_trace(int cpu, unsigned long address); +static void gator_add_sample(int cpu, struct pt_regs *const regs, u64 time); +static u64 gator_get_time(void); + +// Size of the buffer, must be a power of 2. Effectively constant, set in gator_op_setup. +static uint32_t gator_buffer_size[NUM_GATOR_BUFS]; +// gator_buffer_size - 1, bitwise and with pos to get offset into the array. Effectively constant, set in gator_op_setup. +static uint32_t gator_buffer_mask[NUM_GATOR_BUFS]; +// Read position in the buffer. Initialized to zero in gator_op_setup and incremented after bytes are read by userspace in userspace_buffer_read +static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], gator_buffer_read); +// Write position in the buffer. Initialized to zero in gator_op_setup and incremented after bytes are written to the buffer +static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], gator_buffer_write); +// Commit position in the buffer. Initialized to zero in gator_op_setup and incremented after a frame is ready to be read by userspace +static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], gator_buffer_commit); +// If set to false, decreases the number of bytes returned by buffer_bytes_available. Set in buffer_check_space if no space is remaining. Initialized to true in gator_op_setup +// This means that if we run out of space, continue to report that no space is available until bytes are read by userspace +static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], buffer_space_available); +// The buffer. Allocated in gator_op_setup +static DEFINE_PER_CPU(char *[NUM_GATOR_BUFS], gator_buffer); +// The time after which the buffer should be committed for live display +static DEFINE_PER_CPU(u64, gator_buffer_commit_time); + +// List of all gator events - new events must be added to this list +#define GATOR_EVENTS_LIST \ + GATOR_EVENT(gator_events_armv6_init) \ + GATOR_EVENT(gator_events_armv7_init) \ + GATOR_EVENT(gator_events_block_init) \ + GATOR_EVENT(gator_events_ccn504_init) \ + GATOR_EVENT(gator_events_irq_init) \ + GATOR_EVENT(gator_events_l2c310_init) \ + GATOR_EVENT(gator_events_mali_init) \ + GATOR_EVENT(gator_events_mali_t6xx_hw_init) \ + GATOR_EVENT(gator_events_mali_t6xx_init) \ + GATOR_EVENT(gator_events_meminfo_init) \ + GATOR_EVENT(gator_events_mmapped_init) \ + GATOR_EVENT(gator_events_net_init) \ + GATOR_EVENT(gator_events_perf_pmu_init) \ + GATOR_EVENT(gator_events_sched_init) \ + GATOR_EVENT(gator_events_scorpion_init) \ + +#define GATOR_EVENT(EVENT_INIT) __weak int EVENT_INIT(void); +GATOR_EVENTS_LIST +#undef GATOR_EVENT + +static int (*gator_events_list[])(void) = { +#define GATOR_EVENT(EVENT_INIT) EVENT_INIT, +GATOR_EVENTS_LIST +#undef GATOR_EVENT +}; + +/****************************************************************************** + * Application Includes + ******************************************************************************/ +#include "gator_marshaling.c" +#include "gator_hrtimer_perf.c" +#include "gator_hrtimer_gator.c" +#include "gator_cookies.c" +#include "gator_annotate.c" +#include "gator_trace_sched.c" +#include "gator_trace_power.c" +#include "gator_trace_gpu.c" +#include "gator_backtrace.c" +#include "gator_fs.c" +#include "gator_pack.c" + +/****************************************************************************** + * Misc + ******************************************************************************/ + +const struct gator_cpu gator_cpus[] = { + { + .cpuid = ARM1136, + .core_name = "ARM1136", + .pmnc_name = "ARM_ARM11", + .dt_name = "arm,arm1136", + .pmnc_counters = 3, + }, + { + .cpuid = ARM1156, + .core_name = "ARM1156", + .pmnc_name = "ARM_ARM11", + .dt_name = "arm,arm1156", + .pmnc_counters = 3, + }, + { + .cpuid = ARM1176, + .core_name = "ARM1176", + .pmnc_name = "ARM_ARM11", + .dt_name = "arm,arm1176", + .pmnc_counters = 3, + }, + { + .cpuid = ARM11MPCORE, + .core_name = "ARM11MPCore", + .pmnc_name = "ARM_ARM11MPCore", + .dt_name = "arm,arm11mpcore", + .pmnc_counters = 3, + }, + { + .cpuid = CORTEX_A5, + .core_name = "Cortex-A5", + .pmu_name = "ARMv7_Cortex_A5", + .pmnc_name = "ARM_Cortex-A5", + .dt_name = "arm,cortex-a5", + .pmnc_counters = 2, + }, + { + .cpuid = CORTEX_A7, + .core_name = "Cortex-A7", + .pmu_name = "ARMv7_Cortex_A7", + .pmnc_name = "ARM_Cortex-A7", + .dt_name = "arm,cortex-a7", + .pmnc_counters = 4, + }, + { + .cpuid = CORTEX_A8, + .core_name = "Cortex-A8", + .pmu_name = "ARMv7_Cortex_A8", + .pmnc_name = "ARM_Cortex-A8", + .dt_name = "arm,cortex-a8", + .pmnc_counters = 4, + }, + { + .cpuid = CORTEX_A9, + .core_name = "Cortex-A9", + .pmu_name = "ARMv7_Cortex_A9", + .pmnc_name = "ARM_Cortex-A9", + .dt_name = "arm,cortex-a9", + .pmnc_counters = 6, + }, + { + .cpuid = CORTEX_A12, + .core_name = "Cortex-A12", + .pmu_name = "ARMv7_Cortex_A12", + .pmnc_name = "ARM_Cortex-A12", + .dt_name = "arm,cortex-a12", + .pmnc_counters = 6, + }, + { + .cpuid = CORTEX_A15, + .core_name = "Cortex-A15", + .pmu_name = "ARMv7_Cortex_A15", + .pmnc_name = "ARM_Cortex-A15", + .dt_name = "arm,cortex-a15", + .pmnc_counters = 6, + }, + { + .cpuid = SCORPION, + .core_name = "Scorpion", + .pmnc_name = "Scorpion", + .pmnc_counters = 4, + }, + { + .cpuid = SCORPIONMP, + .core_name = "ScorpionMP", + .pmnc_name = "ScorpionMP", + .pmnc_counters = 4, + }, + { + .cpuid = KRAITSIM, + .core_name = "KraitSIM", + .pmnc_name = "Krait", + .pmnc_counters = 4, + }, + { + .cpuid = KRAIT, + .core_name = "Krait", + .pmnc_name = "Krait", + .pmnc_counters = 4, + }, + { + .cpuid = KRAIT_S4_PRO, + .core_name = "Krait S4 Pro", + .pmnc_name = "Krait", + .pmnc_counters = 4, + }, + { + .cpuid = CORTEX_A53, + .core_name = "Cortex-A53", + .pmnc_name = "ARM_Cortex-A53", + .dt_name = "arm,cortex-a53", + .pmnc_counters = 6, + }, + { + .cpuid = CORTEX_A57, + .core_name = "Cortex-A57", + .pmnc_name = "ARM_Cortex-A57", + .dt_name = "arm,cortex-a57", + .pmnc_counters = 6, + }, + { + .cpuid = AARCH64, + .core_name = "AArch64", + .pmnc_name = "ARM_AArch64", + .pmnc_counters = 6, + }, + { + .cpuid = OTHER, + .core_name = "Other", + .pmnc_name = "Other", + .pmnc_counters = 6, + }, + {} +}; + +const struct gator_cpu *gator_find_cpu_by_cpuid(const u32 cpuid) +{ + int i; + + for (i = 0; gator_cpus[i].cpuid != 0; ++i) { + const struct gator_cpu *const gator_cpu = &gator_cpus[i]; + if (gator_cpu->cpuid == cpuid) { + return gator_cpu; + } + } + + return NULL; +} + +const struct gator_cpu *gator_find_cpu_by_pmu_name(const char *const name) +{ + int i; + + for (i = 0; gator_cpus[i].cpuid != 0; ++i) { + const struct gator_cpu *const gator_cpu = &gator_cpus[i]; + if (gator_cpu->pmu_name != NULL && strcmp(gator_cpu->pmu_name, name) == 0) { + return gator_cpu; + } + } + + return NULL; +} + +u32 gator_cpuid(void) +{ +#if defined(__arm__) || defined(__aarch64__) + u32 val; +#if !defined(__aarch64__) + asm volatile("mrc p15, 0, %0, c0, c0, 0" : "=r" (val)); +#else + asm volatile("mrs %0, midr_el1" : "=r" (val)); +#endif + return (val >> 4) & 0xfff; +#else + return OTHER; +#endif +} + +static void gator_buffer_wake_up(unsigned long data) +{ + wake_up(&gator_buffer_wait); +} + +static int gator_buffer_wake_func(void *data) +{ + while (!gator_buffer_wake_stop) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + if (gator_buffer_wake_stop) { + break; + } + + gator_buffer_wake_up(0); + } + + return 0; +} + +/****************************************************************************** + * Commit interface + ******************************************************************************/ +static bool buffer_commit_ready(int *cpu, int *buftype) +{ + int cpu_x, x; + for_each_present_cpu(cpu_x) { + for (x = 0; x < NUM_GATOR_BUFS; x++) + if (per_cpu(gator_buffer_commit, cpu_x)[x] != per_cpu(gator_buffer_read, cpu_x)[x]) { + *cpu = cpu_x; + *buftype = x; + return true; + } + } + *cpu = -1; + *buftype = -1; + return false; +} + +/****************************************************************************** + * Buffer management + ******************************************************************************/ +static int buffer_bytes_available(int cpu, int buftype) +{ + int remaining, filled; + + filled = per_cpu(gator_buffer_write, cpu)[buftype] - per_cpu(gator_buffer_read, cpu)[buftype]; + if (filled < 0) { + filled += gator_buffer_size[buftype]; + } + + remaining = gator_buffer_size[buftype] - filled; + + if (per_cpu(buffer_space_available, cpu)[buftype]) { + // Give some extra room; also allows space to insert the overflow error packet + remaining -= 200; + } else { + // Hysteresis, prevents multiple overflow messages + remaining -= 2000; + } + + return remaining; +} + +static int contiguous_space_available(int cpu, int buftype) +{ + int remaining = buffer_bytes_available(cpu, buftype); + int contiguous = gator_buffer_size[buftype] - per_cpu(gator_buffer_write, cpu)[buftype]; + if (remaining < contiguous) + return remaining; + else + return contiguous; +} + +static bool buffer_check_space(int cpu, int buftype, int bytes) +{ + int remaining = buffer_bytes_available(cpu, buftype); + + if (remaining < bytes) { + per_cpu(buffer_space_available, cpu)[buftype] = false; + } else { + per_cpu(buffer_space_available, cpu)[buftype] = true; + } + + return per_cpu(buffer_space_available, cpu)[buftype]; +} + +static void gator_buffer_write_bytes(int cpu, int buftype, const char *x, int len) +{ + int i; + u32 write = per_cpu(gator_buffer_write, cpu)[buftype]; + u32 mask = gator_buffer_mask[buftype]; + char *buffer = per_cpu(gator_buffer, cpu)[buftype]; + + for (i = 0; i < len; i++) { + buffer[write] = x[i]; + write = (write + 1) & mask; + } + + per_cpu(gator_buffer_write, cpu)[buftype] = write; +} + +static void gator_buffer_write_string(int cpu, int buftype, const char *x) +{ + int len = strlen(x); + gator_buffer_write_packed_int(cpu, buftype, len); + gator_buffer_write_bytes(cpu, buftype, x, len); +} + +static void gator_commit_buffer(int cpu, int buftype, u64 time) +{ + int type_length, commit, length, byte; + + if (!per_cpu(gator_buffer, cpu)[buftype]) + return; + + // post-populate the length, which does not include the response type length nor the length itself, i.e. only the length of the payload + type_length = gator_response_type ? 1 : 0; + commit = per_cpu(gator_buffer_commit, cpu)[buftype]; + length = per_cpu(gator_buffer_write, cpu)[buftype] - commit; + if (length < 0) { + length += gator_buffer_size[buftype]; + } + length = length - type_length - sizeof(s32); + + if (length <= FRAME_HEADER_SIZE) { + // Nothing to write, only the frame header is present + return; + } + + for (byte = 0; byte < sizeof(s32); byte++) { + per_cpu(gator_buffer, cpu)[buftype][(commit + type_length + byte) & gator_buffer_mask[buftype]] = (length >> byte * 8) & 0xFF; + } + + per_cpu(gator_buffer_commit, cpu)[buftype] = per_cpu(gator_buffer_write, cpu)[buftype]; + + if (gator_live_rate > 0) { + while (time > per_cpu(gator_buffer_commit_time, cpu)) { + per_cpu(gator_buffer_commit_time, cpu) += gator_live_rate; + } + } + + marshal_frame(cpu, buftype); + + // had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater + if (per_cpu(in_scheduler_context, cpu)) { +#ifndef CONFIG_PREEMPT_RT_FULL + // mod_timer can not be used in interrupt context in RT-Preempt full + mod_timer(&gator_buffer_wake_up_timer, jiffies + 1); +#endif + } else { + wake_up_process(gator_buffer_wake_thread); + } +} + +static void buffer_check(int cpu, int buftype, u64 time) +{ + int filled = per_cpu(gator_buffer_write, cpu)[buftype] - per_cpu(gator_buffer_commit, cpu)[buftype]; + if (filled < 0) { + filled += gator_buffer_size[buftype]; + } + if (filled >= ((gator_buffer_size[buftype] * 3) / 4)) { + gator_commit_buffer(cpu, buftype, time); + } +} + +static void gator_add_trace(int cpu, unsigned long address) +{ + off_t offset = 0; + unsigned long cookie = get_address_cookie(cpu, current, address & ~1, &offset); + + if (cookie == NO_COOKIE || cookie == UNRESOLVED_COOKIE) { + offset = address; + } + + marshal_backtrace(offset & ~1, cookie); +} + +static void gator_add_sample(int cpu, struct pt_regs *const regs, u64 time) +{ + bool inKernel; + unsigned long exec_cookie; + + if (!regs) + return; + + inKernel = !user_mode(regs); + exec_cookie = get_exec_cookie(cpu, current); + + if (!marshal_backtrace_header(exec_cookie, current->tgid, current->pid, inKernel, time)) + return; + + if (inKernel) { + kernel_backtrace(cpu, regs); + } else { + // Cookie+PC + gator_add_trace(cpu, PC_REG); + + // Backtrace + if (gator_backtrace_depth) + arm_backtrace_eabi(cpu, regs, gator_backtrace_depth); + } + + marshal_backtrace_footer(time); +} + +/****************************************************************************** + * hrtimer interrupt processing + ******************************************************************************/ +static void gator_timer_interrupt(void) +{ + struct pt_regs *const regs = get_irq_regs(); + gator_backtrace_handler(regs); +} + +void gator_backtrace_handler(struct pt_regs *const regs) +{ + u64 time = gator_get_time(); + int cpu = get_physical_cpu(); + + // Output backtrace + gator_add_sample(cpu, regs, time); + + // Collect counters + if (!per_cpu(collecting, cpu)) { + collect_counters(time, NULL); + } + + // No buffer flushing occurs during sched switch for RT-Preempt full. The block counter frame will be flushed by collect_counters, but the sched buffer needs to be explicitly flushed +#ifdef CONFIG_PREEMPT_RT_FULL + buffer_check(cpu, SCHED_TRACE_BUF, time); +#endif +} + +static int gator_running; + +// This function runs in interrupt context and on the appropriate core +static void gator_timer_offline(void *migrate) +{ + struct gator_interface *gi; + int i, len, cpu = get_physical_cpu(); + int *buffer; + u64 time; + + gator_trace_sched_offline(); + gator_trace_power_offline(); + + if (!migrate) { + gator_hrtimer_offline(); + } + + // Offline any events and output counters + time = gator_get_time(); + if (marshal_event_header(time)) { + list_for_each_entry(gi, &gator_events, list) { + if (gi->offline) { + len = gi->offline(&buffer, migrate); + marshal_event(len, buffer); + } + } + // Only check after writing all counters so that time and corresponding counters appear in the same frame + buffer_check(cpu, BLOCK_COUNTER_BUF, time); + } + + // Flush all buffers on this core + for (i = 0; i < NUM_GATOR_BUFS; i++) + gator_commit_buffer(cpu, i, time); +} + +// This function runs in interrupt context and may be running on a core other than core 'cpu' +static void gator_timer_offline_dispatch(int cpu, bool migrate) +{ + struct gator_interface *gi; + + list_for_each_entry(gi, &gator_events, list) { + if (gi->offline_dispatch) { + gi->offline_dispatch(cpu, migrate); + } + } +} + +static void gator_timer_stop(void) +{ + int cpu; + + if (gator_running) { + on_each_cpu(gator_timer_offline, NULL, 1); + for_each_online_cpu(cpu) { + gator_timer_offline_dispatch(lcpu_to_pcpu(cpu), false); + } + + gator_running = 0; + gator_hrtimer_shutdown(); + } +} + +#if defined(__arm__) || defined(__aarch64__) +static void gator_send_core_name(int cpu, const u32 cpuid, const struct gator_cpu *const gator_cpu) { + const char *core_name = NULL; + char core_name_buf[32]; + + if (!sent_core_name[cpu]) { + if (gator_cpu != NULL) { + core_name = gator_cpu->core_name; + } else { + snprintf(core_name_buf, sizeof(core_name_buf), "Unknown (0x%.3x)", cpuid); + core_name = core_name_buf; + } + + marshal_core_name(cpu, cpuid, core_name); + sent_core_name[cpu] = true; + } +} +#endif + +// This function runs in interrupt context and on the appropriate core +static void gator_timer_online(void *migrate) +{ + struct gator_interface *gi; + int len, cpu = get_physical_cpu(); + int *buffer; + u64 time; + + gator_trace_power_online(); + + // online any events and output counters + time = gator_get_time(); + if (marshal_event_header(time)) { + list_for_each_entry(gi, &gator_events, list) { + if (gi->online) { + len = gi->online(&buffer, migrate); + marshal_event(len, buffer); + } + } + // Only check after writing all counters so that time and corresponding counters appear in the same frame + buffer_check(cpu, BLOCK_COUNTER_BUF, time); + } + + if (!migrate) { + gator_hrtimer_online(); + } + +#if defined(__arm__) || defined(__aarch64__) + if (!sent_core_name[cpu]) { + const u32 cpuid = gator_cpuid(); + gator_send_core_name(cpu, cpuid, gator_find_cpu_by_cpuid(cpuid)); + } +#endif +} + +// This function runs in interrupt context and may be running on a core other than core 'cpu' +static void gator_timer_online_dispatch(int cpu, bool migrate) +{ + struct gator_interface *gi; + + list_for_each_entry(gi, &gator_events, list) { + if (gi->online_dispatch) { + gi->online_dispatch(cpu, migrate); + } + } +} + +#include "gator_iks.c" + +int gator_timer_start(unsigned long sample_rate) +{ + int cpu; + + if (gator_running) { + pr_notice("gator: already running\n"); + return 0; + } + + gator_running = 1; + + // event based sampling trumps hr timer based sampling + if (event_based_sampling) { + sample_rate = 0; + } + + if (gator_hrtimer_init(sample_rate, gator_timer_interrupt) == -1) + return -1; + + gator_send_iks_core_names(); + for_each_online_cpu(cpu) { + gator_timer_online_dispatch(lcpu_to_pcpu(cpu), false); + } + on_each_cpu(gator_timer_online, NULL, 1); + + return 0; +} + +static u64 gator_get_time(void) +{ + struct timespec ts; + u64 timestamp; + u64 prev_timestamp; + u64 delta; + int cpu = smp_processor_id(); + + // Match clock_gettime(CLOCK_MONOTONIC_RAW, &ts) from userspace + getrawmonotonic(&ts); + timestamp = timespec_to_ns(&ts); + + // getrawmonotonic is not monotonic on all systems. Detect and attempt to correct these cases. + // up to 0.5ms delta has been seen on some systems, which can skew Streamline data when viewing at high resolution. + // This doesn't work well with interrupts, but that it's OK - the real concern is to catch big jumps in time + prev_timestamp = per_cpu(last_timestamp, cpu); + if (prev_timestamp <= timestamp) { + per_cpu(last_timestamp, cpu) = timestamp; + } else { + delta = prev_timestamp - timestamp; + // Log the error once + if (!printed_monotonic_warning && delta > 500000) { + printk(KERN_ERR "%s: getrawmonotonic is not monotonic cpu: %i delta: %lli\nSkew in Streamline data may be present at the fine zoom levels\n", __FUNCTION__, cpu, delta); + printed_monotonic_warning = true; + } + timestamp = prev_timestamp; + } + + return timestamp - gator_monotonic_started; +} + +/****************************************************************************** + * cpu hotplug and pm notifiers + ******************************************************************************/ +static int __cpuinit gator_hotcpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) +{ + int cpu = lcpu_to_pcpu((long)hcpu); + + switch (action) { + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + smp_call_function_single(cpu, gator_timer_offline, NULL, 1); + gator_timer_offline_dispatch(cpu, false); + break; + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + gator_timer_online_dispatch(cpu, false); + smp_call_function_single(cpu, gator_timer_online, NULL, 1); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block __refdata gator_hotcpu_notifier = { + .notifier_call = gator_hotcpu_notify, +}; + +// n.b. calling "on_each_cpu" only runs on those that are online +// Registered linux events are not disabled, so their counters will continue to collect +static int gator_pm_notify(struct notifier_block *nb, unsigned long event, void *dummy) +{ + int cpu; + struct timespec ts; + + switch (event) { + case PM_HIBERNATION_PREPARE: + case PM_SUSPEND_PREPARE: + unregister_hotcpu_notifier(&gator_hotcpu_notifier); + unregister_scheduler_tracepoints(); + on_each_cpu(gator_timer_offline, NULL, 1); + for_each_online_cpu(cpu) { + gator_timer_offline_dispatch(lcpu_to_pcpu(cpu), false); + } + + // Record the wallclock hibernate time + getnstimeofday(&ts); + gator_hibernate_time = timespec_to_ns(&ts) - gator_get_time(); + break; + case PM_POST_HIBERNATION: + case PM_POST_SUSPEND: + // Adjust gator_monotonic_started for the time spent sleeping, as gator_get_time does not account for it + if (gator_hibernate_time > 0) { + getnstimeofday(&ts); + gator_monotonic_started += gator_hibernate_time + gator_get_time() - timespec_to_ns(&ts); + gator_hibernate_time = 0; + } + + for_each_online_cpu(cpu) { + gator_timer_online_dispatch(lcpu_to_pcpu(cpu), false); + } + on_each_cpu(gator_timer_online, NULL, 1); + register_scheduler_tracepoints(); + register_hotcpu_notifier(&gator_hotcpu_notifier); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block gator_pm_notifier = { + .notifier_call = gator_pm_notify, +}; + +static int gator_notifier_start(void) +{ + int retval; + retval = register_hotcpu_notifier(&gator_hotcpu_notifier); + if (retval == 0) + retval = register_pm_notifier(&gator_pm_notifier); + return retval; +} + +static void gator_notifier_stop(void) +{ + unregister_pm_notifier(&gator_pm_notifier); + unregister_hotcpu_notifier(&gator_hotcpu_notifier); +} + +/****************************************************************************** + * Main + ******************************************************************************/ +static void gator_summary(void) +{ + u64 timestamp, uptime; + struct timespec ts; + char uname_buf[512]; + void (*m2b)(struct timespec *ts); + unsigned long flags; + + snprintf(uname_buf, sizeof(uname_buf), "%s %s %s %s %s GNU/Linux", utsname()->sysname, utsname()->nodename, utsname()->release, utsname()->version, utsname()->machine); + + getnstimeofday(&ts); + timestamp = timespec_to_ns(&ts); + + do_posix_clock_monotonic_gettime(&ts); + // monotonic_to_bootbased is not defined for some versions of Android + m2b = symbol_get(monotonic_to_bootbased); + if (m2b) { + m2b(&ts); + } + uptime = timespec_to_ns(&ts); + + // Disable interrupts as gator_get_time calls smp_processor_id to verify time is monotonic + local_irq_save(flags); + // Set monotonic_started to zero as gator_get_time is uptime minus monotonic_started + gator_monotonic_started = 0; + gator_monotonic_started = gator_get_time(); + local_irq_restore(flags); + + marshal_summary(timestamp, uptime, gator_monotonic_started, uname_buf); +} + +int gator_events_install(struct gator_interface *interface) +{ + list_add_tail(&interface->list, &gator_events); + + return 0; +} + +int gator_events_get_key(void) +{ + // key 0 is reserved as a timestamp + // key 1 is reserved as the marker for thread specific counters + // Odd keys are assigned by the driver, even keys by the daemon + static int key = 3; + + const int ret = key; + key += 2; + return ret; +} + +static int gator_init(void) +{ + int i; + + calc_first_cluster_size(); + + // events sources + for (i = 0; i < ARRAY_SIZE(gator_events_list); i++) + if (gator_events_list[i]) + gator_events_list[i](); + + gator_trace_sched_init(); + gator_trace_power_init(); + + return 0; +} + +static void gator_exit(void) +{ + struct gator_interface *gi; + + list_for_each_entry(gi, &gator_events, list) + if (gi->shutdown) + gi->shutdown(); +} + +static int gator_start(void) +{ + unsigned long cpu, i; + struct gator_interface *gi; + + gator_buffer_wake_stop = false; + if (IS_ERR(gator_buffer_wake_thread = kthread_run(gator_buffer_wake_func, NULL, "gator_bwake"))) { + goto bwake_failure; + } + + if (gator_migrate_start()) + goto migrate_failure; + + // Initialize the buffer with the frame type and core + for_each_present_cpu(cpu) { + for (i = 0; i < NUM_GATOR_BUFS; i++) { + marshal_frame(cpu, i); + } + per_cpu(last_timestamp, cpu) = 0; + } + printed_monotonic_warning = false; + + // Capture the start time + gator_summary(); + + // start all events + list_for_each_entry(gi, &gator_events, list) { + if (gi->start && gi->start() != 0) { + struct list_head *ptr = gi->list.prev; + + while (ptr != &gator_events) { + gi = list_entry(ptr, struct gator_interface, list); + + if (gi->stop) + gi->stop(); + + ptr = ptr->prev; + } + goto events_failure; + } + } + + // cookies shall be initialized before trace_sched_start() and gator_timer_start() + if (cookies_initialize()) + goto cookies_failure; + if (gator_annotate_start()) + goto annotate_failure; + if (gator_trace_sched_start()) + goto sched_failure; + if (gator_trace_power_start()) + goto power_failure; + if (gator_trace_gpu_start()) + goto gpu_failure; + if (gator_timer_start(gator_timer_count)) + goto timer_failure; + if (gator_notifier_start()) + goto notifier_failure; + + return 0; + +notifier_failure: + gator_timer_stop(); +timer_failure: + gator_trace_gpu_stop(); +gpu_failure: + gator_trace_power_stop(); +power_failure: + gator_trace_sched_stop(); +sched_failure: + gator_annotate_stop(); +annotate_failure: + cookies_release(); +cookies_failure: + // stop all events + list_for_each_entry(gi, &gator_events, list) + if (gi->stop) + gi->stop(); +events_failure: + gator_migrate_stop(); +migrate_failure: + gator_buffer_wake_stop = true; + wake_up_process(gator_buffer_wake_thread); +bwake_failure: + + return -1; +} + +static void gator_stop(void) +{ + struct gator_interface *gi; + + gator_annotate_stop(); + gator_trace_sched_stop(); + gator_trace_power_stop(); + gator_trace_gpu_stop(); + + // stop all interrupt callback reads before tearing down other interfaces + gator_notifier_stop(); // should be called before gator_timer_stop to avoid re-enabling the hrtimer after it has been offlined + gator_timer_stop(); + + // stop all events + list_for_each_entry(gi, &gator_events, list) + if (gi->stop) + gi->stop(); + + gator_migrate_stop(); + + gator_buffer_wake_stop = true; + wake_up_process(gator_buffer_wake_thread); +} + +/****************************************************************************** + * Filesystem + ******************************************************************************/ +/* fopen("buffer") */ +static int gator_op_setup(void) +{ + int err = 0; + int cpu, i; + + mutex_lock(&start_mutex); + + gator_buffer_size[SUMMARY_BUF] = SUMMARY_BUFFER_SIZE; + gator_buffer_mask[SUMMARY_BUF] = SUMMARY_BUFFER_SIZE - 1; + + gator_buffer_size[BACKTRACE_BUF] = BACKTRACE_BUFFER_SIZE; + gator_buffer_mask[BACKTRACE_BUF] = BACKTRACE_BUFFER_SIZE - 1; + + gator_buffer_size[NAME_BUF] = NAME_BUFFER_SIZE; + gator_buffer_mask[NAME_BUF] = NAME_BUFFER_SIZE - 1; + + gator_buffer_size[COUNTER_BUF] = COUNTER_BUFFER_SIZE; + gator_buffer_mask[COUNTER_BUF] = COUNTER_BUFFER_SIZE - 1; + + gator_buffer_size[BLOCK_COUNTER_BUF] = BLOCK_COUNTER_BUFFER_SIZE; + gator_buffer_mask[BLOCK_COUNTER_BUF] = BLOCK_COUNTER_BUFFER_SIZE - 1; + + gator_buffer_size[ANNOTATE_BUF] = ANNOTATE_BUFFER_SIZE; + gator_buffer_mask[ANNOTATE_BUF] = ANNOTATE_BUFFER_SIZE - 1; + + gator_buffer_size[SCHED_TRACE_BUF] = SCHED_TRACE_BUFFER_SIZE; + gator_buffer_mask[SCHED_TRACE_BUF] = SCHED_TRACE_BUFFER_SIZE - 1; + + gator_buffer_size[GPU_TRACE_BUF] = GPU_TRACE_BUFFER_SIZE; + gator_buffer_mask[GPU_TRACE_BUF] = GPU_TRACE_BUFFER_SIZE - 1; + + gator_buffer_size[IDLE_BUF] = IDLE_BUFFER_SIZE; + gator_buffer_mask[IDLE_BUF] = IDLE_BUFFER_SIZE - 1; + + // Initialize percpu per buffer variables + for (i = 0; i < NUM_GATOR_BUFS; i++) { + // Verify buffers are a power of 2 + if (gator_buffer_size[i] & (gator_buffer_size[i] - 1)) { + err = -ENOEXEC; + goto setup_error; + } + + for_each_present_cpu(cpu) { + per_cpu(gator_buffer_read, cpu)[i] = 0; + per_cpu(gator_buffer_write, cpu)[i] = 0; + per_cpu(gator_buffer_commit, cpu)[i] = 0; + per_cpu(buffer_space_available, cpu)[i] = true; + per_cpu(gator_buffer_commit_time, cpu) = gator_live_rate; + + // Annotation is a special case that only uses a single buffer + if (cpu > 0 && i == ANNOTATE_BUF) { + per_cpu(gator_buffer, cpu)[i] = NULL; + continue; + } + + per_cpu(gator_buffer, cpu)[i] = vmalloc(gator_buffer_size[i]); + if (!per_cpu(gator_buffer, cpu)[i]) { + err = -ENOMEM; + goto setup_error; + } + } + } + +setup_error: + mutex_unlock(&start_mutex); + return err; +} + +/* Actually start profiling (echo 1>/dev/gator/enable) */ +static int gator_op_start(void) +{ + int err = 0; + + mutex_lock(&start_mutex); + + if (gator_started || gator_start()) + err = -EINVAL; + else + gator_started = 1; + + mutex_unlock(&start_mutex); + + return err; +} + +/* echo 0>/dev/gator/enable */ +static void gator_op_stop(void) +{ + mutex_lock(&start_mutex); + + if (gator_started) { + gator_stop(); + + mutex_lock(&gator_buffer_mutex); + + gator_started = 0; + gator_monotonic_started = 0; + cookies_release(); + wake_up(&gator_buffer_wait); + + mutex_unlock(&gator_buffer_mutex); + } + + mutex_unlock(&start_mutex); +} + +static void gator_shutdown(void) +{ + int cpu, i; + + mutex_lock(&start_mutex); + + for_each_present_cpu(cpu) { + mutex_lock(&gator_buffer_mutex); + for (i = 0; i < NUM_GATOR_BUFS; i++) { + vfree(per_cpu(gator_buffer, cpu)[i]); + per_cpu(gator_buffer, cpu)[i] = NULL; + per_cpu(gator_buffer_read, cpu)[i] = 0; + per_cpu(gator_buffer_write, cpu)[i] = 0; + per_cpu(gator_buffer_commit, cpu)[i] = 0; + per_cpu(buffer_space_available, cpu)[i] = true; + per_cpu(gator_buffer_commit_time, cpu) = 0; + } + mutex_unlock(&gator_buffer_mutex); + } + + memset(&sent_core_name, 0, sizeof(sent_core_name)); + + mutex_unlock(&start_mutex); +} + +static int gator_set_backtrace(unsigned long val) +{ + int err = 0; + + mutex_lock(&start_mutex); + + if (gator_started) + err = -EBUSY; + else + gator_backtrace_depth = val; + + mutex_unlock(&start_mutex); + + return err; +} + +static ssize_t enable_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + return gatorfs_ulong_to_user(gator_started, buf, count, offset); +} + +static ssize_t enable_write(struct file *file, char const __user *buf, size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = gatorfs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val) + retval = gator_op_start(); + else + gator_op_stop(); + + if (retval) + return retval; + return count; +} + +static const struct file_operations enable_fops = { + .read = enable_read, + .write = enable_write, +}; + +static int userspace_buffer_open(struct inode *inode, struct file *file) +{ + int err = -EPERM; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (test_and_set_bit_lock(0, &gator_buffer_opened)) + return -EBUSY; + + if ((err = gator_op_setup())) + goto fail; + + /* NB: the actual start happens from userspace + * echo 1 >/dev/gator/enable + */ + + return 0; + +fail: + __clear_bit_unlock(0, &gator_buffer_opened); + return err; +} + +static int userspace_buffer_release(struct inode *inode, struct file *file) +{ + gator_op_stop(); + gator_shutdown(); + __clear_bit_unlock(0, &gator_buffer_opened); + return 0; +} + +static ssize_t userspace_buffer_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + int commit, length1, length2, read; + char *buffer1; + char *buffer2; + int cpu, buftype; + int written = 0; + + // ensure there is enough space for a whole frame + if (count < userspace_buffer_size || *offset) { + return -EINVAL; + } + + // sleep until the condition is true or a signal is received + // the condition is checked each time gator_buffer_wait is woken up + wait_event_interruptible(gator_buffer_wait, buffer_commit_ready(&cpu, &buftype) || !gator_started); + + if (signal_pending(current)) { + return -EINTR; + } + + if (buftype == -1 || cpu == -1) { + return 0; + } + + mutex_lock(&gator_buffer_mutex); + + do { + read = per_cpu(gator_buffer_read, cpu)[buftype]; + commit = per_cpu(gator_buffer_commit, cpu)[buftype]; + + // May happen if the buffer is freed during pending reads. + if (!per_cpu(gator_buffer, cpu)[buftype]) { + break; + } + + // determine the size of two halves + length1 = commit - read; + length2 = 0; + buffer1 = &(per_cpu(gator_buffer, cpu)[buftype][read]); + buffer2 = &(per_cpu(gator_buffer, cpu)[buftype][0]); + if (length1 < 0) { + length1 = gator_buffer_size[buftype] - read; + length2 = commit; + } + + if (length1 + length2 > count - written) { + break; + } + + // start, middle or end + if (length1 > 0 && copy_to_user(&buf[written], buffer1, length1)) { + break; + } + + // possible wrap around + if (length2 > 0 && copy_to_user(&buf[written + length1], buffer2, length2)) { + break; + } + + per_cpu(gator_buffer_read, cpu)[buftype] = commit; + written += length1 + length2; + + // Wake up annotate_write if more space is available + if (buftype == ANNOTATE_BUF) { + wake_up(&gator_annotate_wait); + } + } while (buffer_commit_ready(&cpu, &buftype)); + + mutex_unlock(&gator_buffer_mutex); + + // kick just in case we've lost an SMP event + wake_up(&gator_buffer_wait); + + return written > 0 ? written : -EFAULT; +} + +const struct file_operations gator_event_buffer_fops = { + .open = userspace_buffer_open, + .release = userspace_buffer_release, + .read = userspace_buffer_read, +}; + +static ssize_t depth_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + return gatorfs_ulong_to_user(gator_backtrace_depth, buf, count, offset); +} + +static ssize_t depth_write(struct file *file, char const __user *buf, size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = gatorfs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + retval = gator_set_backtrace(val); + + if (retval) + return retval; + return count; +} + +static const struct file_operations depth_fops = { + .read = depth_read, + .write = depth_write +}; + +void gator_op_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + struct gator_interface *gi; + int cpu; + + /* reinitialize default values */ + gator_cpu_cores = 0; + for_each_present_cpu(cpu) { + gator_cpu_cores++; + } + userspace_buffer_size = BACKTRACE_BUFFER_SIZE; + gator_response_type = 1; + gator_live_rate = 0; + + gatorfs_create_file(sb, root, "enable", &enable_fops); + gatorfs_create_file(sb, root, "buffer", &gator_event_buffer_fops); + gatorfs_create_file(sb, root, "backtrace_depth", &depth_fops); + gatorfs_create_ro_ulong(sb, root, "cpu_cores", &gator_cpu_cores); + gatorfs_create_ro_ulong(sb, root, "buffer_size", &userspace_buffer_size); + gatorfs_create_ulong(sb, root, "tick", &gator_timer_count); + gatorfs_create_ulong(sb, root, "response_type", &gator_response_type); + gatorfs_create_ro_ulong(sb, root, "version", &gator_protocol_version); + gatorfs_create_ro_u64(sb, root, "started", &gator_monotonic_started); + gatorfs_create_u64(sb, root, "live_rate", &gator_live_rate); + + // Annotate interface + gator_annotate_create_files(sb, root); + + // Linux Events + dir = gatorfs_mkdir(sb, root, "events"); + list_for_each_entry(gi, &gator_events, list) + if (gi->create_files) + gi->create_files(sb, dir); + + // Sched Events + sched_trace_create_files(sb, dir); + + // Power interface + gator_trace_power_create_files(sb, dir); +} + +/****************************************************************************** + * Module + ******************************************************************************/ +static int __init gator_module_init(void) +{ + if (gatorfs_register()) { + return -1; + } + + if (gator_init()) { + gatorfs_unregister(); + return -1; + } + + setup_timer(&gator_buffer_wake_up_timer, gator_buffer_wake_up, 0); + + return 0; +} + +static void __exit gator_module_exit(void) +{ + del_timer_sync(&gator_buffer_wake_up_timer); + tracepoint_synchronize_unregister(); + gator_exit(); + gatorfs_unregister(); +} + +module_init(gator_module_init); +module_exit(gator_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("ARM Ltd"); +MODULE_DESCRIPTION("Gator system profiler"); +#define STRIFY2(ARG) #ARG +#define STRIFY(ARG) STRIFY2(ARG) +MODULE_VERSION(STRIFY(PROTOCOL_VERSION)); diff --git a/drivers/gator/gator_marshaling.c b/drivers/gator/gator_marshaling.c new file mode 100644 index 000000000000..af80ff62e712 --- /dev/null +++ b/drivers/gator/gator_marshaling.c @@ -0,0 +1,432 @@ +/** + * Copyright (C) ARM Limited 2012-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define NEWLINE_CANARY \ + /* Unix */ \ + "1\n" \ + /* Windows */ \ + "2\r\n" \ + /* Mac OS */ \ + "3\r" \ + /* RISC OS */ \ + "4\n\r" \ + /* Add another character so the length isn't 0x0a bytes */ \ + "5" + +#ifdef MALI_SUPPORT +#include "gator_events_mali_common.h" +#endif + +static void marshal_summary(long long timestamp, long long uptime, long long monotonic_delta, const char * uname) +{ + unsigned long flags; + int cpu = 0; + + local_irq_save(flags); + gator_buffer_write_string(cpu, SUMMARY_BUF, NEWLINE_CANARY); + gator_buffer_write_packed_int64(cpu, SUMMARY_BUF, timestamp); + gator_buffer_write_packed_int64(cpu, SUMMARY_BUF, uptime); + gator_buffer_write_packed_int64(cpu, SUMMARY_BUF, monotonic_delta); + gator_buffer_write_string(cpu, SUMMARY_BUF, "uname"); + gator_buffer_write_string(cpu, SUMMARY_BUF, uname); +#if GATOR_IKS_SUPPORT + gator_buffer_write_string(cpu, SUMMARY_BUF, "iks"); + gator_buffer_write_string(cpu, SUMMARY_BUF, ""); +#endif + // Let Streamline know which GPU is used so that it can label the GPU Activity appropriately. This is a temporary fix, to be improved in a future release. +#ifdef MALI_SUPPORT + gator_buffer_write_string(cpu, SUMMARY_BUF, "mali_type"); +#if (MALI_SUPPORT == MALI_4xx) + gator_buffer_write_string(cpu, SUMMARY_BUF, "4xx"); +#elif (MALI_SUPPORT == MALI_T6xx) + gator_buffer_write_string(cpu, SUMMARY_BUF, "6xx"); +#else + gator_buffer_write_string(cpu, SUMMARY_BUF, "unknown"); +#endif +#endif + gator_buffer_write_string(cpu, SUMMARY_BUF, ""); + // Commit the buffer now so it can be one of the first frames read by Streamline + gator_commit_buffer(cpu, SUMMARY_BUF, gator_get_time()); + local_irq_restore(flags); +} + +static bool marshal_cookie_header(const char *text) +{ + int cpu = get_physical_cpu(); + return buffer_check_space(cpu, NAME_BUF, strlen(text) + 3 * MAXSIZE_PACK32); +} + +static void marshal_cookie(int cookie, const char *text) +{ + int cpu = get_physical_cpu(); + // buffer_check_space already called by marshal_cookie_header + gator_buffer_write_packed_int(cpu, NAME_BUF, MESSAGE_COOKIE); + gator_buffer_write_packed_int(cpu, NAME_BUF, cookie); + gator_buffer_write_string(cpu, NAME_BUF, text); + buffer_check(cpu, NAME_BUF, gator_get_time()); +} + +static void marshal_thread_name(int pid, char *name) +{ + unsigned long flags, cpu; + u64 time; + local_irq_save(flags); + cpu = get_physical_cpu(); + time = gator_get_time(); + if (buffer_check_space(cpu, NAME_BUF, TASK_COMM_LEN + 3 * MAXSIZE_PACK32 + MAXSIZE_PACK64)) { + gator_buffer_write_packed_int(cpu, NAME_BUF, MESSAGE_THREAD_NAME); + gator_buffer_write_packed_int64(cpu, NAME_BUF, time); + gator_buffer_write_packed_int(cpu, NAME_BUF, pid); + gator_buffer_write_string(cpu, NAME_BUF, name); + } + buffer_check(cpu, NAME_BUF, time); + local_irq_restore(flags); +} + +static void marshal_link(int cookie, int tgid, int pid) +{ + unsigned long cpu = get_physical_cpu(), flags; + u64 time; + + local_irq_save(flags); + time = gator_get_time(); + if (buffer_check_space(cpu, NAME_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) { + gator_buffer_write_packed_int(cpu, NAME_BUF, MESSAGE_LINK); + gator_buffer_write_packed_int64(cpu, NAME_BUF, time); + gator_buffer_write_packed_int(cpu, NAME_BUF, cookie); + gator_buffer_write_packed_int(cpu, NAME_BUF, tgid); + gator_buffer_write_packed_int(cpu, NAME_BUF, pid); + } + // Check and commit; commit is set to occur once buffer is 3/4 full + buffer_check(cpu, NAME_BUF, time); + local_irq_restore(flags); +} + +static bool marshal_backtrace_header(int exec_cookie, int tgid, int pid, int inKernel, u64 time) +{ + int cpu = get_physical_cpu(); + if (!buffer_check_space(cpu, BACKTRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32 + gator_backtrace_depth * 2 * MAXSIZE_PACK32)) { + // Check and commit; commit is set to occur once buffer is 3/4 full + buffer_check(cpu, BACKTRACE_BUF, time); + + return false; + } + + gator_buffer_write_packed_int64(cpu, BACKTRACE_BUF, time); + gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, exec_cookie); + gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, tgid); + gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, pid); + gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, inKernel); + + return true; +} + +static void marshal_backtrace(unsigned long address, int cookie) +{ + int cpu = get_physical_cpu(); + gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, cookie); + gator_buffer_write_packed_int64(cpu, BACKTRACE_BUF, address); +} + +static void marshal_backtrace_footer(u64 time) +{ + int cpu = get_physical_cpu(); + gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, MESSAGE_END_BACKTRACE); + + // Check and commit; commit is set to occur once buffer is 3/4 full + buffer_check(cpu, BACKTRACE_BUF, time); +} + +static bool marshal_event_header(u64 time) +{ + unsigned long flags, cpu = get_physical_cpu(); + bool retval = false; + + local_irq_save(flags); + if (buffer_check_space(cpu, BLOCK_COUNTER_BUF, MAXSIZE_PACK32 + MAXSIZE_PACK64)) { + gator_buffer_write_packed_int(cpu, BLOCK_COUNTER_BUF, 0); // key of zero indicates a timestamp + gator_buffer_write_packed_int64(cpu, BLOCK_COUNTER_BUF, time); + retval = true; + } + local_irq_restore(flags); + + return retval; +} + +static void marshal_event(int len, int *buffer) +{ + unsigned long i, flags, cpu = get_physical_cpu(); + + if (len <= 0) + return; + + // length must be even since all data is a (key, value) pair + if (len & 0x1) { + pr_err("gator: invalid counter data detected and discarded"); + return; + } + + // events must be written in key,value pairs + local_irq_save(flags); + for (i = 0; i < len; i += 2) { + if (!buffer_check_space(cpu, BLOCK_COUNTER_BUF, 2 * MAXSIZE_PACK32)) { + break; + } + gator_buffer_write_packed_int(cpu, BLOCK_COUNTER_BUF, buffer[i]); + gator_buffer_write_packed_int(cpu, BLOCK_COUNTER_BUF, buffer[i + 1]); + } + local_irq_restore(flags); +} + +static void marshal_event64(int len, long long *buffer64) +{ + unsigned long i, flags, cpu = get_physical_cpu(); + + if (len <= 0) + return; + + // length must be even since all data is a (key, value) pair + if (len & 0x1) { + pr_err("gator: invalid counter data detected and discarded"); + return; + } + + // events must be written in key,value pairs + local_irq_save(flags); + for (i = 0; i < len; i += 2) { + if (!buffer_check_space(cpu, BLOCK_COUNTER_BUF, 2 * MAXSIZE_PACK64)) { + break; + } + gator_buffer_write_packed_int64(cpu, BLOCK_COUNTER_BUF, buffer64[i]); + gator_buffer_write_packed_int64(cpu, BLOCK_COUNTER_BUF, buffer64[i + 1]); + } + local_irq_restore(flags); +} + +#if GATOR_CPU_FREQ_SUPPORT +static void marshal_event_single(int core, int key, int value) +{ + unsigned long flags, cpu; + u64 time; + + local_irq_save(flags); + cpu = get_physical_cpu(); + time = gator_get_time(); + if (buffer_check_space(cpu, COUNTER_BUF, MAXSIZE_PACK64 + 3 * MAXSIZE_PACK32)) { + gator_buffer_write_packed_int64(cpu, COUNTER_BUF, time); + gator_buffer_write_packed_int(cpu, COUNTER_BUF, core); + gator_buffer_write_packed_int(cpu, COUNTER_BUF, key); + gator_buffer_write_packed_int(cpu, COUNTER_BUF, value); + } + // Check and commit; commit is set to occur once buffer is 3/4 full + buffer_check(cpu, COUNTER_BUF, time); + local_irq_restore(flags); +} +#endif + +static void marshal_sched_gpu_start(int unit, int core, int tgid, int pid) +{ + unsigned long cpu = get_physical_cpu(), flags; + u64 time; + + if (!per_cpu(gator_buffer, cpu)[GPU_TRACE_BUF]) + return; + + local_irq_save(flags); + time = gator_get_time(); + if (buffer_check_space(cpu, GPU_TRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) { + gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, MESSAGE_GPU_START); + gator_buffer_write_packed_int64(cpu, GPU_TRACE_BUF, time); + gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, unit); + gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, core); + gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, tgid); + gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, pid); + } + // Check and commit; commit is set to occur once buffer is 3/4 full + buffer_check(cpu, GPU_TRACE_BUF, time); + local_irq_restore(flags); +} + +static void marshal_sched_gpu_stop(int unit, int core) +{ + unsigned long cpu = get_physical_cpu(), flags; + u64 time; + + if (!per_cpu(gator_buffer, cpu)[GPU_TRACE_BUF]) + return; + + local_irq_save(flags); + time = gator_get_time(); + if (buffer_check_space(cpu, GPU_TRACE_BUF, MAXSIZE_PACK64 + 3 * MAXSIZE_PACK32)) { + gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, MESSAGE_GPU_STOP); + gator_buffer_write_packed_int64(cpu, GPU_TRACE_BUF, time); + gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, unit); + gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, core); + } + // Check and commit; commit is set to occur once buffer is 3/4 full + buffer_check(cpu, GPU_TRACE_BUF, time); + local_irq_restore(flags); +} + +static void marshal_sched_trace_start(int tgid, int pid, int cookie) +{ + unsigned long cpu = get_physical_cpu(), flags; + u64 time; + + if (!per_cpu(gator_buffer, cpu)[SCHED_TRACE_BUF]) + return; + + local_irq_save(flags); + time = gator_get_time(); + if (buffer_check_space(cpu, SCHED_TRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) { + gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, MESSAGE_SCHED_START); + gator_buffer_write_packed_int64(cpu, SCHED_TRACE_BUF, time); + gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, tgid); + gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, pid); + gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, cookie); + } + // Check and commit; commit is set to occur once buffer is 3/4 full + buffer_check(cpu, SCHED_TRACE_BUF, time); + local_irq_restore(flags); +} + +static void marshal_sched_trace_switch(int tgid, int pid, int cookie, int state) +{ + unsigned long cpu = get_physical_cpu(), flags; + u64 time; + + if (!per_cpu(gator_buffer, cpu)[SCHED_TRACE_BUF]) + return; + + local_irq_save(flags); + time = gator_get_time(); + if (buffer_check_space(cpu, SCHED_TRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) { + gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, MESSAGE_SCHED_SWITCH); + gator_buffer_write_packed_int64(cpu, SCHED_TRACE_BUF, time); + gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, tgid); + gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, pid); + gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, cookie); + gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, state); + } + // Check and commit; commit is set to occur once buffer is 3/4 full + buffer_check(cpu, SCHED_TRACE_BUF, time); + local_irq_restore(flags); +} + +static void marshal_sched_trace_exit(int tgid, int pid) +{ + unsigned long cpu = get_physical_cpu(), flags; + u64 time; + + if (!per_cpu(gator_buffer, cpu)[SCHED_TRACE_BUF]) + return; + + local_irq_save(flags); + time = gator_get_time(); + if (buffer_check_space(cpu, SCHED_TRACE_BUF, MAXSIZE_PACK64 + 2 * MAXSIZE_PACK32)) { + gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, MESSAGE_SCHED_EXIT); + gator_buffer_write_packed_int64(cpu, SCHED_TRACE_BUF, time); + gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, pid); + } + // Check and commit; commit is set to occur once buffer is 3/4 full + buffer_check(cpu, SCHED_TRACE_BUF, time); + local_irq_restore(flags); +} + +#if GATOR_CPU_FREQ_SUPPORT +static void marshal_idle(int core, int state) +{ + unsigned long flags, cpu; + u64 time; + + local_irq_save(flags); + cpu = get_physical_cpu(); + time = gator_get_time(); + if (buffer_check_space(cpu, IDLE_BUF, MAXSIZE_PACK64 + 2 * MAXSIZE_PACK32)) { + gator_buffer_write_packed_int(cpu, IDLE_BUF, state); + gator_buffer_write_packed_int64(cpu, IDLE_BUF, time); + gator_buffer_write_packed_int(cpu, IDLE_BUF, core); + } + // Check and commit; commit is set to occur once buffer is 3/4 full + buffer_check(cpu, IDLE_BUF, time); + local_irq_restore(flags); +} +#endif + +static void marshal_frame(int cpu, int buftype) +{ + int frame; + + if (!per_cpu(gator_buffer, cpu)[buftype]) { + return; + } + + switch (buftype) { + case SUMMARY_BUF: + frame = FRAME_SUMMARY; + break; + case BACKTRACE_BUF: + frame = FRAME_BACKTRACE; + break; + case NAME_BUF: + frame = FRAME_NAME; + break; + case COUNTER_BUF: + frame = FRAME_COUNTER; + break; + case BLOCK_COUNTER_BUF: + frame = FRAME_BLOCK_COUNTER; + break; + case ANNOTATE_BUF: + frame = FRAME_ANNOTATE; + break; + case SCHED_TRACE_BUF: + frame = FRAME_SCHED_TRACE; + break; + case GPU_TRACE_BUF: + frame = FRAME_GPU_TRACE; + break; + case IDLE_BUF: + frame = FRAME_IDLE; + break; + default: + frame = -1; + break; + } + + // add response type + if (gator_response_type > 0) { + gator_buffer_write_packed_int(cpu, buftype, gator_response_type); + } + + // leave space for 4-byte unpacked length + per_cpu(gator_buffer_write, cpu)[buftype] = (per_cpu(gator_buffer_write, cpu)[buftype] + sizeof(s32)) & gator_buffer_mask[buftype]; + + // add frame type and core number + gator_buffer_write_packed_int(cpu, buftype, frame); + gator_buffer_write_packed_int(cpu, buftype, cpu); +} + +#if defined(__arm__) || defined(__aarch64__) +static void marshal_core_name(const int core, const int cpuid, const char *name) +{ + int cpu = get_physical_cpu(); + unsigned long flags; + local_irq_save(flags); + if (buffer_check_space(cpu, NAME_BUF, MAXSIZE_PACK32 + MAXSIZE_CORE_NAME)) { + gator_buffer_write_packed_int(cpu, NAME_BUF, HRTIMER_CORE_NAME); + gator_buffer_write_packed_int(cpu, NAME_BUF, core); + gator_buffer_write_packed_int(cpu, NAME_BUF, cpuid); + gator_buffer_write_string(cpu, NAME_BUF, name); + } + // Commit core names now so that they can show up in live + gator_commit_buffer(cpu, NAME_BUF, gator_get_time()); + local_irq_restore(flags); +} +#endif diff --git a/drivers/gator/gator_pack.c b/drivers/gator/gator_pack.c new file mode 100644 index 000000000000..2c082f283adc --- /dev/null +++ b/drivers/gator/gator_pack.c @@ -0,0 +1,58 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +static void gator_buffer_write_packed_int(int cpu, int buftype, int x) +{ + uint32_t write = per_cpu(gator_buffer_write, cpu)[buftype]; + uint32_t mask = gator_buffer_mask[buftype]; + char *buffer = per_cpu(gator_buffer, cpu)[buftype]; + int packedBytes = 0; + int more = true; + while (more) { + // low order 7 bits of x + char b = x & 0x7f; + x >>= 7; + + if ((x == 0 && (b & 0x40) == 0) || (x == -1 && (b & 0x40) != 0)) { + more = false; + } else { + b |= 0x80; + } + + buffer[(write + packedBytes) & mask] = b; + packedBytes++; + } + + per_cpu(gator_buffer_write, cpu)[buftype] = (write + packedBytes) & mask; +} + +static void gator_buffer_write_packed_int64(int cpu, int buftype, long long x) +{ + uint32_t write = per_cpu(gator_buffer_write, cpu)[buftype]; + uint32_t mask = gator_buffer_mask[buftype]; + char *buffer = per_cpu(gator_buffer, cpu)[buftype]; + int packedBytes = 0; + int more = true; + while (more) { + // low order 7 bits of x + char b = x & 0x7f; + x >>= 7; + + if ((x == 0 && (b & 0x40) == 0) || (x == -1 && (b & 0x40) != 0)) { + more = false; + } else { + b |= 0x80; + } + + buffer[(write + packedBytes) & mask] = b; + packedBytes++; + } + + per_cpu(gator_buffer_write, cpu)[buftype] = (write + packedBytes) & mask; +} diff --git a/drivers/gator/gator_trace_gpu.c b/drivers/gator/gator_trace_gpu.c new file mode 100644 index 000000000000..be135b4aac56 --- /dev/null +++ b/drivers/gator/gator_trace_gpu.c @@ -0,0 +1,294 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "gator.h" + +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/time.h> +#include <linux/math64.h> + +#ifdef MALI_SUPPORT +#include "linux/mali_linux_trace.h" +#endif +#include "gator_trace_gpu.h" + +/* + * Taken from MALI_PROFILING_EVENT_TYPE_* items in Mali DDK. + */ +#define EVENT_TYPE_SINGLE 0 +#define EVENT_TYPE_START 1 +#define EVENT_TYPE_STOP 2 +#define EVENT_TYPE_SUSPEND 3 +#define EVENT_TYPE_RESUME 4 + +/* Note whether tracepoints have been registered */ +static int mali_timeline_trace_registered; +static int mali_job_slots_trace_registered; +static int gpu_trace_registered; + +enum { + GPU_UNIT_NONE = 0, + GPU_UNIT_VP, + GPU_UNIT_FP, + GPU_UNIT_CL, + NUMBER_OF_GPU_UNITS +}; + +#define MALI_4xx (0x0b07) +#define MALI_T6xx (0x0056) + +struct mali_gpu_job { + int count; + int last_tgid; + int last_pid; + int last_job_id; +}; + +#define NUMBER_OF_GPU_CORES 16 +static struct mali_gpu_job mali_gpu_jobs[NUMBER_OF_GPU_UNITS][NUMBER_OF_GPU_CORES]; +static DEFINE_SPINLOCK(mali_gpu_jobs_lock); + +/* Only one event should be running on a unit and core at a time (ie, a start + * event can only be followed by a stop and vice versa), but because the kernel + * only knows when a job is enqueued and not started, it is possible for a + * start1, start2, stop1, stop2. Change it back into start1, stop1, start2, + * stop2 by queueing up start2 and releasing it when stop1 is received. + */ +static void mali_gpu_enqueue(int unit, int core, int tgid, int pid, int job_id) +{ + int count; + + spin_lock(&mali_gpu_jobs_lock); + count = mali_gpu_jobs[unit][core].count; + BUG_ON(count < 0); + ++mali_gpu_jobs[unit][core].count; + if (count) { + mali_gpu_jobs[unit][core].last_tgid = tgid; + mali_gpu_jobs[unit][core].last_pid = pid; + mali_gpu_jobs[unit][core].last_job_id = job_id; + } + spin_unlock(&mali_gpu_jobs_lock); + + if (!count) { + marshal_sched_gpu_start(unit, core, tgid, pid/*, job_id*/); + } +} + +static void mali_gpu_stop(int unit, int core) +{ + int count; + int last_tgid = 0; + int last_pid = 0; + //int last_job_id = 0; + + spin_lock(&mali_gpu_jobs_lock); + if (mali_gpu_jobs[unit][core].count == 0) { + spin_unlock(&mali_gpu_jobs_lock); + return; + } + --mali_gpu_jobs[unit][core].count; + count = mali_gpu_jobs[unit][core].count; + if (count) { + last_tgid = mali_gpu_jobs[unit][core].last_tgid; + last_pid = mali_gpu_jobs[unit][core].last_pid; + //last_job_id = mali_gpu_jobs[unit][core].last_job_id; + } + spin_unlock(&mali_gpu_jobs_lock); + + marshal_sched_gpu_stop(unit, core); + if (count) { + marshal_sched_gpu_start(unit, core, last_tgid, last_pid/*, last_job_id*/); + } +} + +#if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx) +#include "gator_events_mali_4xx.h" + +/* + * Taken from MALI_PROFILING_EVENT_CHANNEL_* in Mali DDK. + */ +enum { + EVENT_CHANNEL_SOFTWARE = 0, + EVENT_CHANNEL_VP0 = 1, + EVENT_CHANNEL_FP0 = 5, + EVENT_CHANNEL_FP1, + EVENT_CHANNEL_FP2, + EVENT_CHANNEL_FP3, + EVENT_CHANNEL_FP4, + EVENT_CHANNEL_FP5, + EVENT_CHANNEL_FP6, + EVENT_CHANNEL_FP7, + EVENT_CHANNEL_GPU = 21 +}; + +/** + * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from the GPU channel + */ +enum { + EVENT_REASON_SINGLE_GPU_NONE = 0, + EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE = 1, +}; + +GATOR_DEFINE_PROBE(mali_timeline_event, TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1, unsigned int d2, unsigned int d3, unsigned int d4)) +{ + unsigned int component, state; + + // do as much work as possible before disabling interrupts + component = (event_id >> 16) & 0xFF; // component is an 8-bit field + state = (event_id >> 24) & 0xF; // state is a 4-bit field + + switch (state) { + case EVENT_TYPE_START: + if (component == EVENT_CHANNEL_VP0) { + /* tgid = d0; pid = d1; */ + mali_gpu_enqueue(GPU_UNIT_VP, 0, d0, d1, 0); + } else if (component >= EVENT_CHANNEL_FP0 && component <= EVENT_CHANNEL_FP7) { + /* tgid = d0; pid = d1; */ + mali_gpu_enqueue(GPU_UNIT_FP, component - EVENT_CHANNEL_FP0, d0, d1, 0); + } + break; + + case EVENT_TYPE_STOP: + if (component == EVENT_CHANNEL_VP0) { + mali_gpu_stop(GPU_UNIT_VP, 0); + } else if (component >= EVENT_CHANNEL_FP0 && component <= EVENT_CHANNEL_FP7) { + mali_gpu_stop(GPU_UNIT_FP, component - EVENT_CHANNEL_FP0); + } + break; + + case EVENT_TYPE_SINGLE: + if (component == EVENT_CHANNEL_GPU) { + unsigned int reason = (event_id & 0xffff); + + if (reason == EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE) { + gator_events_mali_log_dvfs_event(d0, d1); + } + } + break; + + default: + break; + } +} +#endif + +#if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_T6xx) +#if defined(MALI_JOB_SLOTS_EVENT_CHANGED) +GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid, unsigned char job_id)) +#else +GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid)) +#endif +{ + unsigned int component, state, unit; +#if !defined(MALI_JOB_SLOTS_EVENT_CHANGED) + unsigned char job_id = 0; +#endif + + component = (event_id >> 16) & 0xFF; // component is an 8-bit field + state = (event_id >> 24) & 0xF; // state is a 4-bit field + + switch (component) { + case 0: + unit = GPU_UNIT_FP; + break; + case 1: + unit = GPU_UNIT_VP; + break; + case 2: + unit = GPU_UNIT_CL; + break; + default: + unit = GPU_UNIT_NONE; + } + + if (unit != GPU_UNIT_NONE) { + switch (state) { + case EVENT_TYPE_START: + mali_gpu_enqueue(unit, 0, tgid, (pid != 0 ? pid : tgid), job_id); + break; + case EVENT_TYPE_STOP: + mali_gpu_stop(unit, 0); + break; + default: + /* + * Some jobs can be soft-stopped, so ensure that this terminates the activity trace. + */ + mali_gpu_stop(unit, 0); + } + } +} +#endif + +GATOR_DEFINE_PROBE(gpu_activity_start, TP_PROTO(int gpu_unit, int gpu_core, struct task_struct *p)) +{ + mali_gpu_enqueue(gpu_unit, gpu_core, (int)p->tgid, (int)p->pid, 0); +} + +GATOR_DEFINE_PROBE(gpu_activity_stop, TP_PROTO(int gpu_unit, int gpu_core)) +{ + mali_gpu_stop(gpu_unit, gpu_core); +} + +int gator_trace_gpu_start(void) +{ + /* + * Returns nonzero for installation failed + * Absence of gpu trace points is not an error + */ + + memset(&mali_gpu_jobs, 0, sizeof(mali_gpu_jobs)); + gpu_trace_registered = mali_timeline_trace_registered = mali_job_slots_trace_registered = 0; + +#if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx) + if (!GATOR_REGISTER_TRACE(mali_timeline_event)) { + mali_timeline_trace_registered = 1; + } +#endif + +#if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_T6xx) + if (!GATOR_REGISTER_TRACE(mali_job_slots_event)) { + mali_job_slots_trace_registered = 1; + } +#endif + + if (!mali_timeline_trace_registered) { + if (GATOR_REGISTER_TRACE(gpu_activity_start)) { + return 0; + } + if (GATOR_REGISTER_TRACE(gpu_activity_stop)) { + GATOR_UNREGISTER_TRACE(gpu_activity_start); + return 0; + } + gpu_trace_registered = 1; + } + + return 0; +} + +void gator_trace_gpu_stop(void) +{ +#if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx) + if (mali_timeline_trace_registered) { + GATOR_UNREGISTER_TRACE(mali_timeline_event); + } +#endif + +#if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_T6xx) + if (mali_job_slots_trace_registered) { + GATOR_UNREGISTER_TRACE(mali_job_slots_event); + } +#endif + + if (gpu_trace_registered) { + GATOR_UNREGISTER_TRACE(gpu_activity_stop); + GATOR_UNREGISTER_TRACE(gpu_activity_start); + } + + gpu_trace_registered = mali_timeline_trace_registered = mali_job_slots_trace_registered = 0; +} diff --git a/drivers/gator/gator_trace_gpu.h b/drivers/gator/gator_trace_gpu.h new file mode 100644 index 000000000000..bb0f42d290da --- /dev/null +++ b/drivers/gator/gator_trace_gpu.h @@ -0,0 +1,79 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#undef TRACE_GPU +#define TRACE_GPU gpu + +#if !defined(_TRACE_GPU_H) +#define _TRACE_GPU_H + +#include <linux/tracepoint.h> + +/* + * UNIT - the GPU processor type + * 1 = Vertex Processor + * 2 = Fragment Processor + * + * CORE - the GPU processor core number + * this is not the CPU core number + */ + +/* + * Tracepoint for calling GPU unit start activity on core + */ +TRACE_EVENT(gpu_activity_start, + + TP_PROTO(int gpu_unit, int gpu_core, struct task_struct *p), + + TP_ARGS(gpu_unit, gpu_core, p), + + TP_STRUCT__entry( + __field(int, gpu_unit) + __field(int, gpu_core) + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + ), + + TP_fast_assign( + __entry->gpu_unit = gpu_unit; + __entry->gpu_core = gpu_core; + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + ), + + TP_printk("unit=%d core=%d comm=%s pid=%d", + __entry->gpu_unit, __entry->gpu_core, __entry->comm, + __entry->pid) + ); + +/* + * Tracepoint for calling GPU unit stop activity on core + */ +TRACE_EVENT(gpu_activity_stop, + + TP_PROTO(int gpu_unit, int gpu_core), + + TP_ARGS(gpu_unit, gpu_core), + + TP_STRUCT__entry( + __field(int, gpu_unit) + __field(int, gpu_core) + ), + + TP_fast_assign( + __entry->gpu_unit = gpu_unit; + __entry->gpu_core = gpu_core; + ), + + TP_printk("unit=%d core=%d", __entry->gpu_unit, __entry->gpu_core) + ); + +#endif /* _TRACE_GPU_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/drivers/gator/gator_trace_power.c b/drivers/gator/gator_trace_power.c new file mode 100644 index 000000000000..272e05684ee8 --- /dev/null +++ b/drivers/gator/gator_trace_power.c @@ -0,0 +1,203 @@ +/** + * Copyright (C) ARM Limited 2011-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/cpufreq.h> +#include <trace/events/power.h> + +#if defined(__arm__) + +#include <asm/mach-types.h> + +#define implements_wfi() (!machine_is_omap3_beagle()) + +#else + +#define implements_wfi() false + +#endif + +// cpu_frequency and cpu_idle trace points were introduced in Linux kernel v2.6.38 +// the now deprecated power_frequency trace point was available prior to 2.6.38, but only for x86 +#if GATOR_CPU_FREQ_SUPPORT +enum { + POWER_CPU_FREQ, + POWER_CPU_IDLE, + POWER_TOTAL +}; + +static DEFINE_PER_CPU(ulong, idle_prev_state); +static ulong power_cpu_enabled[POWER_TOTAL]; +static ulong power_cpu_key[POWER_TOTAL]; + +static int gator_trace_power_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + int cpu; + bool found_nonzero_freq = false; + + // Even if CONFIG_CPU_FREQ is defined, it still may not be used. Check + // for non-zero values from cpufreq_quick_get + for_each_online_cpu(cpu) { + if (cpufreq_quick_get(cpu) > 0) { + found_nonzero_freq = true; + break; + } + } + + if (found_nonzero_freq) { + // cpu_frequency + dir = gatorfs_mkdir(sb, root, "Linux_power_cpu_freq"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &power_cpu_enabled[POWER_CPU_FREQ]); + gatorfs_create_ro_ulong(sb, dir, "key", &power_cpu_key[POWER_CPU_FREQ]); + } + + // cpu_idle + dir = gatorfs_mkdir(sb, root, "Linux_power_cpu_idle"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &power_cpu_enabled[POWER_CPU_IDLE]); + gatorfs_create_ro_ulong(sb, dir, "key", &power_cpu_key[POWER_CPU_IDLE]); + + return 0; +} + +// 'cpu' may not equal smp_processor_id(), i.e. may not be running on the core that is having the freq/idle state change +GATOR_DEFINE_PROBE(cpu_frequency, TP_PROTO(unsigned int frequency, unsigned int cpu)) +{ + cpu = lcpu_to_pcpu(cpu); + marshal_event_single(cpu, power_cpu_key[POWER_CPU_FREQ], frequency * 1000); +} + +GATOR_DEFINE_PROBE(cpu_idle, TP_PROTO(unsigned int state, unsigned int cpu)) +{ + cpu = lcpu_to_pcpu(cpu); + + if (state == per_cpu(idle_prev_state, cpu)) { + return; + } + + if (implements_wfi()) { + if (state == PWR_EVENT_EXIT) { + // transition from wfi to non-wfi + marshal_idle(cpu, MESSAGE_IDLE_EXIT); + } else { + // transition from non-wfi to wfi + marshal_idle(cpu, MESSAGE_IDLE_ENTER); + } + } + + per_cpu(idle_prev_state, cpu) = state; + + if (power_cpu_enabled[POWER_CPU_IDLE]) { + // Increment state so that no negative numbers are sent + marshal_event_single(cpu, power_cpu_key[POWER_CPU_IDLE], state + 1); + } +} + +static void gator_trace_power_online(void) +{ + int pcpu = get_physical_cpu(); + int lcpu = get_logical_cpu(); + if (power_cpu_enabled[POWER_CPU_FREQ]) { + marshal_event_single(pcpu, power_cpu_key[POWER_CPU_FREQ], cpufreq_quick_get(lcpu) * 1000); + } +} + +static void gator_trace_power_offline(void) +{ + // Set frequency to zero on an offline + int cpu = get_physical_cpu(); + if (power_cpu_enabled[POWER_CPU_FREQ]) { + marshal_event_single(cpu, power_cpu_key[POWER_CPU_FREQ], 0); + } +} + +static int gator_trace_power_start(void) +{ + int cpu; + + // register tracepoints + if (power_cpu_enabled[POWER_CPU_FREQ]) + if (GATOR_REGISTER_TRACE(cpu_frequency)) + goto fail_cpu_frequency_exit; + + // Always register for cpu:idle for detecting WFI, independent of power_cpu_enabled[POWER_CPU_IDLE] + if (GATOR_REGISTER_TRACE(cpu_idle)) + goto fail_cpu_idle_exit; + pr_debug("gator: registered power event tracepoints\n"); + + for_each_present_cpu(cpu) { + per_cpu(idle_prev_state, cpu) = 0; + } + + return 0; + + // unregister tracepoints on error +fail_cpu_idle_exit: + if (power_cpu_enabled[POWER_CPU_FREQ]) + GATOR_UNREGISTER_TRACE(cpu_frequency); +fail_cpu_frequency_exit: + pr_err("gator: power event tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n"); + + return -1; +} + +static void gator_trace_power_stop(void) +{ + int i; + + if (power_cpu_enabled[POWER_CPU_FREQ]) + GATOR_UNREGISTER_TRACE(cpu_frequency); + GATOR_UNREGISTER_TRACE(cpu_idle); + pr_debug("gator: unregistered power event tracepoints\n"); + + for (i = 0; i < POWER_TOTAL; i++) { + power_cpu_enabled[i] = 0; + } +} + +void gator_trace_power_init(void) +{ + int i; + for (i = 0; i < POWER_TOTAL; i++) { + power_cpu_enabled[i] = 0; + power_cpu_key[i] = gator_events_get_key(); + } +} +#else +static int gator_trace_power_create_files(struct super_block *sb, struct dentry *root) +{ + return 0; +} + +static void gator_trace_power_online(void) +{ +} + +static void gator_trace_power_offline(void) +{ +} + +static int gator_trace_power_start(void) +{ + return 0; +} + +static void gator_trace_power_stop(void) +{ +} + +void gator_trace_power_init(void) +{ +} +#endif diff --git a/drivers/gator/gator_trace_sched.c b/drivers/gator/gator_trace_sched.c new file mode 100644 index 000000000000..332b3f6ba965 --- /dev/null +++ b/drivers/gator/gator_trace_sched.c @@ -0,0 +1,270 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <trace/events/sched.h> +#include "gator.h" + +#define TASK_MAP_ENTRIES 1024 /* must be power of 2 */ +#define TASK_MAX_COLLISIONS 2 + +enum { + STATE_WAIT_ON_OTHER = 0, + STATE_CONTENTION, + STATE_WAIT_ON_IO, + CPU_WAIT_TOTAL +}; + +static DEFINE_PER_CPU(uint64_t *, taskname_keys); +static DEFINE_PER_CPU(int, collecting); +static DEFINE_PER_CPU(bool, in_scheduler_context); + +// this array is never read as the cpu wait charts are derived counters +// the files are needed, nonetheless, to show that these counters are available +static ulong cpu_wait_enabled[CPU_WAIT_TOTAL]; +static ulong sched_cpu_key[CPU_WAIT_TOTAL]; + +static int sched_trace_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + + // CPU Wait - Contention + dir = gatorfs_mkdir(sb, root, "Linux_cpu_wait_contention"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &cpu_wait_enabled[STATE_CONTENTION]); + gatorfs_create_ro_ulong(sb, dir, "key", &sched_cpu_key[STATE_CONTENTION]); + + // CPU Wait - I/O + dir = gatorfs_mkdir(sb, root, "Linux_cpu_wait_io"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &cpu_wait_enabled[STATE_WAIT_ON_IO]); + gatorfs_create_ro_ulong(sb, dir, "key", &sched_cpu_key[STATE_WAIT_ON_IO]); + + return 0; +} + +void emit_pid_name(struct task_struct *task) +{ + bool found = false; + char taskcomm[TASK_COMM_LEN + 3]; + unsigned long x, cpu = get_physical_cpu(); + uint64_t *keys = &(per_cpu(taskname_keys, cpu)[(task->pid & 0xFF) * TASK_MAX_COLLISIONS]); + uint64_t value; + + value = gator_chksum_crc32(task->comm); + value = (value << 32) | (uint32_t)task->pid; + + // determine if the thread name was emitted already + for (x = 0; x < TASK_MAX_COLLISIONS; x++) { + if (keys[x] == value) { + found = true; + break; + } + } + + if (!found) { + // shift values, new value always in front + uint64_t oldv, newv = value; + for (x = 0; x < TASK_MAX_COLLISIONS; x++) { + oldv = keys[x]; + keys[x] = newv; + newv = oldv; + } + + // emit pid names, cannot use get_task_comm, as it's not exported on all kernel versions + if (strlcpy(taskcomm, task->comm, TASK_COMM_LEN) == TASK_COMM_LEN - 1) { + // append ellipses if task->comm has length of TASK_COMM_LEN - 1 + strcat(taskcomm, "..."); + } + + marshal_thread_name(task->pid, taskcomm); + } +} + +static void collect_counters(u64 time, struct task_struct *task) +{ + int *buffer, len, cpu = get_physical_cpu(); + long long *buffer64; + struct gator_interface *gi; + + if (marshal_event_header(time)) { + list_for_each_entry(gi, &gator_events, list) { + if (gi->read) { + len = gi->read(&buffer); + marshal_event(len, buffer); + } else if (gi->read64) { + len = gi->read64(&buffer64); + marshal_event64(len, buffer64); + } + if (gi->read_proc && task != NULL) { + len = gi->read_proc(&buffer64, task); + marshal_event64(len, buffer64); + } + } + // Only check after writing all counters so that time and corresponding counters appear in the same frame + buffer_check(cpu, BLOCK_COUNTER_BUF, time); + + // Commit buffers on timeout + if (gator_live_rate > 0 && time >= per_cpu(gator_buffer_commit_time, cpu)) { + static const int buftypes[] = { NAME_BUF, COUNTER_BUF, BLOCK_COUNTER_BUF, SCHED_TRACE_BUF }; + unsigned long flags; + int i; + + local_irq_save(flags); + for (i = 0; i < ARRAY_SIZE(buftypes); ++i) { + gator_commit_buffer(cpu, buftypes[i], time); + } + local_irq_restore(flags); + + // Try to preemptively flush the annotate buffer to reduce the chance of the buffer being full + if (on_primary_core() && spin_trylock(&annotate_lock)) { + gator_commit_buffer(0, ANNOTATE_BUF, time); + spin_unlock(&annotate_lock); + } + } + } +} + +// special case used during a suspend of the system +static void trace_sched_insert_idle(void) +{ + marshal_sched_trace_switch(0, 0, 0, 0); +} + +GATOR_DEFINE_PROBE(sched_process_fork, TP_PROTO(struct task_struct *parent, struct task_struct *child)) +{ + int cookie; + int cpu = get_physical_cpu(); + + cookie = get_exec_cookie(cpu, child); + emit_pid_name(child); + + marshal_sched_trace_start(child->tgid, child->pid, cookie); +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) +GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next)) +#else +GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_struct *next)) +#endif +{ + int cookie; + int state; + int cpu = get_physical_cpu(); + + per_cpu(in_scheduler_context, cpu) = true; + + // do as much work as possible before disabling interrupts + cookie = get_exec_cookie(cpu, next); + emit_pid_name(next); + if (prev->state == TASK_RUNNING) { + state = STATE_CONTENTION; + } else if (prev->in_iowait) { + state = STATE_WAIT_ON_IO; + } else { + state = STATE_WAIT_ON_OTHER; + } + + per_cpu(collecting, cpu) = 1; + collect_counters(gator_get_time(), prev); + per_cpu(collecting, cpu) = 0; + + marshal_sched_trace_switch(next->tgid, next->pid, cookie, state); + + per_cpu(in_scheduler_context, cpu) = false; +} + +GATOR_DEFINE_PROBE(sched_process_free, TP_PROTO(struct task_struct *p)) +{ + marshal_sched_trace_exit(p->tgid, p->pid); +} + +static void do_nothing(void *info) +{ + // Intentionally do nothing + (void)info; +} + +static int register_scheduler_tracepoints(void) +{ + // register tracepoints + if (GATOR_REGISTER_TRACE(sched_process_fork)) + goto fail_sched_process_fork; + if (GATOR_REGISTER_TRACE(sched_switch)) + goto fail_sched_switch; + if (GATOR_REGISTER_TRACE(sched_process_free)) + goto fail_sched_process_free; + pr_debug("gator: registered tracepoints\n"); + + // Now that the scheduler tracepoint is registered, force a context switch + // on all cpus to capture what is currently running. + on_each_cpu(do_nothing, NULL, 0); + + return 0; + + // unregister tracepoints on error +fail_sched_process_free: + GATOR_UNREGISTER_TRACE(sched_switch); +fail_sched_switch: + GATOR_UNREGISTER_TRACE(sched_process_fork); +fail_sched_process_fork: + pr_err("gator: tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n"); + + return -1; +} + +int gator_trace_sched_start(void) +{ + int cpu, size; + + for_each_present_cpu(cpu) { + size = TASK_MAP_ENTRIES * TASK_MAX_COLLISIONS * sizeof(uint64_t); + per_cpu(taskname_keys, cpu) = (uint64_t *)kmalloc(size, GFP_KERNEL); + if (!per_cpu(taskname_keys, cpu)) + return -1; + memset(per_cpu(taskname_keys, cpu), 0, size); + } + + return register_scheduler_tracepoints(); +} + +void gator_trace_sched_offline(void) +{ + trace_sched_insert_idle(); +} + +static void unregister_scheduler_tracepoints(void) +{ + GATOR_UNREGISTER_TRACE(sched_process_fork); + GATOR_UNREGISTER_TRACE(sched_switch); + GATOR_UNREGISTER_TRACE(sched_process_free); + pr_debug("gator: unregistered tracepoints\n"); +} + +void gator_trace_sched_stop(void) +{ + int cpu; + unregister_scheduler_tracepoints(); + + for_each_present_cpu(cpu) { + kfree(per_cpu(taskname_keys, cpu)); + } +} + +void gator_trace_sched_init(void) +{ + int i; + for (i = 0; i < CPU_WAIT_TOTAL; i++) { + cpu_wait_enabled[i] = 0; + sched_cpu_key[i] = gator_events_get_key(); + } +} diff --git a/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h b/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h new file mode 100644 index 000000000000..347a4fe404bc --- /dev/null +++ b/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h @@ -0,0 +1,163 @@ +/** + * Copyright (C) ARM Limited 2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef __MALI_MJOLLNIR_PROFILING_GATOR_API_H__ +#define __MALI_MJOLLNIR_PROFILING_GATOR_API_H__ + +#ifdef __cplusplus +extern "C" +{ +#endif + + +/* + * The number of processor cores. Update to suit your hardware implementation. + */ +#define MAX_NUM_FP_CORES (4) +#define MAX_NUM_VP_CORES (1) +#define MAX_NUM_L2_CACHE_CORES (1) + +enum counters +{ + /* Timeline activity */ + ACTIVITY_VP_0 = 0, + ACTIVITY_FP_0, + ACTIVITY_FP_1, + ACTIVITY_FP_2, + ACTIVITY_FP_3, + + /* L2 cache counters */ + COUNTER_L2_0_C0, + COUNTER_L2_0_C1, + + /* Vertex processor counters */ + COUNTER_VP_0_C0, + COUNTER_VP_0_C1, + + /* Fragment processor counters */ + COUNTER_FP_0_C0, + COUNTER_FP_0_C1, + COUNTER_FP_1_C0, + COUNTER_FP_1_C1, + COUNTER_FP_2_C0, + COUNTER_FP_2_C1, + COUNTER_FP_3_C0, + COUNTER_FP_3_C1, + + /* EGL Software Counters */ + COUNTER_EGL_BLIT_TIME, + + /* GLES Software Counters */ + COUNTER_GLES_DRAW_ELEMENTS_CALLS, + COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES, + COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED, + COUNTER_GLES_DRAW_ARRAYS_CALLS, + COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED, + COUNTER_GLES_DRAW_POINTS, + COUNTER_GLES_DRAW_LINES, + COUNTER_GLES_DRAW_LINE_LOOP, + COUNTER_GLES_DRAW_LINE_STRIP, + COUNTER_GLES_DRAW_TRIANGLES, + COUNTER_GLES_DRAW_TRIANGLE_STRIP, + COUNTER_GLES_DRAW_TRIANGLE_FAN, + COUNTER_GLES_NON_VBO_DATA_COPY_TIME, + COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI, + COUNTER_GLES_UPLOAD_TEXTURE_TIME, + COUNTER_GLES_UPLOAD_VBO_TIME, + COUNTER_GLES_NUM_FLUSHES, + COUNTER_GLES_NUM_VSHADERS_GENERATED, + COUNTER_GLES_NUM_FSHADERS_GENERATED, + COUNTER_GLES_VSHADER_GEN_TIME, + COUNTER_GLES_FSHADER_GEN_TIME, + COUNTER_GLES_INPUT_TRIANGLES, + COUNTER_GLES_VXCACHE_HIT, + COUNTER_GLES_VXCACHE_MISS, + COUNTER_GLES_VXCACHE_COLLISION, + COUNTER_GLES_CULLED_TRIANGLES, + COUNTER_GLES_CULLED_LINES, + COUNTER_GLES_BACKFACE_TRIANGLES, + COUNTER_GLES_GBCLIP_TRIANGLES, + COUNTER_GLES_GBCLIP_LINES, + COUNTER_GLES_TRIANGLES_DRAWN, + COUNTER_GLES_DRAWCALL_TIME, + COUNTER_GLES_TRIANGLES_COUNT, + COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT, + COUNTER_GLES_STRIP_TRIANGLES_COUNT, + COUNTER_GLES_FAN_TRIANGLES_COUNT, + COUNTER_GLES_LINES_COUNT, + COUNTER_GLES_INDEPENDENT_LINES_COUNT, + COUNTER_GLES_STRIP_LINES_COUNT, + COUNTER_GLES_LOOP_LINES_COUNT, + + COUNTER_FILMSTRIP, + COUNTER_FREQUENCY, + COUNTER_VOLTAGE, + + NUMBER_OF_EVENTS +}; + +#define FIRST_ACTIVITY_EVENT ACTIVITY_VP_0 +#define LAST_ACTIVITY_EVENT ACTIVITY_FP_3 + +#define FIRST_HW_COUNTER COUNTER_L2_0_C0 +#define LAST_HW_COUNTER COUNTER_FP_3_C1 + +#define FIRST_SW_COUNTER COUNTER_EGL_BLIT_TIME +#define LAST_SW_COUNTER COUNTER_GLES_LOOP_LINES_COUNT + +/* Signifies that the system is able to report voltage and frequency numbers. */ +#define DVFS_REPORTED_BY_DDK 1 + +/** + * Structure to pass performance counter data of a Mali core + */ +typedef struct _mali_profiling_core_counters +{ + u32 source0; + u32 value0; + u32 source1; + u32 value1; +} _mali_profiling_core_counters; + +/* + * For compatibility with utgard. + */ +typedef struct _mali_profiling_l2_counter_values +{ + struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES]; +} _mali_profiling_l2_counter_values; + +typedef struct _mali_profiling_mali_version +{ + u32 mali_product_id; + u32 mali_version_major; + u32 mali_version_minor; + u32 num_of_l2_cores; + u32 num_of_fp_cores; + u32 num_of_vp_cores; +} _mali_profiling_mali_version; + +extern void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values); +extern u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values); + +/* + * List of possible actions allowing DDK to be controlled by Streamline. + * The following numbers are used by DDK to control the frame buffer dumping. + */ +#define FBDUMP_CONTROL_ENABLE (1) +#define FBDUMP_CONTROL_RATE (2) +#define SW_COUNTER_ENABLE (3) +#define FBDUMP_CONTROL_RESIZE_FACTOR (4) + + +#ifdef __cplusplus +} +#endif + +#endif /* __MALI_MJOLLNIR_PROFILING_GATOR_API_H__ */ diff --git a/drivers/gator/mali/mali_utgard_profiling_gator_api.h b/drivers/gator/mali/mali_utgard_profiling_gator_api.h new file mode 100644 index 000000000000..559647a76d29 --- /dev/null +++ b/drivers/gator/mali/mali_utgard_profiling_gator_api.h @@ -0,0 +1,201 @@ +/** + * Copyright (C) ARM Limited 2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef __MALI_UTGARD_PROFILING_GATOR_API_H__ +#define __MALI_UTGARD_PROFILING_GATOR_API_H__ + +#ifdef __cplusplus +extern "C" +{ +#endif + +#define MALI_PROFILING_API_VERSION 4 + +#define MAX_NUM_L2_CACHE_CORES 3 +#define MAX_NUM_FP_CORES 8 +#define MAX_NUM_VP_CORES 1 + +/** The list of events supported by the Mali DDK. */ +typedef enum +{ + /* Vertex processor activity */ + ACTIVITY_VP_0 = 0, + + /* Fragment processor activity */ + ACTIVITY_FP_0, /* 1 */ + ACTIVITY_FP_1, + ACTIVITY_FP_2, + ACTIVITY_FP_3, + ACTIVITY_FP_4, + ACTIVITY_FP_5, + ACTIVITY_FP_6, + ACTIVITY_FP_7, + + /* L2 cache counters */ + COUNTER_L2_0_C0, + COUNTER_L2_0_C1, + COUNTER_L2_1_C0, + COUNTER_L2_1_C1, + COUNTER_L2_2_C0, + COUNTER_L2_2_C1, + + /* Vertex processor counters */ + COUNTER_VP_0_C0, /*15*/ + COUNTER_VP_0_C1, + + /* Fragment processor counters */ + COUNTER_FP_0_C0, + COUNTER_FP_0_C1, + COUNTER_FP_1_C0, + COUNTER_FP_1_C1, + COUNTER_FP_2_C0, + COUNTER_FP_2_C1, + COUNTER_FP_3_C0, + COUNTER_FP_3_C1, + COUNTER_FP_4_C0, + COUNTER_FP_4_C1, + COUNTER_FP_5_C0, + COUNTER_FP_5_C1, + COUNTER_FP_6_C0, + COUNTER_FP_6_C1, + COUNTER_FP_7_C0, + COUNTER_FP_7_C1, /* 32 */ + + /* + * If more hardware counters are added, the _mali_osk_hw_counter_table + * below should also be updated. + */ + + /* EGL software counters */ + COUNTER_EGL_BLIT_TIME, + + /* GLES software counters */ + COUNTER_GLES_DRAW_ELEMENTS_CALLS, + COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES, + COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED, + COUNTER_GLES_DRAW_ARRAYS_CALLS, + COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED, + COUNTER_GLES_DRAW_POINTS, + COUNTER_GLES_DRAW_LINES, + COUNTER_GLES_DRAW_LINE_LOOP, + COUNTER_GLES_DRAW_LINE_STRIP, + COUNTER_GLES_DRAW_TRIANGLES, + COUNTER_GLES_DRAW_TRIANGLE_STRIP, + COUNTER_GLES_DRAW_TRIANGLE_FAN, + COUNTER_GLES_NON_VBO_DATA_COPY_TIME, + COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI, + COUNTER_GLES_UPLOAD_TEXTURE_TIME, + COUNTER_GLES_UPLOAD_VBO_TIME, + COUNTER_GLES_NUM_FLUSHES, + COUNTER_GLES_NUM_VSHADERS_GENERATED, + COUNTER_GLES_NUM_FSHADERS_GENERATED, + COUNTER_GLES_VSHADER_GEN_TIME, + COUNTER_GLES_FSHADER_GEN_TIME, + COUNTER_GLES_INPUT_TRIANGLES, + COUNTER_GLES_VXCACHE_HIT, + COUNTER_GLES_VXCACHE_MISS, + COUNTER_GLES_VXCACHE_COLLISION, + COUNTER_GLES_CULLED_TRIANGLES, + COUNTER_GLES_CULLED_LINES, + COUNTER_GLES_BACKFACE_TRIANGLES, + COUNTER_GLES_GBCLIP_TRIANGLES, + COUNTER_GLES_GBCLIP_LINES, + COUNTER_GLES_TRIANGLES_DRAWN, + COUNTER_GLES_DRAWCALL_TIME, + COUNTER_GLES_TRIANGLES_COUNT, + COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT, + COUNTER_GLES_STRIP_TRIANGLES_COUNT, + COUNTER_GLES_FAN_TRIANGLES_COUNT, + COUNTER_GLES_LINES_COUNT, + COUNTER_GLES_INDEPENDENT_LINES_COUNT, + COUNTER_GLES_STRIP_LINES_COUNT, + COUNTER_GLES_LOOP_LINES_COUNT, + + /* Framebuffer capture pseudo-counter */ + COUNTER_FILMSTRIP, + + NUMBER_OF_EVENTS +} _mali_osk_counter_id; + +#define FIRST_ACTIVITY_EVENT ACTIVITY_VP_0 +#define LAST_ACTIVITY_EVENT ACTIVITY_FP_7 + +#define FIRST_HW_COUNTER COUNTER_L2_0_C0 +#define LAST_HW_COUNTER COUNTER_FP_7_C1 + +#define FIRST_SW_COUNTER COUNTER_EGL_BLIT_TIME +#define LAST_SW_COUNTER COUNTER_GLES_LOOP_LINES_COUNT + +#define FIRST_SPECIAL_COUNTER COUNTER_FILMSTRIP +#define LAST_SPECIAL_COUNTER COUNTER_FILMSTRIP + +/** + * Structure to pass performance counter data of a Mali core + */ +typedef struct _mali_profiling_core_counters +{ + u32 source0; + u32 value0; + u32 source1; + u32 value1; +} _mali_profiling_core_counters; + +/** + * Structure to pass performance counter data of Mali L2 cache cores + */ +typedef struct _mali_profiling_l2_counter_values +{ + struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES]; +} _mali_profiling_l2_counter_values; + +/** + * Structure to pass data defining Mali instance in use: + * + * mali_product_id - Mali product id + * mali_version_major - Mali version major number + * mali_version_minor - Mali version minor number + * num_of_l2_cores - number of L2 cache cores + * num_of_fp_cores - number of fragment processor cores + * num_of_vp_cores - number of vertex processor cores + */ +typedef struct _mali_profiling_mali_version +{ + u32 mali_product_id; + u32 mali_version_major; + u32 mali_version_minor; + u32 num_of_l2_cores; + u32 num_of_fp_cores; + u32 num_of_vp_cores; +} _mali_profiling_mali_version; + +/* + * List of possible actions to be controlled by Streamline. + * The following numbers are used by gator to control the frame buffer dumping and s/w counter reporting. + * We cannot use the enums in mali_uk_types.h because they are unknown inside gator. + */ +#define FBDUMP_CONTROL_ENABLE (1) +#define FBDUMP_CONTROL_RATE (2) +#define SW_COUNTER_ENABLE (3) +#define FBDUMP_CONTROL_RESIZE_FACTOR (4) + +void _mali_profiling_control(u32 action, u32 value); + +u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values); + +int _mali_profiling_set_event(u32 counter_id, s32 event_id); + +u32 _mali_profiling_get_api_version(void); + +void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values); + +#ifdef __cplusplus +} +#endif + +#endif /* __MALI_UTGARD_PROFILING_GATOR_API_H__ */ diff --git a/drivers/gator/mali_t6xx.mk b/drivers/gator/mali_t6xx.mk new file mode 100644 index 000000000000..1a98c1c6a73f --- /dev/null +++ b/drivers/gator/mali_t6xx.mk @@ -0,0 +1,26 @@ +# Defines for Mali-T6xx driver +EXTRA_CFLAGS += -DMALI_USE_UMP=1 \ + -DMALI_LICENSE_IS_GPL=1 \ + -DMALI_BASE_TRACK_MEMLEAK=0 \ + -DMALI_DEBUG=0 \ + -DMALI_ERROR_INJECT_ON=0 \ + -DMALI_CUSTOMER_RELEASE=1 \ + -DMALI_UNIT_TEST=0 \ + -DMALI_BACKEND_KERNEL=1 \ + -DMALI_NO_MALI=0 + +DDK_DIR ?= . +KBASE_DIR = $(DDK_DIR)/drivers/gpu/arm/t6xx/kbase +OSK_DIR = $(DDK_DIR)/drivers/gpu/arm/t6xx/kbase/osk +UMP_DIR = $(DDK_DIR)/include/linux + +# Include directories in the DDK +EXTRA_CFLAGS += -I$(KBASE_DIR)/ \ + -I$(KBASE_DIR)/.. \ + -I$(OSK_DIR)/.. \ + -I$(UMP_DIR)/.. \ + -I$(DDK_DIR)/include \ + -I$(KBASE_DIR)/osk/src/linux/include \ + -I$(KBASE_DIR)/platform_dummy \ + -I$(KBASE_DIR)/src + diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 19ceaa60e0f4..65bc83747f66 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -41,6 +41,7 @@ #include <linux/slab.h> #include <linux/irqchip/chained_irq.h> #include <linux/irqchip/arm-gic.h> +#include <trace/events/arm-ipi.h> #include <asm/irq.h> #include <asm/exception.h> @@ -253,10 +254,9 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids) return -EINVAL; + raw_spin_lock(&irq_controller_lock); mask = 0xff << shift; bit = gic_cpu_map[cpu] << shift; - - raw_spin_lock(&irq_controller_lock); val = readl_relaxed(reg) & ~mask; writel_relaxed(val | bit, reg); raw_spin_unlock(&irq_controller_lock); @@ -453,6 +453,12 @@ static void __cpuinit gic_cpu_init(struct gic_chip_data *gic) writel_relaxed(1, base + GIC_CPU_CTRL); } +void gic_cpu_if_down(void) +{ + void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]); + writel_relaxed(0, cpu_base + GIC_CPU_CTRL); +} + #ifdef CONFIG_CPU_PM /* * Saves the GIC distributor registers during suspend or idle. Must be called @@ -646,11 +652,15 @@ static void __init gic_pm_init(struct gic_chip_data *gic) void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) { int cpu; - unsigned long map = 0; + unsigned long flags, map = 0; + + raw_spin_lock_irqsave(&irq_controller_lock, flags); /* Convert our logical CPU mask into a physical one. */ - for_each_cpu(cpu, mask) + for_each_cpu(cpu, mask) { + trace_arm_ipi_send(irq, cpu); map |= gic_cpu_map[cpu]; + } /* * Ensure that stores to Normal memory are visible to the @@ -660,9 +670,145 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) /* this always happens on GIC0 */ writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT); + + raw_spin_unlock_irqrestore(&irq_controller_lock, flags); } #endif +#ifdef CONFIG_BL_SWITCHER +/* + * gic_send_sgi - send a SGI directly to given CPU interface number + * + * cpu_id: the ID for the destination CPU interface + * irq: the IPI number to send a SGI for + */ +void gic_send_sgi(unsigned int cpu_id, unsigned int irq) +{ + BUG_ON(cpu_id >= NR_GIC_CPU_IF); + cpu_id = 1 << cpu_id; + /* this always happens on GIC0 */ + writel_relaxed((cpu_id << 16) | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT); +} + +/* + * gic_get_cpu_id - get the CPU interface ID for the specified CPU + * + * @cpu: the logical CPU number to get the GIC ID for. + * + * Return the CPU interface ID for the given logical CPU number, + * or -1 if the CPU number is too large or the interface ID is + * unknown (more than one bit set). + */ +int gic_get_cpu_id(unsigned int cpu) +{ + unsigned int cpu_bit; + + if (cpu >= NR_GIC_CPU_IF) + return -1; + cpu_bit = gic_cpu_map[cpu]; + if (cpu_bit & (cpu_bit - 1)) + return -1; + return __ffs(cpu_bit); +} + +/* + * gic_migrate_target - migrate IRQs to another PU interface + * + * @new_cpu_id: the CPU target ID to migrate IRQs to + * + * Migrate all peripheral interrupts with a target matching the current CPU + * to the interface corresponding to @new_cpu_id. The CPU interface mapping + * is also updated. Targets to other CPU interfaces are unchanged. + * This must be called with IRQs locally disabled. + */ +void gic_migrate_target(unsigned int new_cpu_id) +{ + unsigned int old_cpu_id, gic_irqs, gic_nr = 0; + void __iomem *dist_base; + int i, ror_val, cpu = smp_processor_id(); + u32 val, old_mask, active_mask; + + if (gic_nr >= MAX_GIC_NR) + BUG(); + + dist_base = gic_data_dist_base(&gic_data[gic_nr]); + if (!dist_base) + return; + gic_irqs = gic_data[gic_nr].gic_irqs; + + old_cpu_id = __ffs(gic_cpu_map[cpu]); + old_mask = 0x01010101 << old_cpu_id; + ror_val = (old_cpu_id - new_cpu_id) & 31; + + raw_spin_lock(&irq_controller_lock); + + gic_cpu_map[cpu] = 1 << new_cpu_id; + + for (i = 8; i < DIV_ROUND_UP(gic_irqs, 4); i++) { + val = readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4); + active_mask = val & old_mask; + if (active_mask) { + val &= ~active_mask; + val |= ror32(active_mask, ror_val); + writel_relaxed(val, dist_base + GIC_DIST_TARGET + i * 4); + } + } + + raw_spin_unlock(&irq_controller_lock); + + /* + * Now let's migrate and clear any potential SGIs that might be + * pending for us (old_cpu_id). Since GIC_DIST_SGI_PENDING_SET + * is a banked register, we can only forward the SGI using + * GIC_DIST_SOFTINT. The original SGI source is lost but Linux + * doesn't use that information anyway. + * + * For the same reason we do not adjust SGI source information + * for previously sent SGIs by us to other CPUs either. + */ + for (i = 0; i < 16; i += 4) { + int j; + val = readl_relaxed(dist_base + GIC_DIST_SGI_PENDING_SET + i); + if (!val) + continue; + writel_relaxed(val, dist_base + GIC_DIST_SGI_PENDING_CLEAR + i); + for (j = i; j < i + 4; j++) { + if (val & 0xff) + writel_relaxed((1 << (new_cpu_id + 16)) | j, + dist_base + GIC_DIST_SOFTINT); + val >>= 8; + } + } +} + +/* + * gic_get_sgir_physaddr - get the physical address for the SGI register + * + * REturn the physical address of the SGI register to be used + * by some early assembly code when the kernel is not yet available. + */ +static unsigned long gic_dist_physaddr; + +unsigned long gic_get_sgir_physaddr(void) +{ + if (!gic_dist_physaddr) + return 0; + return gic_dist_physaddr + GIC_DIST_SOFTINT; +} + +void __init gic_init_physaddr(struct device_node *node) +{ + struct resource res; + if (of_address_to_resource(node, 0, &res) == 0) { + gic_dist_physaddr = res.start; + pr_info("GIC physical location is %#lx\n", gic_dist_physaddr); + } +} + +#else +#define gic_init_physaddr(node) do { } while(0) +#endif + static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { @@ -844,6 +990,8 @@ int __init gic_of_init(struct device_node *node, struct device_node *parent) percpu_offset = 0; gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset, node); + if (!gic_cnt) + gic_init_physaddr(node); if (parent) { irq = irq_of_parse_and_map(node, 0); diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index d54e985748b7..a5e54f0d6a73 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1144,7 +1144,15 @@ config MCP_UCB1200_TS endmenu config VEXPRESS_CONFIG - bool + bool "ARM Versatile Express platform infrastructure" + depends on ARM || ARM64 help Platform configuration infrastructure for the ARM Ltd. Versatile Express. + +config VEXPRESS_SPC + bool "Versatile Express SPC driver support" + depends on ARM + depends on VEXPRESS_CONFIG + help + Serial Power Controller driver for ARM Ltd. test chips. diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 718e94a2a9a7..3a0120315aa3 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -153,5 +153,6 @@ obj-$(CONFIG_MFD_SEC_CORE) += sec-core.o sec-irq.o obj-$(CONFIG_MFD_SYSCON) += syscon.o obj-$(CONFIG_MFD_LM3533) += lm3533-core.o lm3533-ctrlbank.o obj-$(CONFIG_VEXPRESS_CONFIG) += vexpress-config.o vexpress-sysreg.o +obj-$(CONFIG_VEXPRESS_SPC) += vexpress-spc.o obj-$(CONFIG_MFD_RETU) += retu-mfd.o obj-$(CONFIG_MFD_AS3711) += as3711.o diff --git a/drivers/mfd/vexpress-config.c b/drivers/mfd/vexpress-config.c index 84ce6b9daa3d..1af2b0e0182f 100644 --- a/drivers/mfd/vexpress-config.c +++ b/drivers/mfd/vexpress-config.c @@ -86,29 +86,13 @@ void vexpress_config_bridge_unregister(struct vexpress_config_bridge *bridge) } EXPORT_SYMBOL(vexpress_config_bridge_unregister); - -struct vexpress_config_func { - struct vexpress_config_bridge *bridge; - void *func; -}; - -struct vexpress_config_func *__vexpress_config_func_get(struct device *dev, - struct device_node *node) +static struct vexpress_config_bridge * + vexpress_config_bridge_find(struct device_node *node) { - struct device_node *bridge_node; - struct vexpress_config_func *func; int i; + struct vexpress_config_bridge *res = NULL; + struct device_node *bridge_node = of_node_get(node); - if (WARN_ON(dev && node && dev->of_node != node)) - return NULL; - if (dev && !node) - node = dev->of_node; - - func = kzalloc(sizeof(*func), GFP_KERNEL); - if (!func) - return NULL; - - bridge_node = of_node_get(node); while (bridge_node) { const __be32 *prop = of_get_property(bridge_node, "arm,vexpress,config-bridge", NULL); @@ -129,13 +113,46 @@ struct vexpress_config_func *__vexpress_config_func_get(struct device *dev, if (test_bit(i, vexpress_config_bridges_map) && bridge->node == bridge_node) { - func->bridge = bridge; - func->func = bridge->info->func_get(dev, node); + res = bridge; break; } } mutex_unlock(&vexpress_config_bridges_mutex); + return res; +} + + +struct vexpress_config_func { + struct vexpress_config_bridge *bridge; + void *func; +}; + +struct vexpress_config_func *__vexpress_config_func_get( + struct vexpress_config_bridge *bridge, + struct device *dev, + struct device_node *node, + const char *id) +{ + struct vexpress_config_func *func; + + if (WARN_ON(dev && node && dev->of_node != node)) + return NULL; + if (dev && !node) + node = dev->of_node; + + if (!bridge) + bridge = vexpress_config_bridge_find(node); + if (!bridge) + return NULL; + + func = kzalloc(sizeof(*func), GFP_KERNEL); + if (!func) + return NULL; + + func->bridge = bridge; + func->func = bridge->info->func_get(dev, node, id); + if (!func->func) { of_node_put(node); kfree(func); diff --git a/drivers/mfd/vexpress-spc.c b/drivers/mfd/vexpress-spc.c new file mode 100644 index 000000000000..0c6718abf1ba --- /dev/null +++ b/drivers/mfd/vexpress-spc.c @@ -0,0 +1,633 @@ +/* + * Versatile Express Serial Power Controller (SPC) support + * + * Copyright (C) 2013 ARM Ltd. + * + * Authors: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com> + * Achin Gupta <achin.gupta@arm.com> + * Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/device.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/slab.h> +#include <linux/vexpress.h> + +#include <asm/cacheflush.h> + +#define SCC_CFGREG19 0x120 +#define SCC_CFGREG20 0x124 +#define A15_CONF 0x400 +#define A7_CONF 0x500 +#define SYS_INFO 0x700 +#define PERF_LVL_A15 0xB00 +#define PERF_REQ_A15 0xB04 +#define PERF_LVL_A7 0xB08 +#define PERF_REQ_A7 0xB0c +#define SYS_CFGCTRL 0xB10 +#define SYS_CFGCTRL_REQ 0xB14 +#define PWC_STATUS 0xB18 +#define PWC_FLAG 0xB1c +#define WAKE_INT_MASK 0xB24 +#define WAKE_INT_RAW 0xB28 +#define WAKE_INT_STAT 0xB2c +#define A15_PWRDN_EN 0xB30 +#define A7_PWRDN_EN 0xB34 +#define A7_PWRDNACK 0xB54 +#define A15_BX_ADDR0 0xB68 +#define SYS_CFG_WDATA 0xB70 +#define SYS_CFG_RDATA 0xB74 +#define A7_BX_ADDR0 0xB78 + +#define GBL_WAKEUP_INT_MSK (0x3 << 10) + +#define CLKF_SHIFT 16 +#define CLKF_MASK 0x1FFF +#define CLKR_SHIFT 0 +#define CLKR_MASK 0x3F +#define CLKOD_SHIFT 8 +#define CLKOD_MASK 0xF + +#define OPP_FUNCTION 6 +#define OPP_BASE_DEVICE 0x300 +#define OPP_A15_OFFSET 0x4 +#define OPP_A7_OFFSET 0xc + +#define SYS_CFGCTRL_START (1 << 31) +#define SYS_CFGCTRL_WRITE (1 << 30) +#define SYS_CFGCTRL_FUNC(n) (((n) & 0x3f) << 20) +#define SYS_CFGCTRL_DEVICE(n) (((n) & 0xfff) << 0) + +#define MAX_OPPS 8 +#define MAX_CLUSTERS 2 + +enum { + A15_OPP_TYPE = 0, + A7_OPP_TYPE = 1, + SYS_CFGCTRL_TYPE = 2, + INVALID_TYPE +}; + +#define STAT_COMPLETE(type) ((1 << 0) << (type << 2)) +#define STAT_ERR(type) ((1 << 1) << (type << 2)) +#define RESPONSE_MASK(type) (STAT_COMPLETE(type) | STAT_ERR(type)) + +struct vexpress_spc_drvdata { + void __iomem *baseaddr; + u32 a15_clusid; + int irq; + u32 cur_req_type; + u32 freqs[MAX_CLUSTERS][MAX_OPPS]; + int freqs_cnt[MAX_CLUSTERS]; +}; + +enum spc_func_type { + CONFIG_FUNC = 0, + PERF_FUNC = 1, +}; + +struct vexpress_spc_func { + enum spc_func_type type; + u32 function; + u32 device; +}; + +static struct vexpress_spc_drvdata *info; +static u32 *vexpress_spc_config_data; +static struct vexpress_config_bridge *vexpress_spc_config_bridge; +static struct vexpress_config_func *opp_func, *perf_func; + +static int vexpress_spc_load_result = -EAGAIN; + +static bool vexpress_spc_initialized(void) +{ + return vexpress_spc_load_result == 0; +} + +/** + * vexpress_spc_write_resume_reg() - set the jump address used for warm boot + * + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * @cpu: mpidr[7:0] bitfield describing cpu affinity level + * @addr: physical resume address + */ +void vexpress_spc_write_resume_reg(u32 cluster, u32 cpu, u32 addr) +{ + void __iomem *baseaddr; + + if (WARN_ON_ONCE(cluster >= MAX_CLUSTERS)) + return; + + if (cluster != info->a15_clusid) + baseaddr = info->baseaddr + A7_BX_ADDR0 + (cpu << 2); + else + baseaddr = info->baseaddr + A15_BX_ADDR0 + (cpu << 2); + + writel_relaxed(addr, baseaddr); +} + +/** + * vexpress_spc_get_nb_cpus() - get number of cpus in a cluster + * + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * + * Return: number of cpus in the cluster + * -EINVAL if cluster number invalid + */ +int vexpress_spc_get_nb_cpus(u32 cluster) +{ + u32 val; + + if (WARN_ON_ONCE(cluster >= MAX_CLUSTERS)) + return -EINVAL; + + val = readl_relaxed(info->baseaddr + SYS_INFO); + val = (cluster != info->a15_clusid) ? (val >> 20) : (val >> 16); + return val & 0xf; +} +EXPORT_SYMBOL_GPL(vexpress_spc_get_nb_cpus); + +/** + * vexpress_spc_get_performance - get current performance level of cluster + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * @freq: pointer to the performance level to be assigned + * + * Return: 0 on success + * < 0 on read error + */ +int vexpress_spc_get_performance(u32 cluster, u32 *freq) +{ + u32 perf_cfg_reg; + int perf, ret; + + if (!vexpress_spc_initialized() || (cluster >= MAX_CLUSTERS)) + return -EINVAL; + + perf_cfg_reg = cluster != info->a15_clusid ? PERF_LVL_A7 : PERF_LVL_A15; + ret = vexpress_config_read(perf_func, perf_cfg_reg, &perf); + + if (!ret) + *freq = info->freqs[cluster][perf]; + + return ret; +} +EXPORT_SYMBOL_GPL(vexpress_spc_get_performance); + +/** + * vexpress_spc_get_perf_index - get performance level corresponding to + * a frequency + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * @freq: frequency to be looked-up + * + * Return: perf level index on success + * -EINVAL on error + */ +static int vexpress_spc_find_perf_index(u32 cluster, u32 freq) +{ + int idx; + + for (idx = 0; idx < info->freqs_cnt[cluster]; idx++) + if (info->freqs[cluster][idx] == freq) + break; + return (idx == info->freqs_cnt[cluster]) ? -EINVAL : idx; +} + +/** + * vexpress_spc_set_performance - set current performance level of cluster + * + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * @freq: performance level to be programmed + * + * Returns: 0 on success + * < 0 on write error + */ +int vexpress_spc_set_performance(u32 cluster, u32 freq) +{ + int ret, perf, offset; + + if (!vexpress_spc_initialized() || (cluster >= MAX_CLUSTERS)) + return -EINVAL; + + offset = (cluster != info->a15_clusid) ? PERF_LVL_A7 : PERF_LVL_A15; + + perf = vexpress_spc_find_perf_index(cluster, freq); + + if (perf < 0 || perf >= MAX_OPPS) + return -EINVAL; + + ret = vexpress_config_write(perf_func, offset, perf); + + return ret; +} +EXPORT_SYMBOL_GPL(vexpress_spc_set_performance); + +static void vexpress_spc_set_wake_intr(u32 mask) +{ + writel_relaxed(mask & VEXPRESS_SPC_WAKE_INTR_MASK, + info->baseaddr + WAKE_INT_MASK); +} + +static inline void reg_bitmask(u32 *reg, u32 mask, bool set) +{ + if (set) + *reg |= mask; + else + *reg &= ~mask; +} + +/** + * vexpress_spc_set_global_wakeup_intr() + * + * Function to set/clear global wakeup IRQs. Not protected by locking since + * it might be used in code paths where normal cacheable locks are not + * working. Locking must be provided by the caller to ensure atomicity. + * + * @set: if true, global wake-up IRQs are set, if false they are cleared + */ +void vexpress_spc_set_global_wakeup_intr(bool set) +{ + u32 wake_int_mask_reg = 0; + + wake_int_mask_reg = readl_relaxed(info->baseaddr + WAKE_INT_MASK); + reg_bitmask(&wake_int_mask_reg, GBL_WAKEUP_INT_MSK, set); + vexpress_spc_set_wake_intr(wake_int_mask_reg); +} + +/** + * vexpress_spc_set_cpu_wakeup_irq() + * + * Function to set/clear per-CPU wake-up IRQs. Not protected by locking since + * it might be used in code paths where normal cacheable locks are not + * working. Locking must be provided by the caller to ensure atomicity. + * + * @cpu: mpidr[7:0] bitfield describing cpu affinity level + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * @set: if true, wake-up IRQs are set, if false they are cleared + */ +void vexpress_spc_set_cpu_wakeup_irq(u32 cpu, u32 cluster, bool set) +{ + u32 mask = 0; + u32 wake_int_mask_reg = 0; + + mask = 1 << cpu; + if (info->a15_clusid != cluster) + mask <<= 4; + + wake_int_mask_reg = readl_relaxed(info->baseaddr + WAKE_INT_MASK); + reg_bitmask(&wake_int_mask_reg, mask, set); + vexpress_spc_set_wake_intr(wake_int_mask_reg); +} + +/** + * vexpress_spc_powerdown_enable() + * + * Function to enable/disable cluster powerdown. Not protected by locking + * since it might be used in code paths where normal cacheable locks are not + * working. Locking must be provided by the caller to ensure atomicity. + * + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * @enable: if true enables powerdown, if false disables it + */ +void vexpress_spc_powerdown_enable(u32 cluster, bool enable) +{ + u32 pwdrn_reg = 0; + + if (cluster >= MAX_CLUSTERS) + return; + pwdrn_reg = cluster != info->a15_clusid ? A7_PWRDN_EN : A15_PWRDN_EN; + writel_relaxed(enable, info->baseaddr + pwdrn_reg); +} + +irqreturn_t vexpress_spc_irq_handler(int irq, void *data) +{ + int ret; + u32 status = readl_relaxed(info->baseaddr + PWC_STATUS); + + if (!(status & RESPONSE_MASK(info->cur_req_type))) + return IRQ_NONE; + + if ((status == STAT_COMPLETE(SYS_CFGCTRL_TYPE)) + && vexpress_spc_config_data) { + *vexpress_spc_config_data = + readl_relaxed(info->baseaddr + SYS_CFG_RDATA); + vexpress_spc_config_data = NULL; + } + + ret = STAT_COMPLETE(info->cur_req_type) ? 0 : -EIO; + info->cur_req_type = INVALID_TYPE; + vexpress_config_complete(vexpress_spc_config_bridge, ret); + return IRQ_HANDLED; +} + +/** + * Based on the firmware documentation, this is always fixed to 20 + * All the 4 OSC: A15 PLL0/1, A7 PLL0/1 must be programmed same + * values for both control and value registers. + * This function uses A15 PLL 0 registers to compute multiple factor + * F out = F in * (CLKF + 1) / ((CLKOD + 1) * (CLKR + 1)) + */ +static inline int __get_mult_factor(void) +{ + int i_div, o_div, f_div; + u32 tmp; + + tmp = readl(info->baseaddr + SCC_CFGREG19); + f_div = (tmp >> CLKF_SHIFT) & CLKF_MASK; + + tmp = readl(info->baseaddr + SCC_CFGREG20); + o_div = (tmp >> CLKOD_SHIFT) & CLKOD_MASK; + i_div = (tmp >> CLKR_SHIFT) & CLKR_MASK; + + return (f_div + 1) / ((o_div + 1) * (i_div + 1)); +} + +/** + * vexpress_spc_populate_opps() - initialize opp tables from microcontroller + * + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * + * Return: 0 on success + * < 0 on error + */ +static int vexpress_spc_populate_opps(u32 cluster) +{ + u32 data = 0, ret, i, offset; + int mult_fact = __get_mult_factor(); + + if (WARN_ON_ONCE(cluster >= MAX_CLUSTERS)) + return -EINVAL; + + offset = cluster != info->a15_clusid ? OPP_A7_OFFSET : OPP_A15_OFFSET; + for (i = 0; i < MAX_OPPS; i++) { + ret = vexpress_config_read(opp_func, i + offset, &data); + if (!ret) + info->freqs[cluster][i] = (data & 0xFFFFF) * mult_fact; + else + break; + } + + info->freqs_cnt[cluster] = i; + return ret; +} + +/** + * vexpress_spc_get_freq_table() - Retrieve a pointer to the frequency + * table for a given cluster + * + * @cluster: mpidr[15:8] bitfield describing cluster affinity level + * @fptr: pointer to be initialized + * Return: operating points count on success + * -EINVAL on pointer error + */ +int vexpress_spc_get_freq_table(u32 cluster, u32 **fptr) +{ + if (WARN_ON_ONCE(!fptr || cluster >= MAX_CLUSTERS)) + return -EINVAL; + *fptr = info->freqs[cluster]; + return info->freqs_cnt[cluster]; +} +EXPORT_SYMBOL_GPL(vexpress_spc_get_freq_table); + +static void *vexpress_spc_func_get(struct device *dev, + struct device_node *node, const char *id) +{ + struct vexpress_spc_func *spc_func; + u32 func_device[2]; + int err = 0; + + spc_func = kzalloc(sizeof(*spc_func), GFP_KERNEL); + if (!spc_func) + return NULL; + + if (strcmp(id, "opp") == 0) { + spc_func->type = CONFIG_FUNC; + spc_func->function = OPP_FUNCTION; + spc_func->device = OPP_BASE_DEVICE; + } else if (strcmp(id, "perf") == 0) { + spc_func->type = PERF_FUNC; + } else if (node) { + of_node_get(node); + err = of_property_read_u32_array(node, + "arm,vexpress-sysreg,func", func_device, + ARRAY_SIZE(func_device)); + of_node_put(node); + spc_func->type = CONFIG_FUNC; + spc_func->function = func_device[0]; + spc_func->device = func_device[1]; + } + + if (WARN_ON(err)) { + kfree(spc_func); + return NULL; + } + + pr_debug("func 0x%p = 0x%x, %d %d\n", spc_func, + spc_func->function, + spc_func->device, + spc_func->type); + + return spc_func; +} + +static void vexpress_spc_func_put(void *func) +{ + kfree(func); +} + +static int vexpress_spc_func_exec(void *func, int offset, bool write, + u32 *data) +{ + struct vexpress_spc_func *spc_func = func; + u32 command; + + if (!data) + return -EINVAL; + /* + * Setting and retrieval of operating points is not part of + * DCC config interface. It was made to go through the same + * code path so that requests to the M3 can be serialized + * properly with config reads/writes through the common + * vexpress config interface + */ + switch (spc_func->type) { + case PERF_FUNC: + if (write) { + info->cur_req_type = (offset == PERF_LVL_A15) ? + A15_OPP_TYPE : A7_OPP_TYPE; + writel_relaxed(*data, info->baseaddr + offset); + return VEXPRESS_CONFIG_STATUS_WAIT; + } else { + *data = readl_relaxed(info->baseaddr + offset); + return VEXPRESS_CONFIG_STATUS_DONE; + } + case CONFIG_FUNC: + info->cur_req_type = SYS_CFGCTRL_TYPE; + + command = SYS_CFGCTRL_START; + command |= write ? SYS_CFGCTRL_WRITE : 0; + command |= SYS_CFGCTRL_FUNC(spc_func->function); + command |= SYS_CFGCTRL_DEVICE(spc_func->device + offset); + + pr_debug("command %x\n", command); + + if (!write) + vexpress_spc_config_data = data; + else + writel_relaxed(*data, info->baseaddr + SYS_CFG_WDATA); + writel_relaxed(command, info->baseaddr + SYS_CFGCTRL); + + return VEXPRESS_CONFIG_STATUS_WAIT; + default: + return -EINVAL; + } +} + +struct vexpress_config_bridge_info vexpress_spc_config_bridge_info = { + .name = "vexpress-spc", + .func_get = vexpress_spc_func_get, + .func_put = vexpress_spc_func_put, + .func_exec = vexpress_spc_func_exec, +}; + +static const struct of_device_id vexpress_spc_ids[] __initconst = { + { .compatible = "arm,vexpress-spc,v2p-ca15_a7" }, + { .compatible = "arm,vexpress-spc" }, + {}, +}; + +static int __init vexpress_spc_init(void) +{ + int ret; + struct device_node *node = of_find_matching_node(NULL, + vexpress_spc_ids); + + if (!node) + return -ENODEV; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + pr_err("%s: unable to allocate mem\n", __func__); + return -ENOMEM; + } + info->cur_req_type = INVALID_TYPE; + + info->baseaddr = of_iomap(node, 0); + if (WARN_ON(!info->baseaddr)) { + ret = -ENXIO; + goto mem_free; + } + + info->irq = irq_of_parse_and_map(node, 0); + + if (WARN_ON(!info->irq)) { + ret = -ENXIO; + goto unmap; + } + + readl_relaxed(info->baseaddr + PWC_STATUS); + + ret = request_irq(info->irq, vexpress_spc_irq_handler, + IRQF_DISABLED | IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + "arm-spc", info); + + if (ret) { + pr_err("IRQ %d request failed\n", info->irq); + ret = -ENODEV; + goto unmap; + } + + info->a15_clusid = readl_relaxed(info->baseaddr + A15_CONF) & 0xf; + + vexpress_spc_config_bridge = vexpress_config_bridge_register( + node, &vexpress_spc_config_bridge_info); + + if (WARN_ON(!vexpress_spc_config_bridge)) { + ret = -ENODEV; + goto unmap; + } + + opp_func = vexpress_config_func_get(vexpress_spc_config_bridge, "opp"); + perf_func = + vexpress_config_func_get(vexpress_spc_config_bridge, "perf"); + + if (!opp_func || !perf_func) { + ret = -ENODEV; + goto unmap; + } + + if (vexpress_spc_populate_opps(0) || vexpress_spc_populate_opps(1)) { + if (info->irq) + free_irq(info->irq, info); + pr_err("failed to build OPP table\n"); + ret = -ENODEV; + goto unmap; + } + /* + * Multi-cluster systems may need this data when non-coherent, during + * cluster power-up/power-down. Make sure it reaches main memory: + */ + sync_cache_w(info); + sync_cache_w(&info); + pr_info("vexpress-spc loaded at %p\n", info->baseaddr); + return 0; + +unmap: + iounmap(info->baseaddr); + +mem_free: + kfree(info); + return ret; +} + +static bool __init __vexpress_spc_check_loaded(void); +/* + * Pointer spc_check_loaded is swapped after init hence it is safe + * to initialize it to a function in the __init section + */ +static bool (*spc_check_loaded)(void) __refdata = &__vexpress_spc_check_loaded; + +static bool __init __vexpress_spc_check_loaded(void) +{ + if (vexpress_spc_load_result == -EAGAIN) + vexpress_spc_load_result = vexpress_spc_init(); + spc_check_loaded = &vexpress_spc_initialized; + return vexpress_spc_initialized(); +} + +/* + * Function exported to manage early_initcall ordering. + * SPC code is needed very early in the boot process + * to bring CPUs out of reset and initialize power + * management back-end. After boot swap pointers to + * make the functionality check available to loadable + * modules, when early boot init functions have been + * already freed from kernel address space. + */ +bool vexpress_spc_check_loaded(void) +{ + return spc_check_loaded(); +} +EXPORT_SYMBOL_GPL(vexpress_spc_check_loaded); + +static int __init vexpress_spc_early_init(void) +{ + __vexpress_spc_check_loaded(); + return vexpress_spc_load_result; +} +early_initcall(vexpress_spc_early_init); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Serial Power Controller (SPC) support"); diff --git a/drivers/mfd/vexpress-sysreg.c b/drivers/mfd/vexpress-sysreg.c index 96a020b1dcd1..7f429afce112 100644 --- a/drivers/mfd/vexpress-sysreg.c +++ b/drivers/mfd/vexpress-sysreg.c @@ -165,7 +165,7 @@ static u32 *vexpress_sysreg_config_data; static int vexpress_sysreg_config_tries; static void *vexpress_sysreg_config_func_get(struct device *dev, - struct device_node *node) + struct device_node *node, const char *id) { struct vexpress_sysreg_config_func *config_func; u32 site; @@ -351,6 +351,8 @@ void __init vexpress_sysreg_of_early_init(void) } +#ifdef CONFIG_GPIOLIB + #define VEXPRESS_SYSREG_GPIO(_name, _reg, _value) \ [VEXPRESS_GPIO_##_name] = { \ .reg = _reg, \ @@ -445,6 +447,8 @@ struct gpio_led_platform_data vexpress_sysreg_leds_pdata = { .leds = vexpress_sysreg_leds, }; +#endif + static ssize_t vexpress_sysreg_sys_id_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -480,6 +484,9 @@ static int vexpress_sysreg_probe(struct platform_device *pdev) setup_timer(&vexpress_sysreg_config_timer, vexpress_sysreg_config_complete, 0); + vexpress_sysreg_dev = &pdev->dev; + +#ifdef CONFIG_GPIOLIB vexpress_sysreg_gpio_chip.dev = &pdev->dev; err = gpiochip_add(&vexpress_sysreg_gpio_chip); if (err) { @@ -490,11 +497,10 @@ static int vexpress_sysreg_probe(struct platform_device *pdev) return err; } - vexpress_sysreg_dev = &pdev->dev; - platform_device_register_data(vexpress_sysreg_dev, "leds-gpio", PLATFORM_DEVID_AUTO, &vexpress_sysreg_leds_pdata, sizeof(vexpress_sysreg_leds_pdata)); +#endif device_create_file(vexpress_sysreg_dev, &dev_attr_sys_id); diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index dfbf978315df..bdd703c6bf16 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -1896,7 +1896,7 @@ static int smc_probe(struct net_device *dev, void __iomem *ioaddr, SMC_SELECT_BANK(lp, 1); val = SMC_GET_BASE(lp); val = ((val & 0x1F00) >> 3) << SMC_IO_SHIFT; - if (((unsigned int)ioaddr & (0x3e0 << SMC_IO_SHIFT)) != val) { + if (((unsigned long)ioaddr & (0x3e0 << SMC_IO_SHIFT)) != val) { printk("%s: IOADDR %p doesn't match configuration (%x).\n", CARDNAME, ioaddr, val); } diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 38f0b312ff85..663d2d0448b7 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -439,7 +439,7 @@ void phy_start_machine(struct phy_device *phydev, { phydev->adjust_state = handler; - schedule_delayed_work(&phydev->state_queue, HZ); + queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, HZ); } /** @@ -500,7 +500,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) disable_irq_nosync(irq); atomic_inc(&phydev->irq_disable); - schedule_work(&phydev->phy_queue); + queue_work(system_power_efficient_wq, &phydev->phy_queue); return IRQ_HANDLED; } @@ -655,7 +655,7 @@ static void phy_change(struct work_struct *work) /* reschedule state queue work to run as soon as possible */ cancel_delayed_work_sync(&phydev->state_queue); - schedule_delayed_work(&phydev->state_queue, 0); + queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, 0); return; @@ -918,7 +918,8 @@ void phy_state_machine(struct work_struct *work) if (err < 0) phy_error(phydev); - schedule_delayed_work(&phydev->state_queue, PHY_STATE_TIME * HZ); + queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, + PHY_STATE_TIME * HZ); } static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad, diff --git a/drivers/of/base.c b/drivers/of/base.c index 1d10b4ec6814..f24dca92ea43 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -18,6 +18,7 @@ * 2 of the License, or (at your option) any later version. */ #include <linux/ctype.h> +#include <linux/cpu.h> #include <linux/module.h> #include <linux/of.h> #include <linux/spinlock.h> @@ -230,6 +231,100 @@ const void *of_get_property(const struct device_node *np, const char *name, } EXPORT_SYMBOL(of_get_property); +/* + * arch_match_cpu_phys_id - Match the given logical CPU and physical id + * + * @cpu: logical cpu index of a core/thread + * @phys_id: physical identifier of a core/thread + * + * CPU logical to physical index mapping is architecture specific. + * However this __weak function provides a default match of physical + * id to logical cpu index. phys_id provided here is usually values read + * from the device tree which must match the hardware internal registers. + * + * Returns true if the physical identifier and the logical cpu index + * correspond to the same core/thread, false otherwise. + */ +bool __weak arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return (u32)phys_id == cpu; +} + +/** + * Checks if the given "prop_name" property holds the physical id of the + * core/thread corresponding to the logical cpu 'cpu'. If 'thread' is not + * NULL, local thread number within the core is returned in it. + */ +static bool __of_find_n_match_cpu_property(struct device_node *cpun, + const char *prop_name, int cpu, unsigned int *thread) +{ + const __be32 *cell; + int ac, prop_len, tid; + u64 hwid; + + ac = of_n_addr_cells(cpun); + cell = of_get_property(cpun, prop_name, &prop_len); + if (!cell) + return false; + prop_len /= sizeof(*cell); + for (tid = 0; tid < prop_len; tid++) { + hwid = of_read_number(cell, ac); + if (arch_match_cpu_phys_id(cpu, hwid)) { + if (thread) + *thread = tid; + return true; + } + cell += ac; + } + return false; +} + +/** + * of_get_cpu_node - Get device node associated with the given logical CPU + * + * @cpu: CPU number(logical index) for which device node is required + * @thread: if not NULL, local thread number within the physical core is + * returned + * + * The main purpose of this function is to retrieve the device node for the + * given logical CPU index. It should be used to initialize the of_node in + * cpu device. Once of_node in cpu device is populated, all the further + * references can use that instead. + * + * CPU logical to physical index mapping is architecture specific and is built + * before booting secondary cores. This function uses arch_match_cpu_phys_id + * which can be overridden by architecture specific implementation. + * + * Returns a node pointer for the logical cpu if found, else NULL. + */ +struct device_node *of_get_cpu_node(int cpu, unsigned int *thread) +{ + struct device_node *cpun, *cpus; + + cpus = of_find_node_by_path("/cpus"); + if (!cpus) { + pr_warn("Missing cpus node, bailing out\n"); + return NULL; + } + + for_each_child_of_node(cpus, cpun) { + if (of_node_cmp(cpun->type, "cpu")) + continue; + /* Check for non-standard "ibm,ppc-interrupt-server#s" property + * for thread ids on PowerPC. If it doesn't exist fallback to + * standard "reg" property. + */ + if (IS_ENABLED(CONFIG_PPC) && + __of_find_n_match_cpu_property(cpun, + "ibm,ppc-interrupt-server#s", cpu, thread)) + return cpun; + if (__of_find_n_match_cpu_property(cpun, "reg", cpu, thread)) + return cpun; + } + return NULL; +} +EXPORT_SYMBOL(of_get_cpu_node); + /** Checks if the given "compat" string matches one of the strings in * the device's "compatible" property */ diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig index 349e9ae8090a..ee039dcead04 100644 --- a/drivers/power/reset/Kconfig +++ b/drivers/power/reset/Kconfig @@ -32,7 +32,8 @@ config POWER_RESET_RESTART user presses a key. u-boot then boots into Linux. config POWER_RESET_VEXPRESS - bool + bool "ARM Versatile Express power-off and reset driver" + depends on ARM || ARM64 depends on POWER_RESET help Power off and reset support for the ARM Ltd. Versatile diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 815d6df8bd5f..89deb736b9ea 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1890,8 +1890,9 @@ int regulator_disable_deferred(struct regulator *regulator, int ms) rdev->deferred_disables++; mutex_unlock(&rdev->mutex); - ret = schedule_delayed_work(&rdev->disable_work, - msecs_to_jiffies(ms)); + ret = queue_delayed_work(system_power_efficient_wq, + &rdev->disable_work, + msecs_to_jiffies(ms)); if (ret < 0) return ret; else diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 2e937bdace6f..29a5121ce7fd 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -39,6 +39,11 @@ config VIDEOMODE_HELPERS config HDMI bool +config VEXPRESS_DVI_CONTROL + bool "Versatile Express DVI control" + depends on FB && VEXPRESS_CONFIG + default y + menuconfig FB tristate "Support for frame buffer devices" ---help--- @@ -312,7 +317,8 @@ config FB_PM2_FIFO_DISCONNECT config FB_ARMCLCD tristate "ARM PrimeCell PL110 support" - depends on FB && ARM && ARM_AMBA + depends on ARM || ARM64 || COMPILE_TEST + depends on FB && ARM_AMBA select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT @@ -326,6 +332,21 @@ config FB_ARMCLCD here and read <file:Documentation/kbuild/modules.txt>. The module will be called amba-clcd. +config FB_ARMHDLCD + tristate "ARM High Definition LCD support" + depends on FB && ARM + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This framebuffer device driver is for the ARM High Definition + Colour LCD controller. + + If you want to compile this as a module (=code which can be + inserted into and removed from the running kernel), say M + here and read <file:Documentation/kbuild/modules.txt>. The module + will be called arm-hdlcd. + config FB_ACORN bool "Acorn VIDC support" depends on (FB = y) && ARM && ARCH_ACORN diff --git a/drivers/video/Makefile b/drivers/video/Makefile index e8bae8dd4804..33869eea4981 100644 --- a/drivers/video/Makefile +++ b/drivers/video/Makefile @@ -99,6 +99,7 @@ obj-$(CONFIG_FB_ATMEL) += atmel_lcdfb.o obj-$(CONFIG_FB_PVR2) += pvr2fb.o obj-$(CONFIG_FB_VOODOO1) += sstfb.o obj-$(CONFIG_FB_ARMCLCD) += amba-clcd.o +obj-$(CONFIG_FB_ARMHDLCD) += arm-hdlcd.o obj-$(CONFIG_FB_GOLDFISH) += goldfishfb.o obj-$(CONFIG_FB_68328) += 68328fb.o obj-$(CONFIG_FB_GBE) += gbefb.o @@ -177,3 +178,6 @@ obj-$(CONFIG_VIDEOMODE_HELPERS) += display_timing.o videomode.o ifeq ($(CONFIG_OF),y) obj-$(CONFIG_VIDEOMODE_HELPERS) += of_display_timing.o of_videomode.o endif + +# platform specific output drivers +obj-$(CONFIG_VEXPRESS_DVI_CONTROL) += vexpress-dvi.o diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c index 0a2cce7285be..94a1998338da 100644 --- a/drivers/video/amba-clcd.c +++ b/drivers/video/amba-clcd.c @@ -16,7 +16,10 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/memblock.h> #include <linux/mm.h> +#include <linux/of.h> #include <linux/fb.h> #include <linux/init.h> #include <linux/ioport.h> @@ -30,6 +33,16 @@ #define to_clcd(info) container_of(info, struct clcd_fb, fb) +#ifdef CONFIG_ARM +#define clcdfb_dma_alloc dma_alloc_writecombine +#define clcdfb_dma_free dma_free_writecombine +#define clcdfb_dma_mmap dma_mmap_writecombine +#else +#define clcdfb_dma_alloc dma_alloc_coherent +#define clcdfb_dma_free dma_free_coherent +#define clcdfb_dma_mmap dma_mmap_coherent +#endif + /* This is limited to 16 characters when displayed by X startup */ static const char *clcd_name = "CLCD FB"; @@ -392,6 +405,44 @@ static int clcdfb_blank(int blank_mode, struct fb_info *info) return 0; } +int clcdfb_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vma) +{ + return clcdfb_dma_mmap(&fb->dev->dev, vma, + fb->fb.screen_base, + fb->fb.fix.smem_start, + fb->fb.fix.smem_len); +} + +int clcdfb_mmap_io(struct clcd_fb *fb, struct vm_area_struct *vma) +{ + unsigned long user_count, count, pfn, off; + + user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + count = PAGE_ALIGN(fb->fb.fix.smem_len) >> PAGE_SHIFT; + pfn = fb->fb.fix.smem_start >> PAGE_SHIFT; + off = vma->vm_pgoff; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (off < count && user_count <= (count - off)) + return remap_pfn_range(vma, vma->vm_start, pfn + off, + user_count << PAGE_SHIFT, + vma->vm_page_prot); + + return -ENXIO; +} + +void clcdfb_remove_dma(struct clcd_fb *fb) +{ + clcdfb_dma_free(&fb->dev->dev, fb->fb.fix.smem_len, + fb->fb.screen_base, fb->fb.fix.smem_start); +} + +void clcdfb_remove_io(struct clcd_fb *fb) +{ + iounmap(fb->fb.screen_base); +} + static int clcdfb_mmap(struct fb_info *info, struct vm_area_struct *vma) { @@ -542,14 +593,239 @@ static int clcdfb_register(struct clcd_fb *fb) return ret; } +struct string_lookup { + const char *string; + const u32 val; +}; + +static struct string_lookup vmode_lookups[] = { + { "FB_VMODE_NONINTERLACED", FB_VMODE_NONINTERLACED}, + { "FB_VMODE_INTERLACED", FB_VMODE_INTERLACED}, + { "FB_VMODE_DOUBLE", FB_VMODE_DOUBLE}, + { "FB_VMODE_ODD_FLD_FIRST", FB_VMODE_ODD_FLD_FIRST}, + { NULL, 0 }, +}; + +static struct string_lookup tim2_lookups[] = { + { "TIM2_CLKSEL", TIM2_CLKSEL}, + { "TIM2_IVS", TIM2_IVS}, + { "TIM2_IHS", TIM2_IHS}, + { "TIM2_IPC", TIM2_IPC}, + { "TIM2_IOE", TIM2_IOE}, + { "TIM2_BCD", TIM2_BCD}, + { NULL, 0}, +}; +static struct string_lookup cntl_lookups[] = { + {"CNTL_LCDEN", CNTL_LCDEN}, + {"CNTL_LCDBPP1", CNTL_LCDBPP1}, + {"CNTL_LCDBPP2", CNTL_LCDBPP2}, + {"CNTL_LCDBPP4", CNTL_LCDBPP4}, + {"CNTL_LCDBPP8", CNTL_LCDBPP8}, + {"CNTL_LCDBPP16", CNTL_LCDBPP16}, + {"CNTL_LCDBPP16_565", CNTL_LCDBPP16_565}, + {"CNTL_LCDBPP16_444", CNTL_LCDBPP16_444}, + {"CNTL_LCDBPP24", CNTL_LCDBPP24}, + {"CNTL_LCDBW", CNTL_LCDBW}, + {"CNTL_LCDTFT", CNTL_LCDTFT}, + {"CNTL_LCDMONO8", CNTL_LCDMONO8}, + {"CNTL_LCDDUAL", CNTL_LCDDUAL}, + {"CNTL_BGR", CNTL_BGR}, + {"CNTL_BEBO", CNTL_BEBO}, + {"CNTL_BEPO", CNTL_BEPO}, + {"CNTL_LCDPWR", CNTL_LCDPWR}, + {"CNTL_LCDVCOMP(1)", CNTL_LCDVCOMP(1)}, + {"CNTL_LCDVCOMP(2)", CNTL_LCDVCOMP(2)}, + {"CNTL_LCDVCOMP(3)", CNTL_LCDVCOMP(3)}, + {"CNTL_LCDVCOMP(4)", CNTL_LCDVCOMP(4)}, + {"CNTL_LCDVCOMP(5)", CNTL_LCDVCOMP(5)}, + {"CNTL_LCDVCOMP(6)", CNTL_LCDVCOMP(6)}, + {"CNTL_LCDVCOMP(7)", CNTL_LCDVCOMP(7)}, + {"CNTL_LDMAFIFOTIME", CNTL_LDMAFIFOTIME}, + {"CNTL_WATERMARK", CNTL_WATERMARK}, + { NULL, 0}, +}; +static struct string_lookup caps_lookups[] = { + {"CLCD_CAP_RGB444", CLCD_CAP_RGB444}, + {"CLCD_CAP_RGB5551", CLCD_CAP_RGB5551}, + {"CLCD_CAP_RGB565", CLCD_CAP_RGB565}, + {"CLCD_CAP_RGB888", CLCD_CAP_RGB888}, + {"CLCD_CAP_BGR444", CLCD_CAP_BGR444}, + {"CLCD_CAP_BGR5551", CLCD_CAP_BGR5551}, + {"CLCD_CAP_BGR565", CLCD_CAP_BGR565}, + {"CLCD_CAP_BGR888", CLCD_CAP_BGR888}, + {"CLCD_CAP_444", CLCD_CAP_444}, + {"CLCD_CAP_5551", CLCD_CAP_5551}, + {"CLCD_CAP_565", CLCD_CAP_565}, + {"CLCD_CAP_888", CLCD_CAP_888}, + {"CLCD_CAP_RGB", CLCD_CAP_RGB}, + {"CLCD_CAP_BGR", CLCD_CAP_BGR}, + {"CLCD_CAP_ALL", CLCD_CAP_ALL}, + { NULL, 0}, +}; + +u32 parse_setting(struct string_lookup *lookup, const char *name) +{ + int i = 0; + while (lookup[i].string != NULL) { + if (strcmp(lookup[i].string, name) == 0) + return lookup[i].val; + ++i; + } + return -EINVAL; +} + +u32 get_string_lookup(struct device_node *node, const char *name, + struct string_lookup *lookup) +{ + const char *string; + int count, i, ret = 0; + + count = of_property_count_strings(node, name); + if (count >= 0) + for (i = 0; i < count; i++) + if (of_property_read_string_index(node, name, i, + &string) == 0) + ret |= parse_setting(lookup, string); + return ret; +} + +int get_val(struct device_node *node, const char *string) +{ + u32 ret = 0; + + if (of_property_read_u32(node, string, &ret)) + ret = -1; + return ret; +} + +struct clcd_panel *getPanel(struct device_node *node) +{ + static struct clcd_panel panel; + + panel.mode.refresh = get_val(node, "refresh"); + panel.mode.xres = get_val(node, "xres"); + panel.mode.yres = get_val(node, "yres"); + panel.mode.pixclock = get_val(node, "pixclock"); + panel.mode.left_margin = get_val(node, "left_margin"); + panel.mode.right_margin = get_val(node, "right_margin"); + panel.mode.upper_margin = get_val(node, "upper_margin"); + panel.mode.lower_margin = get_val(node, "lower_margin"); + panel.mode.hsync_len = get_val(node, "hsync_len"); + panel.mode.vsync_len = get_val(node, "vsync_len"); + panel.mode.sync = get_val(node, "sync"); + panel.bpp = get_val(node, "bpp"); + panel.width = (signed short) get_val(node, "width"); + panel.height = (signed short) get_val(node, "height"); + + panel.mode.vmode = get_string_lookup(node, "vmode", vmode_lookups); + panel.tim2 = get_string_lookup(node, "tim2", tim2_lookups); + panel.cntl = get_string_lookup(node, "cntl", cntl_lookups); + panel.caps = get_string_lookup(node, "caps", caps_lookups); + + return &panel; +} + +struct clcd_panel *clcdfb_get_panel(const char *name) +{ + struct device_node *node = NULL; + const char *mode; + struct clcd_panel *panel = NULL; + + do { + node = of_find_compatible_node(node, NULL, "panel"); + if (node) + if (of_property_read_string(node, "mode", &mode) == 0) + if (strcmp(mode, name) == 0) { + panel = getPanel(node); + panel->mode.name = name; + } + } while (node != NULL); + + return panel; +} + +#ifdef CONFIG_OF +static int clcdfb_dt_init(struct clcd_fb *fb) +{ + int err = 0; + struct device_node *node; + const char *mode; + dma_addr_t dma; + u32 use_dma; + const __be32 *prop; + int len, na, ns; + phys_addr_t fb_base, fb_size; + + node = fb->dev->dev.of_node; + if (!node) + return -ENODEV; + + na = of_n_addr_cells(node); + ns = of_n_size_cells(node); + + if (WARN_ON(of_property_read_string(node, "mode", &mode))) + return -ENODEV; + + fb->panel = clcdfb_get_panel(mode); + if (!fb->panel) + return -EINVAL; + fb->fb.fix.smem_len = fb->panel->mode.xres * fb->panel->mode.yres * 2; + + fb->board->name = "Device Tree CLCD PL111"; + fb->board->caps = CLCD_CAP_5551 | CLCD_CAP_565; + fb->board->check = clcdfb_check; + fb->board->decode = clcdfb_decode; + + if (of_property_read_u32(node, "use_dma", &use_dma)) + use_dma = 0; + + if (use_dma) { + fb->fb.screen_base = clcdfb_dma_alloc(&fb->dev->dev, + fb->fb.fix.smem_len, + &dma, GFP_KERNEL); + if (!fb->fb.screen_base) { + pr_err("CLCD: unable to map framebuffer\n"); + return -ENOMEM; + } + + fb->fb.fix.smem_start = dma; + fb->board->mmap = clcdfb_mmap_dma; + fb->board->remove = clcdfb_remove_dma; + } else { + prop = of_get_property(node, "framebuffer", &len); + if (WARN_ON(!prop || len < (na + ns) * sizeof(*prop))) + return -EINVAL; + + fb_base = of_read_number(prop, na); + fb_size = of_read_number(prop + na, ns); + + fb->fb.fix.smem_start = fb_base; + fb->fb.screen_base = ioremap_wc(fb_base, fb_size); + fb->board->mmap = clcdfb_mmap_io; + fb->board->remove = clcdfb_remove_io; + } + + return err; +} +#endif /* CONFIG_OF */ + static int clcdfb_probe(struct amba_device *dev, const struct amba_id *id) { struct clcd_board *board = dev->dev.platform_data; struct clcd_fb *fb; int ret; - if (!board) - return -EINVAL; + if (!board) { +#ifdef CONFIG_OF + if (dev->dev.of_node) { + board = kzalloc(sizeof(struct clcd_board), GFP_KERNEL); + if (!board) + return -ENOMEM; + board->setup = clcdfb_dt_init; + } else +#endif + return -EINVAL; + } ret = amba_request_regions(dev, NULL); if (ret) { diff --git a/drivers/video/arm-hdlcd.c b/drivers/video/arm-hdlcd.c new file mode 100644 index 000000000000..cfd631e3dc52 --- /dev/null +++ b/drivers/video/arm-hdlcd.c @@ -0,0 +1,844 @@ +/* + * drivers/video/arm-hdlcd.c + * + * Copyright (C) 2011 ARM Limited + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + * + * ARM HDLCD Controller + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/mm.h> +#include <linux/delay.h> +#include <linux/of.h> +#include <linux/fb.h> +#include <linux/clk.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/dma-mapping.h> +#include <linux/platform_device.h> +#include <linux/memblock.h> +#include <linux/arm-hdlcd.h> +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#endif + +#include "edid.h" + +#ifdef CONFIG_SERIAL_AMBA_PCU_UART +int get_edid(u8 *msgbuf); +#else +#endif + +#define to_hdlcd_device(info) container_of(info, struct hdlcd_device, fb) + +static struct of_device_id hdlcd_of_matches[] = { + { .compatible = "arm,hdlcd" }, + {}, +}; + +/* Framebuffer size. */ +static unsigned long framebuffer_size; + +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS +static unsigned long buffer_underrun_events; +static DEFINE_SPINLOCK(hdlcd_underrun_lock); + +static void hdlcd_underrun_set(unsigned long val) +{ + spin_lock(&hdlcd_underrun_lock); + buffer_underrun_events = val; + spin_unlock(&hdlcd_underrun_lock); +} + +static unsigned long hdlcd_underrun_get(void) +{ + unsigned long val; + spin_lock(&hdlcd_underrun_lock); + val = buffer_underrun_events; + spin_unlock(&hdlcd_underrun_lock); + return val; +} + +#ifdef CONFIG_PROC_FS +static int hdlcd_underrun_show(struct seq_file *m, void *v) +{ + unsigned char underrun_string[32]; + snprintf(underrun_string, 32, "%lu\n", hdlcd_underrun_get()); + seq_puts(m, underrun_string); + return 0; +} + +static int proc_hdlcd_underrun_open(struct inode *inode, struct file *file) +{ + return single_open(file, hdlcd_underrun_show, NULL); +} + +static const struct file_operations proc_hdlcd_underrun_operations = { + .open = proc_hdlcd_underrun_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int hdlcd_underrun_init(void) +{ + hdlcd_underrun_set(0); + proc_create("hdlcd_underrun", 0, NULL, &proc_hdlcd_underrun_operations); + return 0; +} +static void hdlcd_underrun_close(void) +{ + remove_proc_entry("hdlcd_underrun", NULL); +} +#else +static int hdlcd_underrun_init(void) { return 0; } +static void hdlcd_underrun_close(void) { } +#endif +#endif + +static char *fb_mode = "1680x1050-32@60\0\0\0\0\0"; + +static struct fb_var_screeninfo cached_var_screeninfo; + +static struct fb_videomode hdlcd_default_mode = { + .refresh = 60, + .xres = 1680, + .yres = 1050, + .pixclock = 8403, + .left_margin = 80, + .right_margin = 48, + .upper_margin = 21, + .lower_margin = 3, + .hsync_len = 32, + .vsync_len = 6, + .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, + .vmode = FB_VMODE_NONINTERLACED +}; + +static inline void hdlcd_enable(struct hdlcd_device *hdlcd) +{ + dev_dbg(hdlcd->dev, "HDLCD: output enabled\n"); + writel(1, hdlcd->base + HDLCD_REG_COMMAND); +} + +static inline void hdlcd_disable(struct hdlcd_device *hdlcd) +{ + dev_dbg(hdlcd->dev, "HDLCD: output disabled\n"); + writel(0, hdlcd->base + HDLCD_REG_COMMAND); +} + +static int hdlcd_set_bitfields(struct hdlcd_device *hdlcd, + struct fb_var_screeninfo *var) +{ + int ret = 0; + + memset(&var->transp, 0, sizeof(var->transp)); + var->red.msb_right = 0; + var->green.msb_right = 0; + var->blue.msb_right = 0; + var->blue.offset = 0; + + switch (var->bits_per_pixel) { + case 8: + /* pseudocolor */ + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + break; + case 16: + /* 565 format */ + var->red.length = 5; + var->green.length = 6; + var->blue.length = 5; + break; + case 32: + var->transp.length = 8; + case 24: + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + break; + default: + ret = -EINVAL; + break; + } + + if (!ret) { + if(var->bits_per_pixel != 32) + { + var->green.offset = var->blue.length; + var->red.offset = var->green.offset + var->green.length; + } + else + { + /* Previously, the byte ordering for 32-bit color was + * (msb)<alpha><red><green><blue>(lsb) + * but this does not match what android expects and + * the colors are odd. Instead, use + * <alpha><blue><green><red> + * Since we tell fb what we are doing, console + * , X and directfb access should work fine. + */ + var->green.offset = var->red.length; + var->blue.offset = var->green.offset + var->green.length; + var->transp.offset = var->blue.offset + var->blue.length; + } + } + + return ret; +} + +static int hdlcd_check_var(struct fb_var_screeninfo *var, struct fb_info *info) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + int bytes_per_pixel = var->bits_per_pixel / 8; + +#ifdef HDLCD_NO_VIRTUAL_SCREEN + var->yres_virtual = var->yres; +#else + var->yres_virtual = 2 * var->yres; +#endif + + if ((var->xres_virtual * bytes_per_pixel * var->yres_virtual) > hdlcd->fb.fix.smem_len) + return -ENOMEM; + + if (var->xres > HDLCD_MAX_XRES || var->yres > HDLCD_MAX_YRES) + return -EINVAL; + + /* make sure the bitfields are set appropriately */ + return hdlcd_set_bitfields(hdlcd, var); +} + +/* prototype */ +static int hdlcd_pan_display(struct fb_var_screeninfo *var, + struct fb_info *info); + +#define WRITE_HDLCD_REG(reg, value) writel((value), hdlcd->base + (reg)) +#define READ_HDLCD_REG(reg) readl(hdlcd->base + (reg)) + +static int hdlcd_set_par(struct fb_info *info) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + int bytes_per_pixel = hdlcd->fb.var.bits_per_pixel / 8; + int polarities; + int old_yoffset; + + /* check for shortcuts */ + old_yoffset = cached_var_screeninfo.yoffset; + cached_var_screeninfo.yoffset = info->var.yoffset; + if (!memcmp(&info->var, &cached_var_screeninfo, + sizeof(struct fb_var_screeninfo))) { + if(old_yoffset != info->var.yoffset) { + /* we only changed yoffset, and we already + * already recorded it a couple lines up + */ + hdlcd_pan_display(&info->var, info); + } + /* or no change */ + return 0; + } + + hdlcd->fb.fix.line_length = hdlcd->fb.var.xres * bytes_per_pixel; + + if (hdlcd->fb.var.bits_per_pixel >= 16) + hdlcd->fb.fix.visual = FB_VISUAL_TRUECOLOR; + else + hdlcd->fb.fix.visual = FB_VISUAL_PSEUDOCOLOR; + + memcpy(&cached_var_screeninfo, &info->var, sizeof(struct fb_var_screeninfo)); + + polarities = HDLCD_POLARITY_DATAEN | +#ifndef CONFIG_ARCH_TUSCAN + HDLCD_POLARITY_PIXELCLK | +#endif + HDLCD_POLARITY_DATA; + polarities |= (hdlcd->fb.var.sync & FB_SYNC_HOR_HIGH_ACT) ? HDLCD_POLARITY_HSYNC : 0; + polarities |= (hdlcd->fb.var.sync & FB_SYNC_VERT_HIGH_ACT) ? HDLCD_POLARITY_VSYNC : 0; + + hdlcd_disable(hdlcd); + + WRITE_HDLCD_REG(HDLCD_REG_FB_LINE_LENGTH, hdlcd->fb.var.xres * bytes_per_pixel); + WRITE_HDLCD_REG(HDLCD_REG_FB_LINE_PITCH, hdlcd->fb.var.xres * bytes_per_pixel); + WRITE_HDLCD_REG(HDLCD_REG_FB_LINE_COUNT, hdlcd->fb.var.yres - 1); + WRITE_HDLCD_REG(HDLCD_REG_V_SYNC, hdlcd->fb.var.vsync_len - 1); + WRITE_HDLCD_REG(HDLCD_REG_V_BACK_PORCH, hdlcd->fb.var.upper_margin - 1); + WRITE_HDLCD_REG(HDLCD_REG_V_DATA, hdlcd->fb.var.yres - 1); + WRITE_HDLCD_REG(HDLCD_REG_V_FRONT_PORCH, hdlcd->fb.var.lower_margin - 1); + WRITE_HDLCD_REG(HDLCD_REG_H_SYNC, hdlcd->fb.var.hsync_len - 1); + WRITE_HDLCD_REG(HDLCD_REG_H_BACK_PORCH, hdlcd->fb.var.left_margin - 1); + WRITE_HDLCD_REG(HDLCD_REG_H_DATA, hdlcd->fb.var.xres - 1); + WRITE_HDLCD_REG(HDLCD_REG_H_FRONT_PORCH, hdlcd->fb.var.right_margin - 1); + WRITE_HDLCD_REG(HDLCD_REG_POLARITIES, polarities); + WRITE_HDLCD_REG(HDLCD_REG_PIXEL_FORMAT, (bytes_per_pixel - 1) << 3); +#ifdef HDLCD_RED_DEFAULT_COLOUR + WRITE_HDLCD_REG(HDLCD_REG_RED_SELECT, (0x00ff0000 | (hdlcd->fb.var.red.length & 0xf) << 8) \ + | hdlcd->fb.var.red.offset); +#else + WRITE_HDLCD_REG(HDLCD_REG_RED_SELECT, ((hdlcd->fb.var.red.length & 0xf) << 8) | hdlcd->fb.var.red.offset); +#endif + WRITE_HDLCD_REG(HDLCD_REG_GREEN_SELECT, ((hdlcd->fb.var.green.length & 0xf) << 8) | hdlcd->fb.var.green.offset); + WRITE_HDLCD_REG(HDLCD_REG_BLUE_SELECT, ((hdlcd->fb.var.blue.length & 0xf) << 8) | hdlcd->fb.var.blue.offset); + + clk_set_rate(hdlcd->clk, (1000000000 / hdlcd->fb.var.pixclock) * 1000); + clk_enable(hdlcd->clk); + + hdlcd_enable(hdlcd); + + return 0; +} + +static int hdlcd_setcolreg(unsigned int regno, unsigned int red, unsigned int green, + unsigned int blue, unsigned int transp, struct fb_info *info) +{ + if (regno < 16) { + u32 *pal = info->pseudo_palette; + + pal[regno] = ((red >> 8) << info->var.red.offset) | + ((green >> 8) << info->var.green.offset) | + ((blue >> 8) << info->var.blue.offset); + } + + return 0; +} + +static irqreturn_t hdlcd_irq(int irq, void *data) +{ + struct hdlcd_device *hdlcd = data; + unsigned long irq_mask, irq_status; + + irq_mask = READ_HDLCD_REG(HDLCD_REG_INT_MASK); + irq_status = READ_HDLCD_REG(HDLCD_REG_INT_STATUS); + + /* acknowledge interrupt(s) */ + WRITE_HDLCD_REG(HDLCD_REG_INT_CLEAR, irq_status); +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS + if (irq_status & HDLCD_INTERRUPT_UNDERRUN) { + /* increment the count */ + hdlcd_underrun_set(hdlcd_underrun_get() + 1); + } +#endif + if (irq_status & HDLCD_INTERRUPT_VSYNC) { + /* disable future VSYNC interrupts */ + WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, irq_mask & ~HDLCD_INTERRUPT_VSYNC); + + complete(&hdlcd->vsync_completion); + } + + return IRQ_HANDLED; +} + +static int hdlcd_wait_for_vsync(struct fb_info *info) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + unsigned long irq_mask; + int err; + + /* enable VSYNC interrupt */ + irq_mask = READ_HDLCD_REG(HDLCD_REG_INT_MASK); + WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, irq_mask | HDLCD_INTERRUPT_VSYNC); + + err = wait_for_completion_interruptible_timeout(&hdlcd->vsync_completion, + msecs_to_jiffies(100)); + + if (!err) + return -ETIMEDOUT; + + return 0; +} + +static int hdlcd_blank(int blank_mode, struct fb_info *info) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + + switch (blank_mode) { + case FB_BLANK_POWERDOWN: + clk_disable(hdlcd->clk); + case FB_BLANK_NORMAL: + hdlcd_disable(hdlcd); + break; + case FB_BLANK_UNBLANK: + clk_enable(hdlcd->clk); + hdlcd_enable(hdlcd); + break; + case FB_BLANK_VSYNC_SUSPEND: + case FB_BLANK_HSYNC_SUSPEND: + default: + return 1; + } + + return 0; +} + +static void hdlcd_mmap_open(struct vm_area_struct *vma) +{ +} + +static void hdlcd_mmap_close(struct vm_area_struct *vma) +{ +} + +static struct vm_operations_struct hdlcd_mmap_ops = { + .open = hdlcd_mmap_open, + .close = hdlcd_mmap_close, +}; + +static int hdlcd_mmap(struct fb_info *info, struct vm_area_struct *vma) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + unsigned long off; + unsigned long start; + unsigned long len = hdlcd->fb.fix.smem_len; + + if (vma->vm_end - vma->vm_start == 0) + return 0; + if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) + return -EINVAL; + + off = vma->vm_pgoff << PAGE_SHIFT; + if ((off >= len) || (vma->vm_end - vma->vm_start + off) > len) + return -EINVAL; + + start = hdlcd->fb.fix.smem_start; + off += start; + + vma->vm_pgoff = off >> PAGE_SHIFT; + vma->vm_flags |= VM_IO; + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + vma->vm_ops = &hdlcd_mmap_ops; + if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static int hdlcd_pan_display(struct fb_var_screeninfo *var, struct fb_info *info) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + + hdlcd->fb.var.yoffset = var->yoffset; + WRITE_HDLCD_REG(HDLCD_REG_FB_BASE, hdlcd->fb.fix.smem_start + + (var->yoffset * hdlcd->fb.fix.line_length)); + + hdlcd_wait_for_vsync(info); + + return 0; +} + +static int hdlcd_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg) +{ + int err; + + switch (cmd) { + case FBIO_WAITFORVSYNC: + err = hdlcd_wait_for_vsync(info); + break; + default: + err = -ENOIOCTLCMD; + break; + } + + return err; +} + +static struct fb_ops hdlcd_ops = { + .owner = THIS_MODULE, + .fb_check_var = hdlcd_check_var, + .fb_set_par = hdlcd_set_par, + .fb_setcolreg = hdlcd_setcolreg, + .fb_blank = hdlcd_blank, + .fb_fillrect = cfb_fillrect, + .fb_copyarea = cfb_copyarea, + .fb_imageblit = cfb_imageblit, + .fb_mmap = hdlcd_mmap, + .fb_pan_display = hdlcd_pan_display, + .fb_ioctl = hdlcd_ioctl, + .fb_compat_ioctl = hdlcd_ioctl +}; + +static int hdlcd_setup(struct hdlcd_device *hdlcd) +{ + u32 version; + int err = -EFAULT; + + hdlcd->fb.device = hdlcd->dev; + + hdlcd->clk = clk_get(hdlcd->dev, NULL); + if (IS_ERR(hdlcd->clk)) { + dev_err(hdlcd->dev, "HDLCD: unable to find clock data\n"); + return PTR_ERR(hdlcd->clk); + } + + err = clk_prepare(hdlcd->clk); + if (err) + goto clk_prepare_err; + + hdlcd->base = ioremap_nocache(hdlcd->fb.fix.mmio_start, hdlcd->fb.fix.mmio_len); + if (!hdlcd->base) { + dev_err(hdlcd->dev, "HDLCD: unable to map registers\n"); + goto remap_err; + } + + hdlcd->fb.pseudo_palette = kmalloc(sizeof(u32) * 16, GFP_KERNEL); + if (!hdlcd->fb.pseudo_palette) { + dev_err(hdlcd->dev, "HDLCD: unable to allocate pseudo_palette memory\n"); + err = -ENOMEM; + goto kmalloc_err; + } + + version = readl(hdlcd->base + HDLCD_REG_VERSION); + if ((version & HDLCD_PRODUCT_MASK) != HDLCD_PRODUCT_ID) { + dev_err(hdlcd->dev, "HDLCD: unknown product id: 0x%x\n", version); + err = -EINVAL; + goto kmalloc_err; + } + dev_info(hdlcd->dev, "HDLCD: found ARM HDLCD version r%dp%d\n", + (version & HDLCD_VERSION_MAJOR_MASK) >> 8, + version & HDLCD_VERSION_MINOR_MASK); + + strcpy(hdlcd->fb.fix.id, "hdlcd"); + hdlcd->fb.fbops = &hdlcd_ops; + hdlcd->fb.flags = FBINFO_FLAG_DEFAULT/* | FBINFO_VIRTFB*/; + + hdlcd->fb.fix.type = FB_TYPE_PACKED_PIXELS; + hdlcd->fb.fix.type_aux = 0; + hdlcd->fb.fix.xpanstep = 0; + hdlcd->fb.fix.ypanstep = 1; + hdlcd->fb.fix.ywrapstep = 0; + hdlcd->fb.fix.accel = FB_ACCEL_NONE; + + hdlcd->fb.var.nonstd = 0; + hdlcd->fb.var.activate = FB_ACTIVATE_NOW; + hdlcd->fb.var.height = -1; + hdlcd->fb.var.width = -1; + hdlcd->fb.var.accel_flags = 0; + + init_completion(&hdlcd->vsync_completion); + + if (hdlcd->edid) { + /* build modedb from EDID */ + fb_edid_to_monspecs(hdlcd->edid, &hdlcd->fb.monspecs); + fb_videomode_to_modelist(hdlcd->fb.monspecs.modedb, + hdlcd->fb.monspecs.modedb_len, + &hdlcd->fb.modelist); + fb_find_mode(&hdlcd->fb.var, &hdlcd->fb, fb_mode, + hdlcd->fb.monspecs.modedb, + hdlcd->fb.monspecs.modedb_len, + &hdlcd_default_mode, 32); + } else { + hdlcd->fb.monspecs.hfmin = 0; + hdlcd->fb.monspecs.hfmax = 100000; + hdlcd->fb.monspecs.vfmin = 0; + hdlcd->fb.monspecs.vfmax = 400; + hdlcd->fb.monspecs.dclkmin = 1000000; + hdlcd->fb.monspecs.dclkmax = 100000000; + fb_find_mode(&hdlcd->fb.var, &hdlcd->fb, fb_mode, NULL, 0, &hdlcd_default_mode, 32); + } + + dev_info(hdlcd->dev, "using %dx%d-%d@%d mode\n", hdlcd->fb.var.xres, + hdlcd->fb.var.yres, hdlcd->fb.var.bits_per_pixel, + hdlcd->fb.mode ? hdlcd->fb.mode->refresh : 60); + hdlcd->fb.var.xres_virtual = hdlcd->fb.var.xres; +#ifdef HDLCD_NO_VIRTUAL_SCREEN + hdlcd->fb.var.yres_virtual = hdlcd->fb.var.yres; +#else + hdlcd->fb.var.yres_virtual = hdlcd->fb.var.yres * 2; +#endif + + /* initialise and set the palette */ + if (fb_alloc_cmap(&hdlcd->fb.cmap, NR_PALETTE, 0)) { + dev_err(hdlcd->dev, "failed to allocate cmap memory\n"); + err = -ENOMEM; + goto setup_err; + } + fb_set_cmap(&hdlcd->fb.cmap, &hdlcd->fb); + + /* Allow max number of outstanding requests with the largest beat burst */ + WRITE_HDLCD_REG(HDLCD_REG_BUS_OPTIONS, HDLCD_BUS_MAX_OUTSTAND | HDLCD_BUS_BURST_16); + /* Set the framebuffer base to start of allocated memory */ + WRITE_HDLCD_REG(HDLCD_REG_FB_BASE, hdlcd->fb.fix.smem_start); +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS + /* turn on underrun interrupt for counting */ + WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, HDLCD_INTERRUPT_UNDERRUN); +#else + /* Ensure interrupts are disabled */ + WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, 0); +#endif + fb_set_var(&hdlcd->fb, &hdlcd->fb.var); + + if (!register_framebuffer(&hdlcd->fb)) { + return 0; + } + + dev_err(hdlcd->dev, "HDLCD: cannot register framebuffer\n"); + + fb_dealloc_cmap(&hdlcd->fb.cmap); +setup_err: + iounmap(hdlcd->base); +kmalloc_err: + kfree(hdlcd->fb.pseudo_palette); +remap_err: + clk_unprepare(hdlcd->clk); +clk_prepare_err: + clk_put(hdlcd->clk); + return err; +} + +static inline unsigned char atohex(u8 data) +{ + if (!isxdigit(data)) + return 0; + /* truncate the upper nibble and add 9 to non-digit values */ + return (data > 0x39) ? ((data & 0xf) + 9) : (data & 0xf); +} + +/* EDID data is passed from devicetree in a literal string that can contain spaces and + the hexadecimal dump of the data */ +static int parse_edid_data(struct hdlcd_device *hdlcd, const u8 *edid_data, int data_len) +{ + int i, j; + + if (!edid_data) + return -EINVAL; + + hdlcd->edid = kzalloc(EDID_LENGTH, GFP_KERNEL); + if (!hdlcd->edid) + return -ENOMEM; + + for (i = 0, j = 0; i < data_len; i++) { + if (isspace(edid_data[i])) + continue; + hdlcd->edid[j++] = atohex(edid_data[i]); + if (j >= EDID_LENGTH) + break; + } + + if (j < EDID_LENGTH) { + kfree(hdlcd->edid); + hdlcd->edid = NULL; + return -EINVAL; + } + + return 0; +} + +static int hdlcd_probe(struct platform_device *pdev) +{ + int err = 0, i; + struct hdlcd_device *hdlcd; + struct resource *mem; +#ifdef CONFIG_OF + struct device_node *of_node; +#endif + + memset(&cached_var_screeninfo, 0, sizeof(struct fb_var_screeninfo)); + + dev_dbg(&pdev->dev, "HDLCD: probing\n"); + + hdlcd = kzalloc(sizeof(*hdlcd), GFP_KERNEL); + if (!hdlcd) + return -ENOMEM; + +#ifdef CONFIG_OF + of_node = pdev->dev.of_node; + if (of_node) { + int len; + const u8 *edid; + const __be32 *prop = of_get_property(of_node, "mode", &len); + if (prop) + strncpy(fb_mode, (char *)prop, len); + prop = of_get_property(of_node, "framebuffer", &len); + if (prop) { + hdlcd->fb.fix.smem_start = of_read_ulong(prop, + of_n_addr_cells(of_node)); + prop += of_n_addr_cells(of_node); + framebuffer_size = of_read_ulong(prop, + of_n_size_cells(of_node)); + if (framebuffer_size > HDLCD_MAX_FRAMEBUFFER_SIZE) + framebuffer_size = HDLCD_MAX_FRAMEBUFFER_SIZE; + dev_dbg(&pdev->dev, "HDLCD: phys_addr = 0x%lx, size = 0x%lx\n", + hdlcd->fb.fix.smem_start, framebuffer_size); + } + edid = of_get_property(of_node, "edid", &len); + if (edid) { + err = parse_edid_data(hdlcd, edid, len); +#ifdef CONFIG_SERIAL_AMBA_PCU_UART + } else { + /* ask the firmware to fetch the EDID */ + dev_dbg(&pdev->dev, "HDLCD: Requesting EDID data\n"); + hdlcd->edid = kzalloc(EDID_LENGTH, GFP_KERNEL); + if (!hdlcd->edid) + return -ENOMEM; + err = get_edid(hdlcd->edid); +#endif /* CONFIG_SERIAL_AMBA_PCU_UART */ + } + if (err) + dev_info(&pdev->dev, "HDLCD: Failed to parse EDID data\n"); + } +#endif /* CONFIG_OF */ + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) { + dev_err(&pdev->dev, "HDLCD: cannot get platform resources\n"); + err = -EINVAL; + goto resource_err; + } + + i = platform_get_irq(pdev, 0); + if (i < 0) { + dev_err(&pdev->dev, "HDLCD: no irq defined for vsync\n"); + err = -ENOENT; + goto resource_err; + } else { + err = request_irq(i, hdlcd_irq, 0, dev_name(&pdev->dev), hdlcd); + if (err) { + dev_err(&pdev->dev, "HDLCD: unable to request irq\n"); + goto resource_err; + } + hdlcd->irq = i; + } + + if (!request_mem_region(mem->start, resource_size(mem), dev_name(&pdev->dev))) { + err = -ENXIO; + goto request_err; + } + + if (!hdlcd->fb.fix.smem_start) { + dev_err(&pdev->dev, "platform did not allocate frame buffer memory\n"); + err = -ENOMEM; + goto memalloc_err; + } + hdlcd->fb.screen_base = ioremap_wc(hdlcd->fb.fix.smem_start, framebuffer_size); + if (!hdlcd->fb.screen_base) { + dev_err(&pdev->dev, "unable to ioremap framebuffer\n"); + err = -ENOMEM; + goto probe_err; + } + + hdlcd->fb.screen_size = framebuffer_size; + hdlcd->fb.fix.smem_len = framebuffer_size; + hdlcd->fb.fix.mmio_start = mem->start; + hdlcd->fb.fix.mmio_len = resource_size(mem); + + /* Clear the framebuffer */ + memset(hdlcd->fb.screen_base, 0, framebuffer_size); + + hdlcd->dev = &pdev->dev; + + dev_dbg(&pdev->dev, "HDLCD: framebuffer virt base %p, phys base 0x%lX\n", + hdlcd->fb.screen_base, (unsigned long)hdlcd->fb.fix.smem_start); + + err = hdlcd_setup(hdlcd); + + if (err) + goto probe_err; + + platform_set_drvdata(pdev, hdlcd); + return 0; + +probe_err: + iounmap(hdlcd->fb.screen_base); + memblock_free(hdlcd->fb.fix.smem_start, hdlcd->fb.fix.smem_start); + +memalloc_err: + release_mem_region(mem->start, resource_size(mem)); + +request_err: + free_irq(hdlcd->irq, hdlcd); + +resource_err: + kfree(hdlcd); + + return err; +} + +static int hdlcd_remove(struct platform_device *pdev) +{ + struct hdlcd_device *hdlcd = platform_get_drvdata(pdev); + + clk_disable(hdlcd->clk); + clk_unprepare(hdlcd->clk); + clk_put(hdlcd->clk); + + /* unmap memory */ + iounmap(hdlcd->fb.screen_base); + iounmap(hdlcd->base); + + /* deallocate fb memory */ + fb_dealloc_cmap(&hdlcd->fb.cmap); + kfree(hdlcd->fb.pseudo_palette); + memblock_free(hdlcd->fb.fix.smem_start, hdlcd->fb.fix.smem_start); + release_mem_region(hdlcd->fb.fix.mmio_start, hdlcd->fb.fix.mmio_len); + + free_irq(hdlcd->irq, NULL); + kfree(hdlcd); + + return 0; +} + +#ifdef CONFIG_PM +static int hdlcd_suspend(struct platform_device *pdev, pm_message_t state) +{ + /* not implemented yet */ + return 0; +} + +static int hdlcd_resume(struct platform_device *pdev) +{ + /* not implemented yet */ + return 0; +} +#else +#define hdlcd_suspend NULL +#define hdlcd_resume NULL +#endif + +static struct platform_driver hdlcd_driver = { + .probe = hdlcd_probe, + .remove = hdlcd_remove, + .suspend = hdlcd_suspend, + .resume = hdlcd_resume, + .driver = { + .name = "hdlcd", + .owner = THIS_MODULE, + .of_match_table = hdlcd_of_matches, + }, +}; + +static int __init hdlcd_init(void) +{ +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS + int err = platform_driver_register(&hdlcd_driver); + if (!err) + hdlcd_underrun_init(); + return err; +#else + return platform_driver_register(&hdlcd_driver); +#endif +} + +void __exit hdlcd_exit(void) +{ +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS + hdlcd_underrun_close(); +#endif + platform_driver_unregister(&hdlcd_driver); +} + +module_init(hdlcd_init); +module_exit(hdlcd_exit); + +MODULE_AUTHOR("Liviu Dudau"); +MODULE_DESCRIPTION("ARM HDLCD core driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index a92783e480e6..0d8f98c79a6c 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -404,7 +404,7 @@ static void cursor_timer_handler(unsigned long dev_addr) struct fb_info *info = (struct fb_info *) dev_addr; struct fbcon_ops *ops = info->fbcon_par; - schedule_work(&info->queue); + queue_work(system_power_efficient_wq, &info->queue); mod_timer(&ops->cursor_timer, jiffies + HZ/5); } diff --git a/drivers/video/vexpress-dvi.c b/drivers/video/vexpress-dvi.c new file mode 100644 index 000000000000..f08753450ee4 --- /dev/null +++ b/drivers/video/vexpress-dvi.c @@ -0,0 +1,220 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2012 ARM Limited + */ + +#define pr_fmt(fmt) "vexpress-dvi: " fmt + +#include <linux/fb.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/vexpress.h> + + +static struct vexpress_config_func *vexpress_dvimode_func; + +static struct { + u32 xres, yres, mode; +} vexpress_dvi_dvimodes[] = { + { 640, 480, 0 }, /* VGA */ + { 800, 600, 1 }, /* SVGA */ + { 1024, 768, 2 }, /* XGA */ + { 1280, 1024, 3 }, /* SXGA */ + { 1600, 1200, 4 }, /* UXGA */ + { 1920, 1080, 5 }, /* HD1080 */ +}; + +static void vexpress_dvi_mode_set(struct fb_info *info, u32 xres, u32 yres) +{ + int err = -ENOENT; + int i; + + if (!vexpress_dvimode_func) + return; + + for (i = 0; i < ARRAY_SIZE(vexpress_dvi_dvimodes); i++) { + if (vexpress_dvi_dvimodes[i].xres == xres && + vexpress_dvi_dvimodes[i].yres == yres) { + pr_debug("mode: %ux%u = %d\n", xres, yres, + vexpress_dvi_dvimodes[i].mode); + err = vexpress_config_write(vexpress_dvimode_func, 0, + vexpress_dvi_dvimodes[i].mode); + break; + } + } + + if (err) + pr_warn("Failed to set %ux%u mode! (%d)\n", xres, yres, err); +} + + +static struct vexpress_config_func *vexpress_muxfpga_func; +static int vexpress_dvi_fb = -1; + +static int vexpress_dvi_mux_set(struct fb_info *info) +{ + int err; + u32 site = vexpress_get_site_by_dev(info->device); + + if (!vexpress_muxfpga_func) + return -ENXIO; + + err = vexpress_config_write(vexpress_muxfpga_func, 0, site); + if (!err) { + pr_debug("Selected MUXFPGA input %d (fb%d)\n", site, + info->node); + vexpress_dvi_fb = info->node; + vexpress_dvi_mode_set(info, info->var.xres, + info->var.yres); + } else { + pr_warn("Failed to select MUXFPGA input %d (fb%d)! (%d)\n", + site, info->node, err); + } + + return err; +} + +static int vexpress_dvi_fb_select(int fb) +{ + int err; + struct fb_info *info; + + /* fb0 is the default */ + if (fb < 0) + fb = 0; + + info = registered_fb[fb]; + if (!info || !lock_fb_info(info)) + return -ENODEV; + + err = vexpress_dvi_mux_set(info); + + unlock_fb_info(info); + + return err; +} + +static ssize_t vexpress_dvi_fb_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", vexpress_dvi_fb); +} + +static ssize_t vexpress_dvi_fb_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + long value; + int err = kstrtol(buf, 0, &value); + + if (!err) + err = vexpress_dvi_fb_select(value); + + return err ? err : count; +} + +DEVICE_ATTR(fb, S_IRUGO | S_IWUSR, vexpress_dvi_fb_show, + vexpress_dvi_fb_store); + + +static int vexpress_dvi_fb_event_notify(struct notifier_block *self, + unsigned long action, void *data) +{ + struct fb_event *event = data; + struct fb_info *info = event->info; + struct fb_videomode *mode = event->data; + + switch (action) { + case FB_EVENT_FB_REGISTERED: + if (vexpress_dvi_fb < 0) + vexpress_dvi_mux_set(info); + break; + case FB_EVENT_MODE_CHANGE: + case FB_EVENT_MODE_CHANGE_ALL: + if (info->node == vexpress_dvi_fb) + vexpress_dvi_mode_set(info, mode->xres, mode->yres); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block vexpress_dvi_fb_notifier = { + .notifier_call = vexpress_dvi_fb_event_notify, +}; +static bool vexpress_dvi_fb_notifier_registered; + + +enum vexpress_dvi_func { FUNC_MUXFPGA, FUNC_DVIMODE }; + +static struct of_device_id vexpress_dvi_of_match[] = { + { + .compatible = "arm,vexpress-muxfpga", + .data = (void *)FUNC_MUXFPGA, + }, { + .compatible = "arm,vexpress-dvimode", + .data = (void *)FUNC_DVIMODE, + }, + {} +}; + +static int vexpress_dvi_probe(struct platform_device *pdev) +{ + enum vexpress_dvi_func func; + const struct of_device_id *match = + of_match_device(vexpress_dvi_of_match, &pdev->dev); + + if (match) + func = (enum vexpress_dvi_func)match->data; + else + func = pdev->id_entry->driver_data; + + switch (func) { + case FUNC_MUXFPGA: + vexpress_muxfpga_func = + vexpress_config_func_get_by_dev(&pdev->dev); + device_create_file(&pdev->dev, &dev_attr_fb); + break; + case FUNC_DVIMODE: + vexpress_dvimode_func = + vexpress_config_func_get_by_dev(&pdev->dev); + break; + } + + if (!vexpress_dvi_fb_notifier_registered) { + fb_register_client(&vexpress_dvi_fb_notifier); + vexpress_dvi_fb_notifier_registered = true; + } + + vexpress_dvi_fb_select(vexpress_dvi_fb); + + return 0; +} + +static const struct platform_device_id vexpress_dvi_id_table[] = { + { .name = "vexpress-muxfpga", .driver_data = FUNC_MUXFPGA, }, + { .name = "vexpress-dvimode", .driver_data = FUNC_DVIMODE, }, + {} +}; + +static struct platform_driver vexpress_dvi_driver = { + .probe = vexpress_dvi_probe, + .driver = { + .name = "vexpress-dvi", + .of_match_table = vexpress_dvi_of_match, + }, + .id_table = vexpress_dvi_id_table, +}; + +static int __init vexpress_dvi_init(void) +{ + return platform_driver_register(&vexpress_dvi_driver); +} +device_initcall(vexpress_dvi_init); |