79 files changed, 15621 insertions, 109 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 9953a42809ec..d27feb5460f3 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -166,4 +166,6 @@ source "drivers/ipack/Kconfig"
 
 source "drivers/reset/Kconfig"
 
+source "drivers/gator/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 130abc1dfd65..092a62e79688 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -152,3 +152,5 @@ obj-$(CONFIG_IIO)		+= iio/
 obj-$(CONFIG_VME_BUS)		+= vme/
 obj-$(CONFIG_IPACK_BUS)		+= ipack/
 obj-$(CONFIG_NTB)		+= ntb/
+
+obj-$(CONFIG_GATOR)		+= gator/
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index b05ecab915c4..5286e2d333b0 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -26,4 +26,11 @@ config OMAP_INTERCONNECT
 
 	help
 	  Driver to enable OMAP interconnect error handling driver.
+
+config ARM_CCI
+	bool "ARM CCI driver support"
+	depends on ARM
+	help
+	  Driver supporting the CCI cache coherent interconnect for ARM
+	  platforms.
 endmenu
diff --git a/drivers/bus/Makefile b/drivers/bus/Makefile
index 3c7b53c12091..670cea443802 100644
--- a/drivers/bus/Makefile
+++ b/drivers/bus/Makefile
@@ -7,3 +7,5 @@ obj-$(CONFIG_OMAP_OCP2SCP)	+= omap-ocp2scp.o
 
 # Interconnect bus driver for OMAP SoCs.
 obj-$(CONFIG_OMAP_INTERCONNECT)	+= omap_l3_smx.o omap_l3_noc.o
+# CCI cache coherent interconnect for ARM platforms
+obj-$(CONFIG_ARM_CCI)		+= arm-cci.o
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
new file mode 100644
index 000000000000..2d1387bf1724
--- /dev/null
+++ b/drivers/bus/arm-cci.c
@@ -0,0 +1,945 @@
+/*
+ * CCI cache coherent interconnect driver
+ *
+ * Copyright (C) 2013 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/arm-cci.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+
+#include <asm/cacheflush.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
+#include <asm/smp_plat.h>
+
+#define DRIVER_NAME		"CCI"
+
+#define CCI_PORT_CTRL		0x0
+#define CCI_CTRL_STATUS		0xc
+
+#define CCI_ENABLE_SNOOP_REQ	0x1
+#define CCI_ENABLE_DVM_REQ	0x2
+#define CCI_ENABLE_REQ		(CCI_ENABLE_SNOOP_REQ | CCI_ENABLE_DVM_REQ)
+
+struct cci_nb_ports {
+	unsigned int nb_ace;
+	unsigned int nb_ace_lite;
+};
+
+enum cci_ace_port_type {
+	ACE_INVALID_PORT = 0x0,
+	ACE_PORT,
+	ACE_LITE_PORT,
+};
+
+struct cci_ace_port {
+	void __iomem *base;
+	unsigned long phys;
+	enum cci_ace_port_type type;
+	struct device_node *dn;
+};
+
+static struct cci_ace_port *ports;
+static unsigned int nb_cci_ports;
+
+static void __iomem *cci_ctrl_base;
+static unsigned long cci_ctrl_phys;
+
+#ifdef CONFIG_HW_PERF_EVENTS
+
+static void __iomem *cci_pmu_base;
+
+#define CCI400_PMCR		0x0100
+
+#define CCI400_PMU_CYCLE_CNTR_BASE    0x0000
+#define CCI400_PMU_CNTR_BASE(idx)     (CCI400_PMU_CYCLE_CNTR_BASE + (idx) * 0x1000)
+
+#define CCI400_PMCR_CEN          0x00000001
+#define CCI400_PMCR_RST          0x00000002
+#define CCI400_PMCR_CCR          0x00000004
+#define CCI400_PMCR_CCD          0x00000008
+#define CCI400_PMCR_EX           0x00000010
+#define CCI400_PMCR_DP           0x00000020
+#define CCI400_PMCR_NCNT_MASK    0x0000F800
+#define CCI400_PMCR_NCNT_SHIFT   11
+
+#define CCI400_PMU_EVT_SEL       0x000
+#define CCI400_PMU_CNTR          0x004
+#define CCI400_PMU_CNTR_CTRL     0x008
+#define CCI400_PMU_OVERFLOW      0x00C
+
+#define CCI400_PMU_OVERFLOW_FLAG 1
+
+enum cci400_perf_events {
+	CCI400_PMU_CYCLES = 0xFF
+};
+
+#define CCI400_PMU_EVENT_MASK   0xff
+#define CCI400_PMU_EVENT_SOURCE(event) ((event >> 5) & 0x7)
+#define CCI400_PMU_EVENT_CODE(event) (event & 0x1f)
+
+#define CCI400_PMU_EVENT_SOURCE_S0 0
+#define CCI400_PMU_EVENT_SOURCE_S4 4
+#define CCI400_PMU_EVENT_SOURCE_M0 5
+#define CCI400_PMU_EVENT_SOURCE_M2 7
+
+#define CCI400_PMU_EVENT_SLAVE_MIN 0x0
+#define CCI400_PMU_EVENT_SLAVE_MAX 0x13
+
+#define CCI400_PMU_EVENT_MASTER_MIN 0x14
+#define CCI400_PMU_EVENT_MASTER_MAX 0x1A
+
+#define CCI400_PMU_MAX_HW_EVENTS 5   /* CCI PMU has 4 counters + 1 cycle counter */
+
+#define CCI400_PMU_CYCLE_COUNTER_IDX 0
+#define CCI400_PMU_COUNTER0_IDX      1
+#define CCI400_PMU_COUNTER_LAST(cci_pmu) (CCI400_PMU_CYCLE_COUNTER_IDX + cci_pmu->num_events - 1)
+
+
+static struct perf_event *events[CCI400_PMU_MAX_HW_EVENTS];
+static unsigned long used_mask[BITS_TO_LONGS(CCI400_PMU_MAX_HW_EVENTS)];
+static struct pmu_hw_events cci_hw_events = {
+	.events    = events,
+	.used_mask = used_mask,
+};
+
+static int cci_pmu_validate_hw_event(u8 hw_event)
+{
+	u8 ev_source = CCI400_PMU_EVENT_SOURCE(hw_event);
+	u8 ev_code = CCI400_PMU_EVENT_CODE(hw_event);
+
+	if (ev_source <= CCI400_PMU_EVENT_SOURCE_S4 &&
+	    ev_code <= CCI400_PMU_EVENT_SLAVE_MAX)
+			return hw_event;
+	else if (CCI400_PMU_EVENT_SOURCE_M0 <= ev_source &&
+		   ev_source <= CCI400_PMU_EVENT_SOURCE_M2 &&
+		   CCI400_PMU_EVENT_MASTER_MIN <= ev_code &&
+		    ev_code <= CCI400_PMU_EVENT_MASTER_MAX)
+			return hw_event;
+
+	return -EINVAL;
+}
+
+static inline int cci_pmu_counter_is_valid(struct arm_pmu *cci_pmu, int idx)
+{
+	return CCI400_PMU_CYCLE_COUNTER_IDX <= idx &&
+		idx <= CCI400_PMU_COUNTER_LAST(cci_pmu);
+}
+
+static inline u32 cci_pmu_read_register(int idx, unsigned int offset)
+{
+	return readl_relaxed(cci_pmu_base + CCI400_PMU_CNTR_BASE(idx) + offset);
+}
+
+static inline void cci_pmu_write_register(u32 value, int idx, unsigned int offset)
+{
+	return writel_relaxed(value, cci_pmu_base + CCI400_PMU_CNTR_BASE(idx) + offset);
+}
+
+static inline void cci_pmu_disable_counter(int idx)
+{
+	cci_pmu_write_register(0, idx, CCI400_PMU_CNTR_CTRL);
+}
+
+static inline void cci_pmu_enable_counter(int idx)
+{
+	cci_pmu_write_register(1, idx, CCI400_PMU_CNTR_CTRL);
+}
+
+static inline void cci_pmu_select_event(int idx, unsigned long event)
+{
+	event &= CCI400_PMU_EVENT_MASK;
+	cci_pmu_write_register(event, idx, CCI400_PMU_EVT_SEL);
+}
+
+static u32 cci_pmu_get_max_counters(void)
+{
+	u32 n_cnts = (readl_relaxed(cci_ctrl_base + CCI400_PMCR) &
+		      CCI400_PMCR_NCNT_MASK) >> CCI400_PMCR_NCNT_SHIFT;
+
+	/* add 1 for cycle counter */
+	return n_cnts + 1;
+}
+
+static struct pmu_hw_events *cci_pmu_get_hw_events(void)
+{
+	return &cci_hw_events;
+}
+
+static int cci_pmu_get_event_idx(struct pmu_hw_events *hw, struct perf_event *event)
+{
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hw_event = &event->hw;
+	unsigned long cci_event = hw_event->config_base & CCI400_PMU_EVENT_MASK;
+	int idx;
+
+	if (cci_event == CCI400_PMU_CYCLES) {
+		if (test_and_set_bit(CCI400_PMU_CYCLE_COUNTER_IDX, hw->used_mask))
+			return -EAGAIN;
+
+                return CCI400_PMU_CYCLE_COUNTER_IDX;
+        }
+
+	for (idx = CCI400_PMU_COUNTER0_IDX; idx <= CCI400_PMU_COUNTER_LAST(cci_pmu); ++idx) {
+		if (!test_and_set_bit(idx, hw->used_mask))
+			return idx;
+	}
+
+	/* No counters available */
+	return -EAGAIN;
+}
+
+static int cci_pmu_map_event(struct perf_event *event)
+{
+	int mapping;
+	u8 config = event->attr.config & CCI400_PMU_EVENT_MASK;
+
+	if (event->attr.type < PERF_TYPE_MAX)
+		return -ENOENT;
+
+	/* 0xff is used to represent CCI Cycles */
+	if (config == 0xff)
+		mapping = config;
+	else
+		mapping = cci_pmu_validate_hw_event(config);
+
+	return mapping;
+}
+
+static int cci_pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler)
+{
+	int irq, err, i = 0;
+	struct platform_device *pmu_device = cci_pmu->plat_device;
+
+	if (unlikely(!pmu_device))
+		return -ENODEV;
+
+	/* CCI exports 6 interrupts - 1 nERRORIRQ + 5 nEVNTCNTOVERFLOW (PMU)
+	   nERRORIRQ will be handled by secure firmware on TC2. So we
+	   assume that all CCI interrupts listed in the linux device
+	   tree are PMU interrupts.
+
+	   The following code should then be able to handle different routing
+	   of the CCI PMU interrupts.
+	*/
+	while ((irq = platform_get_irq(pmu_device, i)) > 0) {
+		err = request_irq(irq, handler, 0, "arm-cci-pmu", cci_pmu);
+		if (err) {
+			dev_err(&pmu_device->dev, "unable to request IRQ%d for ARM CCI PMU counters\n",
+				irq);
+			return err;
+		}
+		i++;
+	}
+
+	return 0;
+}
+
+static irqreturn_t cci_pmu_handle_irq(int irq_num, void *dev)
+{
+	struct arm_pmu *cci_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+	struct perf_sample_data data;
+	struct pt_regs *regs;
+	int idx;
+
+	regs = get_irq_regs();
+
+	/* Iterate over counters and update the corresponding perf events.
+	   This should work regardless of whether we have per-counter overflow
+	   interrupt or a combined overflow interrupt. */
+	for (idx = CCI400_PMU_CYCLE_COUNTER_IDX; idx <= CCI400_PMU_COUNTER_LAST(cci_pmu); idx++) {
+		struct perf_event *event = events->events[idx];
+		struct hw_perf_event *hw_counter;
+
+		if (!event)
+			continue;
+
+		hw_counter = &event->hw;
+
+		/* Did this counter overflow? */
+		if (!(cci_pmu_read_register(idx, CCI400_PMU_OVERFLOW) & CCI400_PMU_OVERFLOW_FLAG))
+			continue;
+		cci_pmu_write_register(CCI400_PMU_OVERFLOW_FLAG, idx, CCI400_PMU_OVERFLOW);
+
+		armpmu_event_update(event);
+		perf_sample_data_init(&data, 0, hw_counter->last_period);
+		if (!armpmu_event_set_period(event))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			cci_pmu->disable(event);
+	}
+
+	irq_work_run();
+	return IRQ_HANDLED;
+}
+
+static void cci_pmu_free_irq(struct arm_pmu *cci_pmu)
+{
+	int irq, i = 0;
+	struct platform_device *pmu_device = cci_pmu->plat_device;
+
+	while ((irq = platform_get_irq(pmu_device, i)) > 0) {
+		free_irq(irq, cci_pmu);
+		i++;
+	}
+}
+
+static void cci_pmu_enable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+
+	if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) {
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Configure the event to count, unless you are counting cycles */
+	if (idx != CCI400_PMU_CYCLE_COUNTER_IDX)
+		cci_pmu_select_event(idx, hw_counter->config_base);
+
+	cci_pmu_enable_counter(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void cci_pmu_disable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+
+	if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) {
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	cci_pmu_disable_counter(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void cci_pmu_start(struct arm_pmu *cci_pmu)
+{
+	u32 val;
+	unsigned long flags;
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Enable all the PMU counters. */
+	val = readl(cci_ctrl_base + CCI400_PMCR) | CCI400_PMCR_CEN;
+	writel(val, cci_ctrl_base + CCI400_PMCR);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void cci_pmu_stop(struct arm_pmu *cci_pmu)
+{
+	u32 val;
+	unsigned long flags;
+	struct pmu_hw_events *events = cci_pmu->get_hw_events();
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable all the PMU counters. */
+	val = readl(cci_ctrl_base + CCI400_PMCR) & ~CCI400_PMCR_CEN;
+	writel(val, cci_ctrl_base + CCI400_PMCR);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static u32 cci_pmu_read_counter(struct perf_event *event)
+{
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+	u32 value;
+
+	if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) {
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+		return 0;
+	}
+	value = cci_pmu_read_register(idx, CCI400_PMU_CNTR);
+
+	return value;
+}
+
+static void cci_pmu_write_counter(struct perf_event *event, u32 value)
+{
+	struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+
+	if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx)))
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+	else
+		cci_pmu_write_register(value, idx, CCI400_PMU_CNTR);
+}
+
+static struct arm_pmu cci_pmu = {
+	.name             = DRIVER_NAME,
+	.max_period       = (1LLU << 32) - 1,
+	.get_hw_events    = cci_pmu_get_hw_events,
+	.get_event_idx    = cci_pmu_get_event_idx,
+	.map_event        = cci_pmu_map_event,
+	.request_irq      = cci_pmu_request_irq,
+	.handle_irq       = cci_pmu_handle_irq,
+	.free_irq         = cci_pmu_free_irq,
+	.enable           = cci_pmu_enable_event,
+	.disable          = cci_pmu_disable_event,
+	.start            = cci_pmu_start,
+	.stop             = cci_pmu_stop,
+	.read_counter     = cci_pmu_read_counter,
+	.write_counter    = cci_pmu_write_counter,
+};
+
+static int cci_pmu_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	cci_pmu_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(cci_pmu_base))
+		return PTR_ERR(cci_pmu_base);
+
+	cci_pmu.plat_device = pdev;
+	cci_pmu.num_events = cci_pmu_get_max_counters();
+	raw_spin_lock_init(&cci_hw_events.pmu_lock);
+	cpumask_setall(&cci_pmu.valid_cpus);
+
+	return armpmu_register(&cci_pmu, -1);
+}
+
+static const struct of_device_id arm_cci_pmu_matches[] = {
+	{.compatible = "arm,cci-400-pmu"},
+	{},
+};
+
+static struct platform_driver cci_pmu_platform_driver = {
+	.driver = {
+		   .name = DRIVER_NAME,
+		   .of_match_table = arm_cci_pmu_matches,
+		  },
+	.probe = cci_pmu_probe,
+};
+
+static int __init cci_pmu_init(void)
+{
+	if (platform_driver_register(&cci_pmu_platform_driver))
+		WARN(1, "unable to register CCI platform driver\n");
+	return 0;
+}
+
+#else
+
+static int __init cci_pmu_init(void)
+{
+	return 0;
+}
+
+#endif /* CONFIG_HW_PERF_EVENTS */
+
+struct cpu_port {
+	u64 mpidr;
+	u32 port;
+};
+
+/*
+ * Use the port MSB as valid flag, shift can be made dynamic
+ * by computing number of bits required for port indexes.
+ * Code disabling CCI cpu ports runs with D-cache invalidated
+ * and SCTLR bit clear so data accesses must be kept to a minimum
+ * to improve performance; for now shift is left static to
+ * avoid one more data access while disabling the CCI port.
+ */
+#define PORT_VALID_SHIFT	31
+#define PORT_VALID		(0x1 << PORT_VALID_SHIFT)
+
+static inline void init_cpu_port(struct cpu_port *port, u32 index, u64 mpidr)
+{
+	port->port = PORT_VALID | index;
+	port->mpidr = mpidr;
+}
+
+static inline bool cpu_port_is_valid(struct cpu_port *port)
+{
+	return !!(port->port & PORT_VALID);
+}
+
+static inline bool cpu_port_match(struct cpu_port *port, u64 mpidr)
+{
+	return port->mpidr == (mpidr & MPIDR_HWID_BITMASK);
+}
+
+static struct cpu_port cpu_port[NR_CPUS];
+
+/**
+ * __cci_ace_get_port - Function to retrieve the port index connected to
+ *			a cpu or device.
+ *
+ * @dn: device node of the device to look-up
+ * @type: port type
+ *
+ * Return value:
+ *	- CCI port index if success
+ *	- -ENODEV if failure
+ */
+static int __cci_ace_get_port(struct device_node *dn, int type)
+{
+	int i;
+	bool ace_match;
+	struct device_node *cci_portn;
+
+	cci_portn = of_parse_phandle(dn, "cci-control-port", 0);
+	for (i = 0; i < nb_cci_ports; i++) {
+		ace_match = ports[i].type == type;
+		if (ace_match && cci_portn == ports[i].dn)
+			return i;
+	}
+	return -ENODEV;
+}
+
+int cci_ace_get_port(struct device_node *dn)
+{
+	return __cci_ace_get_port(dn, ACE_LITE_PORT);
+}
+EXPORT_SYMBOL_GPL(cci_ace_get_port);
+
+static void __init cci_ace_init_ports(void)
+{
+	int port, ac, cpu;
+	u64 hwid;
+	const u32 *cell;
+	struct device_node *cpun, *cpus;
+
+	cpus = of_find_node_by_path("/cpus");
+	if (WARN(!cpus, "Missing cpus node, bailing out\n"))
+		return;
+
+	if (WARN_ON(of_property_read_u32(cpus, "#address-cells", &ac)))
+		ac = of_n_addr_cells(cpus);
+
+	/*
+	 * Port index look-up speeds up the function disabling ports by CPU,
+	 * since the logical to port index mapping is done once and does
+	 * not change after system boot.
+	 * The stashed index array is initialized for all possible CPUs
+	 * at probe time.
+	 */
+	for_each_child_of_node(cpus, cpun) {
+		if (of_node_cmp(cpun->type, "cpu"))
+			continue;
+		cell = of_get_property(cpun, "reg", NULL);
+		if (WARN(!cell, "%s: missing reg property\n", cpun->full_name))
+			continue;
+
+		hwid = of_read_number(cell, ac);
+		cpu = get_logical_index(hwid & MPIDR_HWID_BITMASK);
+
+		if (cpu < 0 || !cpu_possible(cpu))
+			continue;
+		port = __cci_ace_get_port(cpun, ACE_PORT);
+		if (port < 0)
+			continue;
+
+		init_cpu_port(&cpu_port[cpu], port, cpu_logical_map(cpu));
+	}
+
+	for_each_possible_cpu(cpu) {
+		WARN(!cpu_port_is_valid(&cpu_port[cpu]),
+			"CPU %u does not have an associated CCI port\n",
+			cpu);
+	}
+}
+/*
+ * Functions to enable/disable a CCI interconnect slave port
+ *
+ * They are called by low-level power management code to disable slave
+ * interfaces snoops and DVM broadcast.
+ * Since they may execute with cache data allocation disabled and
+ * after the caches have been cleaned and invalidated the functions provide
+ * no explicit locking since they may run with D-cache disabled, so normal
+ * cacheable kernel locks based on ldrex/strex may not work.
+ * Locking has to be provided by BSP implementations to ensure proper
+ * operations.
+ */
+
+/**
+ * cci_port_control() - function to control a CCI port
+ *
+ * @port: index of the port to setup
+ * @enable: if true enables the port, if false disables it
+ */
+static void notrace cci_port_control(unsigned int port, bool enable)
+{
+	void __iomem *base = ports[port].base;
+
+	writel_relaxed(enable ? CCI_ENABLE_REQ : 0, base + CCI_PORT_CTRL);
+	/*
+	 * This function is called from power down procedures
+	 * and must not execute any instruction that might
+	 * cause the processor to be put in a quiescent state
+	 * (eg wfi). Hence, cpu_relax() can not be added to this
+	 * read loop to optimize power, since it might hide possibly
+	 * disruptive operations.
+	 */
+	while (readl_relaxed(cci_ctrl_base + CCI_CTRL_STATUS) & 0x1)
+			;
+}
+
+/**
+ * cci_disable_port_by_cpu() - function to disable a CCI port by CPU
+ *			       reference
+ *
+ * @mpidr: mpidr of the CPU whose CCI port should be disabled
+ *
+ * Disabling a CCI port for a CPU implies disabling the CCI port
+ * controlling that CPU cluster. Code disabling CPU CCI ports
+ * must make sure that the CPU running the code is the last active CPU
+ * in the cluster ie all other CPUs are quiescent in a low power state.
+ *
+ * Return:
+ *	0 on success
+ *	-ENODEV on port look-up failure
+ */
+int notrace cci_disable_port_by_cpu(u64 mpidr)
+{
+	int cpu;
+	bool is_valid;
+	for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
+		is_valid = cpu_port_is_valid(&cpu_port[cpu]);
+		if (is_valid && cpu_port_match(&cpu_port[cpu], mpidr)) {
+			cci_port_control(cpu_port[cpu].port, false);
+			return 0;
+		}
+	}
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(cci_disable_port_by_cpu);
+
+/**
+ * cci_enable_port_for_self() - enable a CCI port for calling CPU
+ *
+ * Enabling a CCI port for the calling CPU implies enabling the CCI
+ * port controlling that CPU's cluster. Caller must make sure that the
+ * CPU running the code is the first active CPU in the cluster and all
+ * other CPUs are quiescent in a low power state  or waiting for this CPU
+ * to complete the CCI initialization.
+ *
+ * Because this is called when the MMU is still off and with no stack,
+ * the code must be position independent and ideally rely on callee
+ * clobbered registers only.  To achieve this we must code this function
+ * entirely in assembler.
+ *
+ * On success this returns with the proper CCI port enabled.  In case of
+ * any failure this never returns as the inability to enable the CCI is
+ * fatal and there is no possible recovery at this stage.
+ */
+asmlinkage void __naked cci_enable_port_for_self(void)
+{
+	asm volatile ("\n"
+
+"	mrc	p15, 0, r0, c0, c0, 5	@ get MPIDR value \n"
+"	and	r0, r0, #"__stringify(MPIDR_HWID_BITMASK)" \n"
+"	adr	r1, 5f \n"
+"	ldr	r2, [r1] \n"
+"	add	r1, r1, r2		@ &cpu_port \n"
+"	add	ip, r1, %[sizeof_cpu_port] \n"
+
+	/* Loop over the cpu_port array looking for a matching MPIDR */
+"1:	ldr	r2, [r1, %[offsetof_cpu_port_mpidr_lsb]] \n"
+"	cmp	r2, r0 			@ compare MPIDR \n"
+"	bne	2f \n"
+
+	/* Found a match, now test port validity */
+"	ldr	r3, [r1, %[offsetof_cpu_port_port]] \n"
+"	tst	r3, #"__stringify(PORT_VALID)" \n"
+"	bne	3f \n"
+
+	/* no match, loop with the next cpu_port entry */
+"2:	add	r1, r1, %[sizeof_struct_cpu_port] \n"
+"	cmp	r1, ip			@ done? \n"
+"	blo	1b \n"
+
+	/* CCI port not found -- cheaply try to stall this CPU */
+"cci_port_not_found: \n"
+"	wfi \n"
+"	wfe \n"
+"	b	cci_port_not_found \n"
+
+	/* Use matched port index to look up the corresponding ports entry */
+"3:	bic	r3, r3, #"__stringify(PORT_VALID)" \n"
+"	adr	r0, 6f \n"
+"	ldmia	r0, {r1, r2} \n"
+"	sub	r1, r1, r0 		@ virt - phys \n"
+"	ldr	r0, [r0, r2] 		@ *(&ports) \n"
+"	mov	r2, %[sizeof_struct_ace_port] \n"
+"	mla	r0, r2, r3, r0		@ &ports[index] \n"
+"	sub	r0, r0, r1		@ virt_to_phys() \n"
+
+	/* Enable the CCI port */
+"	ldr	r0, [r0, %[offsetof_port_phys]] \n"
+"	mov	r3, #"__stringify(CCI_ENABLE_REQ)" \n"
+"	str	r3, [r0, #"__stringify(CCI_PORT_CTRL)"] \n"
+
+	/* poll the status reg for completion */
+"	adr	r1, 7f \n"
+"	ldr	r0, [r1] \n"
+"	ldr	r0, [r0, r1]		@ cci_ctrl_base \n"
+"4:	ldr	r1, [r0, #"__stringify(CCI_CTRL_STATUS)"] \n"
+"	tst	r1, #1 \n"
+"	bne	4b \n"
+
+"	mov	r0, #0 \n"
+"	bx	lr \n"
+
+"	.align	2 \n"
+"5:	.word	cpu_port - . \n"
+"6:	.word	. \n"
+"	.word	ports - 6b \n"
+"7:	.word	cci_ctrl_phys - . \n"
+	: :
+	[sizeof_cpu_port] "i" (sizeof(cpu_port)),
+#ifndef __ARMEB__
+	[offsetof_cpu_port_mpidr_lsb] "i" (offsetof(struct cpu_port, mpidr)),
+#else
+	[offsetof_cpu_port_mpidr_lsb] "i" (offsetof(struct cpu_port, mpidr)+4),
+#endif
+	[offsetof_cpu_port_port] "i" (offsetof(struct cpu_port, port)),
+	[sizeof_struct_cpu_port] "i" (sizeof(struct cpu_port)),
+	[sizeof_struct_ace_port] "i" (sizeof(struct cci_ace_port)),
+	[offsetof_port_phys] "i" (offsetof(struct cci_ace_port, phys)) );
+
+	unreachable();
+}
+
+/**
+ * __cci_control_port_by_device() - function to control a CCI port by device
+ *				    reference
+ *
+ * @dn: device node pointer of the device whose CCI port should be
+ *      controlled
+ * @enable: if true enables the port, if false disables it
+ *
+ * Return:
+ *	0 on success
+ *	-ENODEV on port look-up failure
+ */
+int notrace __cci_control_port_by_device(struct device_node *dn, bool enable)
+{
+	int port;
+
+	if (!dn)
+		return -ENODEV;
+
+	port = __cci_ace_get_port(dn, ACE_LITE_PORT);
+	if (WARN_ONCE(port < 0, "node %s ACE lite port look-up failure\n",
+				dn->full_name))
+		return -ENODEV;
+	cci_port_control(port, enable);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__cci_control_port_by_device);
+
+/**
+ * __cci_control_port_by_index() - function to control a CCI port by port index
+ *
+ * @port: port index previously retrieved with cci_ace_get_port()
+ * @enable: if true enables the port, if false disables it
+ *
+ * Return:
+ *	0 on success
+ *	-ENODEV on port index out of range
+ *	-EPERM if operation carried out on an ACE PORT
+ */
+int notrace __cci_control_port_by_index(u32 port, bool enable)
+{
+	if (port >= nb_cci_ports || ports[port].type == ACE_INVALID_PORT)
+		return -ENODEV;
+	/*
+	 * CCI control for ports connected to CPUS is extremely fragile
+	 * and must be made to go through a specific and controlled
+	 * interface (ie cci_disable_port_by_cpu(); control by general purpose
+	 * indexing is therefore disabled for ACE ports.
+	 */
+	if (ports[port].type == ACE_PORT)
+		return -EPERM;
+
+	cci_port_control(port, enable);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__cci_control_port_by_index);
+
+static const struct cci_nb_ports cci400_ports = {
+	.nb_ace = 2,
+	.nb_ace_lite = 3
+};
+
+static const struct of_device_id arm_cci_matches[] = {
+	{.compatible = "arm,cci-400", .data = &cci400_ports },
+	{},
+};
+
+static const struct of_device_id arm_cci_ctrl_if_matches[] = {
+	{.compatible = "arm,cci-400-ctrl-if", },
+	{},
+};
+
+static int __init cci_probe(void)
+{
+	struct cci_nb_ports const *cci_config;
+	int ret, i, nb_ace = 0, nb_ace_lite = 0;
+	struct device_node *np, *cp;
+	struct resource res;
+	const char *match_str;
+	bool is_ace;
+
+	np = of_find_matching_node(NULL, arm_cci_matches);
+	if (!np)
+		return -ENODEV;
+
+	cci_config = of_match_node(arm_cci_matches, np)->data;
+	if (!cci_config)
+		return -ENODEV;
+
+	nb_cci_ports = cci_config->nb_ace + cci_config->nb_ace_lite;
+
+	ports = kcalloc(sizeof(*ports), nb_cci_ports, GFP_KERNEL);
+	if (!ports)
+		return -ENOMEM;
+
+	ret = of_address_to_resource(np, 0, &res);
+	if (!ret) {
+		cci_ctrl_base = ioremap(res.start, resource_size(&res));
+		cci_ctrl_phys =	res.start;
+	}
+	if (ret || !cci_ctrl_base) {
+		WARN(1, "unable to ioremap CCI ctrl\n");
+		ret = -ENXIO;
+		goto memalloc_err;
+	}
+
+	for_each_child_of_node(np, cp) {
+		if (!of_match_node(arm_cci_ctrl_if_matches, cp))
+			continue;
+
+		i = nb_ace + nb_ace_lite;
+
+		if (i >= nb_cci_ports)
+			break;
+
+		if (of_property_read_string(cp, "interface-type",
+					&match_str)) {
+			WARN(1, "node %s missing interface-type property\n",
+				  cp->full_name);
+			continue;
+		}
+		is_ace = strcmp(match_str, "ace") == 0;
+		if (!is_ace && strcmp(match_str, "ace-lite")) {
+			WARN(1, "node %s containing invalid interface-type property, skipping it\n",
+					cp->full_name);
+			continue;
+		}
+
+		ret = of_address_to_resource(cp, 0, &res);
+		if (!ret) {
+			ports[i].base = ioremap(res.start, resource_size(&res));
+			ports[i].phys = res.start;
+		}
+		if (ret || !ports[i].base) {
+			WARN(1, "unable to ioremap CCI port %d\n", i);
+			continue;
+		}
+
+		if (is_ace) {
+			if (WARN_ON(nb_ace >= cci_config->nb_ace))
+				continue;
+			ports[i].type = ACE_PORT;
+			++nb_ace;
+		} else {
+			if (WARN_ON(nb_ace_lite >= cci_config->nb_ace_lite))
+				continue;
+			ports[i].type = ACE_LITE_PORT;
+			++nb_ace_lite;
+		}
+		ports[i].dn = cp;
+	}
+
+	 /* initialize a stashed array of ACE ports to speed-up look-up */
+	cci_ace_init_ports();
+
+	/*
+	 * Multi-cluster systems may need this data when non-coherent, during
+	 * cluster power-up/power-down. Make sure it reaches main memory.
+	 */
+	sync_cache_w(&cci_ctrl_base);
+	sync_cache_w(&cci_ctrl_phys);
+	sync_cache_w(&ports);
+	sync_cache_w(&cpu_port);
+	__sync_cache_range_w(ports, sizeof(*ports) * nb_cci_ports);
+	pr_info("ARM CCI driver probed\n");
+	return 0;
+
+memalloc_err:
+
+	kfree(ports);
+	return ret;
+}
+
+static int cci_init_status = -EAGAIN;
+static DEFINE_MUTEX(cci_probing);
+
+static int __init cci_init(void)
+{
+	if (cci_init_status != -EAGAIN)
+		return cci_init_status;
+
+	mutex_lock(&cci_probing);
+	if (cci_init_status == -EAGAIN)
+		cci_init_status = cci_probe();
+	mutex_unlock(&cci_probing);
+	return cci_init_status;
+}
+
+/*
+ * To sort out early init calls ordering a helper function is provided to
+ * check if the CCI driver has beed initialized. Function check if the driver
+ * has been initialized, if not it calls the init function that probes
+ * the driver and updates the return value.
+ */
+bool __init cci_probed(void)
+{
+	return cci_init() == 0;
+}
+EXPORT_SYMBOL_GPL(cci_probed);
+
+early_initcall(cci_init);
+core_initcall(cci_pmu_init);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ARM CCI support");
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 0357ac44638b..d0d9b2124752 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -42,7 +42,7 @@ config COMMON_CLK_WM831X
 
 config COMMON_CLK_VERSATILE
 	bool "Clock driver for ARM Reference designs"
-	depends on ARCH_INTEGRATOR || ARCH_REALVIEW || ARCH_VEXPRESS
+	depends on ARCH_INTEGRATOR || ARCH_REALVIEW || ARCH_VEXPRESS || ARM64
 	---help---
           Supports clocking on ARM Reference designs:
 	  - Integrator/AP and Integrator/CP
diff --git a/drivers/clk/versatile/Makefile b/drivers/clk/versatile/Makefile
index c16ca787170a..6e76bf87ca87 100644
--- a/drivers/clk/versatile/Makefile
+++ b/drivers/clk/versatile/Makefile
@@ -4,4 +4,4 @@ obj-$(CONFIG_ARCH_INTEGRATOR)	+= clk-integrator.o
 obj-$(CONFIG_INTEGRATOR_IMPD1)	+= clk-impd1.o
 obj-$(CONFIG_ARCH_REALVIEW)	+= clk-realview.o
 obj-$(CONFIG_ARCH_VEXPRESS)	+= clk-vexpress.o clk-sp810.o
-obj-$(CONFIG_VEXPRESS_CONFIG)	+= clk-vexpress-osc.o
+obj-$(CONFIG_VEXPRESS_CONFIG)	+= clk-vexpress-osc.o clk-vexpress-spc.o
diff --git a/drivers/clk/versatile/clk-vexpress-osc.c b/drivers/clk/versatile/clk-vexpress-osc.c
index 256c8be74df8..2dc8b41a339d 100644
--- a/drivers/clk/versatile/clk-vexpress-osc.c
+++ b/drivers/clk/versatile/clk-vexpress-osc.c
@@ -107,7 +107,7 @@ void __init vexpress_osc_of_setup(struct device_node *node)
 	osc->func = vexpress_config_func_get_by_node(node);
 	if (!osc->func) {
 		pr_err("Failed to obtain config func for node '%s'!\n",
-				node->name);
+				node->full_name);
 		goto error;
 	}
 
@@ -119,7 +119,7 @@ void __init vexpress_osc_of_setup(struct device_node *node)
 
 	of_property_read_string(node, "clock-output-names", &init.name);
 	if (!init.name)
-		init.name = node->name;
+		init.name = node->full_name;
 
 	init.ops = &vexpress_osc_ops;
 	init.flags = CLK_IS_ROOT;
diff --git a/drivers/clk/versatile/clk-vexpress-spc.c b/drivers/clk/versatile/clk-vexpress-spc.c
new file mode 100644
index 000000000000..bb566e244b0c
--- /dev/null
+++ b/drivers/clk/versatile/clk-vexpress-spc.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2012 ARM Limited
+ * Copyright (C) 2012 Linaro
+ *
+ * Author: Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+/* SPC clock programming interface for Vexpress cpus */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/vexpress.h>
+
+struct clk_spc {
+	struct clk_hw hw;
+	spinlock_t *lock;
+	int cluster;
+};
+
+#define to_clk_spc(spc) container_of(spc, struct clk_spc, hw)
+
+static unsigned long spc_recalc_rate(struct clk_hw *hw,
+		unsigned long parent_rate)
+{
+	struct clk_spc *spc = to_clk_spc(hw);
+	u32 freq;
+
+	if (vexpress_spc_get_performance(spc->cluster, &freq)) {
+		return -EIO;
+		pr_err("%s: Failed", __func__);
+	}
+
+	return freq * 1000;
+}
+
+static long spc_round_rate(struct clk_hw *hw, unsigned long drate,
+		unsigned long *parent_rate)
+{
+	return drate;
+}
+
+static int spc_set_rate(struct clk_hw *hw, unsigned long rate,
+		unsigned long parent_rate)
+{
+	struct clk_spc *spc = to_clk_spc(hw);
+
+	return vexpress_spc_set_performance(spc->cluster, rate / 1000);
+}
+
+static struct clk_ops clk_spc_ops = {
+	.recalc_rate = spc_recalc_rate,
+	.round_rate = spc_round_rate,
+	.set_rate = spc_set_rate,
+};
+
+struct clk *vexpress_clk_register_spc(const char *name, int cluster_id)
+{
+	struct clk_init_data init;
+	struct clk_spc *spc;
+	struct clk *clk;
+
+	if (!name) {
+		pr_err("Invalid name passed");
+		return ERR_PTR(-EINVAL);
+	}
+
+	spc = kzalloc(sizeof(*spc), GFP_KERNEL);
+	if (!spc) {
+		pr_err("could not allocate spc clk\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	spc->hw.init = &init;
+	spc->cluster = cluster_id;
+
+	init.name = name;
+	init.ops = &clk_spc_ops;
+	init.flags = CLK_IS_ROOT | CLK_GET_RATE_NOCACHE;
+	init.num_parents = 0;
+
+	clk = clk_register(NULL, &spc->hw);
+	if (!IS_ERR_OR_NULL(clk))
+		return clk;
+
+	pr_err("clk register failed\n");
+	kfree(spc);
+
+	return NULL;
+}
+
+#if defined(CONFIG_OF)
+void __init vexpress_clk_of_register_spc(void)
+{
+	char name[14] = "cpu-cluster.X";
+	struct device_node *node = NULL;
+	struct clk *clk;
+	const u32 *val;
+	int cluster_id = 0, len;
+
+	if (!of_find_compatible_node(NULL, NULL, "arm,vexpress-spc")) {
+		pr_debug("%s: No SPC found, Exiting!!\n", __func__);
+		return;
+	}
+
+	while ((node = of_find_node_by_name(node, "cluster"))) {
+		val = of_get_property(node, "reg", &len);
+		if (val && len == 4)
+			cluster_id = be32_to_cpup(val);
+
+		name[12] = cluster_id + '0';
+		clk = vexpress_clk_register_spc(name, cluster_id);
+		if (IS_ERR(clk))
+			return;
+
+		pr_debug("Registered clock '%s'\n", name);
+		clk_register_clkdev(clk, NULL, name);
+	}
+}
+CLK_OF_DECLARE(spc, "arm,vexpress-spc", vexpress_clk_of_register_spc);
+#endif
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 534fcb825153..a9c1324843eb 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -201,7 +201,7 @@ source "drivers/cpufreq/Kconfig.x86"
 endmenu
 
 menu "ARM CPU frequency scaling drivers"
-depends on ARM
+depends on ARM || ARM64
 source "drivers/cpufreq/Kconfig.arm"
 endmenu
 
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 6e57543fe0b9..8327444b76cb 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -4,7 +4,7 @@
 
 config ARM_BIG_LITTLE_CPUFREQ
 	tristate "Generic ARM big LITTLE CPUfreq driver"
-	depends on ARM_CPU_TOPOLOGY && PM_OPP && HAVE_CLK
+	depends on ARM_CPU_TOPOLOGY && PM_OPP && HAVE_CLK && BIG_LITTLE
 	help
 	  This enables the Generic CPUfreq driver for ARM big.LITTLE platforms.
 
@@ -15,6 +15,14 @@ config ARM_DT_BL_CPUFREQ
 	  This enables probing via DT for Generic CPUfreq driver for ARM
 	  big.LITTLE platform. This gets frequency tables from DT.
 
+config ARM_VEXPRESS_BL_CPUFREQ
+	tristate "ARM Vexpress big LITTLE CPUfreq driver"
+	select ARM_BIG_LITTLE_CPUFREQ
+	depends on VEXPRESS_SPC
+	help
+	  This enables the CPUfreq driver for ARM Vexpress big.LITTLE platform.
+	  If in doubt, say N.
+
 config ARM_EXYNOS_CPUFREQ
 	bool "SAMSUNG EXYNOS SoCs"
 	depends on ARCH_EXYNOS
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 315b9231feb1..1db9b4929cfa 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY)	+= amd_freq_sensitivity.o
 obj-$(CONFIG_ARM_BIG_LITTLE_CPUFREQ)	+= arm_big_little.o
 # big LITTLE per platform glues. Keep DT_BL_CPUFREQ as the last entry in all big
 # LITTLE drivers, so that it is probed last.
+obj-$(CONFIG_ARM_VEXPRESS_BL_CPUFREQ)	+= vexpress_big_little.o
 obj-$(CONFIG_ARM_DT_BL_CPUFREQ)		+= arm_big_little_dt.o
 
 obj-$(CONFIG_ARCH_DAVINCI_DA850)	+= davinci-cpufreq.o
diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index 5d7f53fcd6f5..076f25a59e00 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -24,27 +24,148 @@
 #include <linux/cpufreq.h>
 #include <linux/cpumask.h>
 #include <linux/export.h>
+#include <linux/mutex.h>
 #include <linux/of_platform.h>
 #include <linux/opp.h>
 #include <linux/slab.h>
 #include <linux/topology.h>
 #include <linux/types.h>
+#include <asm/bL_switcher.h>
 
 #include "arm_big_little.h"
 
-/* Currently we support only two clusters */
-#define MAX_CLUSTERS	2
+#ifdef CONFIG_BL_SWITCHER
+bool bL_switching_enabled;
+#endif
+
+#define ACTUAL_FREQ(cluster, freq)	((cluster == A7_CLUSTER) ? freq << 1 : freq)
+#define VIRT_FREQ(cluster, freq)	((cluster == A7_CLUSTER) ? freq >> 1 : freq)
 
 static struct cpufreq_arm_bL_ops *arm_bL_ops;
 static struct clk *clk[MAX_CLUSTERS];
-static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS];
-static atomic_t cluster_usage[MAX_CLUSTERS] = {ATOMIC_INIT(0), ATOMIC_INIT(0)};
+static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1];
+static atomic_t cluster_usage[MAX_CLUSTERS + 1] = {ATOMIC_INIT(0),
+	ATOMIC_INIT(0)};
+
+static unsigned int clk_big_min;	/* (Big) clock frequencies */
+static unsigned int clk_little_max;	/* Maximum clock frequency (Little) */
+
+static DEFINE_PER_CPU(unsigned int, physical_cluster);
+static DEFINE_PER_CPU(unsigned int, cpu_last_req_freq);
+
+static struct mutex cluster_lock[MAX_CLUSTERS];
+
+static unsigned int find_cluster_maxfreq(int cluster)
+{
+	int j;
+	u32 max_freq = 0, cpu_freq;
+
+	for_each_online_cpu(j) {
+		cpu_freq = per_cpu(cpu_last_req_freq, j);
+
+		if ((cluster == per_cpu(physical_cluster, j)) &&
+				(max_freq < cpu_freq))
+			max_freq = cpu_freq;
+	}
 
-static unsigned int bL_cpufreq_get(unsigned int cpu)
+	pr_debug("%s: cluster: %d, max freq: %d\n", __func__, cluster,
+			max_freq);
+
+	return max_freq;
+}
+
+static unsigned int clk_get_cpu_rate(unsigned int cpu)
 {
-	u32 cur_cluster = cpu_to_cluster(cpu);
+	u32 cur_cluster = per_cpu(physical_cluster, cpu);
+	u32 rate = clk_get_rate(clk[cur_cluster]) / 1000;
+
+	/* For switcher we use virtual A15 clock rates */
+	if (is_bL_switching_enabled())
+		rate = VIRT_FREQ(cur_cluster, rate);
+
+	pr_debug("%s: cpu: %d, cluster: %d, freq: %u\n", __func__, cpu,
+			cur_cluster, rate);
 
-	return clk_get_rate(clk[cur_cluster]) / 1000;
+	return rate;
+}
+
+static unsigned int bL_cpufreq_get_rate(unsigned int cpu)
+{
+	if (is_bL_switching_enabled()) {
+		pr_debug("%s: freq: %d\n", __func__, per_cpu(cpu_last_req_freq,
+					cpu));
+
+		return per_cpu(cpu_last_req_freq, cpu);
+	} else {
+		return clk_get_cpu_rate(cpu);
+	}
+}
+
+static unsigned int
+bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate)
+{
+	u32 new_rate, prev_rate;
+	int ret;
+	bool bLs = is_bL_switching_enabled();
+
+	mutex_lock(&cluster_lock[new_cluster]);
+
+	if (bLs) {
+		prev_rate = per_cpu(cpu_last_req_freq, cpu);
+		per_cpu(cpu_last_req_freq, cpu) = rate;
+		per_cpu(physical_cluster, cpu) = new_cluster;
+
+		new_rate = find_cluster_maxfreq(new_cluster);
+		new_rate = ACTUAL_FREQ(new_cluster, new_rate);
+	} else {
+		new_rate = rate;
+	}
+
+	pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d, freq: %d\n",
+			__func__, cpu, old_cluster, new_cluster, new_rate);
+
+	ret = clk_set_rate(clk[new_cluster], new_rate * 1000);
+	if (WARN_ON(ret)) {
+		pr_err("clk_set_rate failed: %d, new cluster: %d\n", ret,
+				new_cluster);
+		if (bLs) {
+			per_cpu(cpu_last_req_freq, cpu) = prev_rate;
+			per_cpu(physical_cluster, cpu) = old_cluster;
+		}
+
+		mutex_unlock(&cluster_lock[new_cluster]);
+
+		return ret;
+	}
+
+	mutex_unlock(&cluster_lock[new_cluster]);
+
+	/* Recalc freq for old cluster when switching clusters */
+	if (old_cluster != new_cluster) {
+		pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d\n",
+				__func__, cpu, old_cluster, new_cluster);
+
+		/* Switch cluster */
+		bL_switch_request(cpu, new_cluster);
+
+		mutex_lock(&cluster_lock[old_cluster]);
+
+		/* Set freq of old cluster if there are cpus left on it */
+		new_rate = find_cluster_maxfreq(old_cluster);
+		new_rate = ACTUAL_FREQ(old_cluster, new_rate);
+
+		if (new_rate) {
+			pr_debug("%s: Updating rate of old cluster: %d, to freq: %d\n",
+					__func__, old_cluster, new_rate);
+
+			if (clk_set_rate(clk[old_cluster], new_rate * 1000))
+				pr_err("%s: clk_set_rate failed: %d, old cluster: %d\n",
+						__func__, ret, old_cluster);
+		}
+		mutex_unlock(&cluster_lock[old_cluster]);
+	}
+
+	return 0;
 }
 
 /* Validate policy frequency range */
@@ -60,12 +181,14 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
 		unsigned int target_freq, unsigned int relation)
 {
 	struct cpufreq_freqs freqs;
-	u32 cpu = policy->cpu, freq_tab_idx, cur_cluster;
+	u32 cpu = policy->cpu, freq_tab_idx, cur_cluster, new_cluster,
+	    actual_cluster;
 	int ret = 0;
 
-	cur_cluster = cpu_to_cluster(policy->cpu);
+	cur_cluster = cpu_to_cluster(cpu);
+	new_cluster = actual_cluster = per_cpu(physical_cluster, cpu);
 
-	freqs.old = bL_cpufreq_get(policy->cpu);
+	freqs.old = bL_cpufreq_get_rate(cpu);
 
 	/* Determine valid target frequency using freq_table */
 	cpufreq_frequency_table_target(policy, freq_table[cur_cluster],
@@ -79,13 +202,21 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
 	if (freqs.old == freqs.new)
 		return 0;
 
+	if (is_bL_switching_enabled()) {
+		if ((actual_cluster == A15_CLUSTER) &&
+				(freqs.new < clk_big_min)) {
+			new_cluster = A7_CLUSTER;
+		} else if ((actual_cluster == A7_CLUSTER) &&
+				(freqs.new > clk_little_max)) {
+			new_cluster = A15_CLUSTER;
+		}
+	}
+
 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
 
-	ret = clk_set_rate(clk[cur_cluster], freqs.new * 1000);
-	if (ret) {
-		pr_err("clk_set_rate failed: %d\n", ret);
+	ret = bL_cpufreq_set_rate(cpu, actual_cluster, new_cluster, freqs.new);
+	if (ret)
 		return ret;
-	}
 
 	policy->cur = freqs.new;
 
@@ -94,7 +225,73 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
 	return ret;
 }
 
-static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
+static inline u32 get_table_count(struct cpufreq_frequency_table *table)
+{
+	int count;
+
+	for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++)
+		;
+
+	return count;
+}
+
+/* get the minimum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_min(struct cpufreq_frequency_table *table)
+{
+	int i;
+	uint32_t min_freq = ~0;
+	for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++)
+		if (table[i].frequency < min_freq)
+			min_freq = table[i].frequency;
+	return min_freq;
+}
+
+/* get the maximum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_max(struct cpufreq_frequency_table *table)
+{
+	int i;
+	uint32_t max_freq = 0;
+	for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++)
+		if (table[i].frequency > max_freq)
+			max_freq = table[i].frequency;
+	return max_freq;
+}
+
+static int merge_cluster_tables(void)
+{
+	int i, j, k = 0, count = 1;
+	struct cpufreq_frequency_table *table;
+
+	for (i = 0; i < MAX_CLUSTERS; i++)
+		count += get_table_count(freq_table[i]);
+
+	table = kzalloc(sizeof(*table) * count, GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	freq_table[MAX_CLUSTERS] = table;
+
+	/* Add in reverse order to get freqs in increasing order */
+	for (i = MAX_CLUSTERS - 1; i >= 0; i--) {
+		for (j = 0; freq_table[i][j].frequency != CPUFREQ_TABLE_END;
+				j++) {
+			table[k].frequency = VIRT_FREQ(i,
+					freq_table[i][j].frequency);
+			pr_debug("%s: index: %d, freq: %d\n", __func__, k,
+					table[k].frequency);
+			k++;
+		}
+	}
+
+	table[k].index = k;
+	table[k].frequency = CPUFREQ_TABLE_END;
+
+	pr_debug("%s: End, table: %p, count: %d\n", __func__, table, k);
+
+	return 0;
+}
+
+static void _put_cluster_clk_and_freq_table(struct device *cpu_dev)
 {
 	u32 cluster = cpu_to_cluster(cpu_dev->id);
 
@@ -105,10 +302,35 @@ static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
 	}
 }
 
-static int get_cluster_clk_and_freq_table(struct device *cpu_dev)
+static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
 {
 	u32 cluster = cpu_to_cluster(cpu_dev->id);
-	char name[14] = "cpu-cluster.";
+	int i;
+
+	if (cluster < MAX_CLUSTERS)
+		return _put_cluster_clk_and_freq_table(cpu_dev);
+
+	if (atomic_dec_return(&cluster_usage[MAX_CLUSTERS]))
+		return;
+
+	for (i = 0; i < MAX_CLUSTERS; i++) {
+		struct device *cdev = get_cpu_device(i);
+		if (!cdev) {
+			pr_err("%s: failed to get cpu%d device\n", __func__, i);
+			return;
+		}
+
+		_put_cluster_clk_and_freq_table(cdev);
+	}
+
+	/* free virtual table */
+	kfree(freq_table[MAX_CLUSTERS]);
+}
+
+static int _get_cluster_clk_and_freq_table(struct device *cpu_dev)
+{
+	u32 cluster = cpu_to_cluster(cpu_dev->id);
+	char name[14] = "cpu-cluster.X";
 	int ret;
 
 	if (atomic_inc_return(&cluster_usage[cluster]) != 1)
@@ -149,6 +371,62 @@ atomic_dec:
 	return ret;
 }
 
+static int get_cluster_clk_and_freq_table(struct device *cpu_dev)
+{
+	u32 cluster = cpu_to_cluster(cpu_dev->id);
+	int i, ret;
+
+	if (cluster < MAX_CLUSTERS)
+		return _get_cluster_clk_and_freq_table(cpu_dev);
+
+	if (atomic_inc_return(&cluster_usage[MAX_CLUSTERS]) != 1)
+		return 0;
+
+	/*
+	 * Get data for all clusters and fill virtual cluster with a merge of
+	 * both
+	 */
+	for (i = 0; i < MAX_CLUSTERS; i++) {
+		struct device *cdev = get_cpu_device(i);
+		if (!cdev) {
+			pr_err("%s: failed to get cpu%d device\n", __func__, i);
+			return -ENODEV;
+		}
+
+		ret = _get_cluster_clk_and_freq_table(cdev);
+		if (ret)
+			goto put_clusters;
+	}
+
+	ret = merge_cluster_tables();
+	if (ret)
+		goto put_clusters;
+
+	/* Assuming 2 cluster, set clk_big_min and clk_little_max */
+	clk_big_min = get_table_min(freq_table[0]);
+	clk_little_max = VIRT_FREQ(1, get_table_max(freq_table[1]));
+
+	pr_debug("%s: cluster: %d, clk_big_min: %d, clk_little_max: %d\n",
+			__func__, cluster, clk_big_min, clk_little_max);
+
+	return 0;
+
+put_clusters:
+	while (i--) {
+		struct device *cdev = get_cpu_device(i);
+		if (!cdev) {
+			pr_err("%s: failed to get cpu%d device\n", __func__, i);
+			return -ENODEV;
+		}
+
+		_put_cluster_clk_and_freq_table(cdev);
+	}
+
+	atomic_dec(&cluster_usage[MAX_CLUSTERS]);
+
+	return ret;
+}
+
 /* Per-CPU initialization */
 static int bL_cpufreq_init(struct cpufreq_policy *policy)
 {
@@ -177,37 +455,30 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy)
 
 	cpufreq_frequency_table_get_attr(freq_table[cur_cluster], policy->cpu);
 
+	if (cur_cluster < MAX_CLUSTERS) {
+		cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu));
+
+		per_cpu(physical_cluster, policy->cpu) = cur_cluster;
+	} else {
+		/* Assumption: during init, we are always running on A15 */
+		per_cpu(physical_cluster, policy->cpu) = A15_CLUSTER;
+	}
+
 	if (arm_bL_ops->get_transition_latency)
 		policy->cpuinfo.transition_latency =
 			arm_bL_ops->get_transition_latency(cpu_dev);
 	else
 		policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
 
-	policy->cur = bL_cpufreq_get(policy->cpu);
+	policy->cur = clk_get_cpu_rate(policy->cpu);
 
-	cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu));
+	if (is_bL_switching_enabled())
+		per_cpu(cpu_last_req_freq, policy->cpu) = policy->cur;
 
 	dev_info(cpu_dev, "%s: CPU %d initialized\n", __func__, policy->cpu);
 	return 0;
 }
 
-static int bL_cpufreq_exit(struct cpufreq_policy *policy)
-{
-	struct device *cpu_dev;
-
-	cpu_dev = get_cpu_device(policy->cpu);
-	if (!cpu_dev) {
-		pr_err("%s: failed to get cpu%d device\n", __func__,
-				policy->cpu);
-		return -ENODEV;
-	}
-
-	put_cluster_clk_and_freq_table(cpu_dev);
-	dev_dbg(cpu_dev, "%s: Exited, cpu: %d\n", __func__, policy->cpu);
-
-	return 0;
-}
-
 /* Export freq_table to sysfs */
 static struct freq_attr *bL_cpufreq_attr[] = {
 	&cpufreq_freq_attr_scaling_available_freqs,
@@ -219,16 +490,47 @@ static struct cpufreq_driver bL_cpufreq_driver = {
 	.flags			= CPUFREQ_STICKY,
 	.verify			= bL_cpufreq_verify_policy,
 	.target			= bL_cpufreq_set_target,
-	.get			= bL_cpufreq_get,
+	.get			= bL_cpufreq_get_rate,
 	.init			= bL_cpufreq_init,
-	.exit			= bL_cpufreq_exit,
 	.have_governor_per_policy = true,
 	.attr			= bL_cpufreq_attr,
 };
 
+static int bL_cpufreq_switcher_notifier(struct notifier_block *nfb,
+					unsigned long action, void *_arg)
+{
+	pr_debug("%s: action: %ld\n", __func__, action);
+
+	switch (action) {
+	case BL_NOTIFY_PRE_ENABLE:
+	case BL_NOTIFY_PRE_DISABLE:
+		cpufreq_unregister_driver(&bL_cpufreq_driver);
+		break;
+
+	case BL_NOTIFY_POST_ENABLE:
+		set_switching_enabled(true);
+		cpufreq_register_driver(&bL_cpufreq_driver);
+		break;
+
+	case BL_NOTIFY_POST_DISABLE:
+		set_switching_enabled(false);
+		cpufreq_register_driver(&bL_cpufreq_driver);
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block bL_switcher_notifier = {
+	.notifier_call = bL_cpufreq_switcher_notifier,
+};
+
 int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops)
 {
-	int ret;
+	int ret, i;
 
 	if (arm_bL_ops) {
 		pr_debug("%s: Already registered: %s, exiting\n", __func__,
@@ -243,16 +545,29 @@ int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops)
 
 	arm_bL_ops = ops;
 
+	ret = bL_switcher_get_enabled();
+	set_switching_enabled(ret);
+
+	for (i = 0; i < MAX_CLUSTERS; i++)
+		mutex_init(&cluster_lock[i]);
+
 	ret = cpufreq_register_driver(&bL_cpufreq_driver);
 	if (ret) {
 		pr_info("%s: Failed registering platform driver: %s, err: %d\n",
 				__func__, ops->name, ret);
 		arm_bL_ops = NULL;
 	} else {
-		pr_info("%s: Registered platform driver: %s\n", __func__,
-				ops->name);
+		ret = bL_switcher_register_notifier(&bL_switcher_notifier);
+		if (ret) {
+			cpufreq_unregister_driver(&bL_cpufreq_driver);
+			arm_bL_ops = NULL;
+		} else {
+			pr_info("%s: Registered platform driver: %s\n",
+					__func__, ops->name);
+		}
 	}
 
+	bL_switcher_put_enabled();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(bL_cpufreq_register);
@@ -265,9 +580,31 @@ void bL_cpufreq_unregister(struct cpufreq_arm_bL_ops *ops)
 		return;
 	}
 
+	bL_switcher_get_enabled();
+	bL_switcher_unregister_notifier(&bL_switcher_notifier);
 	cpufreq_unregister_driver(&bL_cpufreq_driver);
+	bL_switcher_put_enabled();
 	pr_info("%s: Un-registered platform driver: %s\n", __func__,
 			arm_bL_ops->name);
+
+	/* For saving table get/put on every cpu in/out */
+	if (is_bL_switching_enabled()) {
+		put_cluster_clk_and_freq_table(get_cpu_device(0));
+	} else {
+		int i;
+
+		for (i = 0; i < MAX_CLUSTERS; i++) {
+			struct device *cdev = get_cpu_device(i);
+			if (!cdev) {
+				pr_err("%s: failed to get cpu%d device\n",
+						__func__, i);
+				return;
+			}
+
+			put_cluster_clk_and_freq_table(cdev);
+		}
+	}
+
 	arm_bL_ops = NULL;
 }
 EXPORT_SYMBOL_GPL(bL_cpufreq_unregister);
diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h
index 79b2ce17884d..4f5a03d3aef6 100644
--- a/drivers/cpufreq/arm_big_little.h
+++ b/drivers/cpufreq/arm_big_little.h
@@ -23,6 +23,20 @@
 #include <linux/device.h>
 #include <linux/types.h>
 
+/* Currently we support only two clusters */
+#define A15_CLUSTER	0
+#define A7_CLUSTER	1
+#define MAX_CLUSTERS	2
+
+#ifdef CONFIG_BL_SWITCHER
+extern bool bL_switching_enabled;
+#define is_bL_switching_enabled()		bL_switching_enabled
+#define set_switching_enabled(x) 		(bL_switching_enabled = (x))
+#else
+#define is_bL_switching_enabled()		false
+#define set_switching_enabled(x) 		do { } while (0)
+#endif
+
 struct cpufreq_arm_bL_ops {
 	char name[CPUFREQ_NAME_LEN];
 	int (*get_transition_latency)(struct device *cpu_dev);
@@ -36,7 +50,8 @@ struct cpufreq_arm_bL_ops {
 
 static inline int cpu_to_cluster(int cpu)
 {
-	return topology_physical_package_id(cpu);
+	return is_bL_switching_enabled() ? MAX_CLUSTERS:
+		topology_physical_package_id(cpu);
 }
 
 int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops);
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index bfd6273fd873..66733f1d55d4 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -21,6 +21,9 @@
 #include <linux/spinlock.h>
 #include <linux/notifier.h>
 #include <asm/cputime.h>
+#ifdef CONFIG_BL_SWITCHER
+#include <asm/bL_switcher.h>
+#endif
 
 static spinlock_t cpufreq_stats_lock;
 
@@ -378,7 +381,7 @@ static struct notifier_block notifier_trans_block = {
 	.notifier_call = cpufreq_stat_notifier_trans
 };
 
-static int __init cpufreq_stats_init(void)
+static int cpufreq_stats_setup(void)
 {
 	int ret;
 	unsigned int cpu;
@@ -406,7 +409,8 @@ static int __init cpufreq_stats_init(void)
 
 	return 0;
 }
-static void __exit cpufreq_stats_exit(void)
+
+static void cpufreq_stats_cleanup(void)
 {
 	unsigned int cpu;
 
@@ -421,6 +425,54 @@ static void __exit cpufreq_stats_exit(void)
 	}
 }
 
+#ifdef CONFIG_BL_SWITCHER
+static int cpufreq_stats_switcher_notifier(struct notifier_block *nfb,
+					unsigned long action, void *_arg)
+{
+	switch (action) {
+	case BL_NOTIFY_PRE_ENABLE:
+	case BL_NOTIFY_PRE_DISABLE:
+		cpufreq_stats_cleanup();
+		break;
+
+	case BL_NOTIFY_POST_ENABLE:
+	case BL_NOTIFY_POST_DISABLE:
+		cpufreq_stats_setup();
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block switcher_notifier = {
+	.notifier_call = cpufreq_stats_switcher_notifier,
+};
+#endif
+
+static int __init cpufreq_stats_init(void)
+{
+	int ret;
+	spin_lock_init(&cpufreq_stats_lock);
+
+	ret = cpufreq_stats_setup();
+#ifdef CONFIG_BL_SWITCHER
+	if (!ret)
+		bL_switcher_register_notifier(&switcher_notifier);
+#endif
+	return ret;
+}
+
+static void __exit cpufreq_stats_exit(void)
+{
+#ifdef CONFIG_BL_SWITCHER
+	bL_switcher_unregister_notifier(&switcher_notifier);
+#endif
+	cpufreq_stats_cleanup();
+}
+
 MODULE_AUTHOR("Zou Nan hai <nanhai.zou@intel.com>");
 MODULE_DESCRIPTION("'cpufreq_stats' - A driver to export cpufreq stats "
 				"through sysfs filesystem");
diff --git a/drivers/cpufreq/vexpress_big_little.c b/drivers/cpufreq/vexpress_big_little.c
new file mode 100644
index 000000000000..1abb883c051b
--- /dev/null
+++ b/drivers/cpufreq/vexpress_big_little.c
@@ -0,0 +1,86 @@
+/*
+ * Vexpress big.LITTLE CPUFreq Interface driver
+ *
+ * It provides necessary ops to arm_big_little cpufreq driver and gets
+ * Frequency information from Device Tree. Freq table in DT must be in KHz.
+ *
+ * Copyright (C) 2013 Linaro.
+ * Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cpufreq.h>
+#include <linux/export.h>
+#include <linux/opp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/vexpress.h>
+#include "arm_big_little.h"
+
+static int vexpress_init_opp_table(struct device *cpu_dev)
+{
+	int i = -1, count, cluster = cpu_to_cluster(cpu_dev->id);
+	u32 *table;
+	int ret;
+
+	count = vexpress_spc_get_freq_table(cluster, &table);
+	if (!table || !count) {
+		pr_err("SPC controller returned invalid freq table");
+		return -EINVAL;
+	}
+
+	while (++i < count) {
+		/* FIXME: Voltage value */
+		ret = opp_add(cpu_dev, table[i] * 1000, 900000);
+		if (ret) {
+			dev_warn(cpu_dev, "%s: Failed to add OPP %d, err: %d\n",
+				 __func__, table[i] * 1000, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int vexpress_get_transition_latency(struct device *cpu_dev)
+{
+	/* 1 ms */
+	return 1000000;
+}
+
+static struct cpufreq_arm_bL_ops vexpress_bL_ops = {
+	.name	= "vexpress-bL",
+	.get_transition_latency = vexpress_get_transition_latency,
+	.init_opp_table = vexpress_init_opp_table,
+};
+
+static int vexpress_bL_init(void)
+{
+	if (!vexpress_spc_check_loaded()) {
+		pr_info("%s: No SPC found\n", __func__);
+		return -ENOENT;
+	}
+
+	return bL_cpufreq_register(&vexpress_bL_ops);
+}
+module_init(vexpress_bL_init);
+
+static void vexpress_bL_exit(void)
+{
+	return bL_cpufreq_unregister(&vexpress_bL_ops);
+}
+module_exit(vexpress_bL_exit);
+
+MODULE_AUTHOR("Viresh Kumar <viresh.kumar@linaro.org>");
+MODULE_DESCRIPTION("ARM Vexpress big LITTLE cpufreq driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index 0d8bd55e776f..7d8256a5ea97 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -4,6 +4,6 @@
 
 obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
 obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
-
+obj-$(CONFIG_BIG_LITTLE) += arm_big_little.o
 obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o
 obj-$(CONFIG_ARCH_KIRKWOOD) += cpuidle-kirkwood.o
diff --git a/drivers/cpuidle/arm_big_little.c b/drivers/cpuidle/arm_big_little.c
new file mode 100644
index 000000000000..e5378896a8cb
--- /dev/null
+++ b/drivers/cpuidle/arm_big_little.c
@@ -0,0 +1,183 @@
+/*
+ * big.LITTLE CPU idle driver.
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/arm-cci.h>
+#include <linux/bitmap.h>
+#include <linux/cpuidle.h>
+#include <linux/cpu_pm.h>
+#include <linux/clockchips.h>
+#include <linux/debugfs.h>
+#include <linux/hrtimer.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/tick.h>
+#include <linux/vexpress.h>
+#include <asm/mcpm.h>
+#include <asm/cpuidle.h>
+#include <asm/cputype.h>
+#include <asm/idmap.h>
+#include <asm/proc-fns.h>
+#include <asm/suspend.h>
+#include <linux/of.h>
+
+static int bl_cpuidle_simple_enter(struct cpuidle_device *dev,
+		struct cpuidle_driver *drv, int index)
+{
+	ktime_t time_start, time_end;
+	s64 diff;
+
+	time_start = ktime_get();
+
+	cpu_do_idle();
+
+	time_end = ktime_get();
+
+	local_irq_enable();
+
+	diff = ktime_to_us(ktime_sub(time_end, time_start));
+	if (diff > INT_MAX)
+		diff = INT_MAX;
+
+	dev->last_residency = (int) diff;
+
+	return index;
+}
+
+static int bl_enter_powerdown(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv, int idx);
+
+static struct cpuidle_state bl_cpuidle_set[] __initdata = {
+	[0] = {
+		.enter                  = bl_cpuidle_simple_enter,
+		.exit_latency           = 1,
+		.target_residency       = 1,
+		.power_usage		= UINT_MAX,
+		.flags                  = CPUIDLE_FLAG_TIME_VALID,
+		.name                   = "WFI",
+		.desc                   = "ARM WFI",
+	},
+	[1] = {
+		.enter			= bl_enter_powerdown,
+		.exit_latency		= 300,
+		.target_residency	= 1000,
+		.flags			= CPUIDLE_FLAG_TIME_VALID,
+		.name			= "C1",
+		.desc			= "ARM power down",
+	},
+};
+
+struct cpuidle_driver bl_idle_driver = {
+	.name = "bl_idle",
+	.owner = THIS_MODULE,
+	.safe_state_index = 0
+};
+
+static DEFINE_PER_CPU(struct cpuidle_device, bl_idle_dev);
+
+static int notrace bl_powerdown_finisher(unsigned long arg)
+{
+	unsigned int mpidr = read_cpuid_mpidr();
+	unsigned int cluster = (mpidr >> 8) & 0xf;
+	unsigned int cpu = mpidr & 0xf;
+
+	mcpm_set_entry_vector(cpu, cluster, cpu_resume);
+	mcpm_cpu_suspend(0);  /* 0 should be replaced with better value here */
+	return 1;
+}
+
+/*
+ * bl_enter_powerdown - Programs CPU to enter the specified state
+ * @dev: cpuidle device
+ * @drv: The target state to be programmed
+ * @idx: state index
+ *
+ * Called from the CPUidle framework to program the device to the
+ * specified target state selected by the governor.
+ */
+static int bl_enter_powerdown(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv, int idx)
+{
+	struct timespec ts_preidle, ts_postidle, ts_idle;
+	int ret;
+
+	/* Used to keep track of the total time in idle */
+	getnstimeofday(&ts_preidle);
+
+	BUG_ON(!irqs_disabled());
+
+	cpu_pm_enter();
+
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+
+	ret = cpu_suspend((unsigned long) dev, bl_powerdown_finisher);
+	if (ret)
+		BUG();
+
+	mcpm_cpu_powered_up();
+
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+
+	cpu_pm_exit();
+
+	getnstimeofday(&ts_postidle);
+	local_irq_enable();
+	ts_idle = timespec_sub(ts_postidle, ts_preidle);
+
+	dev->last_residency = ts_idle.tv_nsec / NSEC_PER_USEC +
+					ts_idle.tv_sec * USEC_PER_SEC;
+	return idx;
+}
+
+/*
+ * bl_idle_init
+ *
+ * Registers the bl specific cpuidle driver with the cpuidle
+ * framework with the valid set of states.
+ */
+int __init bl_idle_init(void)
+{
+	struct cpuidle_device *dev;
+	int i, cpu_id;
+	struct cpuidle_driver *drv = &bl_idle_driver;
+
+	if (!of_find_compatible_node(NULL, NULL, "arm,generic")) {
+		pr_info("%s: No compatible node found\n", __func__);
+		return -ENODEV;
+	}
+
+	drv->state_count = (sizeof(bl_cpuidle_set) /
+				       sizeof(struct cpuidle_state));
+
+	for (i = 0; i < drv->state_count; i++) {
+		memcpy(&drv->states[i], &bl_cpuidle_set[i],
+				sizeof(struct cpuidle_state));
+	}
+
+	cpuidle_register_driver(drv);
+
+	for_each_cpu(cpu_id, cpu_online_mask) {
+		pr_err("CPUidle for CPU%d registered\n", cpu_id);
+		dev = &per_cpu(bl_idle_dev, cpu_id);
+		dev->cpu = cpu_id;
+
+		dev->state_count = drv->state_count;
+
+		if (cpuidle_register_device(dev)) {
+			printk(KERN_ERR "%s: Cpuidle register device failed\n",
+			       __func__);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+device_initcall(bl_idle_init);
diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c
index 223379169cb0..0e6e408c0a63 100644
--- a/drivers/cpuidle/cpuidle-calxeda.c
+++ b/drivers/cpuidle/cpuidle-calxeda.c
@@ -37,20 +37,6 @@
 extern void highbank_set_cpu_jump(int cpu, void *jump_addr);
 extern void *scu_base_addr;
 
-static inline unsigned int get_auxcr(void)
-{
-	unsigned int val;
-	asm("mrc p15, 0, %0, c1, c0, 1	@ get AUXCR" : "=r" (val) : : "cc");
-	return val;
-}
-
-static inline void set_auxcr(unsigned int val)
-{
-	asm volatile("mcr p15, 0, %0, c1, c0, 1	@ set AUXCR"
-	  : : "r" (val) : "cc");
-	isb();
-}
-
 static noinline void calxeda_idle_restore(void)
 {
 	set_cr(get_cr() | CR_C);
diff --git a/drivers/extcon/extcon-adc-jack.c b/drivers/extcon/extcon-adc-jack.c
index d0233cd18ffa..5985807e52c9 100644
--- a/drivers/extcon/extcon-adc-jack.c
+++ b/drivers/extcon/extcon-adc-jack.c
@@ -87,7 +87,8 @@ static irqreturn_t adc_jack_irq_thread(int irq, void *_data)
 {
 	struct adc_jack_data *data = _data;
 
-	schedule_delayed_work(&data->handler, data->handling_delay);
+	queue_delayed_work(system_power_efficient_wq,
+			   &data->handler, data->handling_delay);
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/extcon/extcon-gpio.c b/drivers/extcon/extcon-gpio.c
index 02bec32adde4..f874c30ddbff 100644
--- a/drivers/extcon/extcon-gpio.c
+++ b/drivers/extcon/extcon-gpio.c
@@ -56,7 +56,7 @@ static irqreturn_t gpio_irq_handler(int irq, void *dev_id)
 {
 	struct gpio_extcon_data *extcon_data = dev_id;
 
-	schedule_delayed_work(&extcon_data->work,
+	queue_delayed_work(system_power_efficient_wq, &extcon_data->work,
 			      extcon_data->debounce_jiffies);
 	return IRQ_HANDLED;
 }
diff --git a/drivers/gator/Kconfig b/drivers/gator/Kconfig
new file mode 100644
index 000000000000..7ea0fcc3d01d
--- /dev/null
+++ b/drivers/gator/Kconfig
@@ -0,0 +1,33 @@
+config GATOR
+	tristate "Gator module for ARM's Streamline Performance Analyzer"
+	default m if (ARM || ARM64)
+	depends on PROFILING
+	depends on HIGH_RES_TIMERS
+	depends on LOCAL_TIMERS || !(ARM && SMP)
+	select TRACING
+
+config GATOR_WITH_MALI_SUPPORT
+	bool
+
+choice
+	prompt "Enable Mali GPU support in Gator"
+	depends on GATOR
+	optional
+
+config GATOR_MALI_400MP
+	bool "Mali-400MP"
+	select GATOR_WITH_MALI_SUPPORT
+
+config GATOR_MALI_T6XX
+	bool "Mali-T604 or Mali-T658"
+	select GATOR_WITH_MALI_SUPPORT
+
+endchoice
+
+config GATOR_MALI_PATH
+	string "Path to Mali driver"
+	depends on GATOR_WITH_MALI_SUPPORT
+	default "drivers/gpu/arm/mali400mp"
+	help
+	  The gator code adds this to its include path so it can get the Mali
+	  trace headers with: #include "linux/mali_linux_trace.h"
diff --git a/drivers/gator/LICENSE b/drivers/gator/LICENSE
new file mode 100644
index 000000000000..d159169d1050
--- /dev/null
+++ b/drivers/gator/LICENSE
@@ -0,0 +1,339 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/drivers/gator/Makefile b/drivers/gator/Makefile
new file mode 100644
index 000000000000..3dc9d059a4b4
--- /dev/null
+++ b/drivers/gator/Makefile
@@ -0,0 +1,76 @@
+ifneq ($(KERNELRELEASE),)
+
+# Uncomment the following line to enable kernel stack unwinding within gator, or update gator_backtrace.c
+# EXTRA_CFLAGS +=	-DGATOR_KERNEL_STACK_UNWINDING
+
+CONFIG_GATOR ?= m
+obj-$(CONFIG_GATOR) := gator.o
+
+gator-y :=	gator_main.o \
+		gator_events_irq.o \
+		gator_events_sched.o \
+		gator_events_net.o \
+		gator_events_block.o \
+		gator_events_meminfo.o \
+		gator_events_perf_pmu.o \
+		gator_events_mmapped.o \
+
+# Convert the old GATOR_WITH_MALI_SUPPORT to the new kernel flags
+ifneq ($(GATOR_WITH_MALI_SUPPORT),)
+  CONFIG_GATOR_WITH_MALI_SUPPORT := y
+  ifeq ($(GATOR_WITH_MALI_SUPPORT),MALI_T6xx)
+    CONFIG_GATOR_MALI_4XXMP := n
+    CONFIG_GATOR_MALI_T6XX := y
+  else
+    CONFIG_GATOR_MALI_4XXMP := y
+    CONFIG_GATOR_MALI_T6XX := n
+  endif
+  EXTRA_CFLAGS += -DMALI_SUPPORT=$(GATOR_WITH_MALI_SUPPORT)
+  ifneq ($(GATOR_MALI_INTERFACE_STYLE),)
+    EXTRA_CFLAGS += -DGATOR_MALI_INTERFACE_STYLE=$(GATOR_MALI_INTERFACE_STYLE)
+  endif
+endif
+
+ifeq ($(CONFIG_GATOR_WITH_MALI_SUPPORT),y)
+  ifeq ($(CONFIG_GATOR_MALI_T6XX),y)
+    gator-y +=	gator_events_mali_t6xx.o \
+		gator_events_mali_t6xx_hw.o
+    include $(src)/mali_t6xx.mk
+  else
+    gator-y +=	gator_events_mali_4xx.o
+  endif
+  gator-y +=	gator_events_mali_common.o
+
+  ifneq ($(CONFIG_GATOR_MALI_PATH),)
+    ccflags-y += -I$(CONFIG_GATOR_MALI_PATH)
+  endif
+  ccflags-$(CONFIG_GATOR_MALI_4XXMP) += -DMALI_SUPPORT=MALI_4xx
+  ccflags-$(CONFIG_GATOR_MALI_T6XX) += -DMALI_SUPPORT=MALI_T6xx
+endif
+
+# GATOR_TEST controls whether to include (=1) or exclude (=0) test code. 
+GATOR_TEST ?= 0
+EXTRA_CFLAGS +=	-DGATOR_TEST=$(GATOR_TEST)
+
+gator-$(CONFIG_ARM) +=	gator_events_armv6.o \
+			gator_events_armv7.o \
+			gator_events_ccn-504.o \
+			gator_events_l2c-310.o \
+			gator_events_scorpion.o
+
+gator-$(CONFIG_ARM64) +=	gator_events_ccn-504.o
+
+else
+
+all:
+	@echo
+	@echo "usage:"
+	@echo "      make -C <kernel_build_dir> M=\`pwd\` ARCH=arm CROSS_COMPILE=<...> modules"
+	@echo
+	$(error)
+
+clean:
+	rm -f *.o .*.cmd modules.order Module.symvers gator.ko gator.mod.c
+	rm -rf .tmp_versions
+
+endif
diff --git a/drivers/gator/gator.h b/drivers/gator/gator.h
new file mode 100644
index 000000000000..d8981ed85a6a
--- /dev/null
+++ b/drivers/gator/gator.h
@@ -0,0 +1,142 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef GATOR_H_
+#define GATOR_H_
+
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+
+#define GATOR_PERF_SUPPORT		LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0)
+#define GATOR_PERF_PMU_SUPPORT  GATOR_PERF_SUPPORT && defined(CONFIG_PERF_EVENTS) && (!(defined(__arm__) || defined(__aarch64__)) || defined(CONFIG_HW_PERF_EVENTS))
+#define GATOR_NO_PERF_SUPPORT   (!(GATOR_PERF_SUPPORT))
+#define GATOR_CPU_FREQ_SUPPORT  (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)) && defined(CONFIG_CPU_FREQ)
+#define GATOR_IKS_SUPPORT       defined(CONFIG_BL_SWITCHER)
+
+// cpu ids
+#define ARM1136     0xb36
+#define ARM1156     0xb56
+#define ARM1176     0xb76
+#define ARM11MPCORE 0xb02
+#define CORTEX_A5   0xc05
+#define CORTEX_A7   0xc07
+#define CORTEX_A8   0xc08
+#define CORTEX_A9   0xc09
+#define CORTEX_A12  0xc0d
+#define CORTEX_A15  0xc0f
+#define SCORPION    0x00f
+#define SCORPIONMP  0x02d
+#define KRAITSIM    0x049
+#define KRAIT       0x04d
+#define KRAIT_S4_PRO 0x06f
+#define CORTEX_A53  0xd03
+#define CORTEX_A57  0xd07
+#define AARCH64     0xd0f
+#define OTHER       0xfff
+
+#define MAXSIZE_CORE_NAME 32
+
+struct gator_cpu {
+	const int cpuid;
+	// Human readable name
+	const char core_name[MAXSIZE_CORE_NAME];
+	// Perf PMU name
+	const char * const pmu_name;
+	// gatorfs event name
+	const char * const pmnc_name;
+	// compatible from Documentation/devicetree/bindings/arm/cpus.txt
+	const char * const dt_name;
+	const int pmnc_counters;
+};
+
+const struct gator_cpu *gator_find_cpu_by_cpuid(const u32 cpuid);
+const struct gator_cpu *gator_find_cpu_by_pmu_name(const char *const name);
+
+/******************************************************************************
+ * Filesystem
+ ******************************************************************************/
+int gatorfs_create_file_perm(struct super_block *sb, struct dentry *root,
+			     char const *name,
+			     const struct file_operations *fops, int perm);
+
+struct dentry *gatorfs_mkdir(struct super_block *sb, struct dentry *root,
+			     char const *name);
+
+int gatorfs_create_ulong(struct super_block *sb, struct dentry *root,
+			 char const *name, unsigned long *val);
+
+int gatorfs_create_ro_ulong(struct super_block *sb, struct dentry *root,
+			    char const *name, unsigned long *val);
+
+void gator_op_create_files(struct super_block *sb, struct dentry *root);
+
+/******************************************************************************
+ * Tracepoints
+ ******************************************************************************/
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
+#	error Kernels prior to 2.6.32 not supported
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+#	define GATOR_DEFINE_PROBE(probe_name, proto) \
+		static void probe_##probe_name(PARAMS(proto))
+#	define GATOR_REGISTER_TRACE(probe_name) \
+		register_trace_##probe_name(probe_##probe_name)
+#	define GATOR_UNREGISTER_TRACE(probe_name) \
+		unregister_trace_##probe_name(probe_##probe_name)
+#else
+#	define GATOR_DEFINE_PROBE(probe_name, proto) \
+		static void probe_##probe_name(void *data, PARAMS(proto))
+#	define GATOR_REGISTER_TRACE(probe_name) \
+		register_trace_##probe_name(probe_##probe_name, NULL)
+#	define GATOR_UNREGISTER_TRACE(probe_name) \
+		unregister_trace_##probe_name(probe_##probe_name, NULL)
+#endif
+
+/******************************************************************************
+ * Events
+ ******************************************************************************/
+struct gator_interface {
+	void (*shutdown)(void);	// Complementary function to init
+	int (*create_files)(struct super_block *sb, struct dentry *root);
+	int (*start)(void);
+	void (*stop)(void);		// Complementary function to start
+	int (*online)(int **buffer, bool migrate);
+	int (*offline)(int **buffer, bool migrate);
+	void (*online_dispatch)(int cpu, bool migrate);	// called in process context but may not be running on core 'cpu'
+	void (*offline_dispatch)(int cpu, bool migrate);	// called in process context but may not be running on core 'cpu'
+	int (*read)(int **buffer);
+	int (*read64)(long long **buffer);
+	int (*read_proc)(long long **buffer, struct task_struct *);
+	struct list_head list;
+};
+
+int gator_events_install(struct gator_interface *interface);
+int gator_events_get_key(void);
+u32 gator_cpuid(void);
+
+void gator_backtrace_handler(struct pt_regs *const regs);
+
+#if !GATOR_IKS_SUPPORT
+
+#define get_physical_cpu() smp_processor_id()
+#define lcpu_to_pcpu(lcpu) lcpu
+#define pcpu_to_lcpu(pcpu) pcpu
+
+#else
+
+#define get_physical_cpu() lcpu_to_pcpu(get_logical_cpu())
+int lcpu_to_pcpu(const int lcpu);
+int pcpu_to_lcpu(const int pcpu);
+
+#endif
+
+#define get_logical_cpu() smp_processor_id()
+#define on_primary_core() (get_logical_cpu() == 0)
+
+#endif // GATOR_H_
diff --git a/drivers/gator/gator_annotate.c b/drivers/gator/gator_annotate.c
new file mode 100644
index 000000000000..5b9399bea230
--- /dev/null
+++ b/drivers/gator/gator_annotate.c
@@ -0,0 +1,186 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <asm/current.h>
+#include <linux/spinlock.h>
+
+static DEFINE_SPINLOCK(annotate_lock);
+static bool collect_annotations = false;
+
+static int annotate_copy(struct file *file, char const __user *buf, size_t count)
+{
+	int cpu = 0;
+	int write = per_cpu(gator_buffer_write, cpu)[ANNOTATE_BUF];
+
+	if (file == NULL) {
+		// copy from kernel
+		memcpy(&per_cpu(gator_buffer, cpu)[ANNOTATE_BUF][write], buf, count);
+	} else {
+		// copy from user space
+		if (copy_from_user(&per_cpu(gator_buffer, cpu)[ANNOTATE_BUF][write], buf, count) != 0)
+			return -1;
+	}
+	per_cpu(gator_buffer_write, cpu)[ANNOTATE_BUF] = (write + count) & gator_buffer_mask[ANNOTATE_BUF];
+
+	return 0;
+}
+
+static ssize_t annotate_write(struct file *file, char const __user *buf, size_t count_orig, loff_t *offset)
+{
+	int pid, cpu, header_size, available, contiguous, length1, length2, size, count = count_orig & 0x7fffffff;
+	bool interrupt_context;
+
+	if (*offset) {
+		return -EINVAL;
+	}
+
+	interrupt_context = in_interrupt();
+	// Annotations are not supported in interrupt context, but may work if you comment out the the next four lines of code.
+	//   By doing so, annotations in interrupt context can result in deadlocks and lost data.
+	if (interrupt_context) {
+		printk(KERN_WARNING "gator: Annotations are not supported in interrupt context. Edit gator_annotate.c in the gator driver to enable annotations in interrupt context.\n");
+		return -EINVAL;
+	}
+
+ retry:
+	// synchronize between cores and with collect_annotations
+	spin_lock(&annotate_lock);
+
+	if (!collect_annotations) {
+		// Not collecting annotations, tell the caller everything was written
+		size = count_orig;
+		goto annotate_write_out;
+	}
+
+	// Annotation only uses a single per-cpu buffer as the data must be in order to the engine
+	cpu = 0;
+
+	if (current == NULL) {
+		pid = 0;
+	} else {
+		pid = current->pid;
+	}
+
+	// determine total size of the payload
+	header_size = MAXSIZE_PACK32 * 3 + MAXSIZE_PACK64;
+	available = buffer_bytes_available(cpu, ANNOTATE_BUF) - header_size;
+	size = count < available ? count : available;
+
+	if (size <= 0) {
+		// Buffer is full, wait until space is available
+		spin_unlock(&annotate_lock);
+
+		// Drop the annotation as blocking is not allowed in interrupt context
+		if (interrupt_context) {
+			return -EINVAL;
+		}
+
+		wait_event_interruptible(gator_annotate_wait, buffer_bytes_available(cpu, ANNOTATE_BUF) > header_size || !collect_annotations);
+
+		// Check to see if a signal is pending
+		if (signal_pending(current)) {
+			return -EINTR;
+		}
+
+		goto retry;
+	}
+
+	// synchronize shared variables annotateBuf and annotatePos
+	if (per_cpu(gator_buffer, cpu)[ANNOTATE_BUF]) {
+		u64 time = gator_get_time();
+		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, get_physical_cpu());
+		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, pid);
+		gator_buffer_write_packed_int64(cpu, ANNOTATE_BUF, time);
+		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, size);
+
+		// determine the sizes to capture, length1 + length2 will equal size
+		contiguous = contiguous_space_available(cpu, ANNOTATE_BUF);
+		if (size < contiguous) {
+			length1 = size;
+			length2 = 0;
+		} else {
+			length1 = contiguous;
+			length2 = size - contiguous;
+		}
+
+		if (annotate_copy(file, buf, length1) != 0) {
+			size = -EINVAL;
+			goto annotate_write_out;
+		}
+
+		if (length2 > 0 && annotate_copy(file, &buf[length1], length2) != 0) {
+			size = -EINVAL;
+			goto annotate_write_out;
+		}
+
+		// Check and commit; commit is set to occur once buffer is 3/4 full
+		buffer_check(cpu, ANNOTATE_BUF, time);
+	}
+
+annotate_write_out:
+	spin_unlock(&annotate_lock);
+
+	// return the number of bytes written
+	return size;
+}
+
+#include "gator_annotate_kernel.c"
+
+static int annotate_release(struct inode *inode, struct file *file)
+{
+	int cpu = 0;
+
+	// synchronize between cores
+	spin_lock(&annotate_lock);
+
+	if (per_cpu(gator_buffer, cpu)[ANNOTATE_BUF] && buffer_check_space(cpu, ANNOTATE_BUF, MAXSIZE_PACK64 + 3 * MAXSIZE_PACK32)) {
+		uint32_t pid = current->pid;
+		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, get_physical_cpu());
+		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, pid);
+		gator_buffer_write_packed_int64(cpu, ANNOTATE_BUF, 0);	// time
+		gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, 0);	// size
+	}
+
+	// Check and commit; commit is set to occur once buffer is 3/4 full
+	buffer_check(cpu, ANNOTATE_BUF, gator_get_time());
+
+	spin_unlock(&annotate_lock);
+
+	return 0;
+}
+
+static const struct file_operations annotate_fops = {
+	.write = annotate_write,
+	.release = annotate_release
+};
+
+static int gator_annotate_create_files(struct super_block *sb, struct dentry *root)
+{
+	return gatorfs_create_file_perm(sb, root, "annotate", &annotate_fops, 0666);
+}
+
+static int gator_annotate_start(void)
+{
+	collect_annotations = true;
+	return 0;
+}
+
+static void gator_annotate_stop(void)
+{
+	// the spinlock here will ensure that when this function exits, we are not in the middle of an annotation
+	spin_lock(&annotate_lock);
+	collect_annotations = false;
+	wake_up(&gator_annotate_wait);
+	spin_unlock(&annotate_lock);
+}
diff --git a/drivers/gator/gator_annotate_kernel.c b/drivers/gator/gator_annotate_kernel.c
new file mode 100644
index 000000000000..a406e4882974
--- /dev/null
+++ b/drivers/gator/gator_annotate_kernel.c
@@ -0,0 +1,200 @@
+/**
+ * Copyright (C) ARM Limited 2012-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#define ESCAPE_CODE 0x1c
+#define STRING_ANNOTATION 0x06
+#define NAME_CHANNEL_ANNOTATION 0x07
+#define NAME_GROUP_ANNOTATION 0x08
+#define VISUAL_ANNOTATION 0x04
+#define MARKER_ANNOTATION 0x05
+
+static void kannotate_write(const char *ptr, unsigned int size)
+{
+	int retval;
+	int pos = 0;
+	loff_t offset = 0;
+	while (pos < size) {
+		retval = annotate_write(NULL, &ptr[pos], size - pos, &offset);
+		if (retval < 0) {
+			printk(KERN_WARNING "gator: kannotate_write failed with return value %d\n", retval);
+			return;
+		}
+		pos += retval;
+	}
+}
+
+static void marshal_u16(char *buf, u16 val) {
+	buf[0] = val & 0xff;
+	buf[1] = (val >> 8) & 0xff;
+}
+
+static void marshal_u32(char *buf, u32 val) {
+	buf[0] = val & 0xff;
+	buf[1] = (val >> 8) & 0xff;
+	buf[2] = (val >> 16) & 0xff;
+	buf[3] = (val >> 24) & 0xff;
+}
+
+void gator_annotate_channel(int channel, const char *str)
+{
+	const u16 str_size = strlen(str) & 0xffff;
+	char header[8];
+	header[0] = ESCAPE_CODE;
+	header[1] = STRING_ANNOTATION;
+	marshal_u32(header + 2, channel);
+	marshal_u16(header + 6, str_size);
+	kannotate_write(header, sizeof(header));
+	kannotate_write(str, str_size);
+}
+
+EXPORT_SYMBOL(gator_annotate_channel);
+
+void gator_annotate(const char *str)
+{
+	gator_annotate_channel(0, str);
+}
+
+EXPORT_SYMBOL(gator_annotate);
+
+void gator_annotate_channel_color(int channel, int color, const char *str)
+{
+	const u16 str_size = (strlen(str) + 4) & 0xffff;
+	char header[12];
+	header[0] = ESCAPE_CODE;
+	header[1] = STRING_ANNOTATION;
+	marshal_u32(header + 2, channel);
+	marshal_u16(header + 6, str_size);
+	marshal_u32(header + 8, color);
+	kannotate_write(header, sizeof(header));
+	kannotate_write(str, str_size - 4);
+}
+
+EXPORT_SYMBOL(gator_annotate_channel_color);
+
+void gator_annotate_color(int color, const char *str)
+{
+	gator_annotate_channel_color(0, color, str);
+}
+
+EXPORT_SYMBOL(gator_annotate_color);
+
+void gator_annotate_channel_end(int channel)
+{
+	char header[8];
+	header[0] = ESCAPE_CODE;
+	header[1] = STRING_ANNOTATION;
+	marshal_u32(header + 2, channel);
+	marshal_u16(header + 6, 0);
+	kannotate_write(header, sizeof(header));
+}
+
+EXPORT_SYMBOL(gator_annotate_channel_end);
+
+void gator_annotate_end(void)
+{
+	gator_annotate_channel_end(0);
+}
+
+EXPORT_SYMBOL(gator_annotate_end);
+
+void gator_annotate_name_channel(int channel, int group, const char* str)
+{
+	const u16 str_size = strlen(str) & 0xffff;
+	char header[12];
+	header[0] = ESCAPE_CODE;
+	header[1] = NAME_CHANNEL_ANNOTATION;
+	marshal_u32(header + 2, channel);
+	marshal_u32(header + 6, group);
+	marshal_u16(header + 10, str_size);
+	kannotate_write(header, sizeof(header));
+	kannotate_write(str, str_size);
+}
+
+EXPORT_SYMBOL(gator_annotate_name_channel);
+
+void gator_annotate_name_group(int group, const char* str)
+{
+	const u16 str_size = strlen(str) & 0xffff;
+	char header[8];
+	header[0] = ESCAPE_CODE;
+	header[1] = NAME_GROUP_ANNOTATION;
+	marshal_u32(header + 2, group);
+	marshal_u16(header + 6, str_size);
+	kannotate_write(header, sizeof(header));
+	kannotate_write(str, str_size);
+}
+
+EXPORT_SYMBOL(gator_annotate_name_group);
+
+void gator_annotate_visual(const char *data, unsigned int length, const char *str)
+{
+	const u16 str_size = strlen(str) & 0xffff;
+	char header[4];
+	char header_length[4];
+	header[0] = ESCAPE_CODE;
+	header[1] = VISUAL_ANNOTATION;
+	marshal_u16(header + 2, str_size);
+	marshal_u32(header_length, length);
+	kannotate_write(header, sizeof(header));
+	kannotate_write(str, str_size);
+	kannotate_write(header_length, sizeof(header_length));
+	kannotate_write(data, length);
+}
+
+EXPORT_SYMBOL(gator_annotate_visual);
+
+void gator_annotate_marker(void)
+{
+	char header[4];
+	header[0] = ESCAPE_CODE;
+	header[1] = MARKER_ANNOTATION;
+	marshal_u16(header + 2, 0);
+	kannotate_write(header, sizeof(header));
+}
+
+EXPORT_SYMBOL(gator_annotate_marker);
+
+void gator_annotate_marker_str(const char *str)
+{
+	const u16 str_size = strlen(str) & 0xffff;
+	char header[4];
+	header[0] = ESCAPE_CODE;
+	header[1] = MARKER_ANNOTATION;
+	marshal_u16(header + 2, str_size);
+	kannotate_write(header, sizeof(header));
+	kannotate_write(str, str_size);
+}
+
+EXPORT_SYMBOL(gator_annotate_marker_str);
+
+void gator_annotate_marker_color(int color)
+{
+	char header[8];
+	header[0] = ESCAPE_CODE;
+	header[1] = MARKER_ANNOTATION;
+	marshal_u16(header + 2, 4);
+	marshal_u32(header + 4, color);
+	kannotate_write(header, sizeof(header));
+}
+
+EXPORT_SYMBOL(gator_annotate_marker_color);
+
+void gator_annotate_marker_color_str(int color, const char *str)
+{
+	const u16 str_size = (strlen(str) + 4) & 0xffff;
+	char header[8];
+	header[0] = ESCAPE_CODE;
+	header[1] = MARKER_ANNOTATION;
+	marshal_u16(header + 2, str_size);
+	marshal_u32(header + 4, color);
+	kannotate_write(header, sizeof(header));
+	kannotate_write(str, str_size - 4);
+}
+
+EXPORT_SYMBOL(gator_annotate_marker_color_str);
diff --git a/drivers/gator/gator_backtrace.c b/drivers/gator/gator_backtrace.c
new file mode 100644
index 000000000000..ffacb490194c
--- /dev/null
+++ b/drivers/gator/gator_backtrace.c
@@ -0,0 +1,168 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/*
+ * EABI backtrace stores {fp,lr} on the stack.
+ */
+struct stack_frame_eabi {
+	union {
+		struct {
+			unsigned long fp;
+			// May be the fp in the case of a leaf function or clang
+			unsigned long lr;
+			// If lr is really the fp, lr2 is the corresponding lr
+			unsigned long lr2;
+		};
+		// Used to read 32 bit fp/lr from a 64 bit kernel
+		struct {
+			u32 fp_32;
+			// same as lr above
+			u32 lr_32;
+			// same as lr2 above
+			u32 lr2_32;
+		};
+	};
+};
+
+static void arm_backtrace_eabi(int cpu, struct pt_regs *const regs, unsigned int depth)
+{
+#if defined(__arm__) || defined(__aarch64__)
+	struct stack_frame_eabi *curr;
+	struct stack_frame_eabi bufcurr;
+#if defined(__arm__)
+	const bool is_compat = false;
+	unsigned long fp = regs->ARM_fp;
+	unsigned long sp = regs->ARM_sp;
+	unsigned long lr = regs->ARM_lr;
+	const int gcc_frame_offset = sizeof(unsigned long);
+#else
+	// Is userspace aarch32 (32 bit)
+	const bool is_compat = compat_user_mode(regs);
+	unsigned long fp = (is_compat ? regs->regs[11] : regs->regs[29]);
+	unsigned long sp = (is_compat ? regs->compat_sp : regs->sp);
+	unsigned long lr = (is_compat ? regs->compat_lr : regs->regs[30]);
+	const int gcc_frame_offset = (is_compat ? sizeof(u32) : 0);
+#endif
+	// clang frame offset is always zero
+	int is_user_mode = user_mode(regs);
+
+	// pc (current function) has already been added
+
+	if (!is_user_mode) {
+		return;
+	}
+
+	// Add the lr (parent function)
+	// entry preamble may not have executed
+	gator_add_trace(cpu, lr);
+
+	// check fp is valid
+	if (fp == 0 || fp < sp) {
+		return;
+	}
+
+	// Get the current stack frame
+	curr = (struct stack_frame_eabi *)(fp - gcc_frame_offset);
+	if ((unsigned long)curr & 3) {
+		return;
+	}
+
+	while (depth-- && curr) {
+		if (!access_ok(VERIFY_READ, curr, sizeof(struct stack_frame_eabi)) ||
+				__copy_from_user_inatomic(&bufcurr, curr, sizeof(struct stack_frame_eabi))) {
+			return;
+		}
+
+		fp = (is_compat ? bufcurr.fp_32 : bufcurr.fp);
+		lr = (is_compat ? bufcurr.lr_32 : bufcurr.lr);
+
+#define calc_next(reg) ((reg) - gcc_frame_offset)
+		// Returns true if reg is a valid fp
+#define validate_next(reg, curr) \
+		((reg) != 0 && (calc_next(reg) & 3) == 0 && (unsigned long)(curr) < calc_next(reg))
+
+		// Try lr from the stack as the fp because gcc leaf functions do not push lr
+		// If gcc_frame_offset is non-zero, the lr will also be the clang fp
+		// This assumes code is at a lower address than the stack
+		if (validate_next(lr, curr)) {
+			fp = lr;
+			lr = (is_compat ? bufcurr.lr2_32 : bufcurr.lr2);
+		}
+
+		gator_add_trace(cpu, lr);
+
+		if (!validate_next(fp, curr)) {
+			return;
+		}
+
+		// Move to the next stack frame
+		curr = (struct stack_frame_eabi *)calc_next(fp);
+	}
+#endif
+}
+
+#if defined(__arm__) || defined(__aarch64__)
+static int report_trace(struct stackframe *frame, void *d)
+{
+	unsigned int *depth = d, cookie = NO_COOKIE;
+	unsigned long addr = frame->pc;
+
+	if (*depth) {
+#if defined(MODULE)
+		unsigned int cpu = get_physical_cpu();
+		struct module *mod = __module_address(addr);
+		if (mod) {
+			cookie = get_cookie(cpu, current, mod->name, false);
+			addr = addr - (unsigned long)mod->module_core;
+		}
+#endif
+		marshal_backtrace(addr & ~1, cookie);
+		(*depth)--;
+	}
+
+	return *depth == 0;
+}
+#endif
+
+// Uncomment the following line to enable kernel stack unwinding within gator, note it can also be defined from the Makefile
+// #define GATOR_KERNEL_STACK_UNWINDING
+
+#if (defined(__arm__) || defined(__aarch64__)) && !defined(GATOR_KERNEL_STACK_UNWINDING)
+// Disabled by default
+MODULE_PARM_DESC(kernel_stack_unwinding, "Allow kernel stack unwinding.");
+bool kernel_stack_unwinding = 0;
+module_param(kernel_stack_unwinding, bool, 0644);
+#endif
+
+static void kernel_backtrace(int cpu, struct pt_regs *const regs)
+{
+#if defined(__arm__) || defined(__aarch64__)
+#ifdef GATOR_KERNEL_STACK_UNWINDING
+	int depth = gator_backtrace_depth;
+#else
+	int depth = (kernel_stack_unwinding ? gator_backtrace_depth : 1);
+#endif
+	struct stackframe frame;
+	if (depth == 0)
+		depth = 1;
+#if defined(__arm__)
+	frame.fp = regs->ARM_fp;
+	frame.sp = regs->ARM_sp;
+	frame.lr = regs->ARM_lr;
+	frame.pc = regs->ARM_pc;
+#else
+	frame.fp = regs->regs[29];
+	frame.sp = regs->sp;
+	frame.pc = regs->pc;
+#endif
+	walk_stackframe(&frame, report_trace, &depth);
+#else
+	marshal_backtrace(PC_REG & ~1, NO_COOKIE);
+#endif
+}
diff --git a/drivers/gator/gator_cookies.c b/drivers/gator/gator_cookies.c
new file mode 100644
index 000000000000..eb9b946170c1
--- /dev/null
+++ b/drivers/gator/gator_cookies.c
@@ -0,0 +1,433 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#define COOKIEMAP_ENTRIES	1024	/* must be power of 2 */
+#define TRANSLATE_BUFFER_SIZE 512  // must be a power of 2 - 512/4 = 128 entries
+#define TRANSLATE_TEXT_SIZE		256
+#define MAX_COLLISIONS		2
+
+static uint32_t *gator_crc32_table;
+static unsigned int translate_buffer_mask;
+
+struct cookie_args {
+	struct task_struct *task;
+	const char *text;
+};
+
+static DEFINE_PER_CPU(char *, translate_text);
+static DEFINE_PER_CPU(uint32_t, cookie_next_key);
+static DEFINE_PER_CPU(uint64_t *, cookie_keys);
+static DEFINE_PER_CPU(uint32_t *, cookie_values);
+static DEFINE_PER_CPU(int, translate_buffer_read);
+static DEFINE_PER_CPU(int, translate_buffer_write);
+static DEFINE_PER_CPU(struct cookie_args *, translate_buffer);
+
+static uint32_t get_cookie(int cpu, struct task_struct *task, const char *text, bool from_wq);
+static void wq_cookie_handler(struct work_struct *unused);
+DECLARE_WORK(cookie_work, wq_cookie_handler);
+static struct timer_list app_process_wake_up_timer;
+static void app_process_wake_up_handler(unsigned long unused_data);
+
+static uint32_t cookiemap_code(uint64_t value64)
+{
+	uint32_t value = (uint32_t)((value64 >> 32) + value64);
+	uint32_t cookiecode = (value >> 24) & 0xff;
+	cookiecode = cookiecode * 31 + ((value >> 16) & 0xff);
+	cookiecode = cookiecode * 31 + ((value >> 8) & 0xff);
+	cookiecode = cookiecode * 31 + ((value >> 0) & 0xff);
+	cookiecode &= (COOKIEMAP_ENTRIES - 1);
+	return cookiecode * MAX_COLLISIONS;
+}
+
+static uint32_t gator_chksum_crc32(const char *data)
+{
+	register unsigned long crc;
+	const unsigned char *block = data;
+	int i, length = strlen(data);
+
+	crc = 0xFFFFFFFF;
+	for (i = 0; i < length; i++) {
+		crc = ((crc >> 8) & 0x00FFFFFF) ^ gator_crc32_table[(crc ^ *block++) & 0xFF];
+	}
+
+	return (crc ^ 0xFFFFFFFF);
+}
+
+/*
+ * Exists
+ *  Pre:  [0][1][v][3]..[n-1]
+ *  Post: [v][0][1][3]..[n-1]
+ */
+static uint32_t cookiemap_exists(uint64_t key)
+{
+	unsigned long x, flags, retval = 0;
+	int cpu = get_physical_cpu();
+	uint32_t cookiecode = cookiemap_code(key);
+	uint64_t *keys = &(per_cpu(cookie_keys, cpu)[cookiecode]);
+	uint32_t *values = &(per_cpu(cookie_values, cpu)[cookiecode]);
+
+	// Can be called from interrupt handler or from work queue
+	local_irq_save(flags);
+	for (x = 0; x < MAX_COLLISIONS; x++) {
+		if (keys[x] == key) {
+			uint32_t value = values[x];
+			for (; x > 0; x--) {
+				keys[x] = keys[x - 1];
+				values[x] = values[x - 1];
+			}
+			keys[0] = key;
+			values[0] = value;
+			retval = value;
+			break;
+		}
+	}
+	local_irq_restore(flags);
+
+	return retval;
+}
+
+/*
+ * Add
+ *  Pre:  [0][1][2][3]..[n-1]
+ *  Post: [v][0][1][2]..[n-2]
+ */
+static void cookiemap_add(uint64_t key, uint32_t value)
+{
+	int cpu = get_physical_cpu();
+	int cookiecode = cookiemap_code(key);
+	uint64_t *keys = &(per_cpu(cookie_keys, cpu)[cookiecode]);
+	uint32_t *values = &(per_cpu(cookie_values, cpu)[cookiecode]);
+	int x;
+
+	for (x = MAX_COLLISIONS - 1; x > 0; x--) {
+		keys[x] = keys[x - 1];
+		values[x] = values[x - 1];
+	}
+	keys[0] = key;
+	values[0] = value;
+}
+
+#ifndef CONFIG_PREEMPT_RT_FULL
+static void translate_buffer_write_args(int cpu, struct task_struct *task, const char *text)
+{
+	unsigned long flags;
+	int write;
+	int next_write;
+	struct cookie_args *args;
+
+	local_irq_save(flags);
+
+	write = per_cpu(translate_buffer_write, cpu);
+	next_write = (write + 1) & translate_buffer_mask;
+
+	// At least one entry must always remain available as when read == write, the queue is empty not full
+	if (next_write != per_cpu(translate_buffer_read, cpu)) {
+		args = &per_cpu(translate_buffer, cpu)[write];
+		args->task = task;
+		args->text = text;
+		get_task_struct(task);
+		per_cpu(translate_buffer_write, cpu) = next_write;
+	}
+
+	local_irq_restore(flags);
+}
+#endif
+
+static void translate_buffer_read_args(int cpu, struct cookie_args *args)
+{
+	unsigned long flags;
+	int read;
+
+	local_irq_save(flags);
+
+	read = per_cpu(translate_buffer_read, cpu);
+	*args = per_cpu(translate_buffer, cpu)[read];
+	per_cpu(translate_buffer_read, cpu) = (read + 1) & translate_buffer_mask;
+
+	local_irq_restore(flags);
+}
+
+static void wq_cookie_handler(struct work_struct *unused)
+{
+	struct cookie_args args;
+	int cpu = get_physical_cpu(), cookie;
+
+	mutex_lock(&start_mutex);
+
+	if (gator_started != 0) {
+		while (per_cpu(translate_buffer_read, cpu) != per_cpu(translate_buffer_write, cpu)) {
+			translate_buffer_read_args(cpu, &args);
+			cookie = get_cookie(cpu, args.task, args.text, true);
+			marshal_link(cookie, args.task->tgid, args.task->pid);
+			put_task_struct(args.task);
+		}
+	}
+
+	mutex_unlock(&start_mutex);
+}
+
+static void app_process_wake_up_handler(unsigned long unused_data)
+{
+	// had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater
+	schedule_work(&cookie_work);
+}
+
+// Retrieve full name from proc/pid/cmdline for java processes on Android
+static int translate_app_process(const char **text, int cpu, struct task_struct *task, bool from_wq)
+{
+	void *maddr;
+	unsigned int len;
+	unsigned long addr;
+	struct mm_struct *mm;
+	struct page *page = NULL;
+	struct vm_area_struct *page_vma;
+	int bytes, offset, retval = 0;
+	char *buf = per_cpu(translate_text, cpu);
+
+#ifndef CONFIG_PREEMPT_RT_FULL
+	// Push work into a work queue if in atomic context as the kernel functions below might sleep
+	// Rely on the in_interrupt variable rather than in_irq() or in_interrupt() kernel functions, as the value of these functions seems
+	//   inconsistent during a context switch between android/linux versions
+	if (!from_wq) {
+		// Check if already in buffer
+		int pos = per_cpu(translate_buffer_read, cpu);
+		while (pos != per_cpu(translate_buffer_write, cpu)) {
+			if (per_cpu(translate_buffer, cpu)[pos].task == task)
+				goto out;
+			pos = (pos + 1) & translate_buffer_mask;
+		}
+
+		translate_buffer_write_args(cpu, task, *text);
+
+		// Not safe to call in RT-Preempt full in schedule switch context
+		mod_timer(&app_process_wake_up_timer, jiffies + 1);
+		goto out;
+	}
+#endif
+
+	mm = get_task_mm(task);
+	if (!mm)
+		goto out;
+	if (!mm->arg_end)
+		goto outmm;
+	addr = mm->arg_start;
+	len = mm->arg_end - mm->arg_start;
+
+	if (len > TRANSLATE_TEXT_SIZE)
+		len = TRANSLATE_TEXT_SIZE;
+
+	down_read(&mm->mmap_sem);
+	while (len) {
+		if (get_user_pages(task, mm, addr, 1, 0, 1, &page, &page_vma) <= 0)
+			goto outsem;
+
+		maddr = kmap(page);
+		offset = addr & (PAGE_SIZE - 1);
+		bytes = len;
+		if (bytes > PAGE_SIZE - offset)
+			bytes = PAGE_SIZE - offset;
+
+		copy_from_user_page(page_vma, page, addr, buf, maddr + offset, bytes);
+
+		kunmap(page);	// release page allocated by get_user_pages()
+		page_cache_release(page);
+
+		len -= bytes;
+		buf += bytes;
+		addr += bytes;
+
+		*text = per_cpu(translate_text, cpu);
+		retval = 1;
+	}
+
+	// On app_process startup, /proc/pid/cmdline is initially "zygote" then "<pre-initialized>" but changes after an initial startup period
+	if (strcmp(*text, "zygote") == 0 || strcmp(*text, "<pre-initialized>") == 0)
+		retval = 0;
+
+outsem:
+	up_read(&mm->mmap_sem);
+outmm:
+	mmput(mm);
+out:
+	return retval;
+}
+
+static uint32_t get_cookie(int cpu, struct task_struct *task, const char *text, bool from_wq)
+{
+	unsigned long flags, cookie;
+	uint64_t key;
+
+	key = gator_chksum_crc32(text);
+	key = (key << 32) | (uint32_t)task->tgid;
+
+	cookie = cookiemap_exists(key);
+	if (cookie) {
+		return cookie;
+	}
+
+	if (strcmp(text, "app_process") == 0) {
+		if (!translate_app_process(&text, cpu, task, from_wq))
+			return UNRESOLVED_COOKIE;
+	}
+
+	// Can be called from interrupt handler or from work queue or from scheduler trace
+	local_irq_save(flags);
+
+	cookie = UNRESOLVED_COOKIE;
+	if (marshal_cookie_header(text)) {
+		cookie = per_cpu(cookie_next_key, cpu) += nr_cpu_ids;
+		cookiemap_add(key, cookie);
+		marshal_cookie(cookie, text);
+	}
+
+	local_irq_restore(flags);
+
+	return cookie;
+}
+
+static int get_exec_cookie(int cpu, struct task_struct *task)
+{
+	struct mm_struct *mm = task->mm;
+	const char *text;
+
+	// kernel threads have no address space
+	if (!mm)
+		return NO_COOKIE;
+
+	if (task && task->mm && task->mm->exe_file) {
+		text = task->mm->exe_file->f_path.dentry->d_name.name;
+		return get_cookie(cpu, task, text, false);
+	}
+
+	return UNRESOLVED_COOKIE;
+}
+
+static unsigned long get_address_cookie(int cpu, struct task_struct *task, unsigned long addr, off_t *offset)
+{
+	unsigned long cookie = NO_COOKIE;
+	struct mm_struct *mm = task->mm;
+	struct vm_area_struct *vma;
+	const char *text;
+
+	if (!mm)
+		return cookie;
+
+	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
+		if (addr < vma->vm_start || addr >= vma->vm_end)
+			continue;
+
+		if (vma->vm_file) {
+			text = vma->vm_file->f_path.dentry->d_name.name;
+			cookie = get_cookie(cpu, task, text, false);
+			*offset = (vma->vm_pgoff << PAGE_SHIFT) + addr - vma->vm_start;
+		} else {
+			/* must be an anonymous map */
+			*offset = addr;
+		}
+
+		break;
+	}
+
+	if (!vma)
+		cookie = UNRESOLVED_COOKIE;
+
+	return cookie;
+}
+
+static int cookies_initialize(void)
+{
+	uint32_t crc, poly;
+	int i, j, cpu, size, err = 0;
+
+	translate_buffer_mask = TRANSLATE_BUFFER_SIZE / sizeof(per_cpu(translate_buffer, 0)[0]) - 1;
+
+	for_each_present_cpu(cpu) {
+		per_cpu(cookie_next_key, cpu) = nr_cpu_ids + cpu;
+
+		size = COOKIEMAP_ENTRIES * MAX_COLLISIONS * sizeof(uint64_t);
+		per_cpu(cookie_keys, cpu) = (uint64_t *)kmalloc(size, GFP_KERNEL);
+		if (!per_cpu(cookie_keys, cpu)) {
+			err = -ENOMEM;
+			goto cookie_setup_error;
+		}
+		memset(per_cpu(cookie_keys, cpu), 0, size);
+
+		size = COOKIEMAP_ENTRIES * MAX_COLLISIONS * sizeof(uint32_t);
+		per_cpu(cookie_values, cpu) = (uint32_t *)kmalloc(size, GFP_KERNEL);
+		if (!per_cpu(cookie_values, cpu)) {
+			err = -ENOMEM;
+			goto cookie_setup_error;
+		}
+		memset(per_cpu(cookie_values, cpu), 0, size);
+
+		per_cpu(translate_buffer, cpu) = (struct cookie_args *)kmalloc(TRANSLATE_BUFFER_SIZE, GFP_KERNEL);
+		if (!per_cpu(translate_buffer, cpu)) {
+			err = -ENOMEM;
+			goto cookie_setup_error;
+		}
+
+		per_cpu(translate_buffer_write, cpu) = 0;
+		per_cpu(translate_buffer_read, cpu) = 0;
+
+		per_cpu(translate_text, cpu) = (char *)kmalloc(TRANSLATE_TEXT_SIZE, GFP_KERNEL);
+		if (!per_cpu(translate_text, cpu)) {
+			err = -ENOMEM;
+			goto cookie_setup_error;
+		}
+	}
+
+	// build CRC32 table
+	poly = 0x04c11db7;
+	gator_crc32_table = (uint32_t *)kmalloc(256 * sizeof(uint32_t), GFP_KERNEL);
+	if (!gator_crc32_table) {
+		err = -ENOMEM;
+		goto cookie_setup_error;
+	}
+	for (i = 0; i < 256; i++) {
+		crc = i;
+		for (j = 8; j > 0; j--) {
+			if (crc & 1) {
+				crc = (crc >> 1) ^ poly;
+			} else {
+				crc >>= 1;
+			}
+		}
+		gator_crc32_table[i] = crc;
+	}
+
+	setup_timer(&app_process_wake_up_timer, app_process_wake_up_handler, 0);
+
+cookie_setup_error:
+	return err;
+}
+
+static void cookies_release(void)
+{
+	int cpu;
+
+	for_each_present_cpu(cpu) {
+		kfree(per_cpu(cookie_keys, cpu));
+		per_cpu(cookie_keys, cpu) = NULL;
+
+		kfree(per_cpu(cookie_values, cpu));
+		per_cpu(cookie_values, cpu) = NULL;
+
+		kfree(per_cpu(translate_buffer, cpu));
+		per_cpu(translate_buffer, cpu) = NULL;
+		per_cpu(translate_buffer_read, cpu) = 0;
+		per_cpu(translate_buffer_write, cpu) = 0;
+
+		kfree(per_cpu(translate_text, cpu));
+		per_cpu(translate_text, cpu) = NULL;
+	}
+
+	del_timer_sync(&app_process_wake_up_timer);
+	kfree(gator_crc32_table);
+	gator_crc32_table = NULL;
+}
diff --git a/drivers/gator/gator_events_armv6.c b/drivers/gator/gator_events_armv6.c
new file mode 100644
index 000000000000..dd7974090b82
--- /dev/null
+++ b/drivers/gator/gator_events_armv6.c
@@ -0,0 +1,237 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "gator.h"
+
+// gator_events_perf_pmu.c is used if perf is supported
+#if GATOR_NO_PERF_SUPPORT
+
+static const char *pmnc_name;
+
+/*
+ * Per-CPU PMCR
+ */
+#define PMCR_E			(1 << 0)	/* Enable */
+#define PMCR_P			(1 << 1)	/* Count reset */
+#define PMCR_C			(1 << 2)	/* Cycle counter reset */
+#define PMCR_OFL_PMN0	(1 << 8)	/* Count reg 0 overflow */
+#define PMCR_OFL_PMN1	(1 << 9)	/* Count reg 1 overflow */
+#define PMCR_OFL_CCNT	(1 << 10)	/* Cycle counter overflow */
+
+#define PMN0 0
+#define PMN1 1
+#define CCNT 2
+#define CNTMAX	(CCNT+1)
+
+static int pmnc_counters = 0;
+static unsigned long pmnc_enabled[CNTMAX];
+static unsigned long pmnc_event[CNTMAX];
+static unsigned long pmnc_key[CNTMAX];
+
+static DEFINE_PER_CPU(int[CNTMAX * 2], perfCnt);
+
+static inline void armv6_pmnc_write(u32 val)
+{
+	/* upper 4bits and 7, 11 are write-as-0 */
+	val &= 0x0ffff77f;
+	asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r" (val));
+}
+
+static inline u32 armv6_pmnc_read(void)
+{
+	u32 val;
+	asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r" (val));
+	return val;
+}
+
+static void armv6_pmnc_reset_counter(unsigned int cnt)
+{
+	u32 val = 0;
+	switch (cnt) {
+	case CCNT:
+		asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r" (val));
+		break;
+	case PMN0:
+		asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r" (val));
+		break;
+	case PMN1:
+		asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r" (val));
+		break;
+	}
+}
+
+int gator_events_armv6_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+	int i;
+
+	pmnc_counters = 3;
+
+	for (i = PMN0; i <= CCNT; i++) {
+		char buf[40];
+		if (i == CCNT) {
+			snprintf(buf, sizeof buf, "ARM_%s_ccnt", pmnc_name);
+		} else {
+			snprintf(buf, sizeof buf, "ARM_%s_cnt%d", pmnc_name, i);
+		}
+		dir = gatorfs_mkdir(sb, root, buf);
+		if (!dir) {
+			return -1;
+		}
+		gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]);
+		gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]);
+		if (i != CCNT) {
+			gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]);
+		}
+	}
+
+	return 0;
+}
+
+static int gator_events_armv6_online(int **buffer, bool migrate)
+{
+	unsigned int cnt, len = 0, cpu = smp_processor_id();
+	u32 pmnc;
+
+	if (armv6_pmnc_read() & PMCR_E) {
+		armv6_pmnc_write(armv6_pmnc_read() & ~PMCR_E);
+	}
+
+	/* initialize PMNC, reset overflow, D bit, C bit and P bit. */
+	armv6_pmnc_write(PMCR_OFL_PMN0 | PMCR_OFL_PMN1 | PMCR_OFL_CCNT |
+			 PMCR_C | PMCR_P);
+
+	/* configure control register */
+	for (pmnc = 0, cnt = PMN0; cnt <= CCNT; cnt++) {
+		unsigned long event;
+
+		if (!pmnc_enabled[cnt])
+			continue;
+
+		event = pmnc_event[cnt] & 255;
+
+		// Set event (if destined for PMNx counters)
+		if (cnt == PMN0) {
+			pmnc |= event << 20;
+		} else if (cnt == PMN1) {
+			pmnc |= event << 12;
+		}
+
+		// Reset counter
+		armv6_pmnc_reset_counter(cnt);
+	}
+	armv6_pmnc_write(pmnc | PMCR_E);
+
+	// return zero values, no need to read as the counters were just reset
+	for (cnt = PMN0; cnt <= CCNT; cnt++) {
+		if (pmnc_enabled[cnt]) {
+			per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
+			per_cpu(perfCnt, cpu)[len++] = 0;
+		}
+	}
+
+	if (buffer)
+		*buffer = per_cpu(perfCnt, cpu);
+
+	return len;
+}
+
+static int gator_events_armv6_offline(int **buffer, bool migrate)
+{
+	unsigned int cnt;
+
+	armv6_pmnc_write(armv6_pmnc_read() & ~PMCR_E);
+	for (cnt = PMN0; cnt <= CCNT; cnt++) {
+		armv6_pmnc_reset_counter(cnt);
+	}
+
+	return 0;
+}
+
+static void gator_events_armv6_stop(void)
+{
+	unsigned int cnt;
+
+	for (cnt = PMN0; cnt <= CCNT; cnt++) {
+		pmnc_enabled[cnt] = 0;
+		pmnc_event[cnt] = 0;
+	}
+}
+
+static int gator_events_armv6_read(int **buffer)
+{
+	int cnt, len = 0;
+	int cpu = smp_processor_id();
+
+	// a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled
+	if (!(armv6_pmnc_read() & PMCR_E)) {
+		return 0;
+	}
+
+	for (cnt = PMN0; cnt <= CCNT; cnt++) {
+		if (pmnc_enabled[cnt]) {
+			u32 value = 0;
+			switch (cnt) {
+			case CCNT:
+				asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r" (value));
+				break;
+			case PMN0:
+				asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r" (value));
+				break;
+			case PMN1:
+				asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r" (value));
+				break;
+			}
+			armv6_pmnc_reset_counter(cnt);
+
+			per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
+			per_cpu(perfCnt, cpu)[len++] = value;
+		}
+	}
+
+	if (buffer)
+		*buffer = per_cpu(perfCnt, cpu);
+
+	return len;
+}
+
+static struct gator_interface gator_events_armv6_interface = {
+	.create_files = gator_events_armv6_create_files,
+	.stop = gator_events_armv6_stop,
+	.online = gator_events_armv6_online,
+	.offline = gator_events_armv6_offline,
+	.read = gator_events_armv6_read,
+};
+
+int gator_events_armv6_init(void)
+{
+	unsigned int cnt;
+
+	switch (gator_cpuid()) {
+	case ARM1136:
+	case ARM1156:
+	case ARM1176:
+		pmnc_name = "ARM11";
+		break;
+	case ARM11MPCORE:
+		pmnc_name = "ARM11MPCore";
+		break;
+	default:
+		return -1;
+	}
+
+	for (cnt = PMN0; cnt <= CCNT; cnt++) {
+		pmnc_enabled[cnt] = 0;
+		pmnc_event[cnt] = 0;
+		pmnc_key[cnt] = gator_events_get_key();
+	}
+
+	return gator_events_install(&gator_events_armv6_interface);
+}
+
+#endif
diff --git a/drivers/gator/gator_events_armv7.c b/drivers/gator/gator_events_armv7.c
new file mode 100644
index 000000000000..30881c8fd3fd
--- /dev/null
+++ b/drivers/gator/gator_events_armv7.c
@@ -0,0 +1,312 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*  Disabling interrupts
+ *    Many of the functions below disable interrupts via local_irq_save(). This disabling of interrupts is done to prevent any race conditions
+ *    between multiple entities (e.g. hrtimer interrupts and event based interrupts) calling the same functions. As accessing the pmu involves
+ *    several steps (disable, select, read, enable), these steps must be performed atomically. Normal synchronization routines cannot be used
+ *    as these functions are being called from interrupt context.
+ */
+
+#include "gator.h"
+
+// gator_events_perf_pmu.c is used if perf is supported
+#if GATOR_NO_PERF_SUPPORT
+
+// Per-CPU PMNC: config reg
+#define PMNC_E		(1 << 0)	/* Enable all counters */
+#define PMNC_P		(1 << 1)	/* Reset all counters */
+#define PMNC_C		(1 << 2)	/* Cycle counter reset */
+#define	PMNC_MASK	0x3f	/* Mask for writable bits */
+
+// ccnt reg
+#define CCNT_REG	(1 << 31)
+
+#define CCNT 		0
+#define CNT0		1
+#define CNTMAX 		(6+1)
+
+static const char *pmnc_name;
+static int pmnc_counters;
+
+static unsigned long pmnc_enabled[CNTMAX];
+static unsigned long pmnc_event[CNTMAX];
+static unsigned long pmnc_key[CNTMAX];
+
+static DEFINE_PER_CPU(int[CNTMAX * 2], perfCnt);
+
+inline void armv7_pmnc_write(u32 val)
+{
+	val &= PMNC_MASK;
+	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (val));
+}
+
+inline u32 armv7_pmnc_read(void)
+{
+	u32 val;
+	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
+	return val;
+}
+
+inline u32 armv7_ccnt_read(u32 reset_value)
+{
+	unsigned long flags;
+	u32 newval = -reset_value;
+	u32 den = CCNT_REG;
+	u32 val;
+
+	local_irq_save(flags);
+	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (den));	// disable
+	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));	// read
+	asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (newval));	// new value
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (den));	// enable
+	local_irq_restore(flags);
+
+	return val;
+}
+
+inline u32 armv7_cntn_read(unsigned int cnt, u32 reset_value)
+{
+	unsigned long flags;
+	u32 newval = -reset_value;
+	u32 sel = (cnt - CNT0);
+	u32 den = 1 << sel;
+	u32 oldval;
+
+	local_irq_save(flags);
+	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (den));	// disable
+	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (sel));	// select
+	asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (oldval));	// read
+	asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (newval));	// new value
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (den));	// enable
+	local_irq_restore(flags);
+
+	return oldval;
+}
+
+static inline void armv7_pmnc_disable_interrupt(unsigned int cnt)
+{
+	u32 val = cnt ? (1 << (cnt - CNT0)) : (1 << 31);
+	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
+}
+
+inline u32 armv7_pmnc_reset_interrupt(void)
+{
+	// Get and reset overflow status flags
+	u32 flags;
+	asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (flags));
+	flags &= 0x8000003f;
+	asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (flags));
+	return flags;
+}
+
+static inline u32 armv7_pmnc_enable_counter(unsigned int cnt)
+{
+	u32 val = cnt ? (1 << (cnt - CNT0)) : CCNT_REG;
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
+	return cnt;
+}
+
+static inline u32 armv7_pmnc_disable_counter(unsigned int cnt)
+{
+	u32 val = cnt ? (1 << (cnt - CNT0)) : CCNT_REG;
+	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
+	return cnt;
+}
+
+static inline int armv7_pmnc_select_counter(unsigned int cnt)
+{
+	u32 val = (cnt - CNT0);
+	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
+	return cnt;
+}
+
+static inline void armv7_pmnc_write_evtsel(unsigned int cnt, u32 val)
+{
+	if (armv7_pmnc_select_counter(cnt) == cnt) {
+		asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
+	}
+}
+
+static int gator_events_armv7_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+	int i;
+
+	for (i = 0; i < pmnc_counters; i++) {
+		char buf[40];
+		if (i == 0) {
+			snprintf(buf, sizeof buf, "ARM_%s_ccnt", pmnc_name);
+		} else {
+			snprintf(buf, sizeof buf, "ARM_%s_cnt%d", pmnc_name, i - 1);
+		}
+		dir = gatorfs_mkdir(sb, root, buf);
+		if (!dir) {
+			return -1;
+		}
+		gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]);
+		gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]);
+		if (i > 0) {
+			gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]);
+		}
+	}
+
+	return 0;
+}
+
+static int gator_events_armv7_online(int **buffer, bool migrate)
+{
+	unsigned int cnt, len = 0, cpu = smp_processor_id();
+
+	if (armv7_pmnc_read() & PMNC_E) {
+		armv7_pmnc_write(armv7_pmnc_read() & ~PMNC_E);
+	}
+
+	// Initialize & Reset PMNC: C bit and P bit
+	armv7_pmnc_write(PMNC_P | PMNC_C);
+
+	// Reset overflow flags
+	armv7_pmnc_reset_interrupt();
+
+	for (cnt = CCNT; cnt < CNTMAX; cnt++) {
+		unsigned long event;
+
+		if (!pmnc_enabled[cnt])
+			continue;
+
+		// Disable counter
+		armv7_pmnc_disable_counter(cnt);
+
+		event = pmnc_event[cnt] & 255;
+
+		// Set event (if destined for PMNx counters), we don't need to set the event if it's a cycle count
+		if (cnt != CCNT)
+			armv7_pmnc_write_evtsel(cnt, event);
+
+		armv7_pmnc_disable_interrupt(cnt);
+
+		// Reset counter
+		cnt ? armv7_cntn_read(cnt, 0) : armv7_ccnt_read(0);
+
+		// Enable counter
+		armv7_pmnc_enable_counter(cnt);
+	}
+
+	// enable
+	armv7_pmnc_write(armv7_pmnc_read() | PMNC_E);
+
+	// return zero values, no need to read as the counters were just reset
+	for (cnt = 0; cnt < pmnc_counters; cnt++) {
+		if (pmnc_enabled[cnt]) {
+			per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
+			per_cpu(perfCnt, cpu)[len++] = 0;
+		}
+	}
+
+	if (buffer)
+		*buffer = per_cpu(perfCnt, cpu);
+
+	return len;
+}
+
+static int gator_events_armv7_offline(int **buffer, bool migrate)
+{
+	// disable all counters, including PMCCNTR; overflow IRQs will not be signaled
+	armv7_pmnc_write(armv7_pmnc_read() & ~PMNC_E);
+
+	return 0;
+}
+
+static void gator_events_armv7_stop(void)
+{
+	unsigned int cnt;
+
+	for (cnt = CCNT; cnt < CNTMAX; cnt++) {
+		pmnc_enabled[cnt] = 0;
+		pmnc_event[cnt] = 0;
+	}
+}
+
+static int gator_events_armv7_read(int **buffer)
+{
+	int cnt, len = 0;
+	int cpu = smp_processor_id();
+
+	// a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled
+	if (!(armv7_pmnc_read() & PMNC_E)) {
+		return 0;
+	}
+
+	for (cnt = 0; cnt < pmnc_counters; cnt++) {
+		if (pmnc_enabled[cnt]) {
+			int value;
+			if (cnt == CCNT) {
+				value = armv7_ccnt_read(0);
+			} else {
+				value = armv7_cntn_read(cnt, 0);
+			}
+			per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
+			per_cpu(perfCnt, cpu)[len++] = value;
+		}
+	}
+
+	if (buffer)
+		*buffer = per_cpu(perfCnt, cpu);
+
+	return len;
+}
+
+static struct gator_interface gator_events_armv7_interface = {
+	.create_files = gator_events_armv7_create_files,
+	.stop = gator_events_armv7_stop,
+	.online = gator_events_armv7_online,
+	.offline = gator_events_armv7_offline,
+	.read = gator_events_armv7_read,
+};
+
+int gator_events_armv7_init(void)
+{
+	unsigned int cnt;
+
+	switch (gator_cpuid()) {
+	case CORTEX_A5:
+		pmnc_name = "Cortex-A5";
+		pmnc_counters = 2;
+		break;
+	case CORTEX_A7:
+		pmnc_name = "Cortex-A7";
+		pmnc_counters = 4;
+		break;
+	case CORTEX_A8:
+		pmnc_name = "Cortex-A8";
+		pmnc_counters = 4;
+		break;
+	case CORTEX_A9:
+		pmnc_name = "Cortex-A9";
+		pmnc_counters = 6;
+		break;
+	case CORTEX_A15:
+		pmnc_name = "Cortex-A15";
+		pmnc_counters = 6;
+		break;
+	default:
+		return -1;
+	}
+
+	pmnc_counters++;	// CNT[n] + CCNT
+
+	for (cnt = CCNT; cnt < CNTMAX; cnt++) {
+		pmnc_enabled[cnt] = 0;
+		pmnc_event[cnt] = 0;
+		pmnc_key[cnt] = gator_events_get_key();
+	}
+
+	return gator_events_install(&gator_events_armv7_interface);
+}
+
+#endif
diff --git a/drivers/gator/gator_events_block.c b/drivers/gator/gator_events_block.c
new file mode 100644
index 000000000000..691ef2574536
--- /dev/null
+++ b/drivers/gator/gator_events_block.c
@@ -0,0 +1,153 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+#include <trace/events/block.h>
+
+#define BLOCK_RQ_WR		0
+#define BLOCK_RQ_RD		1
+
+#define BLOCK_TOTAL		(BLOCK_RQ_RD+1)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
+#define EVENTWRITE REQ_RW
+#else
+#define EVENTWRITE REQ_WRITE
+#endif
+
+static ulong block_rq_wr_enabled;
+static ulong block_rq_rd_enabled;
+static ulong block_rq_wr_key;
+static ulong block_rq_rd_key;
+static atomic_t blockCnt[BLOCK_TOTAL];
+static int blockGet[BLOCK_TOTAL * 4];
+
+GATOR_DEFINE_PROBE(block_rq_complete, TP_PROTO(struct request_queue *q, struct request *rq))
+{
+	int write, size;
+
+	if (!rq)
+		return;
+
+	write = rq->cmd_flags & EVENTWRITE;
+	size = rq->resid_len;
+
+	if (!size)
+		return;
+
+	if (write) {
+		if (block_rq_wr_enabled) {
+			atomic_add(size, &blockCnt[BLOCK_RQ_WR]);
+		}
+	} else {
+		if (block_rq_rd_enabled) {
+			atomic_add(size, &blockCnt[BLOCK_RQ_RD]);
+		}
+	}
+}
+
+static int gator_events_block_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+
+	/* block_complete_wr */
+	dir = gatorfs_mkdir(sb, root, "Linux_block_rq_wr");
+	if (!dir) {
+		return -1;
+	}
+	gatorfs_create_ulong(sb, dir, "enabled", &block_rq_wr_enabled);
+	gatorfs_create_ro_ulong(sb, dir, "key", &block_rq_wr_key);
+
+	/* block_complete_rd */
+	dir = gatorfs_mkdir(sb, root, "Linux_block_rq_rd");
+	if (!dir) {
+		return -1;
+	}
+	gatorfs_create_ulong(sb, dir, "enabled", &block_rq_rd_enabled);
+	gatorfs_create_ro_ulong(sb, dir, "key", &block_rq_rd_key);
+
+	return 0;
+}
+
+static int gator_events_block_start(void)
+{
+	// register tracepoints
+	if (block_rq_wr_enabled || block_rq_rd_enabled)
+		if (GATOR_REGISTER_TRACE(block_rq_complete))
+			goto fail_block_rq_exit;
+	pr_debug("gator: registered block event tracepoints\n");
+
+	return 0;
+
+	// unregister tracepoints on error
+fail_block_rq_exit:
+	pr_err("gator: block event tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
+
+	return -1;
+}
+
+static void gator_events_block_stop(void)
+{
+	if (block_rq_wr_enabled || block_rq_rd_enabled)
+		GATOR_UNREGISTER_TRACE(block_rq_complete);
+	pr_debug("gator: unregistered block event tracepoints\n");
+
+	block_rq_wr_enabled = 0;
+	block_rq_rd_enabled = 0;
+}
+
+static int gator_events_block_read(int **buffer)
+{
+	int len, value, data = 0;
+
+	if (!on_primary_core()) {
+		return 0;
+	}
+
+	len = 0;
+	if (block_rq_wr_enabled && (value = atomic_read(&blockCnt[BLOCK_RQ_WR])) > 0) {
+		atomic_sub(value, &blockCnt[BLOCK_RQ_WR]);
+		blockGet[len++] = block_rq_wr_key;
+		blockGet[len++] = 0;	// indicates to Streamline that value bytes were written now, not since the last message
+		blockGet[len++] = block_rq_wr_key;
+		blockGet[len++] = value;
+		data += value;
+	}
+	if (block_rq_rd_enabled && (value = atomic_read(&blockCnt[BLOCK_RQ_RD])) > 0) {
+		atomic_sub(value, &blockCnt[BLOCK_RQ_RD]);
+		blockGet[len++] = block_rq_rd_key;
+		blockGet[len++] = 0;	// indicates to Streamline that value bytes were read now, not since the last message
+		blockGet[len++] = block_rq_rd_key;
+		blockGet[len++] = value;
+		data += value;
+	}
+
+	if (buffer)
+		*buffer = blockGet;
+
+	return len;
+}
+
+static struct gator_interface gator_events_block_interface = {
+	.create_files = gator_events_block_create_files,
+	.start = gator_events_block_start,
+	.stop = gator_events_block_stop,
+	.read = gator_events_block_read,
+};
+
+int gator_events_block_init(void)
+{
+	block_rq_wr_enabled = 0;
+	block_rq_rd_enabled = 0;
+
+	block_rq_wr_key = gator_events_get_key();
+	block_rq_rd_key = gator_events_get_key();
+
+	return gator_events_install(&gator_events_block_interface);
+}
diff --git a/drivers/gator/gator_events_ccn-504.c b/drivers/gator/gator_events_ccn-504.c
new file mode 100644
index 000000000000..b89231967c75
--- /dev/null
+++ b/drivers/gator/gator_events_ccn-504.c
@@ -0,0 +1,346 @@
+/**
+ * Copyright (C) ARM Limited 2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+
+#include "gator.h"
+
+#define NUM_REGIONS 256
+#define REGION_SIZE (64*1024)
+#define REGION_DEBUG 1
+#define REGION_XP 64
+#define NUM_XPS 11
+
+// DT (Debug) region
+#define PMEVCNTSR0    0x0150
+#define PMCCNTRSR     0x0190
+#define PMCR          0x01A8
+#define PMSR          0x01B0
+#define PMSR_REQ      0x01B8
+#define PMSR_CLR      0x01C0
+
+// XP region
+#define DT_CONFIG     0x0300
+#define DT_CONTROL    0x0370
+
+// Multiple
+#define PMU_EVENT_SEL 0x0600
+#define OLY_ID        0xFF00
+
+#define CCNT 4
+#define CNTMAX (CCNT + 1)
+
+#define get_pmu_event_id(event) (((event) >> 0) & 0xFF)
+#define get_node_type(event) (((event) >> 8) & 0xFF)
+#define get_region(event) (((event) >> 16) & 0xFF)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
+
+// From kernel/params.c
+#define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn)      	\
+	int param_set_##name(const char *val, struct kernel_param *kp)	\
+	{								\
+		tmptype l;						\
+		int ret;						\
+									\
+		if (!val) return -EINVAL;				\
+		ret = strtolfn(val, 0, &l);				\
+		if (ret == -EINVAL || ((type)l != l))			\
+			return -EINVAL;					\
+		*((type *)kp->arg) = l;					\
+		return 0;						\
+	}								\
+	int param_get_##name(char *buffer, struct kernel_param *kp)	\
+	{								\
+		return sprintf(buffer, format, *((type *)kp->arg));	\
+	}
+
+#else
+
+// From kernel/params.c
+#define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn)      	\
+	int param_set_##name(const char *val, const struct kernel_param *kp) \
+	{								\
+		tmptype l;						\
+		int ret;						\
+									\
+		ret = strtolfn(val, 0, &l);				\
+		if (ret < 0 || ((type)l != l))				\
+			return ret < 0 ? ret : -EINVAL;			\
+		*((type *)kp->arg) = l;					\
+		return 0;						\
+	}								\
+	int param_get_##name(char *buffer, const struct kernel_param *kp) \
+	{								\
+		return scnprintf(buffer, PAGE_SIZE, format,		\
+				*((type *)kp->arg));			\
+	}								\
+	struct kernel_param_ops param_ops_##name = {			\
+		.set = param_set_##name,				\
+		.get = param_get_##name,				\
+	};								\
+	EXPORT_SYMBOL(param_set_##name);				\
+	EXPORT_SYMBOL(param_get_##name);				\
+	EXPORT_SYMBOL(param_ops_##name)
+
+#endif
+
+STANDARD_PARAM_DEF(u64, u64, "%llu", u64, strict_strtoull);
+
+// From include/linux/moduleparam.h
+#define param_check_u64(name, p) __param_check(name, p, u64)
+
+MODULE_PARM_DESC(ccn504_addr, "CCN-504 physical base address");
+static u64 ccn504_addr = 0;
+module_param(ccn504_addr, u64, 0444);
+
+static void __iomem *gator_events_ccn504_base;
+static bool gator_events_ccn504_global_enabled;
+static unsigned long gator_events_ccn504_enabled[CNTMAX];
+static unsigned long gator_events_ccn504_event[CNTMAX];
+static unsigned long gator_events_ccn504_key[CNTMAX];
+static int gator_events_ccn504_buffer[2*CNTMAX];
+static int gator_events_ccn504_prev[CNTMAX];
+
+static void gator_events_ccn504_create_shutdown(void)
+{
+	if (gator_events_ccn504_base != NULL) {
+		iounmap(gator_events_ccn504_base);
+	}
+}
+
+static int gator_events_ccn504_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+	int i;
+	char buf[32];
+
+	for (i = 0; i < CNTMAX; ++i) {
+		if (i == CCNT) {
+			snprintf(buf, sizeof(buf), "CCN-504_ccnt");
+		} else {
+			snprintf(buf, sizeof(buf), "CCN-504_cnt%i", i);
+		}
+		dir = gatorfs_mkdir(sb, root, buf);
+		if (!dir) {
+			return -1;
+		}
+
+		gatorfs_create_ulong(sb, dir, "enabled", &gator_events_ccn504_enabled[i]);
+		if (i != CCNT) {
+			gatorfs_create_ulong(sb, dir, "event", &gator_events_ccn504_event[i]);
+		}
+		gatorfs_create_ro_ulong(sb, dir, "key", &gator_events_ccn504_key[i]);
+	}
+
+	return 0;
+}
+
+static void gator_events_ccn504_set_dt_config(int xp_node_id, int event_num, int value)
+{
+	u32 dt_config;
+
+	dt_config = readl(gator_events_ccn504_base + (REGION_XP + xp_node_id)*REGION_SIZE + DT_CONFIG);
+	dt_config |= (value + event_num) << (4*event_num);
+	writel(dt_config, gator_events_ccn504_base + (REGION_XP + xp_node_id)*REGION_SIZE + DT_CONFIG);
+}
+
+static int gator_events_ccn504_start(void)
+{
+	int i;
+
+	gator_events_ccn504_global_enabled = 0;
+	for (i = 0; i < CNTMAX; ++i) {
+		if (gator_events_ccn504_enabled[i]) {
+			gator_events_ccn504_global_enabled = 1;
+			break;
+		}
+	}
+
+	if (!gator_events_ccn504_global_enabled) {
+		return 0;
+	}
+
+	memset(&gator_events_ccn504_prev, 0x80, sizeof(gator_events_ccn504_prev));
+
+	// Disable INTREQ on overflow
+	// [6] ovfl_intr_en = 0
+	// perhaps set to 1?
+	// [5] cntr_rst = 0
+	// No register paring
+	// [4:1] cntcfg = 0
+	// Enable PMU features
+	// [0] pmu_en = 1
+	writel(0x1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMCR);
+
+	// Configure the XPs
+	for (i = 0; i < NUM_XPS; ++i) {
+		int dt_control;
+
+		// Pass on all events
+		writel(0, gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONFIG);
+
+		// Enable PMU capability
+		// [0] dt_enable = 1
+		dt_control = readl(gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONTROL);
+		dt_control |= 0x1;
+		writel(dt_control, gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONTROL);
+	}
+
+	// Assume no other pmu_event_sel registers are set
+
+	// cycle counter does not need to be enabled
+	for (i = 0; i < CCNT; ++i) {
+		int pmu_event_id;
+		int node_type;
+		int region;
+		u32 pmu_event_sel;
+		u32 oly_id_whole;
+		u32 oly_id;
+		u32 node_id;
+
+		if (!gator_events_ccn504_enabled[i]) {
+			continue;
+		}
+
+		pmu_event_id = get_pmu_event_id(gator_events_ccn504_event[i]);
+		node_type = get_node_type(gator_events_ccn504_event[i]);
+		region = get_region(gator_events_ccn504_event[i]);
+
+		// Verify the node_type
+		oly_id_whole = readl(gator_events_ccn504_base + region*REGION_SIZE + OLY_ID);
+		oly_id = oly_id_whole & 0x1F;
+		node_id = (oly_id_whole >> 8) & 0x7F;
+		if ((oly_id != node_type) ||
+				((node_type == 0x16) && ((oly_id != 0x14) && (oly_id != 0x15) && (oly_id != 0x16) && (oly_id != 0x18) && (oly_id != 0x19) && (oly_id != 0x1A)))) {
+			printk(KERN_ERR "gator: oly_id is 0x%x expected 0x%x\n", oly_id, node_type);
+			return -1;
+		}
+
+		// Set the control register
+		pmu_event_sel = readl(gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL);
+		switch (node_type) {
+		case 0x08: // XP
+			pmu_event_sel |= pmu_event_id << (7*i);
+			gator_events_ccn504_set_dt_config(node_id, i, 0x4);
+			break;
+		case 0x04: // HN-F
+		case 0x16: // RN-I
+		case 0x10: // SBAS
+			pmu_event_sel |= pmu_event_id << (4*i);
+			gator_events_ccn504_set_dt_config(node_id/2, i, (node_id & 1) == 0 ? 0x8 : 0xC);
+			break;
+		}
+		writel(pmu_event_sel, gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL);
+	}
+
+	return 0;
+}
+
+static void gator_events_ccn504_stop(void)
+{
+	int i;
+
+	if (!gator_events_ccn504_global_enabled) {
+		return;
+	}
+
+	// cycle counter does not need to be disabled
+	for (i = 0; i < CCNT; ++i) {
+		int region;
+
+		if (!gator_events_ccn504_enabled[i]) {
+			continue;
+		}
+
+		region = get_region(gator_events_ccn504_event[i]);
+
+		writel(0, gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL);
+	}
+
+	// Clear dt_config
+	for (i = 0; i < NUM_XPS; ++i) {
+		writel(0, gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONFIG);
+	}
+}
+
+static int gator_events_ccn504_read(int **buffer)
+{
+	int i;
+	int len = 0;
+	int value;
+
+	if (!on_primary_core() || !gator_events_ccn504_global_enabled) {
+		return 0;
+	}
+
+	// Verify the pmsr register is zero
+	while (readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR) != 0);
+
+	// Request a PMU snapshot
+	writel(1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR_REQ);
+
+	// Wait for the snapshot
+	while (readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR) == 0);
+
+	// Read the shadow registers
+	for (i = 0; i < CNTMAX; ++i) {
+		if (!gator_events_ccn504_enabled[i]) {
+			continue;
+		}
+
+		value = readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + (i == CCNT ? PMCCNTRSR : PMEVCNTSR0 + 8*i));
+		if (gator_events_ccn504_prev[i] != 0x80808080) {
+			gator_events_ccn504_buffer[len++] = gator_events_ccn504_key[i];
+			gator_events_ccn504_buffer[len++] = value - gator_events_ccn504_prev[i];
+		}
+		gator_events_ccn504_prev[i] = value;
+
+		// Are the counters registers cleared when read? Is that what the cntr_rst bit on the pmcr register does?
+	}
+
+	// Clear the PMU snapshot status
+	writel(1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR_CLR);
+
+	if (buffer)
+		*buffer = gator_events_ccn504_buffer;
+
+	return len;
+}
+
+static struct gator_interface gator_events_ccn504_interface = {
+	.shutdown = gator_events_ccn504_create_shutdown,
+	.create_files = gator_events_ccn504_create_files,
+	.start = gator_events_ccn504_start,
+	.stop = gator_events_ccn504_stop,
+	.read = gator_events_ccn504_read,
+};
+
+int gator_events_ccn504_init(void)
+{
+	int i;
+
+	if (ccn504_addr == 0) {
+		return -1;
+	}
+
+	gator_events_ccn504_base = ioremap(ccn504_addr, NUM_REGIONS*REGION_SIZE);
+	if (gator_events_ccn504_base == NULL) {
+		printk(KERN_ERR "gator: ioremap returned NULL\n");
+		return -1;
+	}
+
+	for (i = 0; i < CNTMAX; ++i) {
+		gator_events_ccn504_enabled[i] = 0;
+		gator_events_ccn504_event[i] = 0;
+		gator_events_ccn504_key[i] = gator_events_get_key();
+	}
+
+	return gator_events_install(&gator_events_ccn504_interface);
+}
diff --git a/drivers/gator/gator_events_irq.c b/drivers/gator/gator_events_irq.c
new file mode 100644
index 000000000000..b11879a248f8
--- /dev/null
+++ b/drivers/gator/gator_events_irq.c
@@ -0,0 +1,165 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+#include <trace/events/irq.h>
+
+#define HARDIRQ		0
+#define SOFTIRQ		1
+#define TOTALIRQ	(SOFTIRQ+1)
+
+static ulong hardirq_enabled;
+static ulong softirq_enabled;
+static ulong hardirq_key;
+static ulong softirq_key;
+static DEFINE_PER_CPU(atomic_t[TOTALIRQ], irqCnt);
+static DEFINE_PER_CPU(int[TOTALIRQ * 2], irqGet);
+
+GATOR_DEFINE_PROBE(irq_handler_exit,
+		   TP_PROTO(int irq, struct irqaction *action, int ret))
+{
+	atomic_inc(&per_cpu(irqCnt, get_physical_cpu())[HARDIRQ]);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37)
+GATOR_DEFINE_PROBE(softirq_exit, TP_PROTO(struct softirq_action *h, struct softirq_action *vec))
+#else
+GATOR_DEFINE_PROBE(softirq_exit, TP_PROTO(unsigned int vec_nr))
+#endif
+{
+	atomic_inc(&per_cpu(irqCnt, get_physical_cpu())[SOFTIRQ]);
+}
+
+static int gator_events_irq_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+
+	/* irq */
+	dir = gatorfs_mkdir(sb, root, "Linux_irq_irq");
+	if (!dir) {
+		return -1;
+	}
+	gatorfs_create_ulong(sb, dir, "enabled", &hardirq_enabled);
+	gatorfs_create_ro_ulong(sb, dir, "key", &hardirq_key);
+
+	/* soft irq */
+	dir = gatorfs_mkdir(sb, root, "Linux_irq_softirq");
+	if (!dir) {
+		return -1;
+	}
+	gatorfs_create_ulong(sb, dir, "enabled", &softirq_enabled);
+	gatorfs_create_ro_ulong(sb, dir, "key", &softirq_key);
+
+	return 0;
+}
+
+static int gator_events_irq_online(int **buffer, bool migrate)
+{
+	int len = 0, cpu = get_physical_cpu();
+
+	// synchronization with the irq_exit functions is not necessary as the values are being reset
+	if (hardirq_enabled) {
+		atomic_set(&per_cpu(irqCnt, cpu)[HARDIRQ], 0);
+		per_cpu(irqGet, cpu)[len++] = hardirq_key;
+		per_cpu(irqGet, cpu)[len++] = 0;
+	}
+
+	if (softirq_enabled) {
+		atomic_set(&per_cpu(irqCnt, cpu)[SOFTIRQ], 0);
+		per_cpu(irqGet, cpu)[len++] = softirq_key;
+		per_cpu(irqGet, cpu)[len++] = 0;
+	}
+
+	if (buffer)
+		*buffer = per_cpu(irqGet, cpu);
+
+	return len;
+}
+
+static int gator_events_irq_start(void)
+{
+	// register tracepoints
+	if (hardirq_enabled)
+		if (GATOR_REGISTER_TRACE(irq_handler_exit))
+			goto fail_hardirq_exit;
+	if (softirq_enabled)
+		if (GATOR_REGISTER_TRACE(softirq_exit))
+			goto fail_softirq_exit;
+	pr_debug("gator: registered irq tracepoints\n");
+
+	return 0;
+
+	// unregister tracepoints on error
+fail_softirq_exit:
+	if (hardirq_enabled)
+		GATOR_UNREGISTER_TRACE(irq_handler_exit);
+fail_hardirq_exit:
+	pr_err("gator: irq tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
+
+	return -1;
+}
+
+static void gator_events_irq_stop(void)
+{
+	if (hardirq_enabled)
+		GATOR_UNREGISTER_TRACE(irq_handler_exit);
+	if (softirq_enabled)
+		GATOR_UNREGISTER_TRACE(softirq_exit);
+	pr_debug("gator: unregistered irq tracepoints\n");
+
+	hardirq_enabled = 0;
+	softirq_enabled = 0;
+}
+
+static int gator_events_irq_read(int **buffer)
+{
+	int len, value;
+	int cpu = get_physical_cpu();
+
+	len = 0;
+	if (hardirq_enabled) {
+		value = atomic_read(&per_cpu(irqCnt, cpu)[HARDIRQ]);
+		atomic_sub(value, &per_cpu(irqCnt, cpu)[HARDIRQ]);
+
+		per_cpu(irqGet, cpu)[len++] = hardirq_key;
+		per_cpu(irqGet, cpu)[len++] = value;
+	}
+
+	if (softirq_enabled) {
+		value = atomic_read(&per_cpu(irqCnt, cpu)[SOFTIRQ]);
+		atomic_sub(value, &per_cpu(irqCnt, cpu)[SOFTIRQ]);
+
+		per_cpu(irqGet, cpu)[len++] = softirq_key;
+		per_cpu(irqGet, cpu)[len++] = value;
+	}
+
+	if (buffer)
+		*buffer = per_cpu(irqGet, cpu);
+
+	return len;
+}
+
+static struct gator_interface gator_events_irq_interface = {
+	.create_files = gator_events_irq_create_files,
+	.online = gator_events_irq_online,
+	.start = gator_events_irq_start,
+	.stop = gator_events_irq_stop,
+	.read = gator_events_irq_read,
+};
+
+int gator_events_irq_init(void)
+{
+	hardirq_key = gator_events_get_key();
+	softirq_key = gator_events_get_key();
+
+	hardirq_enabled = 0;
+	softirq_enabled = 0;
+
+	return gator_events_install(&gator_events_irq_interface);
+}
diff --git a/drivers/gator/gator_events_l2c-310.c b/drivers/gator/gator_events_l2c-310.c
new file mode 100644
index 000000000000..ee521af22517
--- /dev/null
+++ b/drivers/gator/gator_events_l2c-310.c
@@ -0,0 +1,208 @@
+/**
+ * l2c310 (L2 Cache Controller) event counters for gator
+ *
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#if defined(CONFIG_OF)
+#include <linux/of.h>
+#include <linux/of_address.h>
+#endif
+#include <asm/hardware/cache-l2x0.h>
+
+#include "gator.h"
+
+#define L2C310_COUNTERS_NUM 2
+
+static struct {
+	unsigned long enabled;
+	unsigned long event;
+	unsigned long key;
+} l2c310_counters[L2C310_COUNTERS_NUM];
+
+static int l2c310_buffer[L2C310_COUNTERS_NUM * 2];
+
+static void __iomem *l2c310_base;
+
+static void gator_events_l2c310_reset_counters(void)
+{
+	u32 val = readl(l2c310_base + L2X0_EVENT_CNT_CTRL);
+
+	val |= ((1 << L2C310_COUNTERS_NUM) - 1) << 1;
+
+	writel(val, l2c310_base + L2X0_EVENT_CNT_CTRL);
+}
+
+static int gator_events_l2c310_create_files(struct super_block *sb,
+					    struct dentry *root)
+{
+	int i;
+
+	for (i = 0; i < L2C310_COUNTERS_NUM; i++) {
+		char buf[16];
+		struct dentry *dir;
+
+		snprintf(buf, sizeof(buf), "L2C-310_cnt%d", i);
+		dir = gatorfs_mkdir(sb, root, buf);
+		if (WARN_ON(!dir))
+			return -1;
+		gatorfs_create_ulong(sb, dir, "enabled",
+				     &l2c310_counters[i].enabled);
+		gatorfs_create_ulong(sb, dir, "event",
+				     &l2c310_counters[i].event);
+		gatorfs_create_ro_ulong(sb, dir, "key",
+					&l2c310_counters[i].key);
+	}
+
+	return 0;
+}
+
+static int gator_events_l2c310_start(void)
+{
+	static const unsigned long l2x0_event_cntx_cfg[L2C310_COUNTERS_NUM] = {
+		L2X0_EVENT_CNT0_CFG,
+		L2X0_EVENT_CNT1_CFG,
+	};
+	int i;
+
+	/* Counter event sources */
+	for (i = 0; i < L2C310_COUNTERS_NUM; i++)
+		writel((l2c310_counters[i].event & 0xf) << 2,
+		       l2c310_base + l2x0_event_cntx_cfg[i]);
+
+	gator_events_l2c310_reset_counters();
+
+	/* Event counter enable */
+	writel(1, l2c310_base + L2X0_EVENT_CNT_CTRL);
+
+	return 0;
+}
+
+static void gator_events_l2c310_stop(void)
+{
+	/* Event counter disable */
+	writel(0, l2c310_base + L2X0_EVENT_CNT_CTRL);
+}
+
+static int gator_events_l2c310_read(int **buffer)
+{
+	static const unsigned long l2x0_event_cntx_val[L2C310_COUNTERS_NUM] = {
+		L2X0_EVENT_CNT0_VAL,
+		L2X0_EVENT_CNT1_VAL,
+	};
+	int i;
+	int len = 0;
+
+	if (!on_primary_core())
+		return 0;
+
+	for (i = 0; i < L2C310_COUNTERS_NUM; i++) {
+		if (l2c310_counters[i].enabled) {
+			l2c310_buffer[len++] = l2c310_counters[i].key;
+			l2c310_buffer[len++] = readl(l2c310_base +
+						     l2x0_event_cntx_val[i]);
+		}
+	}
+
+	/* l2c310 counters are saturating, not wrapping in case of overflow */
+	gator_events_l2c310_reset_counters();
+
+	if (buffer)
+		*buffer = l2c310_buffer;
+
+	return len;
+}
+
+static struct gator_interface gator_events_l2c310_interface = {
+	.create_files = gator_events_l2c310_create_files,
+	.start = gator_events_l2c310_start,
+	.stop = gator_events_l2c310_stop,
+	.read = gator_events_l2c310_read,
+};
+
+#define L2C310_ADDR_PROBE (~0)
+
+MODULE_PARM_DESC(l2c310_addr, "L2C310 physical base address (0 to disable)");
+static unsigned long l2c310_addr = L2C310_ADDR_PROBE;
+module_param(l2c310_addr, ulong, 0444);
+
+static void __iomem *gator_events_l2c310_probe(void)
+{
+	phys_addr_t variants[] = {
+#if defined(CONFIG_ARCH_EXYNOS4) || defined(CONFIG_ARCH_S5PV310)
+		0x10502000,
+#endif
+#if defined(CONFIG_ARCH_OMAP4)
+		0x48242000,
+#endif
+#if defined(CONFIG_ARCH_TEGRA)
+		0x50043000,
+#endif
+#if defined(CONFIG_ARCH_U8500)
+		0xa0412000,
+#endif
+#if defined(CONFIG_ARCH_VEXPRESS)
+		0x1e00a000, // A9x4 core tile (HBI-0191)
+		0x2c0f0000, // New memory map tiles
+#endif
+	};
+	int i;
+	void __iomem *base;
+#if defined(CONFIG_OF)
+	struct device_node *node = of_find_all_nodes(NULL);
+
+	if (node) {
+		of_node_put(node);
+
+		node = of_find_compatible_node(NULL, NULL, "arm,pl310-cache");
+		base = of_iomap(node, 0);
+		of_node_put(node);
+
+		return base;
+	}
+#endif
+
+	for (i = 0; i < ARRAY_SIZE(variants); i++) {
+		base = ioremap(variants[i], SZ_4K);
+		if (base) {
+			u32 cache_id = readl(base + L2X0_CACHE_ID);
+
+			if ((cache_id & 0xff0003c0) == 0x410000c0)
+				return base;
+
+			iounmap(base);
+		}
+	}
+
+	return NULL;
+}
+
+int gator_events_l2c310_init(void)
+{
+	int i;
+
+	if (gator_cpuid() != CORTEX_A5 && gator_cpuid() != CORTEX_A9)
+		return -1;
+
+	if (l2c310_addr == L2C310_ADDR_PROBE)
+		l2c310_base = gator_events_l2c310_probe();
+	else if (l2c310_addr)
+		l2c310_base = ioremap(l2c310_addr, SZ_4K);
+
+	if (!l2c310_base)
+		return -1;
+
+	for (i = 0; i < L2C310_COUNTERS_NUM; i++) {
+		l2c310_counters[i].enabled = 0;
+		l2c310_counters[i].key = gator_events_get_key();
+	}
+
+	return gator_events_install(&gator_events_l2c310_interface);
+}
diff --git a/drivers/gator/gator_events_mali_4xx.c b/drivers/gator/gator_events_mali_4xx.c
new file mode 100644
index 000000000000..6719c1ec73a2
--- /dev/null
+++ b/drivers/gator/gator_events_mali_4xx.c
@@ -0,0 +1,723 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "gator.h"
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+
+#include "linux/mali_linux_trace.h"
+
+#include "gator_events_mali_common.h"
+#include "gator_events_mali_4xx.h"
+
+/*
+ * There are (currently) four different variants of the comms between gator and Mali:
+ * 1 (deprecated): No software counter support
+ * 2 (deprecated): Tracepoint called for each separate s/w counter value as it appears
+ * 3 (default): Single tracepoint for all s/w counters in a bundle.
+ * Interface style 3 is the default if no other is specified.  1 and 2 will be eliminated when
+ * existing Mali DDKs are upgraded.
+ * 4. As above, but for the Utgard (Mali-450) driver.
+ */
+
+#if !defined(GATOR_MALI_INTERFACE_STYLE)
+#define GATOR_MALI_INTERFACE_STYLE (3)
+#endif
+
+#if GATOR_MALI_INTERFACE_STYLE < 4
+#include "mali/mali_mjollnir_profiling_gator_api.h"
+#else
+#include "mali/mali_utgard_profiling_gator_api.h"
+#endif
+
+/*
+ * Check that the MALI_SUPPORT define is set to one of the allowable device codes.
+ */
+#if (MALI_SUPPORT != MALI_4xx)
+#error MALI_SUPPORT set to an invalid device code: expecting MALI_4xx
+#endif
+
+/* gatorfs variables for counter enable state,
+ * the event the counter should count and the
+ * 'key' (a unique id set by gatord and returned
+ * by gator.ko)
+ */
+static unsigned long counter_enabled[NUMBER_OF_EVENTS];
+static unsigned long counter_event[NUMBER_OF_EVENTS];
+static unsigned long counter_key[NUMBER_OF_EVENTS];
+
+/* The data we have recorded */
+static u32 counter_data[NUMBER_OF_EVENTS];
+/* The address to sample (or 0 if samples are sent to us) */
+static u32 *counter_address[NUMBER_OF_EVENTS];
+
+/* An array used to return the data we recorded
+ * as key,value pairs hence the *2
+ */
+static unsigned long counter_dump[NUMBER_OF_EVENTS * 2];
+static unsigned long counter_prev[NUMBER_OF_EVENTS];
+
+/* Note whether tracepoints have been registered */
+static int trace_registered;
+
+/*
+ * These numbers define the actual numbers of each block type that exist in the system. Initially
+ * these are set to the maxima defined above; if the driver is capable of being queried (newer
+ * drivers only) then the values may be revised.
+ */
+static unsigned int n_vp_cores = MAX_NUM_VP_CORES;
+static unsigned int n_l2_cores = MAX_NUM_L2_CACHE_CORES;
+static unsigned int n_fp_cores = MAX_NUM_FP_CORES;
+
+/**
+ * Calculate the difference and handle the overflow.
+ */
+static u32 get_difference(u32 start, u32 end)
+{
+	if (start - end >= 0) {
+		return start - end;
+	}
+
+	// Mali counters are unsigned 32 bit values that wrap.
+	return (4294967295u - end) + start;
+}
+
+/**
+ * Returns non-zero if the given counter ID is an activity counter.
+ */
+static inline int is_activity_counter(unsigned int event_id)
+{
+	return (event_id >= FIRST_ACTIVITY_EVENT &&
+		event_id <= LAST_ACTIVITY_EVENT);
+}
+
+/**
+ * Returns non-zero if the given counter ID is a hardware counter.
+ */
+static inline int is_hw_counter(unsigned int event_id)
+{
+	return (event_id >= FIRST_HW_COUNTER && event_id <= LAST_HW_COUNTER);
+}
+
+/*
+ * These are provided for utgard compatibility.
+ */
+typedef void _mali_profiling_get_mali_version_type(struct _mali_profiling_mali_version *values);
+typedef u32 _mali_profiling_get_l2_counters_type(_mali_profiling_l2_counter_values *values);
+
+#if GATOR_MALI_INTERFACE_STYLE == 2
+/**
+ * Returns non-zero if the given counter ID is a software counter.
+ */
+static inline int is_sw_counter(unsigned int event_id)
+{
+	return (event_id >= FIRST_SW_COUNTER && event_id <= LAST_SW_COUNTER);
+}
+#endif
+
+#if GATOR_MALI_INTERFACE_STYLE == 2
+/*
+ * The Mali DDK uses s64 types to contain software counter values, but gator
+ * can only use a maximum of 32 bits. This function scales a software counter
+ * to an appropriate range.
+ */
+static u32 scale_sw_counter_value(unsigned int event_id, signed long long value)
+{
+	u32 scaled_value;
+
+	switch (event_id) {
+	case COUNTER_GLES_UPLOAD_TEXTURE_TIME:
+	case COUNTER_GLES_UPLOAD_VBO_TIME:
+		scaled_value = (u32)div_s64(value, 1000000);
+		break;
+	default:
+		scaled_value = (u32)value;
+		break;
+	}
+
+	return scaled_value;
+}
+#endif
+
+/* Probe for continuously sampled counter */
+#if 0				//WE_DONT_CURRENTLY_USE_THIS_SO_SUPPRESS_WARNING
+GATOR_DEFINE_PROBE(mali_sample_address, TP_PROTO(unsigned int event_id, u32 *addr))
+{
+	/* Turning on too many pr_debug statements in frequently called functions
+	 * can cause stability and/or performance problems
+	 */
+	//pr_debug("gator: mali_sample_address %d %d\n", event_id, addr);
+	if (event_id >= ACTIVITY_VP && event_id <= COUNTER_FP3_C1) {
+		counter_address[event_id] = addr;
+	}
+}
+#endif
+
+/* Probe for hardware counter events */
+GATOR_DEFINE_PROBE(mali_hw_counter, TP_PROTO(unsigned int event_id, unsigned int value))
+{
+	/* Turning on too many pr_debug statements in frequently called functions
+	 * can cause stability and/or performance problems
+	 */
+	//pr_debug("gator: mali_hw_counter %d %d\n", event_id, value);
+	if (is_hw_counter(event_id)) {
+		counter_data[event_id] = value;
+	}
+}
+
+#if GATOR_MALI_INTERFACE_STYLE == 2
+GATOR_DEFINE_PROBE(mali_sw_counter, TP_PROTO(unsigned int event_id, signed long long value))
+{
+	if (is_sw_counter(event_id)) {
+		counter_data[event_id] = scale_sw_counter_value(event_id, value);
+	}
+}
+#endif /* GATOR_MALI_INTERFACE_STYLE == 2 */
+
+#if GATOR_MALI_INTERFACE_STYLE >= 3
+GATOR_DEFINE_PROBE(mali_sw_counters, TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters))
+{
+	u32 i;
+
+	/* Copy over the values for those counters which are enabled. */
+	for (i = FIRST_SW_COUNTER; i <= LAST_SW_COUNTER; i++) {
+		if (counter_enabled[i]) {
+			counter_data[i] = (u32)(counters[i - FIRST_SW_COUNTER]);
+		}
+	}
+}
+#endif /* GATOR_MALI_INTERFACE_STYLE >= 3 */
+
+/**
+ * Create a single filesystem entry for a specified event.
+ * @param sb the superblock
+ * @param root Filesystem root
+ * @param name The name of the entry to create
+ * @param event The ID of the event
+ * @param create_event_item boolean indicating whether to create an 'event' filesystem entry. True to create.
+ *
+ * @return 0 if ok, non-zero if the create failed.
+ */
+static int create_fs_entry(struct super_block *sb, struct dentry *root, const char *name, int event, int create_event_item)
+{
+	struct dentry *dir;
+
+	dir = gatorfs_mkdir(sb, root, name);
+
+	if (!dir) {
+		return -1;
+	}
+
+	if (create_event_item) {
+		gatorfs_create_ulong(sb, dir, "event", &counter_event[event]);
+	}
+
+	gatorfs_create_ulong(sb, dir, "enabled", &counter_enabled[event]);
+	gatorfs_create_ro_ulong(sb, dir, "key", &counter_key[event]);
+
+	return 0;
+}
+
+#if GATOR_MALI_INTERFACE_STYLE > 3
+/*
+ * Read the version info structure if available
+ */
+static void initialise_version_info(void)
+{
+	_mali_profiling_get_mali_version_type *mali_profiling_get_mali_version_symbol;
+
+	mali_profiling_get_mali_version_symbol = symbol_get(_mali_profiling_get_mali_version);
+
+	if (mali_profiling_get_mali_version_symbol) {
+		struct _mali_profiling_mali_version version_info;
+
+		pr_debug("gator: mali online _mali_profiling_get_mali_version symbol @ %p\n",
+				mali_profiling_get_mali_version_symbol);
+
+		/*
+		 * Revise the number of each different core type using information derived from the DDK.
+		 */
+		mali_profiling_get_mali_version_symbol(&version_info);
+
+		n_fp_cores = version_info.num_of_fp_cores;
+		n_vp_cores = version_info.num_of_vp_cores;
+		n_l2_cores = version_info.num_of_l2_cores;
+
+		/* Release the function - we're done with it. */
+		symbol_put(_mali_profiling_get_mali_version);
+	} else {
+		printk("gator: mali online _mali_profiling_get_mali_version symbol not found\n");
+	}
+}
+#endif
+
+static int create_files(struct super_block *sb, struct dentry *root)
+{
+	int event;
+	const char *mali_name = gator_mali_get_mali_name();
+
+	char buf[40];
+	int core_id;
+	int counter_number;
+
+	pr_debug("gator: Initialising counters with style = %d\n", GATOR_MALI_INTERFACE_STYLE);
+
+#if GATOR_MALI_INTERFACE_STYLE > 3
+	/*
+	 * Initialise first: this sets up the number of cores available (on compatible DDK versions).
+	 * Ideally this would not need guarding but other parts of the code depend on the interface style being set
+	 * correctly; if it is not then the system can enter an inconsistent state.
+	 */
+	initialise_version_info();
+#endif
+
+	/* Vertex processor counters */
+	for (core_id = 0; core_id < n_vp_cores; core_id++) {
+		int activity_counter_id = ACTIVITY_VP_0;
+		snprintf(buf, sizeof buf, "ARM_%s_VP_%d_active", mali_name, core_id);
+		if (create_fs_entry(sb, root, buf, activity_counter_id, 0) != 0) {
+			return -1;
+		}
+
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			int counter_id = COUNTER_VP_0_C0 + (2 * core_id) + counter_number;
+
+			snprintf(buf, sizeof buf, "ARM_%s_VP_%d_cnt%d", mali_name, core_id, counter_number);
+			if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) {
+				return -1;
+			}
+		}
+	}
+
+	/* Fragment processors' counters */
+	for (core_id = 0; core_id < n_fp_cores; core_id++) {
+		int activity_counter_id = ACTIVITY_FP_0 + core_id;
+
+		snprintf(buf, sizeof buf, "ARM_%s_FP_%d_active", mali_name, core_id);
+		if (create_fs_entry(sb, root, buf, activity_counter_id, 0) != 0) {
+			return -1;
+		}
+
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			int counter_id = COUNTER_FP_0_C0 + (2 * core_id) + counter_number;
+
+			snprintf(buf, sizeof buf, "ARM_%s_FP_%d_cnt%d", mali_name, core_id, counter_number);
+			if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) {
+				return -1;
+			}
+		}
+	}
+
+	/* L2 Cache counters */
+	for (core_id = 0; core_id < n_l2_cores; core_id++) {
+		for (counter_number = 0; counter_number < 2; counter_number++) {
+			int counter_id = COUNTER_L2_0_C0 + (2 * core_id) + counter_number;
+
+			snprintf(buf, sizeof buf, "ARM_%s_L2_%d_cnt%d", mali_name, core_id, counter_number);
+			if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) {
+				return -1;
+			}
+		}
+	}
+
+	/* Now set up the software counter entries */
+	for (event = FIRST_SW_COUNTER; event <= LAST_SW_COUNTER; event++) {
+		snprintf(buf, sizeof(buf), "ARM_%s_SW_%d", mali_name, event - FIRST_SW_COUNTER);
+
+		if (create_fs_entry(sb, root, buf, event, 0) != 0) {
+			return -1;
+		}
+	}
+
+	/* Now set up the special counter entries */
+	snprintf(buf, sizeof(buf), "ARM_%s_Filmstrip_cnt0", mali_name);
+	if (create_fs_entry(sb, root, buf, COUNTER_FILMSTRIP, 1) != 0) {
+		return -1;
+	}
+
+#ifdef DVFS_REPORTED_BY_DDK
+	snprintf(buf, sizeof(buf), "ARM_%s_Frequency", mali_name);
+	if (create_fs_entry(sb, root, buf, COUNTER_FREQUENCY, 1) != 0) {
+		return -1;
+	}
+
+	snprintf(buf, sizeof(buf), "ARM_%s_Voltage", mali_name);
+	if (create_fs_entry(sb, root, buf, COUNTER_VOLTAGE, 1) != 0) {
+		return -1;
+	}
+#endif
+
+	return 0;
+}
+
+/*
+ * Local store for the get_counters entry point into the DDK.
+ * This is stored here since it is used very regularly.
+ */
+static mali_profiling_get_counters_type *mali_get_counters = NULL;
+static _mali_profiling_get_l2_counters_type *mali_get_l2_counters = NULL;
+
+/*
+ * Examine list of counters between two index limits and determine if any one is enabled.
+ * Returns 1 if any counter is enabled, 0 if none is.
+ */
+static int is_any_counter_enabled(unsigned int first_counter, unsigned int last_counter)
+{
+	unsigned int i;
+
+	for (i = first_counter; i <= last_counter; i++) {
+		if (counter_enabled[i]) {
+			return 1;	/* At least one counter is enabled */
+		}
+	}
+
+	return 0;		/* No s/w counters enabled */
+}
+
+static void init_counters(unsigned int from_counter, unsigned int to_counter)
+{
+	unsigned int counter_id;
+
+	/* If a Mali driver is present and exporting the appropriate symbol
+	 * then we can request the HW counters (of which there are only 2)
+	 * be configured to count the desired events
+	 */
+	mali_profiling_set_event_type *mali_set_hw_event;
+
+	mali_set_hw_event = symbol_get(_mali_profiling_set_event);
+
+	if (mali_set_hw_event) {
+		pr_debug("gator: mali online _mali_profiling_set_event symbol @ %p\n", mali_set_hw_event);
+
+		for (counter_id = from_counter; counter_id <= to_counter; counter_id++) {
+			if (counter_enabled[counter_id]) {
+				mali_set_hw_event(counter_id, counter_event[counter_id]);
+			} else {
+				mali_set_hw_event(counter_id, 0xFFFFFFFF);
+			}
+		}
+
+		symbol_put(_mali_profiling_set_event);
+	} else {
+		printk("gator: mali online _mali_profiling_set_event symbol not found\n");
+	}
+}
+
+static void mali_counter_initialize(void)
+{
+	int i;
+	int core_id;
+
+	mali_profiling_control_type *mali_control;
+
+	init_counters(COUNTER_L2_0_C0, COUNTER_L2_0_C0 + (2 * n_l2_cores) - 1);
+	init_counters(COUNTER_VP_0_C0, COUNTER_VP_0_C0 + (2 * n_vp_cores) - 1);
+	init_counters(COUNTER_FP_0_C0, COUNTER_FP_0_C0 + (2 * n_fp_cores) - 1);
+
+	/* Generic control interface for Mali DDK. */
+	mali_control = symbol_get(_mali_profiling_control);
+	if (mali_control) {
+		/* The event attribute in the XML file keeps the actual frame rate. */
+		unsigned int rate = counter_event[COUNTER_FILMSTRIP] & 0xff;
+		unsigned int resize_factor = (counter_event[COUNTER_FILMSTRIP] >> 8) & 0xff;
+
+		pr_debug("gator: mali online _mali_profiling_control symbol @ %p\n", mali_control);
+
+		mali_control(SW_COUNTER_ENABLE, (is_any_counter_enabled(FIRST_SW_COUNTER, LAST_SW_COUNTER) ? 1 : 0));
+		mali_control(FBDUMP_CONTROL_ENABLE, (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0));
+		mali_control(FBDUMP_CONTROL_RATE, rate);
+		mali_control(FBDUMP_CONTROL_RESIZE_FACTOR, resize_factor);
+
+		pr_debug("gator: sent mali_control enabled=%d, rate=%d\n", (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0), rate);
+
+		symbol_put(_mali_profiling_control);
+	} else {
+		printk("gator: mali online _mali_profiling_control symbol not found\n");
+	}
+
+	mali_get_counters = symbol_get(_mali_profiling_get_counters);
+	if (mali_get_counters) {
+		pr_debug("gator: mali online _mali_profiling_get_counters symbol @ %p\n", mali_get_counters);
+
+	} else {
+		pr_debug("gator WARNING: mali _mali_profiling_get_counters symbol not defined");
+	}
+
+	mali_get_l2_counters = symbol_get(_mali_profiling_get_l2_counters);
+	if (mali_get_l2_counters) {
+		pr_debug("gator: mali online _mali_profiling_get_l2_counters symbol @ %p\n", mali_get_l2_counters);
+
+	} else {
+		pr_debug("gator WARNING: mali _mali_profiling_get_l2_counters symbol not defined");
+	}
+
+	if (!mali_get_counters && !mali_get_l2_counters) {
+		pr_debug("gator: WARNING: no L2 counters available");
+		n_l2_cores = 0;
+	}
+
+	for (core_id = 0; core_id < n_l2_cores; core_id++) {
+		int counter_id = COUNTER_L2_0_C0 + (2 * core_id);
+		counter_prev[counter_id] = 0;
+		counter_prev[counter_id + 1] = 0;
+	}
+
+	/* Clear counters in the start */
+	for (i = 0; i < NUMBER_OF_EVENTS; i++) {
+		counter_data[i] = 0;
+	}
+}
+
+static void mali_counter_deinitialize(void)
+{
+	mali_profiling_set_event_type *mali_set_hw_event;
+	mali_profiling_control_type *mali_control;
+
+	mali_set_hw_event = symbol_get(_mali_profiling_set_event);
+
+	if (mali_set_hw_event) {
+		int i;
+
+		pr_debug("gator: mali offline _mali_profiling_set_event symbol @ %p\n", mali_set_hw_event);
+		for (i = FIRST_HW_COUNTER; i <= LAST_HW_COUNTER; i++) {
+			mali_set_hw_event(i, 0xFFFFFFFF);
+		}
+
+		symbol_put(_mali_profiling_set_event);
+	} else {
+		printk("gator: mali offline _mali_profiling_set_event symbol not found\n");
+	}
+
+	/* Generic control interface for Mali DDK. */
+	mali_control = symbol_get(_mali_profiling_control);
+
+	if (mali_control) {
+		pr_debug("gator: mali offline _mali_profiling_control symbol @ %p\n", mali_control);
+
+		/* Reset the DDK state - disable counter collection */
+		mali_control(SW_COUNTER_ENABLE, 0);
+
+		mali_control(FBDUMP_CONTROL_ENABLE, 0);
+
+		symbol_put(_mali_profiling_control);
+	} else {
+		printk("gator: mali offline _mali_profiling_control symbol not found\n");
+	}
+
+	if (mali_get_counters) {
+		symbol_put(_mali_profiling_get_counters);
+	}
+
+	if (mali_get_l2_counters) {
+		symbol_put(_mali_profiling_get_l2_counters);
+	}
+}
+
+static int start(void)
+{
+	// register tracepoints
+	if (GATOR_REGISTER_TRACE(mali_hw_counter)) {
+		printk("gator: mali_hw_counter tracepoint failed to activate\n");
+		return -1;
+	}
+
+#if GATOR_MALI_INTERFACE_STYLE == 1
+	/* None. */
+#elif GATOR_MALI_INTERFACE_STYLE == 2
+	/* For patched Mali driver. */
+	if (GATOR_REGISTER_TRACE(mali_sw_counter)) {
+		printk("gator: mali_sw_counter tracepoint failed to activate\n");
+		return -1;
+	}
+#elif GATOR_MALI_INTERFACE_STYLE >= 3
+	/* For Mali drivers with built-in support. */
+	if (GATOR_REGISTER_TRACE(mali_sw_counters)) {
+		printk("gator: mali_sw_counters tracepoint failed to activate\n");
+		return -1;
+	}
+#else
+#error Unknown GATOR_MALI_INTERFACE_STYLE option.
+#endif
+
+	trace_registered = 1;
+
+	mali_counter_initialize();
+	return 0;
+}
+
+static void stop(void)
+{
+	unsigned int cnt;
+
+	pr_debug("gator: mali stop\n");
+
+	if (trace_registered) {
+		GATOR_UNREGISTER_TRACE(mali_hw_counter);
+
+#if GATOR_MALI_INTERFACE_STYLE == 1
+		/* None. */
+#elif GATOR_MALI_INTERFACE_STYLE == 2
+		/* For patched Mali driver. */
+		GATOR_UNREGISTER_TRACE(mali_sw_counter);
+#elif GATOR_MALI_INTERFACE_STYLE >= 3
+		/* For Mali drivers with built-in support. */
+		GATOR_UNREGISTER_TRACE(mali_sw_counters);
+#else
+#error Unknown GATOR_MALI_INTERFACE_STYLE option.
+#endif
+
+		pr_debug("gator: mali timeline tracepoint deactivated\n");
+
+		trace_registered = 0;
+	}
+
+	for (cnt = 0; cnt < NUMBER_OF_EVENTS; cnt++) {
+		counter_enabled[cnt] = 0;
+		counter_event[cnt] = 0;
+		counter_address[cnt] = NULL;
+	}
+
+	mali_counter_deinitialize();
+}
+
+static void dump_counters(unsigned int from_counter, unsigned int to_counter, unsigned int *len)
+{
+	unsigned int counter_id;
+
+	for (counter_id = from_counter; counter_id <= to_counter; counter_id++) {
+		if (counter_enabled[counter_id]) {
+			counter_dump[(*len)++] = counter_key[counter_id];
+			counter_dump[(*len)++] = counter_data[counter_id];
+
+			counter_data[counter_id] = 0;
+		}
+	}
+}
+
+static int read(int **buffer)
+{
+	int len = 0;
+
+	if (!on_primary_core())
+		return 0;
+
+	// Read the L2 C0 and C1 here.
+	if (n_l2_cores > 0 && is_any_counter_enabled(COUNTER_L2_0_C0, COUNTER_L2_0_C0 + (2 * n_l2_cores))) {
+		unsigned int unavailable_l2_caches = 0;
+		_mali_profiling_l2_counter_values cache_values;
+		unsigned int cache_id;
+		struct _mali_profiling_core_counters *per_core;
+
+		/* Poke the driver to get the counter values - older style; only one L2 cache */
+		if (mali_get_l2_counters) {
+			unavailable_l2_caches = mali_get_l2_counters(&cache_values);
+		} else if (mali_get_counters) {
+			per_core = &cache_values.cores[0];
+			mali_get_counters(&per_core->source0, &per_core->value0, &per_core->source1, &per_core->value1);
+		} else {
+			/* This should never happen, as n_l2_caches is only set > 0 if one of the above functions is found. */
+		}
+
+		/* Fill in the two cache counter values for each cache block. */
+		for (cache_id = 0; cache_id < n_l2_cores; cache_id++) {
+			unsigned int counter_id_0 = COUNTER_L2_0_C0 + (2 * cache_id);
+			unsigned int counter_id_1 = counter_id_0 + 1;
+
+			if ((1 << cache_id) & unavailable_l2_caches) {
+				continue; /* This cache is unavailable (powered-off, possibly). */
+			}
+
+			per_core = &cache_values.cores[cache_id];
+
+			if (counter_enabled[counter_id_0]) {
+				// Calculate and save src0's counter val0
+				counter_dump[len++] = counter_key[counter_id_0];
+				counter_dump[len++] = get_difference(per_core->value0, counter_prev[counter_id_0]);
+			}
+
+			if (counter_enabled[counter_id_1]) {
+				// Calculate and save src1's counter val1
+				counter_dump[len++] = counter_key[counter_id_1];
+				counter_dump[len++] = get_difference(per_core->value1, counter_prev[counter_id_1]);
+			}
+
+			// Save the previous values for the counters.
+			counter_prev[counter_id_0] = per_core->value0;
+			counter_prev[counter_id_1] = per_core->value1;
+		}
+	}
+
+	/* Process other (non-timeline) counters. */
+	dump_counters(COUNTER_VP_0_C0, COUNTER_VP_0_C0 + (2 * n_vp_cores) - 1, &len);
+	dump_counters(COUNTER_FP_0_C0, COUNTER_FP_0_C0 + (2 * n_fp_cores) - 1, &len);
+
+	dump_counters(FIRST_SW_COUNTER, LAST_SW_COUNTER, &len);
+
+#ifdef DVFS_REPORTED_BY_DDK
+	{
+		int cnt;
+		/*
+		 * Add in the voltage and frequency counters if enabled.  Note that, since these are
+		 * actually passed as events, the counter value should not be cleared.
+		 */
+		cnt = COUNTER_FREQUENCY;
+		if (counter_enabled[cnt]) {
+			counter_dump[len++] = counter_key[cnt];
+			counter_dump[len++] = counter_data[cnt];
+		}
+
+		cnt = COUNTER_VOLTAGE;
+		if (counter_enabled[cnt]) {
+			counter_dump[len++] = counter_key[cnt];
+			counter_dump[len++] = counter_data[cnt];
+		}
+	}
+#endif
+
+	if (buffer) {
+		*buffer = (int *)counter_dump;
+	}
+
+	return len;
+}
+
+static struct gator_interface gator_events_mali_interface = {
+	.create_files = create_files,
+	.start = start,
+	.stop = stop,
+	.read = read,
+};
+
+extern void gator_events_mali_log_dvfs_event(unsigned int frequency_mhz, unsigned int voltage_mv)
+{
+#ifdef DVFS_REPORTED_BY_DDK
+	counter_data[COUNTER_FREQUENCY] = frequency_mhz;
+	counter_data[COUNTER_VOLTAGE] = voltage_mv;
+#endif
+}
+
+int gator_events_mali_init(void)
+{
+	unsigned int cnt;
+
+	pr_debug("gator: mali init\n");
+
+	for (cnt = 0; cnt < NUMBER_OF_EVENTS; cnt++) {
+		counter_enabled[cnt] = 0;
+		counter_event[cnt] = 0;
+		counter_key[cnt] = gator_events_get_key();
+		counter_address[cnt] = NULL;
+		counter_data[cnt] = 0;
+	}
+
+	trace_registered = 0;
+
+	return gator_events_install(&gator_events_mali_interface);
+}
diff --git a/drivers/gator/gator_events_mali_4xx.h b/drivers/gator/gator_events_mali_4xx.h
new file mode 100644
index 000000000000..413ad0ffe794
--- /dev/null
+++ b/drivers/gator/gator_events_mali_4xx.h
@@ -0,0 +1,18 @@
+/**
+ * Copyright (C) ARM Limited 2011-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/*
+ * Header contains common definitions for the Mali-4xx processors.
+ */
+#if !defined(GATOR_EVENTS_MALI_4xx_H)
+#define GATOR_EVENTS_MALI_4xx_H
+
+extern void gator_events_mali_log_dvfs_event(unsigned int d0, unsigned int d1);
+
+#endif /* GATOR_EVENTS_MALI_4xx_H */
diff --git a/drivers/gator/gator_events_mali_common.c b/drivers/gator/gator_events_mali_common.c
new file mode 100644
index 000000000000..466ca1683c7e
--- /dev/null
+++ b/drivers/gator/gator_events_mali_common.c
@@ -0,0 +1,81 @@
+/**
+ * Copyright (C) ARM Limited 2012-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include "gator_events_mali_common.h"
+
+static u32 gator_mali_get_id(void)
+{
+	return MALI_SUPPORT;
+}
+
+extern const char *gator_mali_get_mali_name(void)
+{
+	u32 id = gator_mali_get_id();
+
+	switch (id) {
+	case MALI_T6xx:
+		return "Mali-T6xx";
+	case MALI_4xx:
+		return "Mali-4xx";
+	default:
+		pr_debug("gator: Mali-T6xx: unknown Mali ID (%d)\n", id);
+		return "Mali-Unknown";
+	}
+}
+
+extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, mali_counter *counter, unsigned long *event)
+{
+	int err;
+	char buf[255];
+	struct dentry *dir;
+
+	/* If the counter name is empty ignore it */
+	if (strlen(event_name) != 0) {
+		/* Set up the filesystem entry for this event. */
+		snprintf(buf, sizeof(buf), "ARM_%s_%s", mali_name, event_name);
+
+		dir = gatorfs_mkdir(sb, root, buf);
+
+		if (dir == NULL) {
+			pr_debug("gator: Mali-T6xx: error creating file system for: %s (%s)", event_name, buf);
+			return -1;
+		}
+
+		err = gatorfs_create_ulong(sb, dir, "enabled", &counter->enabled);
+		if (err != 0) {
+			pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ulong for: %s (%s)", event_name, buf);
+			return -1;
+		}
+		err = gatorfs_create_ro_ulong(sb, dir, "key", &counter->key);
+		if (err != 0) {
+			pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ro_ulong for: %s (%s)", event_name, buf);
+			return -1;
+		}
+		if (event != NULL) {
+			err = gatorfs_create_ulong(sb, dir, "event", event);
+			if (err != 0) {
+				pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ro_ulong for: %s (%s)", event_name, buf);
+				return -1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+extern void gator_mali_initialise_counters(mali_counter counters[], unsigned int n_counters)
+{
+	unsigned int cnt;
+
+	for (cnt = 0; cnt < n_counters; cnt++) {
+		mali_counter *counter = &counters[cnt];
+
+		counter->key = gator_events_get_key();
+		counter->enabled = 0;
+	}
+}
diff --git a/drivers/gator/gator_events_mali_common.h b/drivers/gator/gator_events_mali_common.h
new file mode 100644
index 000000000000..509f9b61884a
--- /dev/null
+++ b/drivers/gator/gator_events_mali_common.h
@@ -0,0 +1,86 @@
+/**
+ * Copyright (C) ARM Limited 2012-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#if !defined(GATOR_EVENTS_MALI_COMMON_H)
+#define GATOR_EVENTS_MALI_COMMON_H
+
+#include "gator.h"
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+#include <linux/slab.h>
+#include <asm/io.h>
+
+/* Device codes for each known GPU */
+#define MALI_4xx     (0x0b07)
+#define MALI_T6xx    (0x0056)
+
+/* Ensure that MALI_SUPPORT has been defined to something. */
+#ifndef MALI_SUPPORT
+#error MALI_SUPPORT not defined!
+#endif
+
+/* Values for the supported activity event types */
+#define ACTIVITY_START  (1)
+#define ACTIVITY_STOP   (2)
+
+/*
+ * Runtime state information for a counter.
+ */
+typedef struct {
+	unsigned long key;	/* 'key' (a unique id set by gatord and returned by gator.ko) */
+	unsigned long enabled;	/* counter enable state */
+} mali_counter;
+
+/*
+ * Mali-4xx
+ */
+typedef int mali_profiling_set_event_type(unsigned int, int);
+typedef void mali_profiling_control_type(unsigned int, unsigned int);
+typedef void mali_profiling_get_counters_type(unsigned int *, unsigned int *, unsigned int *, unsigned int *);
+
+/*
+ * Driver entry points for functions called directly by gator.
+ */
+extern int _mali_profiling_set_event(unsigned int, int);
+extern void _mali_profiling_control(unsigned int, unsigned int);
+extern void _mali_profiling_get_counters(unsigned int *, unsigned int *, unsigned int *, unsigned int *);
+
+/**
+ * Returns a name which identifies the GPU type (eg Mali-4xx, Mali-T6xx).
+ *
+ * @return The name as a constant string.
+ */
+extern const char *gator_mali_get_mali_name(void);
+
+/**
+ * Creates a filesystem entry under /dev/gator relating to the specified event name and key, and
+ * associate the key/enable values with this entry point.
+ *
+ * @param mali_name A name related to the type of GPU, obtained from a call to gator_mali_get_mali_name()
+ * @param event_name The name of the event.
+ * @param sb Linux super block
+ * @param root Directory under which the entry will be created.
+ * @param counter_key Ptr to location which will be associated with the counter key.
+ * @param counter_enabled Ptr to location which will be associated with the counter enable state.
+ *
+ * @return 0 if entry point was created, non-zero if not.
+ */
+extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, mali_counter *counter, unsigned long *event);
+
+/**
+ * Initializes the counter array.
+ *
+ * @param keys The array of counters
+ * @param n_counters The number of entries in each of the arrays.
+ */
+extern void gator_mali_initialise_counters(mali_counter counters[], unsigned int n_counters);
+
+#endif /* GATOR_EVENTS_MALI_COMMON_H  */
diff --git a/drivers/gator/gator_events_mali_t6xx.c b/drivers/gator/gator_events_mali_t6xx.c
new file mode 100644
index 000000000000..7bf7d6a6dbf9
--- /dev/null
+++ b/drivers/gator/gator_events_mali_t6xx.c
@@ -0,0 +1,560 @@
+/**
+ * Copyright (C) ARM Limited 2011-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+#include <linux/slab.h>
+#include <asm/io.h>
+
+#include "linux/mali_linux_trace.h"
+
+#include "gator_events_mali_common.h"
+
+/*
+ * Check that the MALI_SUPPORT define is set to one of the allowable device codes.
+ */
+#if (MALI_SUPPORT != MALI_T6xx)
+#error MALI_SUPPORT set to an invalid device code: expecting MALI_T6xx
+#endif
+
+/* Counters for Mali-T6xx:
+ *
+ *  - Timeline events
+ *    They are tracepoints, but instead of reporting a number they report a START/STOP event.
+ *    They are reported in Streamline as number of microseconds while that particular counter was active.
+ *
+ *  - SW counters
+ *    They are tracepoints reporting a particular number.
+ *    They are accumulated in sw_counter_data array until they are passed to Streamline, then they are zeroed.
+ *
+ *  - Accumulators
+ *    They are the same as software counters but their value is not zeroed.
+ */
+
+/* Timeline (start/stop) activity */
+static const char *timeline_event_names[] = {
+	"PM_SHADER_0",
+	"PM_SHADER_1",
+	"PM_SHADER_2",
+	"PM_SHADER_3",
+	"PM_SHADER_4",
+	"PM_SHADER_5",
+	"PM_SHADER_6",
+	"PM_SHADER_7",
+	"PM_TILER_0",
+	"PM_L2_0",
+	"PM_L2_1",
+	"MMU_AS_0",
+	"MMU_AS_1",
+	"MMU_AS_2",
+	"MMU_AS_3"
+};
+
+enum {
+	PM_SHADER_0 = 0,
+	PM_SHADER_1,
+	PM_SHADER_2,
+	PM_SHADER_3,
+	PM_SHADER_4,
+	PM_SHADER_5,
+	PM_SHADER_6,
+	PM_SHADER_7,
+	PM_TILER_0,
+	PM_L2_0,
+	PM_L2_1,
+	MMU_AS_0,
+	MMU_AS_1,
+	MMU_AS_2,
+	MMU_AS_3
+};
+/* The number of shader blocks in the enum above */
+#define NUM_PM_SHADER (8)
+
+/* Software Counters */
+static const char *software_counter_names[] = {
+	"MMU_PAGE_FAULT_0",
+	"MMU_PAGE_FAULT_1",
+	"MMU_PAGE_FAULT_2",
+	"MMU_PAGE_FAULT_3"
+};
+
+enum {
+	MMU_PAGE_FAULT_0 = 0,
+	MMU_PAGE_FAULT_1,
+	MMU_PAGE_FAULT_2,
+	MMU_PAGE_FAULT_3
+};
+
+/* Software Counters */
+static const char *accumulators_names[] = {
+	"TOTAL_ALLOC_PAGES"
+};
+
+enum {
+	TOTAL_ALLOC_PAGES = 0
+};
+
+#define FIRST_TIMELINE_EVENT (0)
+#define NUMBER_OF_TIMELINE_EVENTS (sizeof(timeline_event_names) / sizeof(timeline_event_names[0]))
+#define FIRST_SOFTWARE_COUNTER (FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS)
+#define NUMBER_OF_SOFTWARE_COUNTERS (sizeof(software_counter_names) / sizeof(software_counter_names[0]))
+#define FIRST_ACCUMULATOR (FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS)
+#define NUMBER_OF_ACCUMULATORS (sizeof(accumulators_names) / sizeof(accumulators_names[0]))
+#define FILMSTRIP (NUMBER_OF_TIMELINE_EVENTS + NUMBER_OF_SOFTWARE_COUNTERS + NUMBER_OF_ACCUMULATORS)
+#define NUMBER_OF_EVENTS (NUMBER_OF_TIMELINE_EVENTS + NUMBER_OF_SOFTWARE_COUNTERS + NUMBER_OF_ACCUMULATORS + 1)
+
+/*
+ * gatorfs variables for counter enable state
+ */
+static mali_counter counters[NUMBER_OF_EVENTS];
+static unsigned long filmstrip_event;
+
+/* An array used to return the data we recorded
+ * as key,value pairs hence the *2
+ */
+static unsigned long counter_dump[NUMBER_OF_EVENTS * 2];
+
+/*
+ * Array holding counter start times (in ns) for each counter.  A zero here
+ * indicates that the activity monitored by this counter is not running.
+ */
+static struct timespec timeline_event_starttime[NUMBER_OF_TIMELINE_EVENTS];
+
+/* The data we have recorded */
+static unsigned int timeline_data[NUMBER_OF_TIMELINE_EVENTS];
+static unsigned int sw_counter_data[NUMBER_OF_SOFTWARE_COUNTERS];
+static unsigned int accumulators_data[NUMBER_OF_ACCUMULATORS];
+
+/* Hold the previous timestamp, used to calculate the sample interval. */
+static struct timespec prev_timestamp;
+
+/**
+ * Returns the timespan (in microseconds) between the two specified timestamps.
+ *
+ * @param start Ptr to the start timestamp
+ * @param end Ptr to the end timestamp
+ *
+ * @return Number of microseconds between the two timestamps (can be negative if start follows end).
+ */
+static inline long get_duration_us(const struct timespec *start, const struct timespec *end)
+{
+	long event_duration_us = (end->tv_nsec - start->tv_nsec) / 1000;
+	event_duration_us += (end->tv_sec - start->tv_sec) * 1000000;
+
+	return event_duration_us;
+}
+
+static void record_timeline_event(unsigned int timeline_index, unsigned int type)
+{
+	struct timespec event_timestamp;
+	struct timespec *event_start = &timeline_event_starttime[timeline_index];
+
+	switch (type) {
+	case ACTIVITY_START:
+		/* Get the event time... */
+		getnstimeofday(&event_timestamp);
+
+		/* Remember the start time if the activity is not already started */
+		if (event_start->tv_sec == 0) {
+			*event_start = event_timestamp;	/* Structure copy */
+		}
+		break;
+
+	case ACTIVITY_STOP:
+		/* if the counter was started... */
+		if (event_start->tv_sec != 0) {
+			/* Get the event time... */
+			getnstimeofday(&event_timestamp);
+
+			/* Accumulate the duration in us */
+			timeline_data[timeline_index] += get_duration_us(event_start, &event_timestamp);
+
+			/* Reset the start time to indicate the activity is stopped. */
+			event_start->tv_sec = 0;
+		}
+		break;
+
+	default:
+		/* Other activity events are ignored. */
+		break;
+	}
+}
+
+/*
+ * Documentation about the following tracepoints is in mali_linux_trace.h
+ */
+
+GATOR_DEFINE_PROBE(mali_pm_status, TP_PROTO(unsigned int event_id, unsigned long long value))
+{
+#define SHADER_PRESENT_LO       0x100	/* (RO) Shader core present bitmap, low word */
+#define TILER_PRESENT_LO        0x110	/* (RO) Tiler core present bitmap, low word */
+#define L2_PRESENT_LO           0x120	/* (RO) Level 2 cache present bitmap, low word */
+#define BIT_AT(value, pos) ((value >> pos) & 1)
+
+	static unsigned long long previous_shader_bitmask = 0;
+	static unsigned long long previous_tiler_bitmask = 0;
+	static unsigned long long previous_l2_bitmask = 0;
+
+	switch (event_id) {
+	case SHADER_PRESENT_LO:
+		{
+			unsigned long long changed_bitmask = previous_shader_bitmask ^ value;
+			int pos;
+
+			for (pos = 0; pos < NUM_PM_SHADER; ++pos) {
+				if (BIT_AT(changed_bitmask, pos)) {
+					record_timeline_event(PM_SHADER_0 + pos, BIT_AT(value, pos) ? ACTIVITY_START : ACTIVITY_STOP);
+				}
+			}
+
+			previous_shader_bitmask = value;
+			break;
+		}
+
+	case TILER_PRESENT_LO:
+		{
+			unsigned long long changed = previous_tiler_bitmask ^ value;
+
+			if (BIT_AT(changed, 0)) {
+				record_timeline_event(PM_TILER_0, BIT_AT(value, 0) ? ACTIVITY_START : ACTIVITY_STOP);
+			}
+
+			previous_tiler_bitmask = value;
+			break;
+		}
+
+	case L2_PRESENT_LO:
+		{
+			unsigned long long changed = previous_l2_bitmask ^ value;
+
+			if (BIT_AT(changed, 0)) {
+				record_timeline_event(PM_L2_0, BIT_AT(value, 0) ? ACTIVITY_START : ACTIVITY_STOP);
+			}
+			if (BIT_AT(changed, 4)) {
+				record_timeline_event(PM_L2_1, BIT_AT(value, 4) ? ACTIVITY_START : ACTIVITY_STOP);
+			}
+
+			previous_l2_bitmask = value;
+			break;
+		}
+
+	default:
+		/* No other blocks are supported at present */
+		break;
+	}
+
+#undef SHADER_PRESENT_LO
+#undef TILER_PRESENT_LO
+#undef L2_PRESENT_LO
+#undef BIT_AT
+}
+
+GATOR_DEFINE_PROBE(mali_page_fault_insert_pages, TP_PROTO(int event_id, unsigned long value))
+{
+	/* We add to the previous since we may receive many tracepoints in one sample period */
+	sw_counter_data[MMU_PAGE_FAULT_0 + event_id] += value;
+}
+
+GATOR_DEFINE_PROBE(mali_mmu_as_in_use, TP_PROTO(int event_id))
+{
+	record_timeline_event(MMU_AS_0 + event_id, ACTIVITY_START);
+}
+
+GATOR_DEFINE_PROBE(mali_mmu_as_released, TP_PROTO(int event_id))
+{
+	record_timeline_event(MMU_AS_0 + event_id, ACTIVITY_STOP);
+}
+
+GATOR_DEFINE_PROBE(mali_total_alloc_pages_change, TP_PROTO(long long int event_id))
+{
+	accumulators_data[TOTAL_ALLOC_PAGES] = event_id;
+}
+
+static int create_files(struct super_block *sb, struct dentry *root)
+{
+	int event;
+	/*
+	 * Create the filesystem for all events
+	 */
+	int counter_index = 0;
+	const char *mali_name = gator_mali_get_mali_name();
+	mali_profiling_control_type *mali_control;
+
+	for (event = FIRST_TIMELINE_EVENT; event < FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS; event++) {
+		if (gator_mali_create_file_system(mali_name, timeline_event_names[counter_index], sb, root, &counters[event], NULL) != 0) {
+			return -1;
+		}
+		counter_index++;
+	}
+	counter_index = 0;
+	for (event = FIRST_SOFTWARE_COUNTER; event < FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS; event++) {
+		if (gator_mali_create_file_system(mali_name, software_counter_names[counter_index], sb, root, &counters[event], NULL) != 0) {
+			return -1;
+		}
+		counter_index++;
+	}
+	counter_index = 0;
+	for (event = FIRST_ACCUMULATOR; event < FIRST_ACCUMULATOR + NUMBER_OF_ACCUMULATORS; event++) {
+		if (gator_mali_create_file_system(mali_name, accumulators_names[counter_index], sb, root, &counters[event], NULL) != 0) {
+			return -1;
+		}
+		counter_index++;
+	}
+
+	mali_control = symbol_get(_mali_profiling_control);
+	if (mali_control) {	
+		if (gator_mali_create_file_system(mali_name, "Filmstrip_cnt0", sb, root, &counters[FILMSTRIP], &filmstrip_event) != 0) {
+			return -1;
+		}
+		symbol_put(_mali_profiling_control);
+	}
+
+	return 0;
+}
+
+static int register_tracepoints(void)
+{
+	if (GATOR_REGISTER_TRACE(mali_pm_status)) {
+		pr_debug("gator: Mali-T6xx: mali_pm_status tracepoint failed to activate\n");
+		return 0;
+	}
+
+	if (GATOR_REGISTER_TRACE(mali_page_fault_insert_pages)) {
+		pr_debug("gator: Mali-T6xx: mali_page_fault_insert_pages tracepoint failed to activate\n");
+		return 0;
+	}
+
+	if (GATOR_REGISTER_TRACE(mali_mmu_as_in_use)) {
+		pr_debug("gator: Mali-T6xx: mali_mmu_as_in_use tracepoint failed to activate\n");
+		return 0;
+	}
+
+	if (GATOR_REGISTER_TRACE(mali_mmu_as_released)) {
+		pr_debug("gator: Mali-T6xx: mali_mmu_as_released tracepoint failed to activate\n");
+		return 0;
+	}
+
+	if (GATOR_REGISTER_TRACE(mali_total_alloc_pages_change)) {
+		pr_debug("gator: Mali-T6xx: mali_total_alloc_pages_change tracepoint failed to activate\n");
+		return 0;
+	}
+
+	pr_debug("gator: Mali-T6xx: start\n");
+	pr_debug("gator: Mali-T6xx: mali_pm_status probe is at %p\n", &probe_mali_pm_status);
+	pr_debug("gator: Mali-T6xx: mali_page_fault_insert_pages probe is at %p\n", &probe_mali_page_fault_insert_pages);
+	pr_debug("gator: Mali-T6xx: mali_mmu_as_in_use probe is at %p\n", &probe_mali_mmu_as_in_use);
+	pr_debug("gator: Mali-T6xx: mali_mmu_as_released probe is at %p\n", &probe_mali_mmu_as_released);
+	pr_debug("gator: Mali-T6xx: mali_total_alloc_pages_change probe is at %p\n", &probe_mali_total_alloc_pages_change);
+
+	return 1;
+}
+
+static int start(void)
+{
+	unsigned int cnt;
+	mali_profiling_control_type *mali_control;
+
+	/* Clean all data for the next capture */
+	for (cnt = 0; cnt < NUMBER_OF_TIMELINE_EVENTS; cnt++) {
+		timeline_event_starttime[cnt].tv_sec = timeline_event_starttime[cnt].tv_nsec = 0;
+		timeline_data[cnt] = 0;
+	}
+
+	for (cnt = 0; cnt < NUMBER_OF_SOFTWARE_COUNTERS; cnt++) {
+		sw_counter_data[cnt] = 0;
+	}
+
+	for (cnt = 0; cnt < NUMBER_OF_ACCUMULATORS; cnt++) {
+		accumulators_data[cnt] = 0;
+	}
+
+	/* Register tracepoints */
+	if (register_tracepoints() == 0) {
+		return -1;
+	}
+
+	/* Generic control interface for Mali DDK. */
+	mali_control = symbol_get(_mali_profiling_control);
+	if (mali_control) {
+		/* The event attribute in the XML file keeps the actual frame rate. */
+		unsigned int enabled = counters[FILMSTRIP].enabled ? 1 : 0;
+		unsigned int rate = filmstrip_event & 0xff;
+		unsigned int resize_factor = (filmstrip_event >> 8) & 0xff;
+
+		pr_debug("gator: mali online _mali_profiling_control symbol @ %p\n", mali_control);
+
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+		mali_control(FBDUMP_CONTROL_ENABLE, enabled);
+		mali_control(FBDUMP_CONTROL_RATE, rate);
+		mali_control(FBDUMP_CONTROL_RESIZE_FACTOR, resize_factor);
+
+		pr_debug("gator: sent mali_control enabled=%d, rate=%d, resize_factor=%d\n", enabled, rate, resize_factor);
+
+		symbol_put(_mali_profiling_control);
+	} else {
+		printk("gator: mali online _mali_profiling_control symbol not found\n");
+	}
+
+	/*
+	 * Set the first timestamp for calculating the sample interval. The first interval could be quite long,
+	 * since it will be the time between 'start' and the first 'read'.
+	 * This means that timeline values will be divided by a big number for the first sample.
+	 */
+	getnstimeofday(&prev_timestamp);
+
+	return 0;
+}
+
+static void stop(void)
+{
+	mali_profiling_control_type *mali_control;
+
+	pr_debug("gator: Mali-T6xx: stop\n");
+
+	/*
+	 * It is safe to unregister traces even if they were not successfully
+	 * registered, so no need to check.
+	 */
+	GATOR_UNREGISTER_TRACE(mali_pm_status);
+	pr_debug("gator: Mali-T6xx: mali_pm_status tracepoint deactivated\n");
+
+	GATOR_UNREGISTER_TRACE(mali_page_fault_insert_pages);
+	pr_debug("gator: Mali-T6xx: mali_page_fault_insert_pages tracepoint deactivated\n");
+
+	GATOR_UNREGISTER_TRACE(mali_mmu_as_in_use);
+	pr_debug("gator: Mali-T6xx: mali_mmu_as_in_use tracepoint deactivated\n");
+
+	GATOR_UNREGISTER_TRACE(mali_mmu_as_released);
+	pr_debug("gator: Mali-T6xx: mali_mmu_as_released tracepoint deactivated\n");
+
+	GATOR_UNREGISTER_TRACE(mali_total_alloc_pages_change);
+	pr_debug("gator: Mali-T6xx: mali_total_alloc_pages_change tracepoint deactivated\n");
+
+	/* Generic control interface for Mali DDK. */
+	mali_control = symbol_get(_mali_profiling_control);
+	if (mali_control) {
+		pr_debug("gator: mali offline _mali_profiling_control symbol @ %p\n", mali_control);
+
+		mali_control(FBDUMP_CONTROL_ENABLE, 0);
+
+		symbol_put(_mali_profiling_control);
+	} else {
+		printk("gator: mali offline _mali_profiling_control symbol not found\n");
+	}
+}
+
+static int read(int **buffer)
+{
+	int cnt;
+	int len = 0;
+	long sample_interval_us = 0;
+	struct timespec read_timestamp;
+
+	if (!on_primary_core()) {
+		return 0;
+	}
+
+	/* Get the start of this sample period. */
+	getnstimeofday(&read_timestamp);
+
+	/*
+	 * Calculate the sample interval if the previous sample time is valid.
+	 * We use tv_sec since it will not be 0.
+	 */
+	if (prev_timestamp.tv_sec != 0) {
+		sample_interval_us = get_duration_us(&prev_timestamp, &read_timestamp);
+	}
+
+	/* Structure copy. Update the previous timestamp. */
+	prev_timestamp = read_timestamp;
+
+	/*
+	 * Report the timeline counters (ACTIVITY_START/STOP)
+	 */
+	for (cnt = FIRST_TIMELINE_EVENT; cnt < (FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS); cnt++) {
+		mali_counter *counter = &counters[cnt];
+		if (counter->enabled) {
+			const int index = cnt - FIRST_TIMELINE_EVENT;
+			unsigned int value;
+
+			/* If the activity is still running, reset its start time to the start of this sample period
+			 * to correct the count.  Add the time up to the end of the sample onto the count. */
+			if (timeline_event_starttime[index].tv_sec != 0) {
+				const long event_duration = get_duration_us(&timeline_event_starttime[index], &read_timestamp);
+				timeline_data[index] += event_duration;
+				timeline_event_starttime[index] = read_timestamp;	/* Activity is still running. */
+			}
+
+			if (sample_interval_us != 0) {
+				/* Convert the counter to a percent-of-sample value */
+				value = (timeline_data[index] * 100) / sample_interval_us;
+			} else {
+				pr_debug("gator: Mali-T6xx: setting value to zero\n");
+				value = 0;
+			}
+
+			/* Clear the counter value ready for the next sample. */
+			timeline_data[index] = 0;
+
+			counter_dump[len++] = counter->key;
+			counter_dump[len++] = value;
+		}
+	}
+
+	/* Report the software counters */
+	for (cnt = FIRST_SOFTWARE_COUNTER; cnt < (FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS); cnt++) {
+		const mali_counter *counter = &counters[cnt];
+		if (counter->enabled) {
+			const int index = cnt - FIRST_SOFTWARE_COUNTER;
+			counter_dump[len++] = counter->key;
+			counter_dump[len++] = sw_counter_data[index];
+			/* Set the value to zero for the next time */
+			sw_counter_data[index] = 0;
+		}
+	}
+
+	/* Report the accumulators */
+	for (cnt = FIRST_ACCUMULATOR; cnt < (FIRST_ACCUMULATOR + NUMBER_OF_ACCUMULATORS); cnt++) {
+		const mali_counter *counter = &counters[cnt];
+		if (counter->enabled) {
+			const int index = cnt - FIRST_ACCUMULATOR;
+			counter_dump[len++] = counter->key;
+			counter_dump[len++] = accumulators_data[index];
+			/* Do not zero the accumulator */
+		}
+	}
+
+	/* Update the buffer */
+	if (buffer) {
+		*buffer = (int *)counter_dump;
+	}
+
+	return len;
+}
+
+static struct gator_interface gator_events_mali_t6xx_interface = {
+	.create_files = create_files,
+	.start = start,
+	.stop = stop,
+	.read = read
+};
+
+extern int gator_events_mali_t6xx_init(void)
+{
+	pr_debug("gator: Mali-T6xx: sw_counters init\n");
+
+	gator_mali_initialise_counters(counters, NUMBER_OF_EVENTS);
+
+	return gator_events_install(&gator_events_mali_t6xx_interface);
+}
diff --git a/drivers/gator/gator_events_mali_t6xx_hw.c b/drivers/gator/gator_events_mali_t6xx_hw.c
new file mode 100644
index 000000000000..e406991398d9
--- /dev/null
+++ b/drivers/gator/gator_events_mali_t6xx_hw.c
@@ -0,0 +1,784 @@
+/**
+ * Copyright (C) ARM Limited 2012-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+#include <linux/slab.h>
+#include <asm/io.h>
+
+/* Mali T6xx DDK includes */
+#include "linux/mali_linux_trace.h"
+#include "kbase/src/common/mali_kbase.h"
+#include "kbase/src/linux/mali_kbase_mem_linux.h"
+
+#include "gator_events_mali_common.h"
+
+/* If API version is not specified then assume API version 1. */
+#ifndef MALI_DDK_GATOR_API_VERSION
+#define MALI_DDK_GATOR_API_VERSION 1
+#endif
+
+#if (MALI_DDK_GATOR_API_VERSION != 1) && (MALI_DDK_GATOR_API_VERSION != 2)
+#error MALI_DDK_GATOR_API_VERSION is invalid (must be 1 for r1/r2 DDK, or 2 for r3 DDK).
+#endif
+
+/*
+ * Mali-T6xx
+ */
+typedef struct kbase_device *kbase_find_device_type(int);
+typedef kbase_context *kbase_create_context_type(kbase_device *);
+typedef void kbase_destroy_context_type(kbase_context *);
+
+#if MALI_DDK_GATOR_API_VERSION == 1
+typedef void *kbase_va_alloc_type(kbase_context *, u32);
+typedef void kbase_va_free_type(kbase_context *, void *);
+#elif MALI_DDK_GATOR_API_VERSION == 2
+typedef void *kbase_va_alloc_type(kbase_context *, u32, kbase_hwc_dma_mapping * handle);
+typedef void kbase_va_free_type(kbase_context *, kbase_hwc_dma_mapping * handle);
+#endif
+
+typedef mali_error kbase_instr_hwcnt_enable_type(kbase_context *, kbase_uk_hwcnt_setup *);
+typedef mali_error kbase_instr_hwcnt_disable_type(kbase_context *);
+typedef mali_error kbase_instr_hwcnt_clear_type(kbase_context *);
+typedef mali_error kbase_instr_hwcnt_dump_irq_type(kbase_context *);
+typedef mali_bool kbase_instr_hwcnt_dump_complete_type(kbase_context *, mali_bool *);
+
+static kbase_find_device_type *kbase_find_device_symbol;
+static kbase_create_context_type *kbase_create_context_symbol;
+static kbase_va_alloc_type *kbase_va_alloc_symbol;
+static kbase_instr_hwcnt_enable_type *kbase_instr_hwcnt_enable_symbol;
+static kbase_instr_hwcnt_clear_type *kbase_instr_hwcnt_clear_symbol;
+static kbase_instr_hwcnt_dump_irq_type *kbase_instr_hwcnt_dump_irq_symbol;
+static kbase_instr_hwcnt_dump_complete_type *kbase_instr_hwcnt_dump_complete_symbol;
+static kbase_instr_hwcnt_disable_type *kbase_instr_hwcnt_disable_symbol;
+static kbase_va_free_type *kbase_va_free_symbol;
+static kbase_destroy_context_type *kbase_destroy_context_symbol;
+
+static long shader_present_low = 0;
+
+/** The interval between reads, in ns.
+ *
+ * Earlier we introduced
+ * a 'hold off for 1ms after last read' to resolve MIDBASE-2178 and MALINE-724.
+ * However, the 1ms hold off is too long if no context switches occur as there is a race
+ * between this value and the tick of the read clock in gator which is also 1ms. If we 'miss' the
+ * current read, the counter values are effectively 'spread' over 2ms and the values seen are half
+ * what they should be (since Streamline averages over sample time). In the presence of context switches
+ * this spread can vary and markedly affect the counters.  Currently there is no 'proper' solution to
+ * this, but empirically we have found that reducing the minimum read interval to 950us causes the
+ * counts to be much more stable.
+ */
+static const int READ_INTERVAL_NSEC = 950000;
+
+#if GATOR_TEST
+#include "gator_events_mali_t6xx_hw_test.c"
+#endif
+
+/* Blocks for HW counters */
+enum {
+	JM_BLOCK = 0,
+	TILER_BLOCK,
+	SHADER_BLOCK,
+	MMU_BLOCK
+};
+
+/* Counters for Mali-T6xx:
+ *
+ *  - HW counters, 4 blocks
+ *    For HW counters we need strings to create /dev/gator/events files.
+ *    Enums are not needed because the position of the HW name in the array is the same
+ *    of the corresponding value in the received block of memory.
+ *    HW counters are requested by calculating a bitmask, passed then to the driver.
+ *    Every millisecond a HW counters dump is requested, and if the previous has been completed they are read.
+ */
+
+/* Hardware Counters */
+static const char *const hardware_counter_names[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"MESSAGES_SENT",
+	"MESSAGES_RECEIVED",
+	"GPU_ACTIVE",		/* 6 */
+	"IRQ_ACTIVE",
+	"JS0_JOBS",
+	"JS0_TASKS",
+	"JS0_ACTIVE",
+	"",
+	"JS0_WAIT_READ",
+	"JS0_WAIT_ISSUE",
+	"JS0_WAIT_DEPEND",
+	"JS0_WAIT_FINISH",
+	"JS1_JOBS",
+	"JS1_TASKS",
+	"JS1_ACTIVE",
+	"",
+	"JS1_WAIT_READ",
+	"JS1_WAIT_ISSUE",
+	"JS1_WAIT_DEPEND",
+	"JS1_WAIT_FINISH",
+	"JS2_JOBS",
+	"JS2_TASKS",
+	"JS2_ACTIVE",
+	"",
+	"JS2_WAIT_READ",
+	"JS2_WAIT_ISSUE",
+	"JS2_WAIT_DEPEND",
+	"JS2_WAIT_FINISH",
+	"JS3_JOBS",
+	"JS3_TASKS",
+	"JS3_ACTIVE",
+	"",
+	"JS3_WAIT_READ",
+	"JS3_WAIT_ISSUE",
+	"JS3_WAIT_DEPEND",
+	"JS3_WAIT_FINISH",
+	"JS4_JOBS",
+	"JS4_TASKS",
+	"JS4_ACTIVE",
+	"",
+	"JS4_WAIT_READ",
+	"JS4_WAIT_ISSUE",
+	"JS4_WAIT_DEPEND",
+	"JS4_WAIT_FINISH",
+	"JS5_JOBS",
+	"JS5_TASKS",
+	"JS5_ACTIVE",
+	"",
+	"JS5_WAIT_READ",
+	"JS5_WAIT_ISSUE",
+	"JS5_WAIT_DEPEND",
+	"JS5_WAIT_FINISH",
+	"JS6_JOBS",
+	"JS6_TASKS",
+	"JS6_ACTIVE",
+	"",
+	"JS6_WAIT_READ",
+	"JS6_WAIT_ISSUE",
+	"JS6_WAIT_DEPEND",
+	"JS6_WAIT_FINISH",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"JOBS_PROCESSED",
+	"TRIANGLES",
+	"QUADS",
+	"POLYGONS",
+	"POINTS",
+	"LINES",
+	"VCACHE_HIT",
+	"VCACHE_MISS",
+	"FRONT_FACING",
+	"BACK_FACING",
+	"PRIM_VISIBLE",
+	"PRIM_CULLED",
+	"PRIM_CLIPPED",
+	"LEVEL0",
+	"LEVEL1",
+	"LEVEL2",
+	"LEVEL3",
+	"LEVEL4",
+	"LEVEL5",
+	"LEVEL6",
+	"LEVEL7",
+	"COMMAND_1",
+	"COMMAND_2",
+	"COMMAND_3",
+	"COMMAND_4",
+	"COMMAND_4_7",
+	"COMMAND_8_15",
+	"COMMAND_16_63",
+	"COMMAND_64",
+	"COMPRESS_IN",
+	"COMPRESS_OUT",
+	"COMPRESS_FLUSH",
+	"TIMESTAMPS",
+	"PCACHE_HIT",
+	"PCACHE_MISS",
+	"PCACHE_LINE",
+	"PCACHE_STALL",
+	"WRBUF_HIT",
+	"WRBUF_MISS",
+	"WRBUF_LINE",
+	"WRBUF_PARTIAL",
+	"WRBUF_STALL",
+	"ACTIVE",
+	"LOADING_DESC",
+	"INDEX_WAIT",
+	"INDEX_RANGE_WAIT",
+	"VERTEX_WAIT",
+	"PCACHE_WAIT",
+	"WRBUF_WAIT",
+	"BUS_READ",
+	"BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"UTLB_STALL",
+	"UTLB_REPLAY_MISS",
+	"UTLB_REPLAY_FULL",
+	"UTLB_NEW_MISS",
+	"UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"SHADER_CORE_ACTIVE",
+	"FRAG_ACTIVE",
+	"FRAG_PRIMATIVES",
+	"FRAG_PRIMATIVES_DROPPED",
+	"FRAG_CYCLE_DESC",
+	"FRAG_CYCLES_PLR",
+	"FRAG_CYCLES_VERT",
+	"FRAG_CYCLES_TRISETUP",
+	"FRAG_CYCLES_RAST",
+	"FRAG_THREADS",
+	"FRAG_DUMMY_THREADS",
+	"FRAG_QUADS_RAST",
+	"FRAG_QUADS_EZS_TEST",
+	"FRAG_QUADS_EZS_KILLED",
+	"FRAG_QUADS_LZS_TEST",
+	"FRAG_QUADS_LZS_KILLED",
+	"FRAG_CYCLE_NO_TILE",
+	"FRAG_NUM_TILES",
+	"FRAG_TRANS_ELIM",
+	"COMPUTE_ACTIVE",
+	"COMPUTE_TASKS",
+	"COMPUTE_THREADS",
+	"COMPUTE_CYCLES_DESC",
+	"TRIPIPE_ACTIVE",
+	"ARITH_WORDS",
+	"ARITH_CYCLES_REG",
+	"ARITH_CYCLES_L0",
+	"ARITH_FRAG_DEPEND",
+	"LS_WORDS",
+	"LS_ISSUES",
+	"LS_RESTARTS",
+	"LS_REISSUES_MISS",
+	"LS_REISSUES_VD",
+	"LS_REISSUE_ATTRIB_MISS",
+	"LS_NO_WB",
+	"TEX_WORDS",
+	"TEX_BUBBLES",
+	"TEX_WORDS_L0",
+	"TEX_WORDS_DESC",
+	"TEX_THREADS",
+	"TEX_RECIRC_FMISS",
+	"TEX_RECIRC_DESC",
+	"TEX_RECIRC_MULTI",
+	"TEX_RECIRC_PMISS",
+	"TEX_RECIRC_CONF",
+	"LSC_READ_HITS",
+	"LSC_READ_MISSES",
+	"LSC_WRITE_HITS",
+	"LSC_WRITE_MISSES",
+	"LSC_ATOMIC_HITS",
+	"LSC_ATOMIC_MISSES",
+	"LSC_LINE_FETCHES",
+	"LSC_DIRTY_LINE",
+	"LSC_SNOOPS",
+	"AXI_TLB_STALL",
+	"AXI_TLB_MIESS",
+	"AXI_TLB_TRANSACTION",
+	"LS_TLB_MISS",
+	"LS_TLB_HIT",
+	"AXI_BEATS_READ",
+	"AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"MMU_HIT",
+	"MMU_NEW_MISS",
+	"MMU_REPLAY_FULL",
+	"MMU_REPLAY_MISS",
+	"MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"UTLB_HIT",
+	"UTLB_NEW_MISS",
+	"UTLB_REPLAY_FULL",
+	"UTLB_REPLAY_MISS",
+	"UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"L2_WRITE_BEATS",
+	"L2_READ_BEATS",
+	"L2_ANY_LOOKUP",
+	"L2_READ_LOOKUP",
+	"L2_SREAD_LOOKUP",
+	"L2_READ_REPLAY",
+	"L2_READ_SNOOP",
+	"L2_READ_HIT",
+	"L2_CLEAN_MISS",
+	"L2_WRITE_LOOKUP",
+	"L2_SWRITE_LOOKUP",
+	"L2_WRITE_REPLAY",
+	"L2_WRITE_SNOOP",
+	"L2_WRITE_HIT",
+	"L2_EXT_READ_FULL",
+	"L2_EXT_READ_HALF",
+	"L2_EXT_WRITE_FULL",
+	"L2_EXT_WRITE_HALF",
+	"L2_EXT_READ",
+	"L2_EXT_READ_LINE",
+	"L2_EXT_WRITE",
+	"L2_EXT_WRITE_LINE",
+	"L2_EXT_WRITE_SMALL",
+	"L2_EXT_BARRIER",
+	"L2_EXT_AR_STALL",
+	"L2_EXT_R_BUF_FULL",
+	"L2_EXT_RD_BUF_FULL",
+	"L2_EXT_R_RAW",
+	"L2_EXT_W_STALL",
+	"L2_EXT_W_BUF_FULL",
+	"L2_EXT_R_W_HAZARD",
+	"L2_TAG_HAZARD",
+	"L2_SNOOP_FULL",
+	"L2_REPLAY_FULL"
+};
+
+#define NUMBER_OF_HARDWARE_COUNTERS (sizeof(hardware_counter_names) / sizeof(hardware_counter_names[0]))
+
+#define GET_HW_BLOCK(c) (((c) >> 6) & 0x3)
+#define GET_COUNTER_OFFSET(c) ((c) & 0x3f)
+
+/* Memory to dump hardware counters into */
+static void *kernel_dump_buffer;
+
+#if MALI_DDK_GATOR_API_VERSION == 2
+/* DMA state used to manage lifetime of the buffer */
+kbase_hwc_dma_mapping kernel_dump_buffer_handle;
+#endif
+
+/* kbase context and device */
+static kbase_context *kbcontext = NULL;
+static struct kbase_device *kbdevice = NULL;
+
+/*
+ * The following function has no external prototype in older DDK revisions.  When the DDK
+ * is updated then this should be removed.
+ */
+struct kbase_device *kbase_find_device(int minor);
+
+static volatile bool kbase_device_busy = false;
+static unsigned int num_hardware_counters_enabled;
+
+/*
+ * gatorfs variables for counter enable state
+ */
+static mali_counter counters[NUMBER_OF_HARDWARE_COUNTERS];
+
+/* An array used to return the data we recorded
+ * as key,value pairs hence the *2
+ */
+static unsigned long counter_dump[NUMBER_OF_HARDWARE_COUNTERS * 2];
+
+#define SYMBOL_GET(FUNCTION, ERROR_COUNT) \
+	if(FUNCTION ## _symbol) \
+	{ \
+		printk("gator: mali " #FUNCTION " symbol was already registered\n"); \
+		(ERROR_COUNT)++; \
+	} \
+	else \
+	{ \
+		FUNCTION ## _symbol = symbol_get(FUNCTION); \
+		if(! FUNCTION ## _symbol) \
+		{ \
+			printk("gator: mali online " #FUNCTION " symbol not found\n"); \
+			(ERROR_COUNT)++; \
+		} \
+	}
+
+#define SYMBOL_CLEANUP(FUNCTION) \
+	if(FUNCTION ## _symbol) \
+	{ \
+        symbol_put(FUNCTION); \
+        FUNCTION ## _symbol = NULL; \
+	}
+
+/**
+ * Execute symbol_get for all the Mali symbols and check for success.
+ * @return the number of symbols not loaded.
+ */
+static int init_symbols(void)
+{
+	int error_count = 0;
+	SYMBOL_GET(kbase_find_device, error_count);
+	SYMBOL_GET(kbase_create_context, error_count);
+	SYMBOL_GET(kbase_va_alloc, error_count);
+	SYMBOL_GET(kbase_instr_hwcnt_enable, error_count);
+	SYMBOL_GET(kbase_instr_hwcnt_clear, error_count);
+	SYMBOL_GET(kbase_instr_hwcnt_dump_irq, error_count);
+	SYMBOL_GET(kbase_instr_hwcnt_dump_complete, error_count);
+	SYMBOL_GET(kbase_instr_hwcnt_disable, error_count);
+	SYMBOL_GET(kbase_va_free, error_count);
+	SYMBOL_GET(kbase_destroy_context, error_count);
+
+	return error_count;
+}
+
+/**
+ * Execute symbol_put for all the registered Mali symbols.
+ */
+static void clean_symbols(void)
+{
+	SYMBOL_CLEANUP(kbase_find_device);
+	SYMBOL_CLEANUP(kbase_create_context);
+	SYMBOL_CLEANUP(kbase_va_alloc);
+	SYMBOL_CLEANUP(kbase_instr_hwcnt_enable);
+	SYMBOL_CLEANUP(kbase_instr_hwcnt_clear);
+	SYMBOL_CLEANUP(kbase_instr_hwcnt_dump_irq);
+	SYMBOL_CLEANUP(kbase_instr_hwcnt_dump_complete);
+	SYMBOL_CLEANUP(kbase_instr_hwcnt_disable);
+	SYMBOL_CLEANUP(kbase_va_free);
+	SYMBOL_CLEANUP(kbase_destroy_context);
+}
+
+/**
+ * Determines whether a read should take place
+ * @param current_time The current time, obtained from getnstimeofday()
+ * @param prev_time_s The number of seconds at the previous read attempt.
+ * @param next_read_time_ns The time (in ns) when the next read should be allowed.
+ *
+ * Note that this function has been separated out here to allow it to be tested.
+ */
+static int is_read_scheduled(const struct timespec *current_time, u32 *prev_time_s, s32 *next_read_time_ns)
+{
+	/* If the current ns count rolls over a second, roll the next read time too. */
+	if (current_time->tv_sec != *prev_time_s) {
+		*next_read_time_ns = *next_read_time_ns - NSEC_PER_SEC;
+	}
+
+	/* Abort the read if the next read time has not arrived. */
+	if (current_time->tv_nsec < *next_read_time_ns) {
+		return 0;
+	}
+
+	/* Set the next read some fixed time after this one, and update the read timestamp. */
+	*next_read_time_ns = current_time->tv_nsec + READ_INTERVAL_NSEC;
+
+	*prev_time_s = current_time->tv_sec;
+	return 1;
+}
+
+static int start(void)
+{
+	kbase_uk_hwcnt_setup setup;
+	mali_error err;
+	int cnt;
+	u16 bitmask[] = { 0, 0, 0, 0 };
+	unsigned long long shadersPresent = 0;
+
+	/* Setup HW counters */
+	num_hardware_counters_enabled = 0;
+
+	if (NUMBER_OF_HARDWARE_COUNTERS != 256) {
+		pr_debug("Unexpected number of hardware counters defined: expecting 256, got %d\n", NUMBER_OF_HARDWARE_COUNTERS);
+	}
+
+	/* Calculate enable bitmasks based on counters_enabled array */
+	for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) {
+		const mali_counter *counter = &counters[cnt];
+		if (counter->enabled) {
+			int block = GET_HW_BLOCK(cnt);
+			int enable_bit = GET_COUNTER_OFFSET(cnt) / 4;
+			bitmask[block] |= (1 << enable_bit);
+			pr_debug("gator: Mali-T6xx: hardware counter %s selected [%d]\n", hardware_counter_names[cnt], cnt);
+			num_hardware_counters_enabled++;
+		}
+	}
+
+	/* Create a kbase context for HW counters */
+	if (num_hardware_counters_enabled > 0) {
+		if (init_symbols() > 0) {
+			clean_symbols();
+			/* No Mali driver code entrypoints found - not a fault. */
+			return 0;
+		}
+
+		kbdevice = kbase_find_device_symbol(-1);
+
+		/* If we already got a context, fail */
+		if (kbcontext) {
+			pr_debug("gator: Mali-T6xx: error context already present\n");
+			goto out;
+		}
+
+		/* kbcontext will only be valid after all the Mali symbols are loaded successfully */
+		kbcontext = kbase_create_context_symbol(kbdevice);
+		if (!kbcontext) {
+			pr_debug("gator: Mali-T6xx: error creating kbase context\n");
+			goto out;
+		}
+
+
+		/* See if we can get the number of shader cores */
+		shadersPresent = kbdevice->shader_present_bitmap;
+		shader_present_low = (unsigned long)shadersPresent;
+
+		/*
+		 * The amount of memory needed to store the dump (bytes)
+		 * DUMP_SIZE = number of core groups
+		 *             * number of blocks (always 8 for midgard)
+		 *             * number of counters per block (always 64 for midgard)
+		 *             * number of bytes per counter (always 4 in midgard)
+		 * For a Mali-T6xx with a single core group = 1 * 8 * 64 * 4 = 2048
+		 * For a Mali-T6xx with a dual core group   = 2 * 8 * 64 * 4 = 4096
+		 */
+#if MALI_DDK_GATOR_API_VERSION == 1
+		kernel_dump_buffer = kbase_va_alloc_symbol(kbcontext, 4096);
+#elif MALI_DDK_GATOR_API_VERSION == 2
+		kernel_dump_buffer = kbase_va_alloc_symbol(kbcontext, 4096, &kernel_dump_buffer_handle);
+#endif
+		if (!kernel_dump_buffer) {
+			pr_debug("gator: Mali-T6xx: error trying to allocate va\n");
+			goto destroy_context;
+		}
+
+		setup.dump_buffer = (uintptr_t)kernel_dump_buffer;
+		setup.jm_bm = bitmask[JM_BLOCK];
+		setup.tiler_bm = bitmask[TILER_BLOCK];
+		setup.shader_bm = bitmask[SHADER_BLOCK];
+		setup.mmu_l2_bm = bitmask[MMU_BLOCK];
+		/* These counters do not exist on Mali-T60x */
+		setup.l3_cache_bm = 0;
+
+		/* Use kbase API to enable hardware counters and provide dump buffer */
+		err = kbase_instr_hwcnt_enable_symbol(kbcontext, &setup);
+		if (err != MALI_ERROR_NONE) {
+			pr_debug("gator: Mali-T6xx: can't setup hardware counters\n");
+			goto free_buffer;
+		}
+		pr_debug("gator: Mali-T6xx: hardware counters enabled\n");
+		kbase_instr_hwcnt_clear_symbol(kbcontext);
+		pr_debug("gator: Mali-T6xx: hardware counters cleared \n");
+
+		kbase_device_busy = false;
+	}
+
+	return 0;
+
+free_buffer:
+#if MALI_DDK_GATOR_API_VERSION == 1
+	kbase_va_free_symbol(kbcontext, kernel_dump_buffer);
+#elif MALI_DDK_GATOR_API_VERSION == 2
+	kbase_va_free_symbol(kbcontext, &kernel_dump_buffer_handle);
+#endif
+
+destroy_context:
+	kbase_destroy_context_symbol(kbcontext);
+
+out:
+	clean_symbols();
+	return -1;
+}
+
+static void stop(void)
+{
+	unsigned int cnt;
+	kbase_context *temp_kbcontext;
+
+	pr_debug("gator: Mali-T6xx: stop\n");
+
+	/* Set all counters as disabled */
+	for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) {
+		counters[cnt].enabled = 0;
+	}
+
+	/* Destroy the context for HW counters */
+	if (num_hardware_counters_enabled > 0 && kbcontext != NULL) {
+		/*
+		 * Set the global variable to NULL before destroying it, because
+		 * other function will check this before using it.
+		 */
+		temp_kbcontext = kbcontext;
+		kbcontext = NULL;
+
+		kbase_instr_hwcnt_disable_symbol(temp_kbcontext);
+
+#if MALI_DDK_GATOR_API_VERSION == 1
+		kbase_va_free_symbol(temp_kbcontext, kernel_dump_buffer);
+#elif MALI_DDK_GATOR_API_VERSION == 2
+		kbase_va_free_symbol(temp_kbcontext, &kernel_dump_buffer_handle);
+#endif
+
+		kbase_destroy_context_symbol(temp_kbcontext);
+
+		pr_debug("gator: Mali-T6xx: hardware counters stopped\n");
+
+		clean_symbols();
+	}
+}
+
+static int read(int **buffer)
+{
+	int cnt;
+	int len = 0;
+	u32 value = 0;
+	mali_bool success;
+
+	struct timespec current_time;
+	static u32 prev_time_s = 0;
+	static s32 next_read_time_ns = 0;
+
+	if (!on_primary_core()) {
+		return 0;
+	}
+
+	getnstimeofday(&current_time);
+
+	/*
+	 * Discard reads unless a respectable time has passed.  This reduces the load on the GPU without sacrificing
+	 * accuracy on the Streamline display.
+	 */
+	if (!is_read_scheduled(&current_time, &prev_time_s, &next_read_time_ns)) {
+		return 0;
+	}
+
+	/*
+	 * Report the HW counters
+	 * Only process hardware counters if at least one of the hardware counters is enabled.
+	 */
+	if (num_hardware_counters_enabled > 0) {
+		const unsigned int vithar_blocks[] = {
+			0x700,	/* VITHAR_JOB_MANAGER,     Block 0 */
+			0x400,	/* VITHAR_TILER,           Block 1 */
+			0x000,	/* VITHAR_SHADER_CORE,     Block 2 */
+			0x500	/* VITHAR_MEMORY_SYSTEM,   Block 3 */
+		};
+
+		if (!kbcontext) {
+			return -1;
+		}
+
+		/* Mali symbols can be called safely since a kbcontext is valid */
+		if (kbase_instr_hwcnt_dump_complete_symbol(kbcontext, &success) == MALI_TRUE) {
+			kbase_device_busy = false;
+
+			if (success == MALI_TRUE) {
+				/* Cycle through hardware counters and accumulate totals */
+				for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) {
+					const mali_counter *counter = &counters[cnt];
+					if (counter->enabled) {
+						const int block = GET_HW_BLOCK(cnt);
+						const int counter_offset = GET_COUNTER_OFFSET(cnt);
+
+						const char* block_base_address = (char*)kernel_dump_buffer + vithar_blocks[block];
+
+						/* If counter belongs to shader block need to take into account all cores */
+						if (block == SHADER_BLOCK) {
+							int i = 0;
+							int shader_core_count = 0;
+							value = 0;
+
+							for (i = 0; i < 4; i++) {
+								if ((shader_present_low >> i) & 1) {
+									value += *((u32*) (block_base_address + (0x100 * i)) + counter_offset);
+									shader_core_count++;
+								}
+							}
+
+							for (i = 0; i < 4; i++) {
+								if((shader_present_low >> (i+4)) & 1) {
+									value += *((u32*)(block_base_address + (0x100 * i) + 0x800) + counter_offset);
+									shader_core_count++;
+								}
+							}
+
+							/* Need to total by number of cores to produce an average */
+							if (shader_core_count != 0) {
+								value /= shader_core_count;
+							}
+						} else {
+							value = *((u32*)block_base_address + counter_offset);
+						}
+
+						counter_dump[len++] = counter->key;
+						counter_dump[len++] = value;
+					}
+				}
+			}
+		}
+
+		if (!kbase_device_busy) {
+			kbase_device_busy = true;
+			kbase_instr_hwcnt_dump_irq_symbol(kbcontext);
+		}
+	}
+
+	/* Update the buffer */
+	if (buffer) {
+		*buffer = (int *)counter_dump;
+	}
+
+	return len;
+}
+
+static int create_files(struct super_block *sb, struct dentry *root)
+{
+	unsigned int event;
+	/*
+	 * Create the filesystem for all events
+	 */
+	int counter_index = 0;
+	const char *mali_name = gator_mali_get_mali_name();
+
+	for (event = 0; event < NUMBER_OF_HARDWARE_COUNTERS; event++) {
+		if (gator_mali_create_file_system(mali_name, hardware_counter_names[counter_index], sb, root, &counters[event], NULL) != 0)
+			return -1;
+		counter_index++;
+	}
+
+	return 0;
+}
+
+static struct gator_interface gator_events_mali_t6xx_interface = {
+	.create_files = create_files,
+	.start = start,
+	.stop = stop,
+	.read = read
+};
+
+int gator_events_mali_t6xx_hw_init(void)
+{
+	pr_debug("gator: Mali-T6xx: sw_counters init\n");
+
+#if GATOR_TEST
+	test_all_is_read_scheduled();
+#endif
+
+	gator_mali_initialise_counters(counters, NUMBER_OF_HARDWARE_COUNTERS);
+
+	return gator_events_install(&gator_events_mali_t6xx_interface);
+}
diff --git a/drivers/gator/gator_events_mali_t6xx_hw_test.c b/drivers/gator/gator_events_mali_t6xx_hw_test.c
new file mode 100644
index 000000000000..efb32ddf5483
--- /dev/null
+++ b/drivers/gator/gator_events_mali_t6xx_hw_test.c
@@ -0,0 +1,55 @@
+/**
+ * Copyright (C) ARM Limited 2012-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/**
+ * Test functions for mali_t600_hw code.
+ */
+
+static int is_read_scheduled(const struct timespec *current_time, u32 *prev_time_s, s32 *next_read_time_ns);
+
+static int test_is_read_scheduled(u32 s, u32 ns, u32 prev_s, s32 next_ns, int expected_result, s32 expected_next_ns)
+{
+	struct timespec current_time;
+	u32 prev_time_s = prev_s;
+	s32 next_read_time_ns = next_ns;
+
+	current_time.tv_sec = s;
+	current_time.tv_nsec = ns;
+
+	if (is_read_scheduled(&current_time, &prev_time_s, &next_read_time_ns) != expected_result) {
+		printk("Failed do_read(%u, %u, %u, %d): expected %d\n", s, ns, prev_s, next_ns, expected_result);
+		return 0;
+	}
+
+	if (next_read_time_ns != expected_next_ns) {
+		printk("Failed: next_read_ns expected=%d, actual=%d\n", expected_next_ns, next_read_time_ns);
+		return 0;
+	}
+
+	return 1;
+}
+
+static void test_all_is_read_scheduled(void)
+{
+	const int HIGHEST_NS = 999999999;
+	int n_tests_passed = 0;
+
+	printk("gator: running tests on %s\n", __FILE__);
+
+	n_tests_passed += test_is_read_scheduled(0, 0, 0, 0, 1, READ_INTERVAL_NSEC);	/* Null time */
+	n_tests_passed += test_is_read_scheduled(100, 1000, 0, 0, 1, READ_INTERVAL_NSEC + 1000);	/* Initial values */
+
+	n_tests_passed += test_is_read_scheduled(100, HIGHEST_NS, 100, HIGHEST_NS + 500, 0, HIGHEST_NS + 500);
+	n_tests_passed += test_is_read_scheduled(101, 0001, 100, HIGHEST_NS + 500, 0, HIGHEST_NS + 500 - NSEC_PER_SEC);
+	n_tests_passed += test_is_read_scheduled(101, 600, 100, HIGHEST_NS + 500 - NSEC_PER_SEC, 1, 600 + READ_INTERVAL_NSEC);
+
+	n_tests_passed += test_is_read_scheduled(101, 600, 100, HIGHEST_NS + 500, 1, 600 + READ_INTERVAL_NSEC);
+
+	printk("gator: %d tests passed\n", n_tests_passed);
+}
diff --git a/drivers/gator/gator_events_meminfo.c b/drivers/gator/gator_events_meminfo.c
new file mode 100644
index 000000000000..451290d9af17
--- /dev/null
+++ b/drivers/gator/gator_events_meminfo.c
@@ -0,0 +1,387 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+
+#include <linux/hardirq.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/workqueue.h>
+#include <trace/events/kmem.h>
+
+enum {
+	MEMINFO_MEMFREE,
+	MEMINFO_MEMUSED,
+	MEMINFO_BUFFERRAM,
+	MEMINFO_TOTAL,
+};
+
+enum {
+	PROC_SIZE,
+	PROC_SHARE,
+	PROC_TEXT,
+	PROC_DATA,
+	PROC_COUNT,
+};
+
+static const char * const meminfo_names[] = {
+	"Linux_meminfo_memfree",
+	"Linux_meminfo_memused",
+	"Linux_meminfo_bufferram",
+};
+
+static const char * const proc_names[] = {
+	"Linux_proc_statm_size",
+	"Linux_proc_statm_share",
+	"Linux_proc_statm_text",
+	"Linux_proc_statm_data",
+};
+
+static bool meminfo_global_enabled;
+static ulong meminfo_enabled[MEMINFO_TOTAL];
+static ulong meminfo_keys[MEMINFO_TOTAL];
+static long long meminfo_buffer[2 * (MEMINFO_TOTAL + 2)];
+static int meminfo_length = 0;
+static bool new_data_avail;
+
+static bool proc_global_enabled;
+static ulong proc_enabled[PROC_COUNT];
+static ulong proc_keys[PROC_COUNT];
+static DEFINE_PER_CPU(long long, proc_buffer[2 * (PROC_COUNT + 3)]);
+
+static int gator_meminfo_func(void *data);
+static bool gator_meminfo_run;
+// Initialize semaphore unlocked to initialize memory values
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
+static DECLARE_MUTEX(gator_meminfo_sem);
+#else
+static DEFINE_SEMAPHORE(gator_meminfo_sem);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+GATOR_DEFINE_PROBE(mm_page_free_direct, TP_PROTO(struct page *page, unsigned int order))
+#else
+GATOR_DEFINE_PROBE(mm_page_free, TP_PROTO(struct page *page, unsigned int order))
+#endif
+{
+	up(&gator_meminfo_sem);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+GATOR_DEFINE_PROBE(mm_pagevec_free, TP_PROTO(struct page *page, int cold))
+#else
+GATOR_DEFINE_PROBE(mm_page_free_batched, TP_PROTO(struct page *page, int cold))
+#endif
+{
+	up(&gator_meminfo_sem);
+}
+
+GATOR_DEFINE_PROBE(mm_page_alloc, TP_PROTO(struct page *page, unsigned int order, gfp_t gfp_flags, int migratetype))
+{
+	up(&gator_meminfo_sem);
+}
+
+static int gator_events_meminfo_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+	int i;
+
+	for (i = 0; i < MEMINFO_TOTAL; i++) {
+		dir = gatorfs_mkdir(sb, root, meminfo_names[i]);
+		if (!dir) {
+			return -1;
+		}
+		gatorfs_create_ulong(sb, dir, "enabled", &meminfo_enabled[i]);
+		gatorfs_create_ro_ulong(sb, dir, "key", &meminfo_keys[i]);
+	}
+
+	for (i = 0; i < PROC_COUNT; ++i) {
+		dir = gatorfs_mkdir(sb, root, proc_names[i]);
+		if (!dir) {
+			return -1;
+		}
+		gatorfs_create_ulong(sb, dir, "enabled", &proc_enabled[i]);
+		gatorfs_create_ro_ulong(sb, dir, "key", &proc_keys[i]);
+	}
+
+	return 0;
+}
+
+static int gator_events_meminfo_start(void)
+{
+	int i;
+
+	new_data_avail = false;
+	meminfo_global_enabled = 0;
+	for (i = 0; i < MEMINFO_TOTAL; i++) {
+		if (meminfo_enabled[i]) {
+			meminfo_global_enabled = 1;
+			break;
+		}
+	}
+
+	proc_global_enabled = 0;
+	for (i = 0; i < PROC_COUNT; ++i) {
+		if (proc_enabled[i]) {
+			proc_global_enabled = 1;
+			break;
+		}
+	}
+	if (meminfo_enabled[MEMINFO_MEMUSED]) {
+		proc_global_enabled = 1;
+	}
+
+	if (meminfo_global_enabled == 0)
+		return 0;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+	if (GATOR_REGISTER_TRACE(mm_page_free_direct))
+#else
+	if (GATOR_REGISTER_TRACE(mm_page_free))
+#endif
+		goto mm_page_free_exit;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+	if (GATOR_REGISTER_TRACE(mm_pagevec_free))
+#else
+	if (GATOR_REGISTER_TRACE(mm_page_free_batched))
+#endif
+		goto mm_page_free_batched_exit;
+	if (GATOR_REGISTER_TRACE(mm_page_alloc))
+		goto mm_page_alloc_exit;
+
+	// Start worker thread
+	gator_meminfo_run = true;
+	// Since the mutex starts unlocked, memory values will be initialized
+	if (IS_ERR(kthread_run(gator_meminfo_func, NULL, "gator_meminfo")))
+		goto kthread_run_exit;
+
+	return 0;
+
+kthread_run_exit:
+	GATOR_UNREGISTER_TRACE(mm_page_alloc);
+mm_page_alloc_exit:
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+	GATOR_UNREGISTER_TRACE(mm_pagevec_free);
+#else
+	GATOR_UNREGISTER_TRACE(mm_page_free_batched);
+#endif
+mm_page_free_batched_exit:
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+	GATOR_UNREGISTER_TRACE(mm_page_free_direct);
+#else
+	GATOR_UNREGISTER_TRACE(mm_page_free);
+#endif
+mm_page_free_exit:
+	return -1;
+}
+
+static void gator_events_meminfo_stop(void)
+{
+	if (meminfo_global_enabled) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+		GATOR_UNREGISTER_TRACE(mm_page_free_direct);
+		GATOR_UNREGISTER_TRACE(mm_pagevec_free);
+#else
+		GATOR_UNREGISTER_TRACE(mm_page_free);
+		GATOR_UNREGISTER_TRACE(mm_page_free_batched);
+#endif
+		GATOR_UNREGISTER_TRACE(mm_page_alloc);
+
+		// Stop worker thread
+		gator_meminfo_run = false;
+		up(&gator_meminfo_sem);
+	}
+}
+
+// Must be run in process context as the kernel function si_meminfo() can sleep
+static int gator_meminfo_func(void *data)
+{
+	struct sysinfo info;
+	int i, len;
+	unsigned long long value;
+
+	for (;;) {
+		if (down_killable(&gator_meminfo_sem)) {
+			break;
+		}
+
+		// Eat up any pending events
+		while (!down_trylock(&gator_meminfo_sem));
+
+		if (!gator_meminfo_run) {
+			break;
+		}
+
+		meminfo_length = len = 0;
+
+		si_meminfo(&info);
+		for (i = 0; i < MEMINFO_TOTAL; i++) {
+			if (meminfo_enabled[i]) {
+				switch (i) {
+				case MEMINFO_MEMFREE:
+					value = info.freeram * PAGE_SIZE;
+					break;
+				case MEMINFO_MEMUSED:
+					// pid -1 means system wide
+					meminfo_buffer[len++] = 1;
+					meminfo_buffer[len++] = -1;
+					// Emit value
+					meminfo_buffer[len++] = meminfo_keys[MEMINFO_MEMUSED];
+					meminfo_buffer[len++] = (info.totalram - info.freeram) * PAGE_SIZE;
+					// Clear pid
+					meminfo_buffer[len++] = 1;
+					meminfo_buffer[len++] = 0;
+					continue;
+				case MEMINFO_BUFFERRAM:
+					value = info.bufferram * PAGE_SIZE;
+					break;
+				default:
+					value = 0;
+					break;
+				}
+				meminfo_buffer[len++] = meminfo_keys[i];
+				meminfo_buffer[len++] = value;
+			}
+		}
+
+		meminfo_length = len;
+		new_data_avail = true;
+	}
+
+	return 0;
+}
+
+static int gator_events_meminfo_read(long long **buffer)
+{
+	if (!on_primary_core() || !meminfo_global_enabled)
+		return 0;
+
+	if (!new_data_avail)
+		return 0;
+
+	new_data_avail = false;
+
+	if (buffer)
+		*buffer = meminfo_buffer;
+
+	return meminfo_length;
+}
+
+static int gator_events_meminfo_read_proc(long long **buffer, struct task_struct *task)
+{
+	struct mm_struct *mm;
+	u64 share = 0;
+	int i;
+	long long value;
+	int len = 0;
+	int cpu = get_physical_cpu();
+	long long *buf = per_cpu(proc_buffer, cpu);
+
+	if (!proc_global_enabled) {
+		return 0;
+	}
+
+	// Collect the memory stats of the process instead of the thread
+	if (task->group_leader != NULL) {
+		task = task->group_leader;
+	}
+
+	// get_task_mm/mmput is not needed in this context because the task and it's mm are required as part of the sched_switch
+	mm = task->mm;
+	if (mm == NULL) {
+		return 0;
+	}
+
+	// Derived from task_statm in fs/proc/task_mmu.c
+	if (meminfo_enabled[MEMINFO_MEMUSED] || proc_enabled[PROC_SHARE]) {
+		share = get_mm_counter(mm,
+#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32)
+							   file_rss
+#else
+							   MM_FILEPAGES
+#endif
+							   );
+	}
+
+	// key of 1 indicates a pid
+	buf[len++] = 1;
+	buf[len++] = task->pid;
+
+	for (i = 0; i < PROC_COUNT; ++i) {
+		if (proc_enabled[i]) {
+			switch (i) {
+			case PROC_SIZE:
+				value = mm->total_vm;
+				break;
+			case PROC_SHARE:
+				value = share;
+				break;
+			case PROC_TEXT:
+				value = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> PAGE_SHIFT;
+				break;
+			case PROC_DATA:
+				value = mm->total_vm - mm->shared_vm;
+				break;
+			}
+
+			buf[len++] = proc_keys[i];
+			buf[len++] = value * PAGE_SIZE;
+		}
+	}
+
+	if (meminfo_enabled[MEMINFO_MEMUSED]) {
+		value = share + get_mm_counter(mm,
+#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32)
+									   anon_rss
+#else
+									   MM_ANONPAGES
+#endif
+									   );
+		// Send resident for this pid
+		buf[len++] = meminfo_keys[MEMINFO_MEMUSED];
+		buf[len++] = value * PAGE_SIZE;
+	}
+
+	// Clear pid
+	buf[len++] = 1;
+	buf[len++] = 0;
+
+	if (buffer)
+		*buffer = buf;
+
+	return len;
+}
+
+static struct gator_interface gator_events_meminfo_interface = {
+	.create_files = gator_events_meminfo_create_files,
+	.start = gator_events_meminfo_start,
+	.stop = gator_events_meminfo_stop,
+	.read64 = gator_events_meminfo_read,
+	.read_proc = gator_events_meminfo_read_proc,
+};
+
+int gator_events_meminfo_init(void)
+{
+	int i;
+
+	meminfo_global_enabled = 0;
+	for (i = 0; i < MEMINFO_TOTAL; i++) {
+		meminfo_enabled[i] = 0;
+		meminfo_keys[i] = gator_events_get_key();
+	}
+
+	proc_global_enabled = 0;
+	for (i = 0; i < PROC_COUNT; ++i) {
+		proc_enabled[i] = 0;
+		proc_keys[i] = gator_events_get_key();
+	}
+
+	return gator_events_install(&gator_events_meminfo_interface);
+}
diff --git a/drivers/gator/gator_events_mmapped.c b/drivers/gator/gator_events_mmapped.c
new file mode 100644
index 000000000000..f055e48d317a
--- /dev/null
+++ b/drivers/gator/gator_events_mmapped.c
@@ -0,0 +1,209 @@
+/*
+ * Example events provider
+ *
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Similar entries to those below must be present in the events.xml file.
+ * To add them to the events.xml, create an events-mmap.xml with the 
+ * following contents and rebuild gatord:
+ *
+ * <counter_set name="mmapped_cnt" count="3"/>
+ * <category name="mmapped" counter_set="mmapped_cnt" per_cpu="no">
+ *   <event event="0x0" title="Simulated1" name="Sine" display="maximum" average_selection="yes" description="Sort-of-sine"/>
+ *   <event event="0x1" title="Simulated2" name="Triangle" display="maximum" average_selection="yes" description="Triangular wave"/>
+ *   <event event="0x2" title="Simulated3" name="PWM" display="maximum" average_selection="yes" description="PWM Signal"/>
+ * </category>
+ *
+ * When adding custom events, be sure do the following
+ * - add any needed .c files to the gator driver Makefile
+ * - call gator_events_install in the events init function
+ * - add the init function to GATOR_EVENTS_LIST in gator_main.c
+ * - add a new events-*.xml file to the gator daemon and rebuild
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/ratelimit.h>
+
+#include "gator.h"
+
+#define MMAPPED_COUNTERS_NUM 3
+
+static int mmapped_global_enabled;
+
+static struct {
+	unsigned long enabled;
+	unsigned long event;
+	unsigned long key;
+} mmapped_counters[MMAPPED_COUNTERS_NUM];
+
+static int mmapped_buffer[MMAPPED_COUNTERS_NUM * 2];
+
+static s64 prev_time;
+
+/* Adds mmapped_cntX directories and enabled, event, and key files to /dev/gator/events */
+static int gator_events_mmapped_create_files(struct super_block *sb,
+					    struct dentry *root)
+{
+	int i;
+
+	for (i = 0; i < MMAPPED_COUNTERS_NUM; i++) {
+		char buf[16];
+		struct dentry *dir;
+
+		snprintf(buf, sizeof(buf), "mmapped_cnt%d", i);
+		dir = gatorfs_mkdir(sb, root, buf);
+		if (WARN_ON(!dir))
+			return -1;
+		gatorfs_create_ulong(sb, dir, "enabled",
+				     &mmapped_counters[i].enabled);
+		gatorfs_create_ulong(sb, dir, "event",
+				     &mmapped_counters[i].event);
+		gatorfs_create_ro_ulong(sb, dir, "key",
+					&mmapped_counters[i].key);
+	}
+
+	return 0;
+}
+
+static int gator_events_mmapped_start(void)
+{
+	int i;
+	struct timespec ts;
+
+	getnstimeofday(&ts);
+	prev_time = timespec_to_ns(&ts);
+
+	mmapped_global_enabled = 0;
+	for (i = 0; i < MMAPPED_COUNTERS_NUM; i++) {
+		if (mmapped_counters[i].enabled) {
+			mmapped_global_enabled = 1;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static void gator_events_mmapped_stop(void)
+{
+}
+
+/* This function "simulates" counters, generating values of fancy
+ * functions like sine or triangle... */
+static int mmapped_simulate(int counter, int delta_in_us)
+{
+	int result = 0;
+
+	switch (counter) {
+	case 0:		/* sort-of-sine */
+		{
+			static int t = 0;
+			int x;
+
+			t += delta_in_us;
+			if (t > 2048000)
+				t = 0;
+
+			if (t % 1024000 < 512000)
+				x = 512000 - (t % 512000);
+			else
+				x = t % 512000;
+
+			result = 32 * x / 512000;
+			result = result * result;
+
+			if (t < 1024000)
+				result = 1922 - result;
+		}
+		break;
+	case 1:		/* triangle */
+		{
+			static int v, d = 1;
+
+			v = v + d * delta_in_us;
+			if (v < 0) {
+				v = 0;
+				d = 1;
+			} else if (v > 1000000) {
+				v = 1000000;
+				d = -1;
+			}
+
+			result = v;
+		}
+		break;
+	case 2:		/* PWM signal */
+		{
+			static int dc, x, t = 0;
+
+			t += delta_in_us;
+			if (t > 1000000)
+				t = 0;
+			if (x / 1000000 != (x + delta_in_us) / 1000000)
+				dc = (dc + 100000) % 1000000;
+			x += delta_in_us;
+
+			result = t < dc ? 0 : 10;
+		}
+		break;
+	}
+
+	return result;
+}
+
+static int gator_events_mmapped_read(int **buffer)
+{
+	int i;
+	int len = 0;
+	int delta_in_us;
+	struct timespec ts;
+	s64 time;
+
+	/* System wide counters - read from one core only */
+	if (!on_primary_core() || !mmapped_global_enabled)
+		return 0;
+
+	getnstimeofday(&ts);
+	time = timespec_to_ns(&ts);
+	delta_in_us = (int)(time - prev_time) / 1000;
+	prev_time = time;
+
+	for (i = 0; i < MMAPPED_COUNTERS_NUM; i++) {
+		if (mmapped_counters[i].enabled) {
+			mmapped_buffer[len++] = mmapped_counters[i].key;
+			mmapped_buffer[len++] =
+			    mmapped_simulate(mmapped_counters[i].event,
+					    delta_in_us);
+		}
+	}
+
+	if (buffer)
+		*buffer = mmapped_buffer;
+
+	return len;
+}
+
+static struct gator_interface gator_events_mmapped_interface = {
+	.create_files = gator_events_mmapped_create_files,
+	.start = gator_events_mmapped_start,
+	.stop = gator_events_mmapped_stop,
+	.read = gator_events_mmapped_read,
+};
+
+/* Must not be static! */
+int __init gator_events_mmapped_init(void)
+{
+	int i;
+
+	for (i = 0; i < MMAPPED_COUNTERS_NUM; i++) {
+		mmapped_counters[i].enabled = 0;
+		mmapped_counters[i].key = gator_events_get_key();
+	}
+
+	return gator_events_install(&gator_events_mmapped_interface);
+}
diff --git a/drivers/gator/gator_events_net.c b/drivers/gator/gator_events_net.c
new file mode 100644
index 000000000000..9c8d3a43eaeb
--- /dev/null
+++ b/drivers/gator/gator_events_net.c
@@ -0,0 +1,172 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+#include <linux/netdevice.h>
+#include <linux/hardirq.h>
+
+#define NETRX		0
+#define NETTX		1
+#define TOTALNET	2
+
+static ulong netrx_enabled;
+static ulong nettx_enabled;
+static ulong netrx_key;
+static ulong nettx_key;
+static int rx_total, tx_total;
+static ulong netPrev[TOTALNET];
+static int netGet[TOTALNET * 4];
+
+static struct timer_list net_wake_up_timer;
+
+// Must be run in process context as the kernel function dev_get_stats() can sleep
+static void get_network_stats(struct work_struct *wsptr)
+{
+	int rx = 0, tx = 0;
+	struct net_device *dev;
+
+	for_each_netdev(&init_net, dev) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
+		const struct net_device_stats *stats = dev_get_stats(dev);
+#else
+		struct rtnl_link_stats64 temp;
+		const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
+#endif
+		rx += stats->rx_bytes;
+		tx += stats->tx_bytes;
+	}
+	rx_total = rx;
+	tx_total = tx;
+}
+
+DECLARE_WORK(wq_get_stats, get_network_stats);
+
+static void net_wake_up_handler(unsigned long unused_data)
+{
+	// had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater
+	schedule_work(&wq_get_stats);
+}
+
+static void calculate_delta(int *rx, int *tx)
+{
+	int rx_calc, tx_calc;
+
+	rx_calc = (int)(rx_total - netPrev[NETRX]);
+	if (rx_calc < 0)
+		rx_calc = 0;
+	netPrev[NETRX] += rx_calc;
+
+	tx_calc = (int)(tx_total - netPrev[NETTX]);
+	if (tx_calc < 0)
+		tx_calc = 0;
+	netPrev[NETTX] += tx_calc;
+
+	*rx = rx_calc;
+	*tx = tx_calc;
+}
+
+static int gator_events_net_create_files(struct super_block *sb, struct dentry *root)
+{
+	// Network counters are not currently supported in RT-Preempt full because mod_timer is used
+#ifndef CONFIG_PREEMPT_RT_FULL
+	struct dentry *dir;
+
+	dir = gatorfs_mkdir(sb, root, "Linux_net_rx");
+	if (!dir) {
+		return -1;
+	}
+	gatorfs_create_ulong(sb, dir, "enabled", &netrx_enabled);
+	gatorfs_create_ro_ulong(sb, dir, "key", &netrx_key);
+
+	dir = gatorfs_mkdir(sb, root, "Linux_net_tx");
+	if (!dir) {
+		return -1;
+	}
+	gatorfs_create_ulong(sb, dir, "enabled", &nettx_enabled);
+	gatorfs_create_ro_ulong(sb, dir, "key", &nettx_key);
+#endif
+
+	return 0;
+}
+
+static int gator_events_net_start(void)
+{
+	get_network_stats(0);
+	netPrev[NETRX] = rx_total;
+	netPrev[NETTX] = tx_total;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
+	setup_timer(&net_wake_up_timer, net_wake_up_handler, 0);
+#else
+	setup_deferrable_timer_on_stack(&net_wake_up_timer, net_wake_up_handler, 0);
+#endif
+	return 0;
+}
+
+static void gator_events_net_stop(void)
+{
+	del_timer_sync(&net_wake_up_timer);
+	netrx_enabled = 0;
+	nettx_enabled = 0;
+}
+
+static int gator_events_net_read(int **buffer)
+{
+	int len, rx_delta, tx_delta;
+	static int last_rx_delta = 0, last_tx_delta = 0;
+
+	if (!on_primary_core())
+		return 0;
+
+	if (!netrx_enabled && !nettx_enabled)
+		return 0;
+
+	mod_timer(&net_wake_up_timer, jiffies + 1);
+
+	calculate_delta(&rx_delta, &tx_delta);
+
+	len = 0;
+	if (netrx_enabled && last_rx_delta != rx_delta) {
+		last_rx_delta = rx_delta;
+		netGet[len++] = netrx_key;
+		netGet[len++] = 0;	// indicates to Streamline that rx_delta bytes were transmitted now, not since the last message
+		netGet[len++] = netrx_key;
+		netGet[len++] = rx_delta;
+	}
+
+	if (nettx_enabled && last_tx_delta != tx_delta) {
+		last_tx_delta = tx_delta;
+		netGet[len++] = nettx_key;
+		netGet[len++] = 0;	// indicates to Streamline that tx_delta bytes were transmitted now, not since the last message
+		netGet[len++] = nettx_key;
+		netGet[len++] = tx_delta;
+	}
+
+	if (buffer)
+		*buffer = netGet;
+
+	return len;
+}
+
+static struct gator_interface gator_events_net_interface = {
+	.create_files = gator_events_net_create_files,
+	.start = gator_events_net_start,
+	.stop = gator_events_net_stop,
+	.read = gator_events_net_read,
+};
+
+int gator_events_net_init(void)
+{
+	netrx_key = gator_events_get_key();
+	nettx_key = gator_events_get_key();
+
+	netrx_enabled = 0;
+	nettx_enabled = 0;
+
+	return gator_events_install(&gator_events_net_interface);
+}
diff --git a/drivers/gator/gator_events_perf_pmu.c b/drivers/gator/gator_events_perf_pmu.c
new file mode 100644
index 000000000000..d472df918ab0
--- /dev/null
+++ b/drivers/gator/gator_events_perf_pmu.c
@@ -0,0 +1,587 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "gator.h"
+
+// gator_events_armvX.c is used for Linux 2.6.x
+#if GATOR_PERF_PMU_SUPPORT
+
+#include <linux/io.h>
+#ifdef CONFIG_OF
+#include <linux/of_address.h>
+#endif
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+
+extern bool event_based_sampling;
+
+// Maximum number of per-core counters - currently reserves enough space for two full hardware PMUs for big.LITTLE
+#define CNTMAX 16
+#define CCI_400 4
+// Maximum number of uncore counters
+// + 1 for the cci-400 cycles counter
+#define UCCNT (CCI_400 + 1)
+
+// Default to 0 if unable to probe the revision which was the previous behavior
+#define DEFAULT_CCI_REVISION 0
+
+// A gator_attr is needed for every counter
+struct gator_attr {
+	// Set once in gator_events_perf_pmu_*_init - the name of the event in the gatorfs
+	char name[40];
+	// Exposed in gatorfs - set by gatord to enable this counter
+	unsigned long enabled;
+	// Set once in gator_events_perf_pmu_*_init - the perf type to use, see perf_type_id in the perf_event.h header file.
+	unsigned long type;
+	// Exposed in gatorfs - set by gatord to select the event to collect
+	unsigned long event;
+	// Exposed in gatorfs - set by gatord with the sample period to use and enable EBS for this counter
+	unsigned long count;
+	// Exposed as read only in gatorfs - set once in __attr_init as the key to use in the APC data
+	unsigned long key;
+};
+
+// Per-core counter attributes
+static struct gator_attr attrs[CNTMAX];
+// Number of initialized per-core counters
+static int attr_count;
+// Uncore counter attributes
+static struct gator_attr uc_attrs[UCCNT];
+// Number of initialized uncore counters
+static int uc_attr_count;
+
+struct gator_event {
+	int curr;
+	int prev;
+	int prev_delta;
+	bool zero;
+	struct perf_event *pevent;
+	struct perf_event_attr *pevent_attr;
+};
+
+static DEFINE_PER_CPU(struct gator_event[CNTMAX], events);
+static struct gator_event uc_events[UCCNT];
+static DEFINE_PER_CPU(int[(CNTMAX + UCCNT)*2], perf_cnt);
+
+static void gator_events_perf_pmu_stop(void);
+
+static int __create_files(struct super_block *sb, struct dentry *root, struct gator_attr *const attr)
+{
+	struct dentry *dir;
+
+	if (attr->name[0] == '\0') {
+		return 0;
+	}
+	dir = gatorfs_mkdir(sb, root, attr->name);
+	if (!dir) {
+		return -1;
+	}
+	gatorfs_create_ulong(sb, dir, "enabled", &attr->enabled);
+	gatorfs_create_ulong(sb, dir, "count", &attr->count);
+	gatorfs_create_ro_ulong(sb, dir, "key", &attr->key);
+	gatorfs_create_ulong(sb, dir, "event", &attr->event);
+
+	return 0;
+}
+
+static int gator_events_perf_pmu_create_files(struct super_block *sb, struct dentry *root)
+{
+	int cnt;
+
+	for (cnt = 0; cnt < attr_count; cnt++) {
+		if (__create_files(sb, root, &attrs[cnt]) != 0) {
+			return -1;
+		}
+	}
+
+	for (cnt = 0; cnt < uc_attr_count; cnt++) {
+		if (__create_files(sb, root, &uc_attrs[cnt]) != 0) {
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
+static void ebs_overflow_handler(struct perf_event *event, int unused, struct perf_sample_data *data, struct pt_regs *regs)
+#else
+static void ebs_overflow_handler(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs)
+#endif
+{
+	gator_backtrace_handler(regs);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
+static void dummy_handler(struct perf_event *event, int unused, struct perf_sample_data *data, struct pt_regs *regs)
+#else
+static void dummy_handler(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs)
+#endif
+{
+// Required as perf_event_create_kernel_counter() requires an overflow handler, even though all we do is poll
+}
+
+static int gator_events_perf_pmu_read(int **buffer);
+
+static int gator_events_perf_pmu_online(int **buffer, bool migrate)
+{
+	return gator_events_perf_pmu_read(buffer);
+}
+
+static void __online_dispatch(int cpu, bool migrate, struct gator_attr *const attr, struct gator_event *const event)
+{
+	perf_overflow_handler_t handler;
+
+	event->zero = true;
+
+	if (event->pevent != NULL || event->pevent_attr == 0 || migrate) {
+		return;
+	}
+
+	if (attr->count > 0) {
+		handler = ebs_overflow_handler;
+	} else {
+		handler = dummy_handler;
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
+	event->pevent = perf_event_create_kernel_counter(event->pevent_attr, cpu, 0, handler);
+#else
+	event->pevent = perf_event_create_kernel_counter(event->pevent_attr, cpu, 0, handler, 0);
+#endif
+	if (IS_ERR(event->pevent)) {
+		pr_debug("gator: unable to online a counter on cpu %d\n", cpu);
+		event->pevent = NULL;
+		return;
+	}
+
+	if (event->pevent->state != PERF_EVENT_STATE_ACTIVE) {
+		pr_debug("gator: inactive counter on cpu %d\n", cpu);
+		perf_event_release_kernel(event->pevent);
+		event->pevent = NULL;
+		return;
+	}
+}
+
+static void gator_events_perf_pmu_online_dispatch(int cpu, bool migrate)
+{
+	int cnt;
+
+	cpu = pcpu_to_lcpu(cpu);
+
+	for (cnt = 0; cnt < attr_count; cnt++) {
+		__online_dispatch(cpu, migrate, &attrs[cnt], &per_cpu(events, cpu)[cnt]);
+	}
+
+	if (cpu == 0) {
+		for (cnt = 0; cnt < uc_attr_count; cnt++) {
+			__online_dispatch(cpu, migrate, &uc_attrs[cnt], &uc_events[cnt]);
+		}
+	}
+}
+
+static void __offline_dispatch(int cpu, struct gator_event *const event)
+{
+	struct perf_event *pe = NULL;
+
+	if (event->pevent) {
+		pe = event->pevent;
+		event->pevent = NULL;
+	}
+
+	if (pe) {
+		perf_event_release_kernel(pe);
+	}
+}
+
+static void gator_events_perf_pmu_offline_dispatch(int cpu, bool migrate)
+{
+	int cnt;
+
+	if (migrate) {
+		return;
+	}
+	cpu = pcpu_to_lcpu(cpu);
+
+	for (cnt = 0; cnt < attr_count; cnt++) {
+		__offline_dispatch(cpu, &per_cpu(events, cpu)[cnt]);
+	}
+
+	if (cpu == 0) {
+		for (cnt = 0; cnt < uc_attr_count; cnt++) {
+			__offline_dispatch(cpu, &uc_events[cnt]);
+		}
+	}
+}
+
+static int __check_ebs(struct gator_attr *const attr)
+{
+	if (attr->count > 0) {
+		if (!event_based_sampling) {
+			event_based_sampling = true;
+		} else {
+			printk(KERN_WARNING "gator: Only one ebs counter is allowed\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int __start(struct gator_attr *const attr, struct gator_event *const event)
+{
+	u32 size = sizeof(struct perf_event_attr);
+
+	event->pevent = NULL;
+	if (!attr->enabled) {	// Skip disabled counters
+		return 0;
+	}
+
+	event->prev = 0;
+	event->curr = 0;
+	event->prev_delta = 0;
+	event->pevent_attr = kmalloc(size, GFP_KERNEL);
+	if (!event->pevent_attr) {
+		gator_events_perf_pmu_stop();
+		return -1;
+	}
+
+	memset(event->pevent_attr, 0, size);
+	event->pevent_attr->type = attr->type;
+	event->pevent_attr->size = size;
+	event->pevent_attr->config = attr->event;
+	event->pevent_attr->sample_period = attr->count;
+	event->pevent_attr->pinned = 1;
+
+	return 0;
+}
+
+static int gator_events_perf_pmu_start(void)
+{
+	int cnt, cpu;
+
+	event_based_sampling = false;
+	for (cnt = 0; cnt < attr_count; cnt++) {
+		if (__check_ebs(&attrs[cnt]) != 0) {
+			return -1;
+		}
+	}
+
+	for (cnt = 0; cnt < uc_attr_count; cnt++) {
+		if (__check_ebs(&uc_attrs[cnt]) != 0) {
+			return -1;
+		}
+	}
+
+	for_each_present_cpu(cpu) {
+		for (cnt = 0; cnt < attr_count; cnt++) {
+			if (__start(&attrs[cnt], &per_cpu(events, cpu)[cnt]) != 0) {
+				return -1;
+			}
+		}
+	}
+
+	for (cnt = 0; cnt < uc_attr_count; cnt++) {
+		if (__start(&uc_attrs[cnt], &uc_events[cnt]) != 0) {
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static void __event_stop(struct gator_event *const event)
+{
+	if (event->pevent_attr) {
+		kfree(event->pevent_attr);
+		event->pevent_attr = NULL;
+	}
+}
+
+static void __attr_stop(struct gator_attr *const attr)
+{
+	attr->enabled = 0;
+	attr->event = 0;
+	attr->count = 0;
+}
+
+static void gator_events_perf_pmu_stop(void)
+{
+	unsigned int cnt, cpu;
+
+	for_each_present_cpu(cpu) {
+		for (cnt = 0; cnt < attr_count; cnt++) {
+			__event_stop(&per_cpu(events, cpu)[cnt]);
+		}
+	}
+
+	for (cnt = 0; cnt < uc_attr_count; cnt++) {
+		__event_stop(&uc_events[cnt]);
+	}
+
+	for (cnt = 0; cnt < attr_count; cnt++) {
+		__attr_stop(&attrs[cnt]);
+	}
+
+	for (cnt = 0; cnt < uc_attr_count; cnt++) {
+		__attr_stop(&uc_attrs[cnt]);
+	}
+}
+
+static void __read(int *const len, int cpu, struct gator_attr *const attr, struct gator_event *const event)
+{
+	int delta;
+
+	struct perf_event *const ev = event->pevent;
+	if (ev != NULL && ev->state == PERF_EVENT_STATE_ACTIVE) {
+		/* After creating the perf counter in __online_dispatch, there
+		 * is a race condition between gator_events_perf_pmu_online and
+		 * gator_events_perf_pmu_read. So have
+		 * gator_events_perf_pmu_online call gator_events_perf_pmu_read
+		 * and in __read check to see if it's the first call after
+		 * __online_dispatch and if so, run the online code.
+		 */
+		if (event->zero) {
+			ev->pmu->read(ev);
+			event->prev = event->curr = local64_read(&ev->count);
+			event->prev_delta = 0;
+			per_cpu(perf_cnt, cpu)[(*len)++] = attr->key;
+			per_cpu(perf_cnt, cpu)[(*len)++] = 0;
+			event->zero = false;
+		} else {
+			ev->pmu->read(ev);
+			event->curr = local64_read(&ev->count);
+			delta = event->curr - event->prev;
+			if (delta != 0 || delta != event->prev_delta) {
+				event->prev_delta = delta;
+				event->prev = event->curr;
+				per_cpu(perf_cnt, cpu)[(*len)++] = attr->key;
+				if (delta < 0) {
+					delta *= -1;
+				}
+				per_cpu(perf_cnt, cpu)[(*len)++] = delta;
+			}
+		}
+	}
+}
+
+static int gator_events_perf_pmu_read(int **buffer)
+{
+	int cnt, len = 0;
+	const int cpu = get_logical_cpu();
+
+	for (cnt = 0; cnt < attr_count; cnt++) {
+		__read(&len, cpu, &attrs[cnt], &per_cpu(events, cpu)[cnt]);
+	}
+
+	if (cpu == 0) {
+		for (cnt = 0; cnt < uc_attr_count; cnt++) {
+			__read(&len, cpu, &uc_attrs[cnt], &uc_events[cnt]);
+		}
+	}
+
+	if (buffer) {
+		*buffer = per_cpu(perf_cnt, cpu);
+	}
+
+	return len;
+}
+
+static struct gator_interface gator_events_perf_pmu_interface = {
+	.create_files = gator_events_perf_pmu_create_files,
+	.start = gator_events_perf_pmu_start,
+	.stop = gator_events_perf_pmu_stop,
+	.online = gator_events_perf_pmu_online,
+	.online_dispatch = gator_events_perf_pmu_online_dispatch,
+	.offline_dispatch = gator_events_perf_pmu_offline_dispatch,
+	.read = gator_events_perf_pmu_read,
+};
+
+static void __attr_init(struct gator_attr *const attr)
+{
+	attr->name[0] = '\0';
+	attr->enabled = 0;
+	attr->type = 0;
+	attr->event = 0;
+	attr->count = 0;
+	attr->key = gator_events_get_key();
+}
+
+#ifdef CONFIG_OF
+
+static const struct of_device_id arm_cci_matches[] = {
+	{.compatible = "arm,cci-400" },
+	{},
+};
+
+static int probe_cci_revision(void)
+{
+	struct device_node *np;
+	struct resource res;
+	void __iomem *cci_ctrl_base;
+	int rev;
+	int ret = DEFAULT_CCI_REVISION;
+
+	np = of_find_matching_node(NULL, arm_cci_matches);
+	if (!np) {
+		return ret;
+	}
+
+	if (of_address_to_resource(np, 0, &res)) {
+		goto node_put;
+	}
+
+	cci_ctrl_base = ioremap(res.start, resource_size(&res));
+
+	rev = (readl_relaxed(cci_ctrl_base + 0xfe8) >> 4) & 0xf;
+
+	if (rev <= 4) {
+		ret = 0;
+	} else if (rev <= 6) {
+		ret = 1;
+	}
+
+	iounmap(cci_ctrl_base);
+
+ node_put:
+	of_node_put(np);
+
+	return ret;
+}
+
+#else
+
+static int probe_cci_revision(void)
+{
+	return DEFAULT_CCI_REVISION;
+}
+
+#endif
+
+static void gator_events_perf_pmu_cci_init(const int type)
+{
+	int cnt;
+	const char *cci_name;
+
+	switch (probe_cci_revision()) {
+	case 0:
+		cci_name = "cci-400";
+		break;
+	case 1:
+		cci_name = "cci-400-r1";
+		break;
+	default:
+		pr_debug("gator: unrecognized cci-400 revision\n");
+		return;
+	}
+
+	snprintf(uc_attrs[uc_attr_count].name, sizeof(uc_attrs[uc_attr_count].name), "%s_ccnt", cci_name);
+	uc_attrs[uc_attr_count].type = type;
+	++uc_attr_count;
+
+	for (cnt = 0; cnt < CCI_400; ++cnt, ++uc_attr_count) {
+		struct gator_attr *const attr = &uc_attrs[uc_attr_count];
+		snprintf(attr->name, sizeof(attr->name), "%s_cnt%d", cci_name, cnt);
+		attr->type = type;
+	}
+}
+
+static void gator_events_perf_pmu_cpu_init(const struct gator_cpu *const gator_cpu, const int type)
+{
+	int cnt;
+
+	snprintf(attrs[attr_count].name, sizeof(attrs[attr_count].name), "%s_ccnt", gator_cpu->pmnc_name);
+	attrs[attr_count].type = type;
+	++attr_count;
+
+	for (cnt = 0; cnt < gator_cpu->pmnc_counters; ++cnt, ++attr_count) {
+		struct gator_attr *const attr = &attrs[attr_count];
+		snprintf(attr->name, sizeof(attr->name), "%s_cnt%d", gator_cpu->pmnc_name, cnt);
+		attr->type = type;
+	}
+}
+
+int gator_events_perf_pmu_init(void)
+{
+	struct perf_event_attr pea;
+	struct perf_event *pe;
+	const struct gator_cpu *gator_cpu;
+	int type;
+	int cpu;
+	int cnt;
+	bool found_cpu = false;
+
+	for (cnt = 0; cnt < CNTMAX; cnt++) {
+		__attr_init(&attrs[cnt]);
+	}
+	for (cnt = 0; cnt < UCCNT; cnt++) {
+		__attr_init(&uc_attrs[cnt]);
+	}
+
+	memset(&pea, 0, sizeof(pea));
+	pea.size = sizeof(pea);
+	pea.config = 0xFF;
+	attr_count = 0;
+	uc_attr_count = 0;
+	for (type = PERF_TYPE_MAX; type < 0x20; ++type) {
+		pea.type = type;
+
+		// A particular PMU may work on some but not all cores, so try on each core
+		pe = NULL;
+		for_each_present_cpu(cpu) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
+			pe = perf_event_create_kernel_counter(&pea, cpu, 0, dummy_handler);
+#else
+			pe = perf_event_create_kernel_counter(&pea, cpu, 0, dummy_handler, 0);
+#endif
+			if (!IS_ERR(pe)) {
+				break;
+			}
+		}
+		// Assume that valid PMUs are contiguous
+		if (IS_ERR(pe)) {
+			break;
+		}
+
+		if (pe->pmu != NULL && type == pe->pmu->type) {
+			if (strcmp("CCI", pe->pmu->name) == 0 || strcmp("CCI_400", pe->pmu->name) == 0) {
+				gator_events_perf_pmu_cci_init(type);
+			} else if ((gator_cpu = gator_find_cpu_by_pmu_name(pe->pmu->name)) != NULL) {
+				found_cpu = true;
+				gator_events_perf_pmu_cpu_init(gator_cpu, type);
+			}
+			// Initialize gator_attrs for dynamic PMUs here
+		}
+
+		perf_event_release_kernel(pe);
+	}
+
+	if (!found_cpu) {
+		const struct gator_cpu *const gator_cpu = gator_find_cpu_by_cpuid(gator_cpuid());
+		if (gator_cpu == NULL) {
+			return -1;
+		}
+		gator_events_perf_pmu_cpu_init(gator_cpu, PERF_TYPE_RAW);
+	}
+
+	// Initialize gator_attrs for non-dynamic PMUs here
+
+	if (attr_count > CNTMAX) {
+		printk(KERN_ERR "gator: Too many perf counters\n");
+		return -1;
+	}
+
+	if (uc_attr_count > UCCNT) {
+		printk(KERN_ERR "gator: Too many perf uncore counters\n");
+		return -1;
+	}
+
+	return gator_events_install(&gator_events_perf_pmu_interface);
+}
+
+#endif
diff --git a/drivers/gator/gator_events_sched.c b/drivers/gator/gator_events_sched.c
new file mode 100644
index 000000000000..29f4e39e261c
--- /dev/null
+++ b/drivers/gator/gator_events_sched.c
@@ -0,0 +1,113 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+#include <trace/events/sched.h>
+
+#define SCHED_SWITCH	0
+#define SCHED_TOTAL		(SCHED_SWITCH+1)
+
+static ulong sched_switch_enabled;
+static ulong sched_switch_key;
+static DEFINE_PER_CPU(int[SCHED_TOTAL], schedCnt);
+static DEFINE_PER_CPU(int[SCHED_TOTAL * 2], schedGet);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next))
+#else
+GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_struct *next))
+#endif
+{
+	unsigned long flags;
+
+	// disable interrupts to synchronize with gator_events_sched_read()
+	// spinlocks not needed since percpu buffers are used
+	local_irq_save(flags);
+	per_cpu(schedCnt, get_physical_cpu())[SCHED_SWITCH]++;
+	local_irq_restore(flags);
+}
+
+static int gator_events_sched_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+
+	/* switch */
+	dir = gatorfs_mkdir(sb, root, "Linux_sched_switch");
+	if (!dir) {
+		return -1;
+	}
+	gatorfs_create_ulong(sb, dir, "enabled", &sched_switch_enabled);
+	gatorfs_create_ro_ulong(sb, dir, "key", &sched_switch_key);
+
+	return 0;
+}
+
+static int gator_events_sched_start(void)
+{
+	// register tracepoints
+	if (sched_switch_enabled)
+		if (GATOR_REGISTER_TRACE(sched_switch))
+			goto sched_switch_exit;
+	pr_debug("gator: registered scheduler event tracepoints\n");
+
+	return 0;
+
+	// unregister tracepoints on error
+sched_switch_exit:
+	pr_err("gator: scheduler event tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
+
+	return -1;
+}
+
+static void gator_events_sched_stop(void)
+{
+	if (sched_switch_enabled)
+		GATOR_UNREGISTER_TRACE(sched_switch);
+	pr_debug("gator: unregistered scheduler event tracepoints\n");
+
+	sched_switch_enabled = 0;
+}
+
+static int gator_events_sched_read(int **buffer)
+{
+	unsigned long flags;
+	int len, value;
+	int cpu = get_physical_cpu();
+
+	len = 0;
+	if (sched_switch_enabled) {
+		local_irq_save(flags);
+		value = per_cpu(schedCnt, cpu)[SCHED_SWITCH];
+		per_cpu(schedCnt, cpu)[SCHED_SWITCH] = 0;
+		local_irq_restore(flags);
+		per_cpu(schedGet, cpu)[len++] = sched_switch_key;
+		per_cpu(schedGet, cpu)[len++] = value;
+	}
+
+	if (buffer)
+		*buffer = per_cpu(schedGet, cpu);
+
+	return len;
+}
+
+static struct gator_interface gator_events_sched_interface = {
+	.create_files = gator_events_sched_create_files,
+	.start = gator_events_sched_start,
+	.stop = gator_events_sched_stop,
+	.read = gator_events_sched_read,
+};
+
+int gator_events_sched_init(void)
+{
+	sched_switch_enabled = 0;
+
+	sched_switch_key = gator_events_get_key();
+
+	return gator_events_install(&gator_events_sched_interface);
+}
diff --git a/drivers/gator/gator_events_scorpion.c b/drivers/gator/gator_events_scorpion.c
new file mode 100644
index 000000000000..c91db1219d08
--- /dev/null
+++ b/drivers/gator/gator_events_scorpion.c
@@ -0,0 +1,669 @@
+/**
+ * Copyright (C) ARM Limited 2011-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "gator.h"
+
+// gator_events_perf_pmu.c is used if perf is supported
+#if GATOR_NO_PERF_SUPPORT
+
+static const char *pmnc_name;
+static int pmnc_counters;
+
+// Per-CPU PMNC: config reg
+#define PMNC_E		(1 << 0)	/* Enable all counters */
+#define PMNC_P		(1 << 1)	/* Reset all counters */
+#define PMNC_C		(1 << 2)	/* Cycle counter reset */
+#define PMNC_D		(1 << 3)	/* CCNT counts every 64th cpu cycle */
+#define PMNC_X		(1 << 4)	/* Export to ETM */
+#define PMNC_DP		(1 << 5)	/* Disable CCNT if non-invasive debug */
+#define	PMNC_MASK	0x3f	/* Mask for writable bits */
+
+// ccnt reg
+#define CCNT_REG	(1 << 31)
+
+#define CCNT 		0
+#define CNT0		1
+#define CNTMAX 		(4+1)
+
+static unsigned long pmnc_enabled[CNTMAX];
+static unsigned long pmnc_event[CNTMAX];
+static unsigned long pmnc_key[CNTMAX];
+
+static DEFINE_PER_CPU(int[CNTMAX * 2], perfCnt);
+
+enum scorpion_perf_types {
+	SCORPION_ICACHE_EXPL_INV = 0x4c,
+	SCORPION_ICACHE_MISS = 0x4d,
+	SCORPION_ICACHE_ACCESS = 0x4e,
+	SCORPION_ICACHE_CACHEREQ_L2 = 0x4f,
+	SCORPION_ICACHE_NOCACHE_L2 = 0x50,
+	SCORPION_HIQUP_NOPED = 0x51,
+	SCORPION_DATA_ABORT = 0x52,
+	SCORPION_IRQ = 0x53,
+	SCORPION_FIQ = 0x54,
+	SCORPION_ALL_EXCPT = 0x55,
+	SCORPION_UNDEF = 0x56,
+	SCORPION_SVC = 0x57,
+	SCORPION_SMC = 0x58,
+	SCORPION_PREFETCH_ABORT = 0x59,
+	SCORPION_INDEX_CHECK = 0x5a,
+	SCORPION_NULL_CHECK = 0x5b,
+	SCORPION_EXPL_ICIALLU = 0x5c,
+	SCORPION_IMPL_ICIALLU = 0x5d,
+	SCORPION_NONICIALLU_BTAC_INV = 0x5e,
+	SCORPION_ICIMVAU_IMPL_ICIALLU = 0x5f,
+	SCORPION_SPIPE_ONLY_CYCLES = 0x60,
+	SCORPION_XPIPE_ONLY_CYCLES = 0x61,
+	SCORPION_DUAL_CYCLES = 0x62,
+	SCORPION_DISPATCH_ANY_CYCLES = 0x63,
+	SCORPION_FIFO_FULLBLK_CMT = 0x64,
+	SCORPION_FAIL_COND_INST = 0x65,
+	SCORPION_PASS_COND_INST = 0x66,
+	SCORPION_ALLOW_VU_CLK = 0x67,
+	SCORPION_VU_IDLE = 0x68,
+	SCORPION_ALLOW_L2_CLK = 0x69,
+	SCORPION_L2_IDLE = 0x6a,
+	SCORPION_DTLB_IMPL_INV_SCTLR_DACR = 0x6b,
+	SCORPION_DTLB_EXPL_INV = 0x6c,
+	SCORPION_DTLB_MISS = 0x6d,
+	SCORPION_DTLB_ACCESS = 0x6e,
+	SCORPION_ITLB_MISS = 0x6f,
+	SCORPION_ITLB_IMPL_INV = 0x70,
+	SCORPION_ITLB_EXPL_INV = 0x71,
+	SCORPION_UTLB_D_MISS = 0x72,
+	SCORPION_UTLB_D_ACCESS = 0x73,
+	SCORPION_UTLB_I_MISS = 0x74,
+	SCORPION_UTLB_I_ACCESS = 0x75,
+	SCORPION_UTLB_INV_ASID = 0x76,
+	SCORPION_UTLB_INV_MVA = 0x77,
+	SCORPION_UTLB_INV_ALL = 0x78,
+	SCORPION_S2_HOLD_RDQ_UNAVAIL = 0x79,
+	SCORPION_S2_HOLD = 0x7a,
+	SCORPION_S2_HOLD_DEV_OP = 0x7b,
+	SCORPION_S2_HOLD_ORDER = 0x7c,
+	SCORPION_S2_HOLD_BARRIER = 0x7d,
+	SCORPION_VIU_DUAL_CYCLE = 0x7e,
+	SCORPION_VIU_SINGLE_CYCLE = 0x7f,
+	SCORPION_VX_PIPE_WAR_STALL_CYCLES = 0x80,
+	SCORPION_VX_PIPE_WAW_STALL_CYCLES = 0x81,
+	SCORPION_VX_PIPE_RAW_STALL_CYCLES = 0x82,
+	SCORPION_VX_PIPE_LOAD_USE_STALL = 0x83,
+	SCORPION_VS_PIPE_WAR_STALL_CYCLES = 0x84,
+	SCORPION_VS_PIPE_WAW_STALL_CYCLES = 0x85,
+	SCORPION_VS_PIPE_RAW_STALL_CYCLES = 0x86,
+	SCORPION_EXCEPTIONS_INV_OPERATION = 0x87,
+	SCORPION_EXCEPTIONS_DIV_BY_ZERO = 0x88,
+	SCORPION_COND_INST_FAIL_VX_PIPE = 0x89,
+	SCORPION_COND_INST_FAIL_VS_PIPE = 0x8a,
+	SCORPION_EXCEPTIONS_OVERFLOW = 0x8b,
+	SCORPION_EXCEPTIONS_UNDERFLOW = 0x8c,
+	SCORPION_EXCEPTIONS_DENORM = 0x8d,
+#ifdef CONFIG_ARCH_MSM_SCORPIONMP
+	SCORPIONMP_NUM_BARRIERS = 0x8e,
+	SCORPIONMP_BARRIER_CYCLES = 0x8f,
+#else
+	SCORPION_BANK_AB_HIT = 0x8e,
+	SCORPION_BANK_AB_ACCESS = 0x8f,
+	SCORPION_BANK_CD_HIT = 0x90,
+	SCORPION_BANK_CD_ACCESS = 0x91,
+	SCORPION_BANK_AB_DSIDE_HIT = 0x92,
+	SCORPION_BANK_AB_DSIDE_ACCESS = 0x93,
+	SCORPION_BANK_CD_DSIDE_HIT = 0x94,
+	SCORPION_BANK_CD_DSIDE_ACCESS = 0x95,
+	SCORPION_BANK_AB_ISIDE_HIT = 0x96,
+	SCORPION_BANK_AB_ISIDE_ACCESS = 0x97,
+	SCORPION_BANK_CD_ISIDE_HIT = 0x98,
+	SCORPION_BANK_CD_ISIDE_ACCESS = 0x99,
+	SCORPION_ISIDE_RD_WAIT = 0x9a,
+	SCORPION_DSIDE_RD_WAIT = 0x9b,
+	SCORPION_BANK_BYPASS_WRITE = 0x9c,
+	SCORPION_BANK_AB_NON_CASTOUT = 0x9d,
+	SCORPION_BANK_AB_L2_CASTOUT = 0x9e,
+	SCORPION_BANK_CD_NON_CASTOUT = 0x9f,
+	SCORPION_BANK_CD_L2_CASTOUT = 0xa0,
+#endif
+	MSM_MAX_EVT
+};
+
+struct scorp_evt {
+	u32 evt_type;
+	u32 val;
+	u8 grp;
+	u32 evt_type_act;
+};
+
+static const struct scorp_evt sc_evt[] = {
+	{SCORPION_ICACHE_EXPL_INV, 0x80000500, 0, 0x4d},
+	{SCORPION_ICACHE_MISS, 0x80050000, 0, 0x4e},
+	{SCORPION_ICACHE_ACCESS, 0x85000000, 0, 0x4f},
+	{SCORPION_ICACHE_CACHEREQ_L2, 0x86000000, 0, 0x4f},
+	{SCORPION_ICACHE_NOCACHE_L2, 0x87000000, 0, 0x4f},
+	{SCORPION_HIQUP_NOPED, 0x80080000, 0, 0x4e},
+	{SCORPION_DATA_ABORT, 0x8000000a, 0, 0x4c},
+	{SCORPION_IRQ, 0x80000a00, 0, 0x4d},
+	{SCORPION_FIQ, 0x800a0000, 0, 0x4e},
+	{SCORPION_ALL_EXCPT, 0x8a000000, 0, 0x4f},
+	{SCORPION_UNDEF, 0x8000000b, 0, 0x4c},
+	{SCORPION_SVC, 0x80000b00, 0, 0x4d},
+	{SCORPION_SMC, 0x800b0000, 0, 0x4e},
+	{SCORPION_PREFETCH_ABORT, 0x8b000000, 0, 0x4f},
+	{SCORPION_INDEX_CHECK, 0x8000000c, 0, 0x4c},
+	{SCORPION_NULL_CHECK, 0x80000c00, 0, 0x4d},
+	{SCORPION_EXPL_ICIALLU, 0x8000000d, 0, 0x4c},
+	{SCORPION_IMPL_ICIALLU, 0x80000d00, 0, 0x4d},
+	{SCORPION_NONICIALLU_BTAC_INV, 0x800d0000, 0, 0x4e},
+	{SCORPION_ICIMVAU_IMPL_ICIALLU, 0x8d000000, 0, 0x4f},
+
+	{SCORPION_SPIPE_ONLY_CYCLES, 0x80000600, 1, 0x51},
+	{SCORPION_XPIPE_ONLY_CYCLES, 0x80060000, 1, 0x52},
+	{SCORPION_DUAL_CYCLES, 0x86000000, 1, 0x53},
+	{SCORPION_DISPATCH_ANY_CYCLES, 0x89000000, 1, 0x53},
+	{SCORPION_FIFO_FULLBLK_CMT, 0x8000000d, 1, 0x50},
+	{SCORPION_FAIL_COND_INST, 0x800d0000, 1, 0x52},
+	{SCORPION_PASS_COND_INST, 0x8d000000, 1, 0x53},
+	{SCORPION_ALLOW_VU_CLK, 0x8000000e, 1, 0x50},
+	{SCORPION_VU_IDLE, 0x80000e00, 1, 0x51},
+	{SCORPION_ALLOW_L2_CLK, 0x800e0000, 1, 0x52},
+	{SCORPION_L2_IDLE, 0x8e000000, 1, 0x53},
+
+	{SCORPION_DTLB_IMPL_INV_SCTLR_DACR, 0x80000001, 2, 0x54},
+	{SCORPION_DTLB_EXPL_INV, 0x80000100, 2, 0x55},
+	{SCORPION_DTLB_MISS, 0x80010000, 2, 0x56},
+	{SCORPION_DTLB_ACCESS, 0x81000000, 2, 0x57},
+	{SCORPION_ITLB_MISS, 0x80000200, 2, 0x55},
+	{SCORPION_ITLB_IMPL_INV, 0x80020000, 2, 0x56},
+	{SCORPION_ITLB_EXPL_INV, 0x82000000, 2, 0x57},
+	{SCORPION_UTLB_D_MISS, 0x80000003, 2, 0x54},
+	{SCORPION_UTLB_D_ACCESS, 0x80000300, 2, 0x55},
+	{SCORPION_UTLB_I_MISS, 0x80030000, 2, 0x56},
+	{SCORPION_UTLB_I_ACCESS, 0x83000000, 2, 0x57},
+	{SCORPION_UTLB_INV_ASID, 0x80000400, 2, 0x55},
+	{SCORPION_UTLB_INV_MVA, 0x80040000, 2, 0x56},
+	{SCORPION_UTLB_INV_ALL, 0x84000000, 2, 0x57},
+	{SCORPION_S2_HOLD_RDQ_UNAVAIL, 0x80000800, 2, 0x55},
+	{SCORPION_S2_HOLD, 0x88000000, 2, 0x57},
+	{SCORPION_S2_HOLD_DEV_OP, 0x80000900, 2, 0x55},
+	{SCORPION_S2_HOLD_ORDER, 0x80090000, 2, 0x56},
+	{SCORPION_S2_HOLD_BARRIER, 0x89000000, 2, 0x57},
+
+	{SCORPION_VIU_DUAL_CYCLE, 0x80000001, 4, 0x5c},
+	{SCORPION_VIU_SINGLE_CYCLE, 0x80000100, 4, 0x5d},
+	{SCORPION_VX_PIPE_WAR_STALL_CYCLES, 0x80000005, 4, 0x5c},
+	{SCORPION_VX_PIPE_WAW_STALL_CYCLES, 0x80000500, 4, 0x5d},
+	{SCORPION_VX_PIPE_RAW_STALL_CYCLES, 0x80050000, 4, 0x5e},
+	{SCORPION_VX_PIPE_LOAD_USE_STALL, 0x80000007, 4, 0x5c},
+	{SCORPION_VS_PIPE_WAR_STALL_CYCLES, 0x80000008, 4, 0x5c},
+	{SCORPION_VS_PIPE_WAW_STALL_CYCLES, 0x80000800, 4, 0x5d},
+	{SCORPION_VS_PIPE_RAW_STALL_CYCLES, 0x80080000, 4, 0x5e},
+	{SCORPION_EXCEPTIONS_INV_OPERATION, 0x8000000b, 4, 0x5c},
+	{SCORPION_EXCEPTIONS_DIV_BY_ZERO, 0x80000b00, 4, 0x5d},
+	{SCORPION_COND_INST_FAIL_VX_PIPE, 0x800b0000, 4, 0x5e},
+	{SCORPION_COND_INST_FAIL_VS_PIPE, 0x8b000000, 4, 0x5f},
+	{SCORPION_EXCEPTIONS_OVERFLOW, 0x8000000c, 4, 0x5c},
+	{SCORPION_EXCEPTIONS_UNDERFLOW, 0x80000c00, 4, 0x5d},
+	{SCORPION_EXCEPTIONS_DENORM, 0x8c000000, 4, 0x5f},
+
+#ifdef CONFIG_ARCH_MSM_SCORPIONMP
+	{SCORPIONMP_NUM_BARRIERS, 0x80000e00, 3, 0x59},
+	{SCORPIONMP_BARRIER_CYCLES, 0x800e0000, 3, 0x5a},
+#else
+	{SCORPION_BANK_AB_HIT, 0x80000001, 3, 0x58},
+	{SCORPION_BANK_AB_ACCESS, 0x80000100, 3, 0x59},
+	{SCORPION_BANK_CD_HIT, 0x80010000, 3, 0x5a},
+	{SCORPION_BANK_CD_ACCESS, 0x81000000, 3, 0x5b},
+	{SCORPION_BANK_AB_DSIDE_HIT, 0x80000002, 3, 0x58},
+	{SCORPION_BANK_AB_DSIDE_ACCESS, 0x80000200, 3, 0x59},
+	{SCORPION_BANK_CD_DSIDE_HIT, 0x80020000, 3, 0x5a},
+	{SCORPION_BANK_CD_DSIDE_ACCESS, 0x82000000, 3, 0x5b},
+	{SCORPION_BANK_AB_ISIDE_HIT, 0x80000003, 3, 0x58},
+	{SCORPION_BANK_AB_ISIDE_ACCESS, 0x80000300, 3, 0x59},
+	{SCORPION_BANK_CD_ISIDE_HIT, 0x80030000, 3, 0x5a},
+	{SCORPION_BANK_CD_ISIDE_ACCESS, 0x83000000, 3, 0x5b},
+	{SCORPION_ISIDE_RD_WAIT, 0x80000009, 3, 0x58},
+	{SCORPION_DSIDE_RD_WAIT, 0x80090000, 3, 0x5a},
+	{SCORPION_BANK_BYPASS_WRITE, 0x8000000a, 3, 0x58},
+	{SCORPION_BANK_AB_NON_CASTOUT, 0x8000000c, 3, 0x58},
+	{SCORPION_BANK_AB_L2_CASTOUT, 0x80000c00, 3, 0x59},
+	{SCORPION_BANK_CD_NON_CASTOUT, 0x800c0000, 3, 0x5a},
+	{SCORPION_BANK_CD_L2_CASTOUT, 0x8c000000, 3, 0x5b},
+#endif
+};
+
+static inline void scorpion_pmnc_write(u32 val)
+{
+	val &= PMNC_MASK;
+	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (val));
+}
+
+static inline u32 scorpion_pmnc_read(void)
+{
+	u32 val;
+	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
+	return val;
+}
+
+static inline u32 scorpion_ccnt_read(void)
+{
+	u32 val;
+	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
+	return val;
+}
+
+static inline u32 scorpion_cntn_read(void)
+{
+	u32 val;
+	asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
+	return val;
+}
+
+static inline u32 scorpion_pmnc_enable_counter(unsigned int cnt)
+{
+	u32 val;
+
+	if (cnt >= CNTMAX) {
+		pr_err("gator: CPU%u enabling wrong PMNC counter %d\n", smp_processor_id(), cnt);
+		return -1;
+	}
+
+	if (cnt == CCNT)
+		val = CCNT_REG;
+	else
+		val = (1 << (cnt - CNT0));
+
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
+
+	return cnt;
+}
+
+static inline u32 scorpion_pmnc_disable_counter(unsigned int cnt)
+{
+	u32 val;
+
+	if (cnt >= CNTMAX) {
+		pr_err("gator: CPU%u disabling wrong PMNC counter %d\n", smp_processor_id(), cnt);
+		return -1;
+	}
+
+	if (cnt == CCNT)
+		val = CCNT_REG;
+	else
+		val = (1 << (cnt - CNT0));
+
+	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
+
+	return cnt;
+}
+
+static inline int scorpion_pmnc_select_counter(unsigned int cnt)
+{
+	u32 val;
+
+	if ((cnt == CCNT) || (cnt >= CNTMAX)) {
+		pr_err("gator: CPU%u selecting wrong PMNC counter %d\n", smp_processor_id(), cnt);
+		return -1;
+	}
+
+	val = (cnt - CNT0);
+	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
+
+	return cnt;
+}
+
+static u32 scorpion_read_lpm0(void)
+{
+	u32 val;
+	asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val));
+	return val;
+}
+
+static void scorpion_write_lpm0(u32 val)
+{
+	asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val));
+}
+
+static u32 scorpion_read_lpm1(void)
+{
+	u32 val;
+	asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val));
+	return val;
+}
+
+static void scorpion_write_lpm1(u32 val)
+{
+	asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val));
+}
+
+static u32 scorpion_read_lpm2(void)
+{
+	u32 val;
+	asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val));
+	return val;
+}
+
+static void scorpion_write_lpm2(u32 val)
+{
+	asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val));
+}
+
+static u32 scorpion_read_l2lpm(void)
+{
+	u32 val;
+	asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val));
+	return val;
+}
+
+static void scorpion_write_l2lpm(u32 val)
+{
+	asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val));
+}
+
+static u32 scorpion_read_vlpm(void)
+{
+	u32 val;
+	asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val));
+	return val;
+}
+
+static void scorpion_write_vlpm(u32 val)
+{
+	asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val));
+}
+
+struct scorpion_access_funcs {
+	u32(*read)(void);
+	void (*write)(u32);
+};
+
+struct scorpion_access_funcs scor_func[] = {
+	{scorpion_read_lpm0, scorpion_write_lpm0},
+	{scorpion_read_lpm1, scorpion_write_lpm1},
+	{scorpion_read_lpm2, scorpion_write_lpm2},
+	{scorpion_read_l2lpm, scorpion_write_l2lpm},
+	{scorpion_read_vlpm, scorpion_write_vlpm},
+};
+
+u32 venum_orig_val;
+u32 fp_orig_val;
+
+static void scorpion_pre_vlpm(void)
+{
+	u32 venum_new_val;
+	u32 fp_new_val;
+
+	/* CPACR Enable CP10 access */
+	asm volatile("mrc p15, 0, %0, c1, c0, 2" : "=r" (venum_orig_val));
+	venum_new_val = venum_orig_val | 0x00300000;
+	asm volatile("mcr p15, 0, %0, c1, c0, 2" : : "r" (venum_new_val));
+	/* Enable FPEXC */
+	asm volatile("mrc p10, 7, %0, c8, c0, 0" : "=r" (fp_orig_val));
+	fp_new_val = fp_orig_val | 0x40000000;
+	asm volatile("mcr p10, 7, %0, c8, c0, 0" : : "r" (fp_new_val));
+}
+
+static void scorpion_post_vlpm(void)
+{
+	/* Restore FPEXC */
+	asm volatile("mcr p10, 7, %0, c8, c0, 0" : : "r" (fp_orig_val));
+	/* Restore CPACR */
+	asm volatile("mcr p15, 0, %0, c1, c0, 2" : : "r" (venum_orig_val));
+}
+
+#define COLMN0MASK 0x000000ff
+#define COLMN1MASK 0x0000ff00
+#define COLMN2MASK 0x00ff0000
+static u32 scorpion_get_columnmask(u32 setval)
+{
+	if (setval & COLMN0MASK)
+		return 0xffffff00;
+	else if (setval & COLMN1MASK)
+		return 0xffff00ff;
+	else if (setval & COLMN2MASK)
+		return 0xff00ffff;
+	else
+		return 0x80ffffff;
+}
+
+static void scorpion_evt_setup(u32 gr, u32 setval)
+{
+	u32 val;
+	if (gr == 4)
+		scorpion_pre_vlpm();
+	val = scorpion_get_columnmask(setval) & scor_func[gr].read();
+	val = val | setval;
+	scor_func[gr].write(val);
+	if (gr == 4)
+		scorpion_post_vlpm();
+}
+
+static int get_scorpion_evtinfo(unsigned int evt_type, struct scorp_evt *evtinfo)
+{
+	u32 idx;
+	if ((evt_type < 0x4c) || (evt_type >= MSM_MAX_EVT))
+		return 0;
+	idx = evt_type - 0x4c;
+	if (sc_evt[idx].evt_type == evt_type) {
+		evtinfo->val = sc_evt[idx].val;
+		evtinfo->grp = sc_evt[idx].grp;
+		evtinfo->evt_type_act = sc_evt[idx].evt_type_act;
+		return 1;
+	}
+	return 0;
+}
+
+static inline void scorpion_pmnc_write_evtsel(unsigned int cnt, u32 val)
+{
+	if (scorpion_pmnc_select_counter(cnt) == cnt) {
+		if (val < 0x40) {
+			asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
+		} else {
+			u32 zero = 0;
+			struct scorp_evt evtinfo;
+			// extract evtinfo.grp and evtinfo.tevt_type_act from val
+			if (get_scorpion_evtinfo(val, &evtinfo) == 0)
+				return;
+			asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (evtinfo.evt_type_act));
+			asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (zero));
+			scorpion_evt_setup(evtinfo.grp, val);
+		}
+	}
+}
+
+static void scorpion_pmnc_reset_counter(unsigned int cnt)
+{
+	u32 val = 0;
+
+	if (cnt == CCNT) {
+		scorpion_pmnc_disable_counter(cnt);
+
+		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (val));
+
+		if (pmnc_enabled[cnt] != 0)
+			scorpion_pmnc_enable_counter(cnt);
+
+	} else if (cnt >= CNTMAX) {
+		pr_err("gator: CPU%u resetting wrong PMNC counter %d\n", smp_processor_id(), cnt);
+	} else {
+		scorpion_pmnc_disable_counter(cnt);
+
+		if (scorpion_pmnc_select_counter(cnt) == cnt)
+			asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (val));
+
+		if (pmnc_enabled[cnt] != 0)
+			scorpion_pmnc_enable_counter(cnt);
+	}
+}
+
+static int gator_events_scorpion_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+	int i;
+
+	for (i = 0; i < pmnc_counters; i++) {
+		char buf[40];
+		if (i == 0) {
+			snprintf(buf, sizeof buf, "%s_ccnt", pmnc_name);
+		} else {
+			snprintf(buf, sizeof buf, "%s_cnt%d", pmnc_name, i - 1);
+		}
+		dir = gatorfs_mkdir(sb, root, buf);
+		if (!dir) {
+			return -1;
+		}
+		gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]);
+		gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]);
+		if (i > 0) {
+			gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]);
+		}
+	}
+
+	return 0;
+}
+
+static int gator_events_scorpion_online(int **buffer, bool migrate)
+{
+	unsigned int cnt, len = 0, cpu = smp_processor_id();
+
+	if (scorpion_pmnc_read() & PMNC_E) {
+		scorpion_pmnc_write(scorpion_pmnc_read() & ~PMNC_E);
+	}
+
+	/* Initialize & Reset PMNC: C bit and P bit */
+	scorpion_pmnc_write(PMNC_P | PMNC_C);
+
+	for (cnt = CCNT; cnt < CNTMAX; cnt++) {
+		unsigned long event;
+
+		if (!pmnc_enabled[cnt])
+			continue;
+
+		// disable counter
+		scorpion_pmnc_disable_counter(cnt);
+
+		event = pmnc_event[cnt] & 255;
+
+		// Set event (if destined for PMNx counters), We don't need to set the event if it's a cycle count
+		if (cnt != CCNT)
+			scorpion_pmnc_write_evtsel(cnt, event);
+
+		// reset counter
+		scorpion_pmnc_reset_counter(cnt);
+
+		// Enable counter, do not enable interrupt for this counter
+		scorpion_pmnc_enable_counter(cnt);
+	}
+
+	// enable
+	scorpion_pmnc_write(scorpion_pmnc_read() | PMNC_E);
+
+	// read the counters and toss the invalid data, return zero instead
+	for (cnt = 0; cnt < pmnc_counters; cnt++) {
+		if (pmnc_enabled[cnt]) {
+			if (cnt == CCNT) {
+				scorpion_ccnt_read();
+			} else if (scorpion_pmnc_select_counter(cnt) == cnt) {
+				scorpion_cntn_read();
+			}
+			scorpion_pmnc_reset_counter(cnt);
+
+			per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
+			per_cpu(perfCnt, cpu)[len++] = 0;
+		}
+	}
+
+	if (buffer)
+		*buffer = per_cpu(perfCnt, cpu);
+
+	return len;
+}
+
+static int gator_events_scorpion_offline(int **buffer, bool migrate)
+{
+	scorpion_pmnc_write(scorpion_pmnc_read() & ~PMNC_E);
+	return 0;
+}
+
+static void gator_events_scorpion_stop(void)
+{
+	unsigned int cnt;
+
+	for (cnt = CCNT; cnt < CNTMAX; cnt++) {
+		pmnc_enabled[cnt] = 0;
+		pmnc_event[cnt] = 0;
+	}
+}
+
+static int gator_events_scorpion_read(int **buffer)
+{
+	int cnt, len = 0;
+	int cpu = smp_processor_id();
+
+	// a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled
+	if (!(scorpion_pmnc_read() & PMNC_E)) {
+		return 0;
+	}
+
+	for (cnt = 0; cnt < pmnc_counters; cnt++) {
+		if (pmnc_enabled[cnt]) {
+			int value;
+			if (cnt == CCNT) {
+				value = scorpion_ccnt_read();
+			} else if (scorpion_pmnc_select_counter(cnt) == cnt) {
+				value = scorpion_cntn_read();
+			} else {
+				value = 0;
+			}
+			scorpion_pmnc_reset_counter(cnt);
+
+			per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
+			per_cpu(perfCnt, cpu)[len++] = value;
+		}
+	}
+
+	if (buffer)
+		*buffer = per_cpu(perfCnt, cpu);
+
+	return len;
+}
+
+static struct gator_interface gator_events_scorpion_interface = {
+	.create_files = gator_events_scorpion_create_files,
+	.stop = gator_events_scorpion_stop,
+	.online = gator_events_scorpion_online,
+	.offline = gator_events_scorpion_offline,
+	.read = gator_events_scorpion_read,
+};
+
+int gator_events_scorpion_init(void)
+{
+	unsigned int cnt;
+
+	switch (gator_cpuid()) {
+	case SCORPION:
+		pmnc_name = "Scorpion";
+		pmnc_counters = 4;
+		break;
+	case SCORPIONMP:
+		pmnc_name = "ScorpionMP";
+		pmnc_counters = 4;
+		break;
+	default:
+		return -1;
+	}
+
+	pmnc_counters++;	// CNT[n] + CCNT
+
+	for (cnt = CCNT; cnt < CNTMAX; cnt++) {
+		pmnc_enabled[cnt] = 0;
+		pmnc_event[cnt] = 0;
+		pmnc_key[cnt] = gator_events_get_key();
+	}
+
+	return gator_events_install(&gator_events_scorpion_interface);
+}
+
+#endif
diff --git a/drivers/gator/gator_fs.c b/drivers/gator/gator_fs.c
new file mode 100644
index 000000000000..fe6f83d547e9
--- /dev/null
+++ b/drivers/gator/gator_fs.c
@@ -0,0 +1,382 @@
+/**
+ * @file gatorfs.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ *
+ * A simple filesystem for configuration and
+ * access of oprofile.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <asm/uaccess.h>
+
+#define gatorfs_MAGIC 0x24051020
+#define TMPBUFSIZE 50
+DEFINE_SPINLOCK(gatorfs_lock);
+
+static struct inode *gatorfs_get_inode(struct super_block *sb, int mode)
+{
+	struct inode *inode = new_inode(sb);
+
+	if (inode) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)
+		inode->i_ino = get_next_ino();
+#endif
+		inode->i_mode = mode;
+		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	}
+	return inode;
+}
+
+static const struct super_operations s_ops = {
+	.statfs = simple_statfs,
+	.drop_inode = generic_delete_inode,
+};
+
+ssize_t gatorfs_str_to_user(char const *str, char __user *buf, size_t count, loff_t *offset)
+{
+	return simple_read_from_buffer(buf, count, offset, str, strlen(str));
+}
+
+ssize_t gatorfs_ulong_to_user(unsigned long val, char __user *buf, size_t count, loff_t *offset)
+{
+	char tmpbuf[TMPBUFSIZE];
+	size_t maxlen = snprintf(tmpbuf, TMPBUFSIZE, "%lu\n", val);
+	if (maxlen > TMPBUFSIZE)
+		maxlen = TMPBUFSIZE;
+	return simple_read_from_buffer(buf, count, offset, tmpbuf, maxlen);
+}
+
+ssize_t gatorfs_u64_to_user(u64 val, char __user *buf, size_t count, loff_t *offset)
+{
+	char tmpbuf[TMPBUFSIZE];
+	size_t maxlen = snprintf(tmpbuf, TMPBUFSIZE, "%llu\n", val);
+	if (maxlen > TMPBUFSIZE)
+		maxlen = TMPBUFSIZE;
+	return simple_read_from_buffer(buf, count, offset, tmpbuf, maxlen);
+}
+
+int gatorfs_ulong_from_user(unsigned long *val, char const __user *buf, size_t count)
+{
+	char tmpbuf[TMPBUFSIZE];
+	unsigned long flags;
+
+	if (!count)
+		return 0;
+
+	if (count > TMPBUFSIZE - 1)
+		return -EINVAL;
+
+	memset(tmpbuf, 0x0, TMPBUFSIZE);
+
+	if (copy_from_user(tmpbuf, buf, count))
+		return -EFAULT;
+
+	spin_lock_irqsave(&gatorfs_lock, flags);
+	*val = simple_strtoul(tmpbuf, NULL, 0);
+	spin_unlock_irqrestore(&gatorfs_lock, flags);
+	return 0;
+}
+
+int gatorfs_u64_from_user(u64 *val, char const __user *buf, size_t count)
+{
+	char tmpbuf[TMPBUFSIZE];
+	unsigned long flags;
+
+	if (!count)
+		return 0;
+
+	if (count > TMPBUFSIZE - 1)
+		return -EINVAL;
+
+	memset(tmpbuf, 0x0, TMPBUFSIZE);
+
+	if (copy_from_user(tmpbuf, buf, count))
+		return -EFAULT;
+
+	spin_lock_irqsave(&gatorfs_lock, flags);
+	*val = simple_strtoull(tmpbuf, NULL, 0);
+	spin_unlock_irqrestore(&gatorfs_lock, flags);
+	return 0;
+}
+
+static ssize_t ulong_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	unsigned long *val = file->private_data;
+	return gatorfs_ulong_to_user(*val, buf, count, offset);
+}
+
+static ssize_t u64_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	u64 *val = file->private_data;
+	return gatorfs_u64_to_user(*val, buf, count, offset);
+}
+
+static ssize_t ulong_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset)
+{
+	unsigned long *value = file->private_data;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = gatorfs_ulong_from_user(value, buf, count);
+
+	if (retval)
+		return retval;
+	return count;
+}
+
+static ssize_t u64_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset)
+{
+	u64 *value = file->private_data;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = gatorfs_u64_from_user(value, buf, count);
+
+	if (retval)
+		return retval;
+	return count;
+}
+
+static int default_open(struct inode *inode, struct file *filp)
+{
+	if (inode->i_private)
+		filp->private_data = inode->i_private;
+	return 0;
+}
+
+static const struct file_operations ulong_fops = {
+	.read = ulong_read_file,
+	.write = ulong_write_file,
+	.open = default_open,
+};
+
+static const struct file_operations u64_fops = {
+	.read = u64_read_file,
+	.write = u64_write_file,
+	.open = default_open,
+};
+
+static const struct file_operations ulong_ro_fops = {
+	.read = ulong_read_file,
+	.open = default_open,
+};
+
+static const struct file_operations u64_ro_fops = {
+	.read = u64_read_file,
+	.open = default_open,
+};
+
+static struct dentry *__gatorfs_create_file(struct super_block *sb,
+					    struct dentry *root,
+					    char const *name,
+					    const struct file_operations *fops,
+					    int perm)
+{
+	struct dentry *dentry;
+	struct inode *inode;
+
+	dentry = d_alloc_name(root, name);
+	if (!dentry)
+		return NULL;
+	inode = gatorfs_get_inode(sb, S_IFREG | perm);
+	if (!inode) {
+		dput(dentry);
+		return NULL;
+	}
+	inode->i_fop = fops;
+	d_add(dentry, inode);
+	return dentry;
+}
+
+int gatorfs_create_ulong(struct super_block *sb, struct dentry *root,
+			 char const *name, unsigned long *val)
+{
+	struct dentry *d = __gatorfs_create_file(sb, root, name,
+						 &ulong_fops, 0644);
+	if (!d)
+		return -EFAULT;
+
+	d->d_inode->i_private = val;
+	return 0;
+}
+
+int gatorfs_create_u64(struct super_block *sb, struct dentry *root,
+			 char const *name, u64 *val)
+{
+	struct dentry *d = __gatorfs_create_file(sb, root, name,
+						 &u64_fops, 0644);
+	if (!d)
+		return -EFAULT;
+
+	d->d_inode->i_private = val;
+	return 0;
+}
+
+int gatorfs_create_ro_ulong(struct super_block *sb, struct dentry *root,
+			    char const *name, unsigned long *val)
+{
+	struct dentry *d = __gatorfs_create_file(sb, root, name,
+						 &ulong_ro_fops, 0444);
+	if (!d)
+		return -EFAULT;
+
+	d->d_inode->i_private = val;
+	return 0;
+}
+
+int gatorfs_create_ro_u64(struct super_block *sb, struct dentry *root,
+			  char const *name, u64 * val)
+{
+	struct dentry *d =
+	    __gatorfs_create_file(sb, root, name, &u64_ro_fops, 0444);
+	if (!d)
+		return -EFAULT;
+
+	d->d_inode->i_private = val;
+	return 0;
+}
+
+static ssize_t atomic_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	atomic_t *val = file->private_data;
+	return gatorfs_ulong_to_user(atomic_read(val), buf, count, offset);
+}
+
+static const struct file_operations atomic_ro_fops = {
+	.read = atomic_read_file,
+	.open = default_open,
+};
+
+int gatorfs_create_ro_atomic(struct super_block *sb, struct dentry *root,
+			     char const *name, atomic_t *val)
+{
+	struct dentry *d = __gatorfs_create_file(sb, root, name,
+						 &atomic_ro_fops, 0444);
+	if (!d)
+		return -EFAULT;
+
+	d->d_inode->i_private = val;
+	return 0;
+}
+
+int gatorfs_create_file(struct super_block *sb, struct dentry *root,
+			char const *name, const struct file_operations *fops)
+{
+	if (!__gatorfs_create_file(sb, root, name, fops, 0644))
+		return -EFAULT;
+	return 0;
+}
+
+int gatorfs_create_file_perm(struct super_block *sb, struct dentry *root,
+			     char const *name,
+			     const struct file_operations *fops, int perm)
+{
+	if (!__gatorfs_create_file(sb, root, name, fops, perm))
+		return -EFAULT;
+	return 0;
+}
+
+struct dentry *gatorfs_mkdir(struct super_block *sb,
+			     struct dentry *root, char const *name)
+{
+	struct dentry *dentry;
+	struct inode *inode;
+
+	dentry = d_alloc_name(root, name);
+	if (!dentry)
+		return NULL;
+	inode = gatorfs_get_inode(sb, S_IFDIR | 0755);
+	if (!inode) {
+		dput(dentry);
+		return NULL;
+	}
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+	d_add(dentry, inode);
+	return dentry;
+}
+
+static int gatorfs_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct inode *root_inode;
+	struct dentry *root_dentry;
+
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = gatorfs_MAGIC;
+	sb->s_op = &s_ops;
+	sb->s_time_gran = 1;
+
+	root_inode = gatorfs_get_inode(sb, S_IFDIR | 0755);
+	if (!root_inode)
+		return -ENOMEM;
+	root_inode->i_op = &simple_dir_inode_operations;
+	root_inode->i_fop = &simple_dir_operations;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
+	root_dentry = d_alloc_root(root_inode);
+#else
+	root_dentry = d_make_root(root_inode);
+#endif
+
+	if (!root_dentry) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
+		iput(root_inode);
+#endif
+		return -ENOMEM;
+	}
+
+	sb->s_root = root_dentry;
+
+	gator_op_create_files(sb, root_dentry);
+
+	return 0;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
+static int gatorfs_get_sb(struct file_system_type *fs_type,
+			  int flags, const char *dev_name, void *data,
+			  struct vfsmount *mnt)
+{
+	return get_sb_single(fs_type, flags, data, gatorfs_fill_super, mnt);
+}
+#else
+static struct dentry *gatorfs_mount(struct file_system_type *fs_type,
+				    int flags, const char *dev_name, void *data)
+{
+	return mount_nodev(fs_type, flags, data, gatorfs_fill_super);
+}
+#endif
+
+static struct file_system_type gatorfs_type = {
+	.owner = THIS_MODULE,
+	.name = "gatorfs",
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
+	.get_sb = gatorfs_get_sb,
+#else
+	.mount = gatorfs_mount,
+#endif
+
+	.kill_sb = kill_litter_super,
+};
+
+int __init gatorfs_register(void)
+{
+	return register_filesystem(&gatorfs_type);
+}
+
+void gatorfs_unregister(void)
+{
+	unregister_filesystem(&gatorfs_type);
+}
diff --git a/drivers/gator/gator_hrtimer_gator.c b/drivers/gator/gator_hrtimer_gator.c
new file mode 100644
index 000000000000..b0c947afe1e1
--- /dev/null
+++ b/drivers/gator/gator_hrtimer_gator.c
@@ -0,0 +1,86 @@
+/**
+ * Copyright (C) ARM Limited 2011-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+// gator_hrtimer_perf.c is used if perf is supported
+//   update, gator_hrtimer_gator.c always used until issues resolved with perf hrtimers
+#if 1
+
+void (*callback)(void);
+DEFINE_PER_CPU(struct hrtimer, percpu_hrtimer);
+DEFINE_PER_CPU(ktime_t, hrtimer_expire);
+DEFINE_PER_CPU(int, hrtimer_is_active);
+static ktime_t profiling_interval;
+static void gator_hrtimer_online(void);
+static void gator_hrtimer_offline(void);
+
+static enum hrtimer_restart gator_hrtimer_notify(struct hrtimer *hrtimer)
+{
+	int cpu = get_logical_cpu();
+	hrtimer_forward(hrtimer, per_cpu(hrtimer_expire, cpu), profiling_interval);
+	per_cpu(hrtimer_expire, cpu) = ktime_add(per_cpu(hrtimer_expire, cpu), profiling_interval);
+	(*callback)();
+	return HRTIMER_RESTART;
+}
+
+static void gator_hrtimer_online(void)
+{
+	int cpu = get_logical_cpu();
+	struct hrtimer *hrtimer = &per_cpu(percpu_hrtimer, cpu);
+
+	if (per_cpu(hrtimer_is_active, cpu) || profiling_interval.tv64 == 0)
+		return;
+
+	per_cpu(hrtimer_is_active, cpu) = 1;
+	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer->function = gator_hrtimer_notify;
+#ifdef CONFIG_PREEMPT_RT_BASE
+	hrtimer->irqsafe = 1;
+#endif
+	per_cpu(hrtimer_expire, cpu) = ktime_add(hrtimer->base->get_time(), profiling_interval);
+	hrtimer_start(hrtimer, per_cpu(hrtimer_expire, cpu), HRTIMER_MODE_ABS_PINNED);
+}
+
+static void gator_hrtimer_offline(void)
+{
+	int cpu = get_logical_cpu();
+	struct hrtimer *hrtimer = &per_cpu(percpu_hrtimer, cpu);
+
+	if (!per_cpu(hrtimer_is_active, cpu))
+		return;
+
+	per_cpu(hrtimer_is_active, cpu) = 0;
+	hrtimer_cancel(hrtimer);
+}
+
+static int gator_hrtimer_init(int interval, void (*func)(void))
+{
+	int cpu;
+
+	(callback) = (func);
+
+	for_each_present_cpu(cpu) {
+		per_cpu(hrtimer_is_active, cpu) = 0;
+	}
+
+	// calculate profiling interval
+	if (interval > 0) {
+		profiling_interval = ns_to_ktime(1000000000UL / interval);
+	} else {
+		profiling_interval.tv64 = 0;
+	}
+
+	return 0;
+}
+
+static void gator_hrtimer_shutdown(void)
+{
+	/* empty */
+}
+
+#endif
diff --git a/drivers/gator/gator_hrtimer_perf.c b/drivers/gator/gator_hrtimer_perf.c
new file mode 100644
index 000000000000..7b95399478e4
--- /dev/null
+++ b/drivers/gator/gator_hrtimer_perf.c
@@ -0,0 +1,113 @@
+/**
+ * Copyright (C) ARM Limited 2011-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+// gator_hrtimer_gator.c is used if perf is not supported
+//   update, gator_hrtimer_gator.c always used until issues resolved with perf hrtimers
+#if 0
+
+// Note: perf Cortex support added in 2.6.35 and PERF_COUNT_SW_CPU_CLOCK/hrtimer broken on 2.6.35 and 2.6.36
+//       not relevant as this code is not active until 3.0.0, but wanted to document the issue
+
+void (*callback)(void);
+static int profiling_interval;
+static DEFINE_PER_CPU(struct perf_event *, perf_hrtimer);
+static DEFINE_PER_CPU(struct perf_event_attr *, perf_hrtimer_attr);
+
+static void gator_hrtimer_shutdown(void);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
+static void hrtimer_overflow_handler(struct perf_event *event, int unused, struct perf_sample_data *data, struct pt_regs *regs)
+#else
+static void hrtimer_overflow_handler(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs)
+#endif
+{
+	(*callback)();
+}
+
+static int gator_online_single_hrtimer(int cpu)
+{
+	if (per_cpu(perf_hrtimer, cpu) != 0 || per_cpu(perf_hrtimer_attr, cpu) == 0)
+		return 0;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
+	per_cpu(perf_hrtimer, cpu) = perf_event_create_kernel_counter(per_cpu(perf_hrtimer_attr, cpu), cpu, 0, hrtimer_overflow_handler);
+#else
+	per_cpu(perf_hrtimer, cpu) = perf_event_create_kernel_counter(per_cpu(perf_hrtimer_attr, cpu), cpu, 0, hrtimer_overflow_handler, 0);
+#endif
+	if (IS_ERR(per_cpu(perf_hrtimer, cpu))) {
+		per_cpu(perf_hrtimer, cpu) = NULL;
+		return -1;
+	}
+
+	if (per_cpu(perf_hrtimer, cpu)->state != PERF_EVENT_STATE_ACTIVE) {
+		perf_event_release_kernel(per_cpu(perf_hrtimer, cpu));
+		per_cpu(perf_hrtimer, cpu) = NULL;
+		return -1;
+	}
+
+	return 0;
+}
+
+static void gator_hrtimer_online(int cpu)
+{
+	if (gator_online_single_hrtimer(cpu) < 0) {
+		pr_debug("gator: unable to online the hrtimer on cpu%d\n", cpu);
+	}
+}
+
+static void gator_hrtimer_offline(int cpu)
+{
+	if (per_cpu(perf_hrtimer, cpu)) {
+		perf_event_release_kernel(per_cpu(perf_hrtimer, cpu));
+		per_cpu(perf_hrtimer, cpu) = NULL;
+	}
+}
+
+static int gator_hrtimer_init(int interval, void (*func)(void))
+{
+	u32 size = sizeof(struct perf_event_attr);
+	int cpu;
+
+	callback = func;
+
+	// calculate profiling interval
+	profiling_interval = 1000000000 / interval;
+
+	for_each_present_cpu(cpu) {
+		per_cpu(perf_hrtimer, cpu) = 0;
+		per_cpu(perf_hrtimer_attr, cpu) = kmalloc(size, GFP_KERNEL);
+		if (per_cpu(perf_hrtimer_attr, cpu) == 0) {
+			gator_hrtimer_shutdown();
+			return -1;
+		}
+
+		memset(per_cpu(perf_hrtimer_attr, cpu), 0, size);
+		per_cpu(perf_hrtimer_attr, cpu)->type = PERF_TYPE_SOFTWARE;
+		per_cpu(perf_hrtimer_attr, cpu)->size = size;
+		per_cpu(perf_hrtimer_attr, cpu)->config = PERF_COUNT_SW_CPU_CLOCK;
+		per_cpu(perf_hrtimer_attr, cpu)->sample_period = profiling_interval;
+		per_cpu(perf_hrtimer_attr, cpu)->pinned = 1;
+	}
+
+	return 0;
+}
+
+static void gator_hrtimer_shutdown(void)
+{
+	int cpu;
+
+	for_each_present_cpu(cpu) {
+		if (per_cpu(perf_hrtimer_attr, cpu)) {
+			kfree(per_cpu(perf_hrtimer_attr, cpu));
+			per_cpu(perf_hrtimer_attr, cpu) = NULL;
+		}
+	}
+}
+
+#endif
diff --git a/drivers/gator/gator_iks.c b/drivers/gator/gator_iks.c
new file mode 100644
index 000000000000..0a90bdd1904e
--- /dev/null
+++ b/drivers/gator/gator_iks.c
@@ -0,0 +1,197 @@
+/**
+ * Copyright (C) ARM Limited 2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#if GATOR_IKS_SUPPORT
+
+#include <linux/of.h>
+#include <asm/bL_switcher.h>
+#include <asm/smp_plat.h>
+#include <trace/events/power_cpu_migrate.h>
+
+static bool map_cpuids;
+static int mpidr_cpuids[NR_CPUS];
+static const struct gator_cpu * mpidr_cpus[NR_CPUS];
+static int __lcpu_to_pcpu[NR_CPUS];
+
+static const struct gator_cpu *gator_find_cpu_by_dt_name(const char *const name)
+{
+	int i;
+
+	for (i = 0; gator_cpus[i].cpuid != 0; ++i) {
+		const struct gator_cpu *const gator_cpu = &gator_cpus[i];
+		if (gator_cpu->dt_name != NULL && strcmp(gator_cpu->dt_name, name) == 0) {
+			return gator_cpu;
+		}
+	}
+
+	return NULL;
+}
+
+static void calc_first_cluster_size(void)
+{
+	int len;
+	const u32 *val;
+	const char *compatible;
+	struct device_node *cn = NULL;
+	int mpidr_cpuids_count = 0;
+
+	// Zero is a valid cpuid, so initialize the array to 0xff's
+	memset(&mpidr_cpuids, 0xff, sizeof(mpidr_cpuids));
+	memset(&mpidr_cpus, 0, sizeof(mpidr_cpus));
+
+	while ((cn = of_find_node_by_type(cn, "cpu"))) {
+		BUG_ON(mpidr_cpuids_count >= NR_CPUS);
+
+		val = of_get_property(cn, "reg", &len);
+		if (!val || len != 4) {
+			pr_err("%s missing reg property\n", cn->full_name);
+			continue;
+		}
+		compatible = of_get_property(cn, "compatible", NULL);
+		if (compatible == NULL) {
+			pr_err("%s missing compatible property\n", cn->full_name);
+			continue;
+		}
+
+		mpidr_cpuids[mpidr_cpuids_count] = be32_to_cpup(val);
+		mpidr_cpus[mpidr_cpuids_count] = gator_find_cpu_by_dt_name(compatible);
+		++mpidr_cpuids_count;
+	}
+
+	map_cpuids = (mpidr_cpuids_count == nr_cpu_ids);
+}
+
+static int linearize_mpidr(int mpidr)
+{
+	int i;
+	for (i = 0; i < nr_cpu_ids; ++i) {
+		if (mpidr_cpuids[i] == mpidr) {
+			return i;
+		}
+	}
+
+	BUG();
+}
+
+int lcpu_to_pcpu(const int lcpu)
+{
+	int pcpu;
+
+	if (!map_cpuids)
+		return lcpu;
+
+	BUG_ON(lcpu >= nr_cpu_ids || lcpu < 0);
+	pcpu = __lcpu_to_pcpu[lcpu];
+	BUG_ON(pcpu >= nr_cpu_ids || pcpu < 0);
+	return pcpu;
+}
+
+int pcpu_to_lcpu(const int pcpu)
+{
+	int lcpu;
+
+	if (!map_cpuids)
+		return pcpu;
+
+	BUG_ON(pcpu >= nr_cpu_ids || pcpu < 0);
+	for (lcpu = 0; lcpu < nr_cpu_ids; ++lcpu) {
+		if (__lcpu_to_pcpu[lcpu] == pcpu) {
+			BUG_ON(lcpu >= nr_cpu_ids || lcpu < 0);
+			return lcpu;
+		}
+	}
+	BUG();
+}
+
+static void gator_update_cpu_mapping(u32 cpu_hwid)
+{
+	int lcpu = smp_processor_id();
+	int pcpu = linearize_mpidr(cpu_hwid & MPIDR_HWID_BITMASK);
+	BUG_ON(lcpu >= nr_cpu_ids || lcpu < 0);
+	BUG_ON(pcpu >= nr_cpu_ids || pcpu < 0);
+	__lcpu_to_pcpu[lcpu] = pcpu;
+}
+
+GATOR_DEFINE_PROBE(cpu_migrate_begin, TP_PROTO(u64 timestamp, u32 cpu_hwid))
+{
+	const int cpu = get_physical_cpu();
+
+	gator_timer_offline((void *)1);
+	gator_timer_offline_dispatch(cpu, true);
+}
+
+GATOR_DEFINE_PROBE(cpu_migrate_finish, TP_PROTO(u64 timestamp, u32 cpu_hwid))
+{
+	int cpu;
+
+	gator_update_cpu_mapping(cpu_hwid);
+
+	// get_physical_cpu must be called after gator_update_cpu_mapping
+	cpu = get_physical_cpu();
+	gator_timer_online_dispatch(cpu, true);
+	gator_timer_online((void *)1);
+}
+
+GATOR_DEFINE_PROBE(cpu_migrate_current, TP_PROTO(u64 timestamp, u32 cpu_hwid))
+{
+	gator_update_cpu_mapping(cpu_hwid);
+}
+
+static void gator_send_iks_core_names(void)
+{
+	int cpu;
+	// Send the cpu names
+	preempt_disable();
+	for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
+		if (mpidr_cpus[cpu] != NULL) {
+			gator_send_core_name(cpu, mpidr_cpus[cpu]->cpuid, mpidr_cpus[cpu]);
+		}
+	}
+	preempt_enable();
+}
+
+static int gator_migrate_start(void)
+{
+	int retval = 0;
+
+	if (!map_cpuids)
+		return retval;
+
+	if (retval == 0)
+		retval = GATOR_REGISTER_TRACE(cpu_migrate_begin);
+	if (retval == 0)
+		retval = GATOR_REGISTER_TRACE(cpu_migrate_finish);
+	if (retval == 0)
+		retval = GATOR_REGISTER_TRACE(cpu_migrate_current);
+	if (retval == 0) {
+		// Initialize the logical to physical cpu mapping
+		memset(&__lcpu_to_pcpu, 0xff, sizeof(__lcpu_to_pcpu));
+		bL_switcher_trace_trigger();
+	}
+	return retval;
+}
+
+static void gator_migrate_stop(void)
+{
+	if (!map_cpuids)
+		return;
+
+	GATOR_UNREGISTER_TRACE(cpu_migrate_current);
+	GATOR_UNREGISTER_TRACE(cpu_migrate_finish);
+	GATOR_UNREGISTER_TRACE(cpu_migrate_begin);
+}
+
+#else
+
+#define calc_first_cluster_size()
+#define gator_send_iks_core_names()
+#define gator_migrate_start() 0
+#define gator_migrate_stop()
+
+#endif
diff --git a/drivers/gator/gator_main.c b/drivers/gator/gator_main.c
new file mode 100644
index 000000000000..19f51c7cd8ee
--- /dev/null
+++ b/drivers/gator/gator_main.c
@@ -0,0 +1,1532 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+// This version must match the gator daemon version
+#define PROTOCOL_VERSION 17
+static unsigned long gator_protocol_version = PROTOCOL_VERSION;
+
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/irq.h>
+#include <linux/vmalloc.h>
+#include <linux/hardirq.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/utsname.h>
+#include <linux/kthread.h>
+#include <asm/stacktrace.h>
+#include <asm/uaccess.h>
+
+#include "gator.h"
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
+#error kernels prior to 2.6.32 are not supported
+#endif
+
+#if defined(MODULE) && !defined(CONFIG_MODULES)
+#error Cannot build a module against a kernel that does not support modules. To resolve, either rebuild the kernel to support modules or build gator as part of the kernel.
+#endif
+
+#if !defined(CONFIG_GENERIC_TRACER) && !defined(CONFIG_TRACING)
+#error gator requires the kernel to have CONFIG_GENERIC_TRACER or CONFIG_TRACING defined
+#endif
+
+#ifndef CONFIG_PROFILING
+#error gator requires the kernel to have CONFIG_PROFILING defined
+#endif
+
+#ifndef CONFIG_HIGH_RES_TIMERS
+#error gator requires the kernel to have CONFIG_HIGH_RES_TIMERS defined to support PC sampling
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && defined(__arm__) && defined(CONFIG_SMP) && !defined(CONFIG_LOCAL_TIMERS)
+#error gator requires the kernel to have CONFIG_LOCAL_TIMERS defined on SMP systems
+#endif
+
+#if (GATOR_PERF_SUPPORT) && (!(GATOR_PERF_PMU_SUPPORT))
+#ifndef CONFIG_PERF_EVENTS
+#warning gator requires the kernel to have CONFIG_PERF_EVENTS defined to support pmu hardware counters
+#elif !defined CONFIG_HW_PERF_EVENTS
+#warning gator requires the kernel to have CONFIG_HW_PERF_EVENTS defined to support pmu hardware counters
+#endif
+#endif
+
+/******************************************************************************
+ * DEFINES
+ ******************************************************************************/
+#define SUMMARY_BUFFER_SIZE       (1*1024)
+#define BACKTRACE_BUFFER_SIZE     (128*1024)
+#define NAME_BUFFER_SIZE          (64*1024)
+#define COUNTER_BUFFER_SIZE       (64*1024)	// counters have the core as part of the data and the core value in the frame header may be discarded
+#define BLOCK_COUNTER_BUFFER_SIZE (128*1024)
+#define ANNOTATE_BUFFER_SIZE      (128*1024)	// annotate counters have the core as part of the data and the core value in the frame header may be discarded
+#define SCHED_TRACE_BUFFER_SIZE   (128*1024)
+#define GPU_TRACE_BUFFER_SIZE     (64*1024)	// gpu trace counters have the core as part of the data and the core value in the frame header may be discarded
+#define IDLE_BUFFER_SIZE          (32*1024)	// idle counters have the core as part of the data and the core value in the frame header may be discarded
+
+#define NO_COOKIE      0U
+#define UNRESOLVED_COOKIE ~0U
+
+#define FRAME_SUMMARY       1
+#define FRAME_BACKTRACE     2
+#define FRAME_NAME          3
+#define FRAME_COUNTER       4
+#define FRAME_BLOCK_COUNTER 5
+#define FRAME_ANNOTATE      6
+#define FRAME_SCHED_TRACE   7
+#define FRAME_GPU_TRACE     8
+#define FRAME_IDLE          9
+
+#define MESSAGE_END_BACKTRACE 1
+
+#define MESSAGE_COOKIE      1
+#define MESSAGE_THREAD_NAME 2
+#define HRTIMER_CORE_NAME   3
+#define MESSAGE_LINK        4
+
+#define MESSAGE_GPU_START 1
+#define MESSAGE_GPU_STOP  2
+
+#define MESSAGE_SCHED_SWITCH 1
+#define MESSAGE_SCHED_EXIT   2
+#define MESSAGE_SCHED_START  3
+
+#define MESSAGE_IDLE_ENTER 1
+#define MESSAGE_IDLE_EXIT 2
+
+#define MAXSIZE_PACK32     5
+#define MAXSIZE_PACK64    10
+
+#define FRAME_HEADER_SIZE 3
+
+#if defined(__arm__)
+#define PC_REG regs->ARM_pc
+#elif defined(__aarch64__)
+#define PC_REG regs->pc
+#else
+#define PC_REG regs->ip
+#endif
+
+enum {
+	SUMMARY_BUF,
+	BACKTRACE_BUF,
+	NAME_BUF,
+	COUNTER_BUF,
+	BLOCK_COUNTER_BUF,
+	ANNOTATE_BUF,
+	SCHED_TRACE_BUF,
+	GPU_TRACE_BUF,
+	IDLE_BUF,
+	NUM_GATOR_BUFS
+};
+
+/******************************************************************************
+ * Globals
+ ******************************************************************************/
+static unsigned long gator_cpu_cores;
+// Size of the largest buffer. Effectively constant, set in gator_op_create_files
+static unsigned long userspace_buffer_size;
+static unsigned long gator_backtrace_depth;
+// How often to commit the buffers for live in nanoseconds
+static u64 gator_live_rate;
+
+static unsigned long gator_started;
+static u64 gator_monotonic_started;
+static u64 gator_hibernate_time;
+static unsigned long gator_buffer_opened;
+static unsigned long gator_timer_count;
+static unsigned long gator_response_type;
+static DEFINE_MUTEX(start_mutex);
+static DEFINE_MUTEX(gator_buffer_mutex);
+
+bool event_based_sampling;
+
+static DECLARE_WAIT_QUEUE_HEAD(gator_buffer_wait);
+static DECLARE_WAIT_QUEUE_HEAD(gator_annotate_wait);
+static struct timer_list gator_buffer_wake_up_timer;
+static bool gator_buffer_wake_stop;
+static struct task_struct *gator_buffer_wake_thread;
+static LIST_HEAD(gator_events);
+
+static DEFINE_PER_CPU(u64, last_timestamp);
+
+static bool printed_monotonic_warning;
+
+static bool sent_core_name[NR_CPUS];
+
+/******************************************************************************
+ * Prototypes
+ ******************************************************************************/
+static void buffer_check(int cpu, int buftype, u64 time);
+static void gator_commit_buffer(int cpu, int buftype, u64 time);
+static int buffer_bytes_available(int cpu, int buftype);
+static bool buffer_check_space(int cpu, int buftype, int bytes);
+static int contiguous_space_available(int cpu, int bufytpe);
+static void gator_buffer_write_packed_int(int cpu, int buftype, int x);
+static void gator_buffer_write_packed_int64(int cpu, int buftype, long long x);
+static void gator_buffer_write_bytes(int cpu, int buftype, const char *x, int len);
+static void gator_buffer_write_string(int cpu, int buftype, const char *x);
+static void gator_add_trace(int cpu, unsigned long address);
+static void gator_add_sample(int cpu, struct pt_regs *const regs, u64 time);
+static u64 gator_get_time(void);
+
+// Size of the buffer, must be a power of 2. Effectively constant, set in gator_op_setup.
+static uint32_t gator_buffer_size[NUM_GATOR_BUFS];
+// gator_buffer_size - 1, bitwise and with pos to get offset into the array. Effectively constant, set in gator_op_setup.
+static uint32_t gator_buffer_mask[NUM_GATOR_BUFS];
+// Read position in the buffer. Initialized to zero in gator_op_setup and incremented after bytes are read by userspace in userspace_buffer_read
+static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], gator_buffer_read);
+// Write position in the buffer. Initialized to zero in gator_op_setup and incremented after bytes are written to the buffer
+static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], gator_buffer_write);
+// Commit position in the buffer. Initialized to zero in gator_op_setup and incremented after a frame is ready to be read by userspace
+static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], gator_buffer_commit);
+// If set to false, decreases the number of bytes returned by buffer_bytes_available. Set in buffer_check_space if no space is remaining. Initialized to true in gator_op_setup
+// This means that if we run out of space, continue to report that no space is available until bytes are read by userspace
+static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], buffer_space_available);
+// The buffer. Allocated in gator_op_setup
+static DEFINE_PER_CPU(char *[NUM_GATOR_BUFS], gator_buffer);
+// The time after which the buffer should be committed for live display
+static DEFINE_PER_CPU(u64, gator_buffer_commit_time);
+
+// List of all gator events - new events must be added to this list
+#define GATOR_EVENTS_LIST \
+	GATOR_EVENT(gator_events_armv6_init) \
+	GATOR_EVENT(gator_events_armv7_init) \
+	GATOR_EVENT(gator_events_block_init) \
+	GATOR_EVENT(gator_events_ccn504_init) \
+	GATOR_EVENT(gator_events_irq_init) \
+	GATOR_EVENT(gator_events_l2c310_init) \
+	GATOR_EVENT(gator_events_mali_init) \
+	GATOR_EVENT(gator_events_mali_t6xx_hw_init) \
+	GATOR_EVENT(gator_events_mali_t6xx_init) \
+	GATOR_EVENT(gator_events_meminfo_init) \
+	GATOR_EVENT(gator_events_mmapped_init) \
+	GATOR_EVENT(gator_events_net_init) \
+	GATOR_EVENT(gator_events_perf_pmu_init) \
+	GATOR_EVENT(gator_events_sched_init) \
+	GATOR_EVENT(gator_events_scorpion_init) \
+
+#define GATOR_EVENT(EVENT_INIT) __weak int EVENT_INIT(void);
+GATOR_EVENTS_LIST
+#undef GATOR_EVENT
+
+static int (*gator_events_list[])(void) = {
+#define GATOR_EVENT(EVENT_INIT) EVENT_INIT,
+GATOR_EVENTS_LIST
+#undef GATOR_EVENT
+};
+
+/******************************************************************************
+ * Application Includes
+ ******************************************************************************/
+#include "gator_marshaling.c"
+#include "gator_hrtimer_perf.c"
+#include "gator_hrtimer_gator.c"
+#include "gator_cookies.c"
+#include "gator_annotate.c"
+#include "gator_trace_sched.c"
+#include "gator_trace_power.c"
+#include "gator_trace_gpu.c"
+#include "gator_backtrace.c"
+#include "gator_fs.c"
+#include "gator_pack.c"
+
+/******************************************************************************
+ * Misc
+ ******************************************************************************/
+
+const struct gator_cpu gator_cpus[] = {
+	{
+		.cpuid = ARM1136,
+		.core_name = "ARM1136",
+		.pmnc_name = "ARM_ARM11",
+		.dt_name = "arm,arm1136",
+		.pmnc_counters = 3,
+	},
+	{
+		.cpuid = ARM1156,
+		.core_name = "ARM1156",
+		.pmnc_name = "ARM_ARM11",
+		.dt_name = "arm,arm1156",
+		.pmnc_counters = 3,
+	},
+	{
+		.cpuid = ARM1176,
+		.core_name = "ARM1176",
+		.pmnc_name = "ARM_ARM11",
+		.dt_name = "arm,arm1176",
+		.pmnc_counters = 3,
+	},
+	{
+		.cpuid = ARM11MPCORE,
+		.core_name = "ARM11MPCore",
+		.pmnc_name = "ARM_ARM11MPCore",
+		.dt_name = "arm,arm11mpcore",
+		.pmnc_counters = 3,
+	},
+	{
+		.cpuid = CORTEX_A5,
+		.core_name = "Cortex-A5",
+		.pmu_name = "ARMv7_Cortex_A5",
+		.pmnc_name = "ARM_Cortex-A5",
+		.dt_name = "arm,cortex-a5",
+		.pmnc_counters = 2,
+	},
+	{
+		.cpuid = CORTEX_A7,
+		.core_name = "Cortex-A7",
+		.pmu_name = "ARMv7_Cortex_A7",
+		.pmnc_name = "ARM_Cortex-A7",
+		.dt_name = "arm,cortex-a7",
+		.pmnc_counters = 4,
+	},
+	{
+		.cpuid = CORTEX_A8,
+		.core_name = "Cortex-A8",
+		.pmu_name = "ARMv7_Cortex_A8",
+		.pmnc_name = "ARM_Cortex-A8",
+		.dt_name = "arm,cortex-a8",
+		.pmnc_counters = 4,
+	},
+	{
+		.cpuid = CORTEX_A9,
+		.core_name = "Cortex-A9",
+		.pmu_name = "ARMv7_Cortex_A9",
+		.pmnc_name = "ARM_Cortex-A9",
+		.dt_name = "arm,cortex-a9",
+		.pmnc_counters = 6,
+	},
+	{
+		.cpuid = CORTEX_A12,
+		.core_name = "Cortex-A12",
+		.pmu_name = "ARMv7_Cortex_A12",
+		.pmnc_name = "ARM_Cortex-A12",
+		.dt_name = "arm,cortex-a12",
+		.pmnc_counters = 6,
+	},
+	{
+		.cpuid = CORTEX_A15,
+		.core_name = "Cortex-A15",
+		.pmu_name = "ARMv7_Cortex_A15",
+		.pmnc_name = "ARM_Cortex-A15",
+		.dt_name = "arm,cortex-a15",
+		.pmnc_counters = 6,
+	},
+	{
+		.cpuid = SCORPION,
+		.core_name = "Scorpion",
+		.pmnc_name = "Scorpion",
+		.pmnc_counters = 4,
+	},
+	{
+		.cpuid = SCORPIONMP,
+		.core_name = "ScorpionMP",
+		.pmnc_name = "ScorpionMP",
+		.pmnc_counters = 4,
+	},
+	{
+		.cpuid = KRAITSIM,
+		.core_name = "KraitSIM",
+		.pmnc_name = "Krait",
+		.pmnc_counters = 4,
+	},
+	{
+		.cpuid = KRAIT,
+		.core_name = "Krait",
+		.pmnc_name = "Krait",
+		.pmnc_counters = 4,
+	},
+	{
+		.cpuid = KRAIT_S4_PRO,
+		.core_name = "Krait S4 Pro",
+		.pmnc_name = "Krait",
+		.pmnc_counters = 4,
+	},
+	{
+		.cpuid = CORTEX_A53,
+		.core_name = "Cortex-A53",
+		.pmnc_name = "ARM_Cortex-A53",
+		.dt_name = "arm,cortex-a53",
+		.pmnc_counters = 6,
+	},
+	{
+		.cpuid = CORTEX_A57,
+		.core_name = "Cortex-A57",
+		.pmnc_name = "ARM_Cortex-A57",
+		.dt_name = "arm,cortex-a57",
+		.pmnc_counters = 6,
+	},
+	{
+		.cpuid = AARCH64,
+		.core_name = "AArch64",
+		.pmnc_name = "ARM_AArch64",
+		.pmnc_counters = 6,
+	},
+	{
+		.cpuid = OTHER,
+		.core_name = "Other",
+		.pmnc_name = "Other",
+		.pmnc_counters = 6,
+	},
+	{}
+};
+
+const struct gator_cpu *gator_find_cpu_by_cpuid(const u32 cpuid)
+{
+	int i;
+
+	for (i = 0; gator_cpus[i].cpuid != 0; ++i) {
+		const struct gator_cpu *const gator_cpu = &gator_cpus[i];
+		if (gator_cpu->cpuid == cpuid) {
+			return gator_cpu;
+		}
+	}
+
+	return NULL;
+}
+
+const struct gator_cpu *gator_find_cpu_by_pmu_name(const char *const name)
+{
+	int i;
+
+	for (i = 0; gator_cpus[i].cpuid != 0; ++i) {
+		const struct gator_cpu *const gator_cpu = &gator_cpus[i];
+		if (gator_cpu->pmu_name != NULL && strcmp(gator_cpu->pmu_name, name) == 0) {
+			return gator_cpu;
+		}
+	}
+
+	return NULL;
+}
+
+u32 gator_cpuid(void)
+{
+#if defined(__arm__) || defined(__aarch64__)
+	u32 val;
+#if !defined(__aarch64__)
+	asm volatile("mrc p15, 0, %0, c0, c0, 0" : "=r" (val));
+#else
+	asm volatile("mrs %0, midr_el1" : "=r" (val));
+#endif
+	return (val >> 4) & 0xfff;
+#else
+	return OTHER;
+#endif
+}
+
+static void gator_buffer_wake_up(unsigned long data)
+{
+	wake_up(&gator_buffer_wait);
+}
+
+static int gator_buffer_wake_func(void *data)
+{
+	while (!gator_buffer_wake_stop) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+		if (gator_buffer_wake_stop) {
+			break;
+		}
+
+		gator_buffer_wake_up(0);
+	}
+
+	return 0;
+}
+
+/******************************************************************************
+ * Commit interface
+ ******************************************************************************/
+static bool buffer_commit_ready(int *cpu, int *buftype)
+{
+	int cpu_x, x;
+	for_each_present_cpu(cpu_x) {
+		for (x = 0; x < NUM_GATOR_BUFS; x++)
+			if (per_cpu(gator_buffer_commit, cpu_x)[x] != per_cpu(gator_buffer_read, cpu_x)[x]) {
+				*cpu = cpu_x;
+				*buftype = x;
+				return true;
+			}
+	}
+	*cpu = -1;
+	*buftype = -1;
+	return false;
+}
+
+/******************************************************************************
+ * Buffer management
+ ******************************************************************************/
+static int buffer_bytes_available(int cpu, int buftype)
+{
+	int remaining, filled;
+
+	filled = per_cpu(gator_buffer_write, cpu)[buftype] - per_cpu(gator_buffer_read, cpu)[buftype];
+	if (filled < 0) {
+		filled += gator_buffer_size[buftype];
+	}
+
+	remaining = gator_buffer_size[buftype] - filled;
+
+	if (per_cpu(buffer_space_available, cpu)[buftype]) {
+		// Give some extra room; also allows space to insert the overflow error packet
+		remaining -= 200;
+	} else {
+		// Hysteresis, prevents multiple overflow messages
+		remaining -= 2000;
+	}
+
+	return remaining;
+}
+
+static int contiguous_space_available(int cpu, int buftype)
+{
+	int remaining = buffer_bytes_available(cpu, buftype);
+	int contiguous = gator_buffer_size[buftype] - per_cpu(gator_buffer_write, cpu)[buftype];
+	if (remaining < contiguous)
+		return remaining;
+	else
+		return contiguous;
+}
+
+static bool buffer_check_space(int cpu, int buftype, int bytes)
+{
+	int remaining = buffer_bytes_available(cpu, buftype);
+
+	if (remaining < bytes) {
+		per_cpu(buffer_space_available, cpu)[buftype] = false;
+	} else {
+		per_cpu(buffer_space_available, cpu)[buftype] = true;
+	}
+
+	return per_cpu(buffer_space_available, cpu)[buftype];
+}
+
+static void gator_buffer_write_bytes(int cpu, int buftype, const char *x, int len)
+{
+	int i;
+	u32 write = per_cpu(gator_buffer_write, cpu)[buftype];
+	u32 mask = gator_buffer_mask[buftype];
+	char *buffer = per_cpu(gator_buffer, cpu)[buftype];
+
+	for (i = 0; i < len; i++) {
+		buffer[write] = x[i];
+		write = (write + 1) & mask;
+	}
+
+	per_cpu(gator_buffer_write, cpu)[buftype] = write;
+}
+
+static void gator_buffer_write_string(int cpu, int buftype, const char *x)
+{
+	int len = strlen(x);
+	gator_buffer_write_packed_int(cpu, buftype, len);
+	gator_buffer_write_bytes(cpu, buftype, x, len);
+}
+
+static void gator_commit_buffer(int cpu, int buftype, u64 time)
+{
+	int type_length, commit, length, byte;
+
+	if (!per_cpu(gator_buffer, cpu)[buftype])
+		return;
+
+	// post-populate the length, which does not include the response type length nor the length itself, i.e. only the length of the payload
+	type_length = gator_response_type ? 1 : 0;
+	commit = per_cpu(gator_buffer_commit, cpu)[buftype];
+	length = per_cpu(gator_buffer_write, cpu)[buftype] - commit;
+	if (length < 0) {
+		length += gator_buffer_size[buftype];
+	}
+	length = length - type_length - sizeof(s32);
+
+	if (length <= FRAME_HEADER_SIZE) {
+		// Nothing to write, only the frame header is present
+		return;
+	}
+
+	for (byte = 0; byte < sizeof(s32); byte++) {
+		per_cpu(gator_buffer, cpu)[buftype][(commit + type_length + byte) & gator_buffer_mask[buftype]] = (length >> byte * 8) & 0xFF;
+	}
+
+	per_cpu(gator_buffer_commit, cpu)[buftype] = per_cpu(gator_buffer_write, cpu)[buftype];
+
+	if (gator_live_rate > 0) {
+		while (time > per_cpu(gator_buffer_commit_time, cpu)) {
+			per_cpu(gator_buffer_commit_time, cpu) += gator_live_rate;
+		}
+	}
+
+	marshal_frame(cpu, buftype);
+
+	// had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater
+	if (per_cpu(in_scheduler_context, cpu)) {
+#ifndef CONFIG_PREEMPT_RT_FULL
+		// mod_timer can not be used in interrupt context in RT-Preempt full
+		mod_timer(&gator_buffer_wake_up_timer, jiffies + 1);
+#endif
+	} else {
+		wake_up_process(gator_buffer_wake_thread);
+	}
+}
+
+static void buffer_check(int cpu, int buftype, u64 time)
+{
+	int filled = per_cpu(gator_buffer_write, cpu)[buftype] - per_cpu(gator_buffer_commit, cpu)[buftype];
+	if (filled < 0) {
+		filled += gator_buffer_size[buftype];
+	}
+	if (filled >= ((gator_buffer_size[buftype] * 3) / 4)) {
+		gator_commit_buffer(cpu, buftype, time);
+	}
+}
+
+static void gator_add_trace(int cpu, unsigned long address)
+{
+	off_t offset = 0;
+	unsigned long cookie = get_address_cookie(cpu, current, address & ~1, &offset);
+
+	if (cookie == NO_COOKIE || cookie == UNRESOLVED_COOKIE) {
+		offset = address;
+	}
+
+	marshal_backtrace(offset & ~1, cookie);
+}
+
+static void gator_add_sample(int cpu, struct pt_regs *const regs, u64 time)
+{
+	bool inKernel;
+	unsigned long exec_cookie;
+
+	if (!regs)
+		return;
+
+	inKernel = !user_mode(regs);
+	exec_cookie = get_exec_cookie(cpu, current);
+
+	if (!marshal_backtrace_header(exec_cookie, current->tgid, current->pid, inKernel, time))
+		return;
+
+	if (inKernel) {
+		kernel_backtrace(cpu, regs);
+	} else {
+		// Cookie+PC
+		gator_add_trace(cpu, PC_REG);
+
+		// Backtrace
+		if (gator_backtrace_depth)
+			arm_backtrace_eabi(cpu, regs, gator_backtrace_depth);
+	}
+
+	marshal_backtrace_footer(time);
+}
+
+/******************************************************************************
+ * hrtimer interrupt processing
+ ******************************************************************************/
+static void gator_timer_interrupt(void)
+{
+	struct pt_regs *const regs = get_irq_regs();
+	gator_backtrace_handler(regs);
+}
+
+void gator_backtrace_handler(struct pt_regs *const regs)
+{
+	u64 time = gator_get_time();
+	int cpu = get_physical_cpu();
+
+	// Output backtrace
+	gator_add_sample(cpu, regs, time);
+
+	// Collect counters
+	if (!per_cpu(collecting, cpu)) {
+		collect_counters(time, NULL);
+	}
+
+	// No buffer flushing occurs during sched switch for RT-Preempt full. The block counter frame will be flushed by collect_counters, but the sched buffer needs to be explicitly flushed
+#ifdef CONFIG_PREEMPT_RT_FULL
+	buffer_check(cpu, SCHED_TRACE_BUF, time);
+#endif
+}
+
+static int gator_running;
+
+// This function runs in interrupt context and on the appropriate core
+static void gator_timer_offline(void *migrate)
+{
+	struct gator_interface *gi;
+	int i, len, cpu = get_physical_cpu();
+	int *buffer;
+	u64 time;
+
+	gator_trace_sched_offline();
+	gator_trace_power_offline();
+
+	if (!migrate) {
+		gator_hrtimer_offline();
+	}
+
+	// Offline any events and output counters
+	time = gator_get_time();
+	if (marshal_event_header(time)) {
+		list_for_each_entry(gi, &gator_events, list) {
+			if (gi->offline) {
+				len = gi->offline(&buffer, migrate);
+				marshal_event(len, buffer);
+			}
+		}
+		// Only check after writing all counters so that time and corresponding counters appear in the same frame
+		buffer_check(cpu, BLOCK_COUNTER_BUF, time);
+	}
+
+	// Flush all buffers on this core
+	for (i = 0; i < NUM_GATOR_BUFS; i++)
+		gator_commit_buffer(cpu, i, time);
+}
+
+// This function runs in interrupt context and may be running on a core other than core 'cpu'
+static void gator_timer_offline_dispatch(int cpu, bool migrate)
+{
+	struct gator_interface *gi;
+
+	list_for_each_entry(gi, &gator_events, list) {
+		if (gi->offline_dispatch) {
+			gi->offline_dispatch(cpu, migrate);
+		}
+	}
+}
+
+static void gator_timer_stop(void)
+{
+	int cpu;
+
+	if (gator_running) {
+		on_each_cpu(gator_timer_offline, NULL, 1);
+		for_each_online_cpu(cpu) {
+			gator_timer_offline_dispatch(lcpu_to_pcpu(cpu), false);
+		}
+
+		gator_running = 0;
+		gator_hrtimer_shutdown();
+	}
+}
+
+#if defined(__arm__) || defined(__aarch64__)
+static void gator_send_core_name(int cpu, const u32 cpuid, const struct gator_cpu *const gator_cpu) {
+	const char *core_name = NULL;
+	char core_name_buf[32];
+
+	if (!sent_core_name[cpu]) {
+		if (gator_cpu != NULL) {
+			core_name = gator_cpu->core_name;
+		} else {
+			snprintf(core_name_buf, sizeof(core_name_buf), "Unknown (0x%.3x)", cpuid);
+			core_name = core_name_buf;
+		}
+
+		marshal_core_name(cpu, cpuid, core_name);
+		sent_core_name[cpu] = true;
+	}
+}
+#endif
+
+// This function runs in interrupt context and on the appropriate core
+static void gator_timer_online(void *migrate)
+{
+	struct gator_interface *gi;
+	int len, cpu = get_physical_cpu();
+	int *buffer;
+	u64 time;
+
+	gator_trace_power_online();
+
+	// online any events and output counters
+	time = gator_get_time();
+	if (marshal_event_header(time)) {
+		list_for_each_entry(gi, &gator_events, list) {
+			if (gi->online) {
+				len = gi->online(&buffer, migrate);
+				marshal_event(len, buffer);
+			}
+		}
+		// Only check after writing all counters so that time and corresponding counters appear in the same frame
+		buffer_check(cpu, BLOCK_COUNTER_BUF, time);
+	}
+
+	if (!migrate) {
+		gator_hrtimer_online();
+	}
+
+#if defined(__arm__) || defined(__aarch64__)
+	if (!sent_core_name[cpu]) {
+		const u32 cpuid = gator_cpuid();
+		gator_send_core_name(cpu, cpuid, gator_find_cpu_by_cpuid(cpuid));
+	}
+#endif
+}
+
+// This function runs in interrupt context and may be running on a core other than core 'cpu'
+static void gator_timer_online_dispatch(int cpu, bool migrate)
+{
+	struct gator_interface *gi;
+
+	list_for_each_entry(gi, &gator_events, list) {
+		if (gi->online_dispatch) {
+			gi->online_dispatch(cpu, migrate);
+		}
+	}
+}
+
+#include "gator_iks.c"
+
+int gator_timer_start(unsigned long sample_rate)
+{
+	int cpu;
+
+	if (gator_running) {
+		pr_notice("gator: already running\n");
+		return 0;
+	}
+
+	gator_running = 1;
+
+	// event based sampling trumps hr timer based sampling
+	if (event_based_sampling) {
+		sample_rate = 0;
+	}
+
+	if (gator_hrtimer_init(sample_rate, gator_timer_interrupt) == -1)
+		return -1;
+
+	gator_send_iks_core_names();
+	for_each_online_cpu(cpu) {
+		gator_timer_online_dispatch(lcpu_to_pcpu(cpu), false);
+	}
+	on_each_cpu(gator_timer_online, NULL, 1);
+
+	return 0;
+}
+
+static u64 gator_get_time(void)
+{
+	struct timespec ts;
+	u64 timestamp;
+	u64 prev_timestamp;
+	u64 delta;
+	int cpu = smp_processor_id();
+
+	// Match clock_gettime(CLOCK_MONOTONIC_RAW, &ts) from userspace
+	getrawmonotonic(&ts);
+	timestamp = timespec_to_ns(&ts);
+
+	// getrawmonotonic is not monotonic on all systems. Detect and attempt to correct these cases.
+	// up to 0.5ms delta has been seen on some systems, which can skew Streamline data when viewing at high resolution.
+	// This doesn't work well with interrupts, but that it's OK - the real concern is to catch big jumps in time
+	prev_timestamp = per_cpu(last_timestamp, cpu);
+	if (prev_timestamp <= timestamp) {
+		per_cpu(last_timestamp, cpu) = timestamp;
+	} else {
+		delta = prev_timestamp - timestamp;
+		// Log the error once
+		if (!printed_monotonic_warning && delta > 500000) {
+			printk(KERN_ERR "%s: getrawmonotonic is not monotonic  cpu: %i  delta: %lli\nSkew in Streamline data may be present at the fine zoom levels\n", __FUNCTION__, cpu, delta);
+			printed_monotonic_warning = true;
+		}
+		timestamp = prev_timestamp;
+	}
+
+	return timestamp - gator_monotonic_started;
+}
+
+/******************************************************************************
+ * cpu hotplug and pm notifiers
+ ******************************************************************************/
+static int __cpuinit gator_hotcpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	int cpu = lcpu_to_pcpu((long)hcpu);
+
+	switch (action) {
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		smp_call_function_single(cpu, gator_timer_offline, NULL, 1);
+		gator_timer_offline_dispatch(cpu, false);
+		break;
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		gator_timer_online_dispatch(cpu, false);
+		smp_call_function_single(cpu, gator_timer_online, NULL, 1);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __refdata gator_hotcpu_notifier = {
+	.notifier_call = gator_hotcpu_notify,
+};
+
+// n.b. calling "on_each_cpu" only runs on those that are online
+// Registered linux events are not disabled, so their counters will continue to collect
+static int gator_pm_notify(struct notifier_block *nb, unsigned long event, void *dummy)
+{
+	int cpu;
+	struct timespec ts;
+
+	switch (event) {
+	case PM_HIBERNATION_PREPARE:
+	case PM_SUSPEND_PREPARE:
+		unregister_hotcpu_notifier(&gator_hotcpu_notifier);
+		unregister_scheduler_tracepoints();
+		on_each_cpu(gator_timer_offline, NULL, 1);
+		for_each_online_cpu(cpu) {
+			gator_timer_offline_dispatch(lcpu_to_pcpu(cpu), false);
+		}
+
+		// Record the wallclock hibernate time
+		getnstimeofday(&ts);
+		gator_hibernate_time = timespec_to_ns(&ts) - gator_get_time();
+		break;
+	case PM_POST_HIBERNATION:
+	case PM_POST_SUSPEND:
+		// Adjust gator_monotonic_started for the time spent sleeping, as gator_get_time does not account for it
+		if (gator_hibernate_time > 0) {
+			getnstimeofday(&ts);
+			gator_monotonic_started += gator_hibernate_time + gator_get_time() - timespec_to_ns(&ts);
+			gator_hibernate_time = 0;
+		}
+
+		for_each_online_cpu(cpu) {
+			gator_timer_online_dispatch(lcpu_to_pcpu(cpu), false);
+		}
+		on_each_cpu(gator_timer_online, NULL, 1);
+		register_scheduler_tracepoints();
+		register_hotcpu_notifier(&gator_hotcpu_notifier);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block gator_pm_notifier = {
+	.notifier_call = gator_pm_notify,
+};
+
+static int gator_notifier_start(void)
+{
+	int retval;
+	retval = register_hotcpu_notifier(&gator_hotcpu_notifier);
+	if (retval == 0)
+		retval = register_pm_notifier(&gator_pm_notifier);
+	return retval;
+}
+
+static void gator_notifier_stop(void)
+{
+	unregister_pm_notifier(&gator_pm_notifier);
+	unregister_hotcpu_notifier(&gator_hotcpu_notifier);
+}
+
+/******************************************************************************
+ * Main
+ ******************************************************************************/
+static void gator_summary(void)
+{
+	u64 timestamp, uptime;
+	struct timespec ts;
+	char uname_buf[512];
+	void (*m2b)(struct timespec *ts);
+	unsigned long flags;
+
+	snprintf(uname_buf, sizeof(uname_buf), "%s %s %s %s %s GNU/Linux", utsname()->sysname, utsname()->nodename, utsname()->release, utsname()->version, utsname()->machine);
+
+	getnstimeofday(&ts);
+	timestamp = timespec_to_ns(&ts);
+
+	do_posix_clock_monotonic_gettime(&ts);
+	// monotonic_to_bootbased is not defined for some versions of Android
+	m2b = symbol_get(monotonic_to_bootbased);
+	if (m2b) {
+		m2b(&ts);
+	}
+	uptime = timespec_to_ns(&ts);
+
+	// Disable interrupts as gator_get_time calls smp_processor_id to verify time is monotonic
+	local_irq_save(flags);
+	// Set monotonic_started to zero as gator_get_time is uptime minus monotonic_started
+	gator_monotonic_started = 0;
+	gator_monotonic_started = gator_get_time();
+	local_irq_restore(flags);
+
+	marshal_summary(timestamp, uptime, gator_monotonic_started, uname_buf);
+}
+
+int gator_events_install(struct gator_interface *interface)
+{
+	list_add_tail(&interface->list, &gator_events);
+
+	return 0;
+}
+
+int gator_events_get_key(void)
+{
+	// key 0 is reserved as a timestamp
+	// key 1 is reserved as the marker for thread specific counters
+	// Odd keys are assigned by the driver, even keys by the daemon
+	static int key = 3;
+
+	const int ret = key;
+	key += 2;
+	return ret;
+}
+
+static int gator_init(void)
+{
+	int i;
+
+	calc_first_cluster_size();
+
+	// events sources
+	for (i = 0; i < ARRAY_SIZE(gator_events_list); i++)
+		if (gator_events_list[i])
+			gator_events_list[i]();
+
+	gator_trace_sched_init();
+	gator_trace_power_init();
+
+	return 0;
+}
+
+static void gator_exit(void)
+{
+	struct gator_interface *gi;
+
+	list_for_each_entry(gi, &gator_events, list)
+		if (gi->shutdown)
+			gi->shutdown();
+}
+
+static int gator_start(void)
+{
+	unsigned long cpu, i;
+	struct gator_interface *gi;
+
+	gator_buffer_wake_stop = false;
+	if (IS_ERR(gator_buffer_wake_thread = kthread_run(gator_buffer_wake_func, NULL, "gator_bwake"))) {
+		goto bwake_failure;
+	}
+
+	if (gator_migrate_start())
+		goto migrate_failure;
+
+	// Initialize the buffer with the frame type and core
+	for_each_present_cpu(cpu) {
+		for (i = 0; i < NUM_GATOR_BUFS; i++) {
+			marshal_frame(cpu, i);
+		}
+		per_cpu(last_timestamp, cpu) = 0;
+	}
+	printed_monotonic_warning = false;
+
+	// Capture the start time
+	gator_summary();
+
+	// start all events
+	list_for_each_entry(gi, &gator_events, list) {
+		if (gi->start && gi->start() != 0) {
+			struct list_head *ptr = gi->list.prev;
+
+			while (ptr != &gator_events) {
+				gi = list_entry(ptr, struct gator_interface, list);
+
+				if (gi->stop)
+					gi->stop();
+
+				ptr = ptr->prev;
+			}
+			goto events_failure;
+		}
+	}
+
+	// cookies shall be initialized before trace_sched_start() and gator_timer_start()
+	if (cookies_initialize())
+		goto cookies_failure;
+	if (gator_annotate_start())
+		goto annotate_failure;
+	if (gator_trace_sched_start())
+		goto sched_failure;
+	if (gator_trace_power_start())
+		goto power_failure;
+	if (gator_trace_gpu_start())
+		goto gpu_failure;
+	if (gator_timer_start(gator_timer_count))
+		goto timer_failure;
+	if (gator_notifier_start())
+		goto notifier_failure;
+
+	return 0;
+
+notifier_failure:
+	gator_timer_stop();
+timer_failure:
+	gator_trace_gpu_stop();
+gpu_failure:
+	gator_trace_power_stop();
+power_failure:
+	gator_trace_sched_stop();
+sched_failure:
+	gator_annotate_stop();
+annotate_failure:
+	cookies_release();
+cookies_failure:
+	// stop all events
+	list_for_each_entry(gi, &gator_events, list)
+		if (gi->stop)
+			gi->stop();
+events_failure:
+	gator_migrate_stop();
+migrate_failure:
+	gator_buffer_wake_stop = true;
+	wake_up_process(gator_buffer_wake_thread);
+bwake_failure:
+
+	return -1;
+}
+
+static void gator_stop(void)
+{
+	struct gator_interface *gi;
+
+	gator_annotate_stop();
+	gator_trace_sched_stop();
+	gator_trace_power_stop();
+	gator_trace_gpu_stop();
+
+	// stop all interrupt callback reads before tearing down other interfaces
+	gator_notifier_stop();	// should be called before gator_timer_stop to avoid re-enabling the hrtimer after it has been offlined
+	gator_timer_stop();
+
+	// stop all events
+	list_for_each_entry(gi, &gator_events, list)
+		if (gi->stop)
+			gi->stop();
+
+	gator_migrate_stop();
+
+	gator_buffer_wake_stop = true;
+	wake_up_process(gator_buffer_wake_thread);
+}
+
+/******************************************************************************
+ * Filesystem
+ ******************************************************************************/
+/* fopen("buffer") */
+static int gator_op_setup(void)
+{
+	int err = 0;
+	int cpu, i;
+
+	mutex_lock(&start_mutex);
+
+	gator_buffer_size[SUMMARY_BUF] = SUMMARY_BUFFER_SIZE;
+	gator_buffer_mask[SUMMARY_BUF] = SUMMARY_BUFFER_SIZE - 1;
+
+	gator_buffer_size[BACKTRACE_BUF] = BACKTRACE_BUFFER_SIZE;
+	gator_buffer_mask[BACKTRACE_BUF] = BACKTRACE_BUFFER_SIZE - 1;
+
+	gator_buffer_size[NAME_BUF] = NAME_BUFFER_SIZE;
+	gator_buffer_mask[NAME_BUF] = NAME_BUFFER_SIZE - 1;
+
+	gator_buffer_size[COUNTER_BUF] = COUNTER_BUFFER_SIZE;
+	gator_buffer_mask[COUNTER_BUF] = COUNTER_BUFFER_SIZE - 1;
+
+	gator_buffer_size[BLOCK_COUNTER_BUF] = BLOCK_COUNTER_BUFFER_SIZE;
+	gator_buffer_mask[BLOCK_COUNTER_BUF] = BLOCK_COUNTER_BUFFER_SIZE - 1;
+
+	gator_buffer_size[ANNOTATE_BUF] = ANNOTATE_BUFFER_SIZE;
+	gator_buffer_mask[ANNOTATE_BUF] = ANNOTATE_BUFFER_SIZE - 1;
+
+	gator_buffer_size[SCHED_TRACE_BUF] = SCHED_TRACE_BUFFER_SIZE;
+	gator_buffer_mask[SCHED_TRACE_BUF] = SCHED_TRACE_BUFFER_SIZE - 1;
+
+	gator_buffer_size[GPU_TRACE_BUF] = GPU_TRACE_BUFFER_SIZE;
+	gator_buffer_mask[GPU_TRACE_BUF] = GPU_TRACE_BUFFER_SIZE - 1;
+
+	gator_buffer_size[IDLE_BUF] = IDLE_BUFFER_SIZE;
+	gator_buffer_mask[IDLE_BUF] = IDLE_BUFFER_SIZE - 1;
+
+	// Initialize percpu per buffer variables
+	for (i = 0; i < NUM_GATOR_BUFS; i++) {
+		// Verify buffers are a power of 2
+		if (gator_buffer_size[i] & (gator_buffer_size[i] - 1)) {
+			err = -ENOEXEC;
+			goto setup_error;
+		}
+
+		for_each_present_cpu(cpu) {
+			per_cpu(gator_buffer_read, cpu)[i] = 0;
+			per_cpu(gator_buffer_write, cpu)[i] = 0;
+			per_cpu(gator_buffer_commit, cpu)[i] = 0;
+			per_cpu(buffer_space_available, cpu)[i] = true;
+			per_cpu(gator_buffer_commit_time, cpu) = gator_live_rate;
+
+			// Annotation is a special case that only uses a single buffer
+			if (cpu > 0 && i == ANNOTATE_BUF) {
+				per_cpu(gator_buffer, cpu)[i] = NULL;
+				continue;
+			}
+
+			per_cpu(gator_buffer, cpu)[i] = vmalloc(gator_buffer_size[i]);
+			if (!per_cpu(gator_buffer, cpu)[i]) {
+				err = -ENOMEM;
+				goto setup_error;
+			}
+		}
+	}
+
+setup_error:
+	mutex_unlock(&start_mutex);
+	return err;
+}
+
+/* Actually start profiling (echo 1>/dev/gator/enable) */
+static int gator_op_start(void)
+{
+	int err = 0;
+
+	mutex_lock(&start_mutex);
+
+	if (gator_started || gator_start())
+		err = -EINVAL;
+	else
+		gator_started = 1;
+
+	mutex_unlock(&start_mutex);
+
+	return err;
+}
+
+/* echo 0>/dev/gator/enable */
+static void gator_op_stop(void)
+{
+	mutex_lock(&start_mutex);
+
+	if (gator_started) {
+		gator_stop();
+
+		mutex_lock(&gator_buffer_mutex);
+
+		gator_started = 0;
+		gator_monotonic_started = 0;
+		cookies_release();
+		wake_up(&gator_buffer_wait);
+
+		mutex_unlock(&gator_buffer_mutex);
+	}
+
+	mutex_unlock(&start_mutex);
+}
+
+static void gator_shutdown(void)
+{
+	int cpu, i;
+
+	mutex_lock(&start_mutex);
+
+	for_each_present_cpu(cpu) {
+		mutex_lock(&gator_buffer_mutex);
+		for (i = 0; i < NUM_GATOR_BUFS; i++) {
+			vfree(per_cpu(gator_buffer, cpu)[i]);
+			per_cpu(gator_buffer, cpu)[i] = NULL;
+			per_cpu(gator_buffer_read, cpu)[i] = 0;
+			per_cpu(gator_buffer_write, cpu)[i] = 0;
+			per_cpu(gator_buffer_commit, cpu)[i] = 0;
+			per_cpu(buffer_space_available, cpu)[i] = true;
+			per_cpu(gator_buffer_commit_time, cpu) = 0;
+		}
+		mutex_unlock(&gator_buffer_mutex);
+	}
+
+	memset(&sent_core_name, 0, sizeof(sent_core_name));
+
+	mutex_unlock(&start_mutex);
+}
+
+static int gator_set_backtrace(unsigned long val)
+{
+	int err = 0;
+
+	mutex_lock(&start_mutex);
+
+	if (gator_started)
+		err = -EBUSY;
+	else
+		gator_backtrace_depth = val;
+
+	mutex_unlock(&start_mutex);
+
+	return err;
+}
+
+static ssize_t enable_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	return gatorfs_ulong_to_user(gator_started, buf, count, offset);
+}
+
+static ssize_t enable_write(struct file *file, char const __user *buf, size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = gatorfs_ulong_from_user(&val, buf, count);
+	if (retval)
+		return retval;
+
+	if (val)
+		retval = gator_op_start();
+	else
+		gator_op_stop();
+
+	if (retval)
+		return retval;
+	return count;
+}
+
+static const struct file_operations enable_fops = {
+	.read = enable_read,
+	.write = enable_write,
+};
+
+static int userspace_buffer_open(struct inode *inode, struct file *file)
+{
+	int err = -EPERM;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (test_and_set_bit_lock(0, &gator_buffer_opened))
+		return -EBUSY;
+
+	if ((err = gator_op_setup()))
+		goto fail;
+
+	/* NB: the actual start happens from userspace
+	 * echo 1 >/dev/gator/enable
+	 */
+
+	return 0;
+
+fail:
+	__clear_bit_unlock(0, &gator_buffer_opened);
+	return err;
+}
+
+static int userspace_buffer_release(struct inode *inode, struct file *file)
+{
+	gator_op_stop();
+	gator_shutdown();
+	__clear_bit_unlock(0, &gator_buffer_opened);
+	return 0;
+}
+
+static ssize_t userspace_buffer_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	int commit, length1, length2, read;
+	char *buffer1;
+	char *buffer2;
+	int cpu, buftype;
+	int written = 0;
+
+	// ensure there is enough space for a whole frame
+	if (count < userspace_buffer_size || *offset) {
+		return -EINVAL;
+	}
+
+	// sleep until the condition is true or a signal is received
+	// the condition is checked each time gator_buffer_wait is woken up
+	wait_event_interruptible(gator_buffer_wait, buffer_commit_ready(&cpu, &buftype) || !gator_started);
+
+	if (signal_pending(current)) {
+		return -EINTR;
+	}
+
+	if (buftype == -1 || cpu == -1) {
+		return 0;
+	}
+
+	mutex_lock(&gator_buffer_mutex);
+
+	do {
+		read = per_cpu(gator_buffer_read, cpu)[buftype];
+		commit = per_cpu(gator_buffer_commit, cpu)[buftype];
+
+		// May happen if the buffer is freed during pending reads.
+		if (!per_cpu(gator_buffer, cpu)[buftype]) {
+			break;
+		}
+
+		// determine the size of two halves
+		length1 = commit - read;
+		length2 = 0;
+		buffer1 = &(per_cpu(gator_buffer, cpu)[buftype][read]);
+		buffer2 = &(per_cpu(gator_buffer, cpu)[buftype][0]);
+		if (length1 < 0) {
+			length1 = gator_buffer_size[buftype] - read;
+			length2 = commit;
+		}
+
+		if (length1 + length2 > count - written) {
+			break;
+		}
+
+		// start, middle or end
+		if (length1 > 0 && copy_to_user(&buf[written], buffer1, length1)) {
+			break;
+		}
+
+		// possible wrap around
+		if (length2 > 0 && copy_to_user(&buf[written + length1], buffer2, length2)) {
+			break;
+		}
+
+		per_cpu(gator_buffer_read, cpu)[buftype] = commit;
+		written += length1 + length2;
+
+		// Wake up annotate_write if more space is available
+		if (buftype == ANNOTATE_BUF) {
+			wake_up(&gator_annotate_wait);
+		}
+	} while (buffer_commit_ready(&cpu, &buftype));
+
+	mutex_unlock(&gator_buffer_mutex);
+
+	// kick just in case we've lost an SMP event
+	wake_up(&gator_buffer_wait);
+
+	return written > 0 ? written : -EFAULT;
+}
+
+const struct file_operations gator_event_buffer_fops = {
+	.open = userspace_buffer_open,
+	.release = userspace_buffer_release,
+	.read = userspace_buffer_read,
+};
+
+static ssize_t depth_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	return gatorfs_ulong_to_user(gator_backtrace_depth, buf, count, offset);
+}
+
+static ssize_t depth_write(struct file *file, char const __user *buf, size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = gatorfs_ulong_from_user(&val, buf, count);
+	if (retval)
+		return retval;
+
+	retval = gator_set_backtrace(val);
+
+	if (retval)
+		return retval;
+	return count;
+}
+
+static const struct file_operations depth_fops = {
+	.read = depth_read,
+	.write = depth_write
+};
+
+void gator_op_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+	struct gator_interface *gi;
+	int cpu;
+
+	/* reinitialize default values */
+	gator_cpu_cores = 0;
+	for_each_present_cpu(cpu) {
+		gator_cpu_cores++;
+	}
+	userspace_buffer_size = BACKTRACE_BUFFER_SIZE;
+	gator_response_type = 1;
+	gator_live_rate = 0;
+
+	gatorfs_create_file(sb, root, "enable", &enable_fops);
+	gatorfs_create_file(sb, root, "buffer", &gator_event_buffer_fops);
+	gatorfs_create_file(sb, root, "backtrace_depth", &depth_fops);
+	gatorfs_create_ro_ulong(sb, root, "cpu_cores", &gator_cpu_cores);
+	gatorfs_create_ro_ulong(sb, root, "buffer_size", &userspace_buffer_size);
+	gatorfs_create_ulong(sb, root, "tick", &gator_timer_count);
+	gatorfs_create_ulong(sb, root, "response_type", &gator_response_type);
+	gatorfs_create_ro_ulong(sb, root, "version", &gator_protocol_version);
+	gatorfs_create_ro_u64(sb, root, "started", &gator_monotonic_started);
+	gatorfs_create_u64(sb, root, "live_rate", &gator_live_rate);
+
+	// Annotate interface
+	gator_annotate_create_files(sb, root);
+
+	// Linux Events
+	dir = gatorfs_mkdir(sb, root, "events");
+	list_for_each_entry(gi, &gator_events, list)
+		if (gi->create_files)
+			gi->create_files(sb, dir);
+
+	// Sched Events
+	sched_trace_create_files(sb, dir);
+
+	// Power interface
+	gator_trace_power_create_files(sb, dir);
+}
+
+/******************************************************************************
+ * Module
+ ******************************************************************************/
+static int __init gator_module_init(void)
+{
+	if (gatorfs_register()) {
+		return -1;
+	}
+
+	if (gator_init()) {
+		gatorfs_unregister();
+		return -1;
+	}
+
+	setup_timer(&gator_buffer_wake_up_timer, gator_buffer_wake_up, 0);
+
+	return 0;
+}
+
+static void __exit gator_module_exit(void)
+{
+	del_timer_sync(&gator_buffer_wake_up_timer);
+	tracepoint_synchronize_unregister();
+	gator_exit();
+	gatorfs_unregister();
+}
+
+module_init(gator_module_init);
+module_exit(gator_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd");
+MODULE_DESCRIPTION("Gator system profiler");
+#define STRIFY2(ARG) #ARG
+#define STRIFY(ARG) STRIFY2(ARG)
+MODULE_VERSION(STRIFY(PROTOCOL_VERSION));
diff --git a/drivers/gator/gator_marshaling.c b/drivers/gator/gator_marshaling.c
new file mode 100644
index 000000000000..af80ff62e712
--- /dev/null
+++ b/drivers/gator/gator_marshaling.c
@@ -0,0 +1,432 @@
+/**
+ * Copyright (C) ARM Limited 2012-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#define NEWLINE_CANARY \
+	/* Unix */ \
+	"1\n" \
+	/* Windows */ \
+	"2\r\n" \
+	/* Mac OS */ \
+	"3\r" \
+	/* RISC OS */ \
+	"4\n\r" \
+	/* Add another character so the length isn't 0x0a bytes */ \
+	"5"
+
+#ifdef MALI_SUPPORT
+#include "gator_events_mali_common.h"
+#endif
+
+static void marshal_summary(long long timestamp, long long uptime, long long monotonic_delta, const char * uname)
+{
+	unsigned long flags;
+	int cpu = 0;
+
+	local_irq_save(flags);
+	gator_buffer_write_string(cpu, SUMMARY_BUF, NEWLINE_CANARY);
+	gator_buffer_write_packed_int64(cpu, SUMMARY_BUF, timestamp);
+	gator_buffer_write_packed_int64(cpu, SUMMARY_BUF, uptime);
+	gator_buffer_write_packed_int64(cpu, SUMMARY_BUF, monotonic_delta);
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "uname");
+	gator_buffer_write_string(cpu, SUMMARY_BUF, uname);
+#if GATOR_IKS_SUPPORT
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "iks");
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "");
+#endif
+	// Let Streamline know which GPU is used so that it can label the GPU Activity appropriately. This is a temporary fix, to be improved in a future release.
+#ifdef MALI_SUPPORT
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "mali_type");
+#if (MALI_SUPPORT == MALI_4xx)
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "4xx");
+#elif (MALI_SUPPORT == MALI_T6xx)
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "6xx");
+#else
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "unknown");
+#endif
+#endif
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "");
+	// Commit the buffer now so it can be one of the first frames read by Streamline
+	gator_commit_buffer(cpu, SUMMARY_BUF, gator_get_time());
+	local_irq_restore(flags);
+}
+
+static bool marshal_cookie_header(const char *text)
+{
+	int cpu = get_physical_cpu();
+	return buffer_check_space(cpu, NAME_BUF, strlen(text) + 3 * MAXSIZE_PACK32);
+}
+
+static void marshal_cookie(int cookie, const char *text)
+{
+	int cpu = get_physical_cpu();
+	// buffer_check_space already called by marshal_cookie_header
+	gator_buffer_write_packed_int(cpu, NAME_BUF, MESSAGE_COOKIE);
+	gator_buffer_write_packed_int(cpu, NAME_BUF, cookie);
+	gator_buffer_write_string(cpu, NAME_BUF, text);
+	buffer_check(cpu, NAME_BUF, gator_get_time());
+}
+
+static void marshal_thread_name(int pid, char *name)
+{
+	unsigned long flags, cpu;
+	u64 time;
+	local_irq_save(flags);
+	cpu = get_physical_cpu();
+	time = gator_get_time();
+	if (buffer_check_space(cpu, NAME_BUF, TASK_COMM_LEN + 3 * MAXSIZE_PACK32 + MAXSIZE_PACK64)) {
+		gator_buffer_write_packed_int(cpu, NAME_BUF, MESSAGE_THREAD_NAME);
+		gator_buffer_write_packed_int64(cpu, NAME_BUF, time);
+		gator_buffer_write_packed_int(cpu, NAME_BUF, pid);
+		gator_buffer_write_string(cpu, NAME_BUF, name);
+	}
+	buffer_check(cpu, NAME_BUF, time);
+	local_irq_restore(flags);
+}
+
+static void marshal_link(int cookie, int tgid, int pid)
+{
+	unsigned long cpu = get_physical_cpu(), flags;
+	u64 time;
+
+	local_irq_save(flags);
+	time = gator_get_time();
+	if (buffer_check_space(cpu, NAME_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int(cpu, NAME_BUF, MESSAGE_LINK);
+		gator_buffer_write_packed_int64(cpu, NAME_BUF, time);
+		gator_buffer_write_packed_int(cpu, NAME_BUF, cookie);
+		gator_buffer_write_packed_int(cpu, NAME_BUF, tgid);
+		gator_buffer_write_packed_int(cpu, NAME_BUF, pid);
+	}
+	// Check and commit; commit is set to occur once buffer is 3/4 full
+	buffer_check(cpu, NAME_BUF, time);
+	local_irq_restore(flags);
+}
+
+static bool marshal_backtrace_header(int exec_cookie, int tgid, int pid, int inKernel, u64 time)
+{
+	int cpu = get_physical_cpu();
+	if (!buffer_check_space(cpu, BACKTRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32 + gator_backtrace_depth * 2 * MAXSIZE_PACK32)) {
+		// Check and commit; commit is set to occur once buffer is 3/4 full
+		buffer_check(cpu, BACKTRACE_BUF, time);
+
+		return false;
+	}
+
+	gator_buffer_write_packed_int64(cpu, BACKTRACE_BUF, time);
+	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, exec_cookie);
+	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, tgid);
+	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, pid);
+	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, inKernel);
+
+	return true;
+}
+
+static void marshal_backtrace(unsigned long address, int cookie)
+{
+	int cpu = get_physical_cpu();
+	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, cookie);
+	gator_buffer_write_packed_int64(cpu, BACKTRACE_BUF, address);
+}
+
+static void marshal_backtrace_footer(u64 time)
+{
+	int cpu = get_physical_cpu();
+	gator_buffer_write_packed_int(cpu, BACKTRACE_BUF, MESSAGE_END_BACKTRACE);
+
+	// Check and commit; commit is set to occur once buffer is 3/4 full
+	buffer_check(cpu, BACKTRACE_BUF, time);
+}
+
+static bool marshal_event_header(u64 time)
+{
+	unsigned long flags, cpu = get_physical_cpu();
+	bool retval = false;
+
+	local_irq_save(flags);
+	if (buffer_check_space(cpu, BLOCK_COUNTER_BUF, MAXSIZE_PACK32 + MAXSIZE_PACK64)) {
+		gator_buffer_write_packed_int(cpu, BLOCK_COUNTER_BUF, 0);	// key of zero indicates a timestamp
+		gator_buffer_write_packed_int64(cpu, BLOCK_COUNTER_BUF, time);
+		retval = true;
+	}
+	local_irq_restore(flags);
+
+	return retval;
+}
+
+static void marshal_event(int len, int *buffer)
+{
+	unsigned long i, flags, cpu = get_physical_cpu();
+
+	if (len <= 0)
+		return;
+
+	// length must be even since all data is a (key, value) pair
+	if (len & 0x1) {
+		pr_err("gator: invalid counter data detected and discarded");
+		return;
+	}
+
+	// events must be written in key,value pairs
+	local_irq_save(flags);
+	for (i = 0; i < len; i += 2) {
+		if (!buffer_check_space(cpu, BLOCK_COUNTER_BUF, 2 * MAXSIZE_PACK32)) {
+			break;
+		}
+		gator_buffer_write_packed_int(cpu, BLOCK_COUNTER_BUF, buffer[i]);
+		gator_buffer_write_packed_int(cpu, BLOCK_COUNTER_BUF, buffer[i + 1]);
+	}
+	local_irq_restore(flags);
+}
+
+static void marshal_event64(int len, long long *buffer64)
+{
+	unsigned long i, flags, cpu = get_physical_cpu();
+
+	if (len <= 0)
+		return;
+
+	// length must be even since all data is a (key, value) pair
+	if (len & 0x1) {
+		pr_err("gator: invalid counter data detected and discarded");
+		return;
+	}
+
+	// events must be written in key,value pairs
+	local_irq_save(flags);
+	for (i = 0; i < len; i += 2) {
+		if (!buffer_check_space(cpu, BLOCK_COUNTER_BUF, 2 * MAXSIZE_PACK64)) {
+			break;
+		}
+		gator_buffer_write_packed_int64(cpu, BLOCK_COUNTER_BUF, buffer64[i]);
+		gator_buffer_write_packed_int64(cpu, BLOCK_COUNTER_BUF, buffer64[i + 1]);
+	}
+	local_irq_restore(flags);
+}
+
+#if GATOR_CPU_FREQ_SUPPORT
+static void marshal_event_single(int core, int key, int value)
+{
+	unsigned long flags, cpu;
+	u64 time;
+
+	local_irq_save(flags);
+	cpu = get_physical_cpu();
+	time = gator_get_time();
+	if (buffer_check_space(cpu, COUNTER_BUF, MAXSIZE_PACK64 + 3 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int64(cpu, COUNTER_BUF, time);
+		gator_buffer_write_packed_int(cpu, COUNTER_BUF, core);
+		gator_buffer_write_packed_int(cpu, COUNTER_BUF, key);
+		gator_buffer_write_packed_int(cpu, COUNTER_BUF, value);
+	}
+	// Check and commit; commit is set to occur once buffer is 3/4 full
+	buffer_check(cpu, COUNTER_BUF, time);
+	local_irq_restore(flags);
+}
+#endif
+
+static void marshal_sched_gpu_start(int unit, int core, int tgid, int pid)
+{
+	unsigned long cpu = get_physical_cpu(), flags;
+	u64 time;
+
+	if (!per_cpu(gator_buffer, cpu)[GPU_TRACE_BUF])
+		return;
+
+	local_irq_save(flags);
+	time = gator_get_time();
+	if (buffer_check_space(cpu, GPU_TRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, MESSAGE_GPU_START);
+		gator_buffer_write_packed_int64(cpu, GPU_TRACE_BUF, time);
+		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, unit);
+		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, core);
+		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, tgid);
+		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, pid);
+	}
+	// Check and commit; commit is set to occur once buffer is 3/4 full
+	buffer_check(cpu, GPU_TRACE_BUF, time);
+	local_irq_restore(flags);
+}
+
+static void marshal_sched_gpu_stop(int unit, int core)
+{
+	unsigned long cpu = get_physical_cpu(), flags;
+	u64 time;
+
+	if (!per_cpu(gator_buffer, cpu)[GPU_TRACE_BUF])
+		return;
+
+	local_irq_save(flags);
+	time = gator_get_time();
+	if (buffer_check_space(cpu, GPU_TRACE_BUF, MAXSIZE_PACK64 + 3 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, MESSAGE_GPU_STOP);
+		gator_buffer_write_packed_int64(cpu, GPU_TRACE_BUF, time);
+		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, unit);
+		gator_buffer_write_packed_int(cpu, GPU_TRACE_BUF, core);
+	}
+	// Check and commit; commit is set to occur once buffer is 3/4 full
+	buffer_check(cpu, GPU_TRACE_BUF, time);
+	local_irq_restore(flags);
+}
+
+static void marshal_sched_trace_start(int tgid, int pid, int cookie)
+{
+	unsigned long cpu = get_physical_cpu(), flags;
+	u64 time;
+
+	if (!per_cpu(gator_buffer, cpu)[SCHED_TRACE_BUF])
+		return;
+
+	local_irq_save(flags);
+	time = gator_get_time();
+	if (buffer_check_space(cpu, SCHED_TRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, MESSAGE_SCHED_START);
+		gator_buffer_write_packed_int64(cpu, SCHED_TRACE_BUF, time);
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, tgid);
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, pid);
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, cookie);
+	}
+	// Check and commit; commit is set to occur once buffer is 3/4 full
+	buffer_check(cpu, SCHED_TRACE_BUF, time);
+	local_irq_restore(flags);
+}
+
+static void marshal_sched_trace_switch(int tgid, int pid, int cookie, int state)
+{
+	unsigned long cpu = get_physical_cpu(), flags;
+	u64 time;
+
+	if (!per_cpu(gator_buffer, cpu)[SCHED_TRACE_BUF])
+		return;
+
+	local_irq_save(flags);
+	time = gator_get_time();
+	if (buffer_check_space(cpu, SCHED_TRACE_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, MESSAGE_SCHED_SWITCH);
+		gator_buffer_write_packed_int64(cpu, SCHED_TRACE_BUF, time);
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, tgid);
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, pid);
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, cookie);
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, state);
+	}
+	// Check and commit; commit is set to occur once buffer is 3/4 full
+	buffer_check(cpu, SCHED_TRACE_BUF, time);
+	local_irq_restore(flags);
+}
+
+static void marshal_sched_trace_exit(int tgid, int pid)
+{
+	unsigned long cpu = get_physical_cpu(), flags;
+	u64 time;
+
+	if (!per_cpu(gator_buffer, cpu)[SCHED_TRACE_BUF])
+		return;
+
+	local_irq_save(flags);
+	time = gator_get_time();
+	if (buffer_check_space(cpu, SCHED_TRACE_BUF, MAXSIZE_PACK64 + 2 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, MESSAGE_SCHED_EXIT);
+		gator_buffer_write_packed_int64(cpu, SCHED_TRACE_BUF, time);
+		gator_buffer_write_packed_int(cpu, SCHED_TRACE_BUF, pid);
+	}
+	// Check and commit; commit is set to occur once buffer is 3/4 full
+	buffer_check(cpu, SCHED_TRACE_BUF, time);
+	local_irq_restore(flags);
+}
+
+#if GATOR_CPU_FREQ_SUPPORT
+static void marshal_idle(int core, int state)
+{
+	unsigned long flags, cpu;
+	u64 time;
+
+	local_irq_save(flags);
+	cpu = get_physical_cpu();
+	time = gator_get_time();
+	if (buffer_check_space(cpu, IDLE_BUF, MAXSIZE_PACK64 + 2 * MAXSIZE_PACK32)) {
+		gator_buffer_write_packed_int(cpu, IDLE_BUF, state);
+		gator_buffer_write_packed_int64(cpu, IDLE_BUF, time);
+		gator_buffer_write_packed_int(cpu, IDLE_BUF, core);
+	}
+	// Check and commit; commit is set to occur once buffer is 3/4 full
+	buffer_check(cpu, IDLE_BUF, time);
+	local_irq_restore(flags);
+}
+#endif
+
+static void marshal_frame(int cpu, int buftype)
+{
+	int frame;
+
+	if (!per_cpu(gator_buffer, cpu)[buftype]) {
+		return;
+	}
+
+	switch (buftype) {
+	case SUMMARY_BUF:
+		frame = FRAME_SUMMARY;
+		break;
+	case BACKTRACE_BUF:
+		frame = FRAME_BACKTRACE;
+		break;
+	case NAME_BUF:
+		frame = FRAME_NAME;
+		break;
+	case COUNTER_BUF:
+		frame = FRAME_COUNTER;
+		break;
+	case BLOCK_COUNTER_BUF:
+		frame = FRAME_BLOCK_COUNTER;
+		break;
+	case ANNOTATE_BUF:
+		frame = FRAME_ANNOTATE;
+		break;
+	case SCHED_TRACE_BUF:
+		frame = FRAME_SCHED_TRACE;
+		break;
+	case GPU_TRACE_BUF:
+		frame = FRAME_GPU_TRACE;
+		break;
+	case IDLE_BUF:
+		frame = FRAME_IDLE;
+		break;
+	default:
+		frame = -1;
+		break;
+	}
+
+	// add response type
+	if (gator_response_type > 0) {
+		gator_buffer_write_packed_int(cpu, buftype, gator_response_type);
+	}
+
+	// leave space for 4-byte unpacked length
+	per_cpu(gator_buffer_write, cpu)[buftype] = (per_cpu(gator_buffer_write, cpu)[buftype] + sizeof(s32)) & gator_buffer_mask[buftype];
+
+	// add frame type and core number
+	gator_buffer_write_packed_int(cpu, buftype, frame);
+	gator_buffer_write_packed_int(cpu, buftype, cpu);
+}
+
+#if defined(__arm__) || defined(__aarch64__)
+static void marshal_core_name(const int core, const int cpuid, const char *name)
+{
+	int cpu = get_physical_cpu();
+	unsigned long flags;
+	local_irq_save(flags);
+	if (buffer_check_space(cpu, NAME_BUF, MAXSIZE_PACK32 + MAXSIZE_CORE_NAME)) {
+		gator_buffer_write_packed_int(cpu, NAME_BUF, HRTIMER_CORE_NAME);
+		gator_buffer_write_packed_int(cpu, NAME_BUF, core);
+		gator_buffer_write_packed_int(cpu, NAME_BUF, cpuid);
+		gator_buffer_write_string(cpu, NAME_BUF, name);
+	}
+	// Commit core names now so that they can show up in live
+	gator_commit_buffer(cpu, NAME_BUF, gator_get_time());
+	local_irq_restore(flags);
+}
+#endif
diff --git a/drivers/gator/gator_pack.c b/drivers/gator/gator_pack.c
new file mode 100644
index 000000000000..2c082f283adc
--- /dev/null
+++ b/drivers/gator/gator_pack.c
@@ -0,0 +1,58 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+static void gator_buffer_write_packed_int(int cpu, int buftype, int x)
+{
+	uint32_t write = per_cpu(gator_buffer_write, cpu)[buftype];
+	uint32_t mask = gator_buffer_mask[buftype];
+	char *buffer = per_cpu(gator_buffer, cpu)[buftype];
+	int packedBytes = 0;
+	int more = true;
+	while (more) {
+		// low order 7 bits of x
+		char b = x & 0x7f;
+		x >>= 7;
+
+		if ((x == 0 && (b & 0x40) == 0) || (x == -1 && (b & 0x40) != 0)) {
+			more = false;
+		} else {
+			b |= 0x80;
+		}
+
+		buffer[(write + packedBytes) & mask] = b;
+		packedBytes++;
+	}
+
+	per_cpu(gator_buffer_write, cpu)[buftype] = (write + packedBytes) & mask;
+}
+
+static void gator_buffer_write_packed_int64(int cpu, int buftype, long long x)
+{
+	uint32_t write = per_cpu(gator_buffer_write, cpu)[buftype];
+	uint32_t mask = gator_buffer_mask[buftype];
+	char *buffer = per_cpu(gator_buffer, cpu)[buftype];
+	int packedBytes = 0;
+	int more = true;
+	while (more) {
+		// low order 7 bits of x
+		char b = x & 0x7f;
+		x >>= 7;
+
+		if ((x == 0 && (b & 0x40) == 0) || (x == -1 && (b & 0x40) != 0)) {
+			more = false;
+		} else {
+			b |= 0x80;
+		}
+
+		buffer[(write + packedBytes) & mask] = b;
+		packedBytes++;
+	}
+
+	per_cpu(gator_buffer_write, cpu)[buftype] = (write + packedBytes) & mask;
+}
diff --git a/drivers/gator/gator_trace_gpu.c b/drivers/gator/gator_trace_gpu.c
new file mode 100644
index 000000000000..be135b4aac56
--- /dev/null
+++ b/drivers/gator/gator_trace_gpu.c
@@ -0,0 +1,294 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "gator.h"
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+
+#ifdef MALI_SUPPORT
+#include "linux/mali_linux_trace.h"
+#endif
+#include "gator_trace_gpu.h"
+
+/*
+ * Taken from MALI_PROFILING_EVENT_TYPE_* items in Mali DDK.
+ */
+#define EVENT_TYPE_SINGLE  0
+#define EVENT_TYPE_START   1
+#define EVENT_TYPE_STOP    2
+#define EVENT_TYPE_SUSPEND 3
+#define EVENT_TYPE_RESUME  4
+
+/* Note whether tracepoints have been registered */
+static int mali_timeline_trace_registered;
+static int mali_job_slots_trace_registered;
+static int gpu_trace_registered;
+
+enum {
+	GPU_UNIT_NONE = 0,
+	GPU_UNIT_VP,
+	GPU_UNIT_FP,
+	GPU_UNIT_CL,
+	NUMBER_OF_GPU_UNITS
+};
+
+#define MALI_4xx     (0x0b07)
+#define MALI_T6xx    (0x0056)
+
+struct mali_gpu_job {
+	int count;
+	int last_tgid;
+	int last_pid;
+	int last_job_id;
+};
+
+#define NUMBER_OF_GPU_CORES 16
+static struct mali_gpu_job mali_gpu_jobs[NUMBER_OF_GPU_UNITS][NUMBER_OF_GPU_CORES];
+static DEFINE_SPINLOCK(mali_gpu_jobs_lock);
+
+/* Only one event should be running on a unit and core at a time (ie, a start
+ * event can only be followed by a stop and vice versa), but because the kernel
+ * only knows when a job is enqueued and not started, it is possible for a
+ * start1, start2, stop1, stop2. Change it back into start1, stop1, start2,
+ * stop2 by queueing up start2 and releasing it when stop1 is received.
+ */
+static void mali_gpu_enqueue(int unit, int core, int tgid, int pid, int job_id)
+{
+	int count;
+
+	spin_lock(&mali_gpu_jobs_lock);
+	count = mali_gpu_jobs[unit][core].count;
+	BUG_ON(count < 0);
+	++mali_gpu_jobs[unit][core].count;
+	if (count) {
+		mali_gpu_jobs[unit][core].last_tgid = tgid;
+		mali_gpu_jobs[unit][core].last_pid = pid;
+		mali_gpu_jobs[unit][core].last_job_id = job_id;
+	}
+	spin_unlock(&mali_gpu_jobs_lock);
+
+	if (!count) {
+		marshal_sched_gpu_start(unit, core, tgid, pid/*, job_id*/);
+	}
+}
+
+static void mali_gpu_stop(int unit, int core)
+{
+	int count;
+	int last_tgid = 0;
+	int last_pid = 0;
+	//int last_job_id = 0;
+
+	spin_lock(&mali_gpu_jobs_lock);
+	if (mali_gpu_jobs[unit][core].count == 0) {
+		spin_unlock(&mali_gpu_jobs_lock);
+		return;
+	}
+	--mali_gpu_jobs[unit][core].count;
+	count = mali_gpu_jobs[unit][core].count;
+	if (count) {
+		last_tgid = mali_gpu_jobs[unit][core].last_tgid;
+		last_pid = mali_gpu_jobs[unit][core].last_pid;
+		//last_job_id = mali_gpu_jobs[unit][core].last_job_id;
+	}
+	spin_unlock(&mali_gpu_jobs_lock);
+
+	marshal_sched_gpu_stop(unit, core);
+	if (count) {
+		marshal_sched_gpu_start(unit, core, last_tgid, last_pid/*, last_job_id*/);
+	}
+}
+
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx)
+#include "gator_events_mali_4xx.h"
+
+/*
+ * Taken from MALI_PROFILING_EVENT_CHANNEL_* in Mali DDK.
+ */
+enum {
+	EVENT_CHANNEL_SOFTWARE = 0,
+	EVENT_CHANNEL_VP0 = 1,
+	EVENT_CHANNEL_FP0 = 5,
+	EVENT_CHANNEL_FP1,
+	EVENT_CHANNEL_FP2,
+	EVENT_CHANNEL_FP3,
+	EVENT_CHANNEL_FP4,
+	EVENT_CHANNEL_FP5,
+	EVENT_CHANNEL_FP6,
+	EVENT_CHANNEL_FP7,
+	EVENT_CHANNEL_GPU = 21
+};
+
+/**
+ * These events are applicable when the type MALI_PROFILING_EVENT_TYPE_SINGLE is used from the GPU channel
+ */
+enum {
+	EVENT_REASON_SINGLE_GPU_NONE = 0,
+	EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE = 1,
+};
+
+GATOR_DEFINE_PROBE(mali_timeline_event, TP_PROTO(unsigned int event_id, unsigned int d0, unsigned int d1, unsigned int d2, unsigned int d3, unsigned int d4))
+{
+	unsigned int component, state;
+
+	// do as much work as possible before disabling interrupts
+	component = (event_id >> 16) & 0xFF;	// component is an 8-bit field
+	state = (event_id >> 24) & 0xF;	// state is a 4-bit field
+
+	switch (state) {
+	case EVENT_TYPE_START:
+		if (component == EVENT_CHANNEL_VP0) {
+			/* tgid = d0; pid = d1; */
+			mali_gpu_enqueue(GPU_UNIT_VP, 0, d0, d1, 0);
+		} else if (component >= EVENT_CHANNEL_FP0 && component <= EVENT_CHANNEL_FP7) {
+			/* tgid = d0; pid = d1; */
+			mali_gpu_enqueue(GPU_UNIT_FP, component - EVENT_CHANNEL_FP0, d0, d1, 0);
+		}
+		break;
+
+	case EVENT_TYPE_STOP:
+		if (component == EVENT_CHANNEL_VP0) {
+			mali_gpu_stop(GPU_UNIT_VP, 0);
+		} else if (component >= EVENT_CHANNEL_FP0 && component <= EVENT_CHANNEL_FP7) {
+			mali_gpu_stop(GPU_UNIT_FP, component - EVENT_CHANNEL_FP0);
+		}
+		break;
+
+	case EVENT_TYPE_SINGLE:
+		if (component == EVENT_CHANNEL_GPU) {
+			unsigned int reason = (event_id & 0xffff);
+
+			if (reason == EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE) {
+				gator_events_mali_log_dvfs_event(d0, d1);
+			}
+		}
+		break;
+
+	default:
+		break;
+	}
+}
+#endif
+
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_T6xx)
+#if defined(MALI_JOB_SLOTS_EVENT_CHANGED)
+GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid, unsigned char job_id))
+#else
+GATOR_DEFINE_PROBE(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid))
+#endif
+{
+	unsigned int component, state, unit;
+#if !defined(MALI_JOB_SLOTS_EVENT_CHANGED)
+	unsigned char job_id = 0;
+#endif
+
+	component = (event_id >> 16) & 0xFF;	// component is an 8-bit field
+	state = (event_id >> 24) & 0xF;	// state is a 4-bit field
+
+	switch (component) {
+	case 0:
+		unit = GPU_UNIT_FP;
+		break;
+	case 1:
+		unit = GPU_UNIT_VP;
+		break;
+	case 2:
+		unit = GPU_UNIT_CL;
+		break;
+	default:
+		unit = GPU_UNIT_NONE;
+	}
+
+	if (unit != GPU_UNIT_NONE) {
+		switch (state) {
+		case EVENT_TYPE_START:
+			mali_gpu_enqueue(unit, 0, tgid, (pid != 0 ? pid : tgid), job_id);
+			break;
+		case EVENT_TYPE_STOP:
+			mali_gpu_stop(unit, 0);
+			break;
+		default:
+			/*
+			 * Some jobs can be soft-stopped, so ensure that this terminates the activity trace.
+			 */
+			mali_gpu_stop(unit, 0);
+		}
+	}
+}
+#endif
+
+GATOR_DEFINE_PROBE(gpu_activity_start, TP_PROTO(int gpu_unit, int gpu_core, struct task_struct *p))
+{
+	mali_gpu_enqueue(gpu_unit, gpu_core, (int)p->tgid, (int)p->pid, 0);
+}
+
+GATOR_DEFINE_PROBE(gpu_activity_stop, TP_PROTO(int gpu_unit, int gpu_core))
+{
+	mali_gpu_stop(gpu_unit, gpu_core);
+}
+
+int gator_trace_gpu_start(void)
+{
+	/*
+	 * Returns nonzero for installation failed
+	 * Absence of gpu trace points is not an error
+	 */
+
+	memset(&mali_gpu_jobs, 0, sizeof(mali_gpu_jobs));
+	gpu_trace_registered = mali_timeline_trace_registered = mali_job_slots_trace_registered = 0;
+
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx)
+	if (!GATOR_REGISTER_TRACE(mali_timeline_event)) {
+		mali_timeline_trace_registered = 1;
+	}
+#endif
+
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_T6xx)
+	if (!GATOR_REGISTER_TRACE(mali_job_slots_event)) {
+		mali_job_slots_trace_registered = 1;
+	}
+#endif
+
+	if (!mali_timeline_trace_registered) {
+		if (GATOR_REGISTER_TRACE(gpu_activity_start)) {
+			return 0;
+		}
+		if (GATOR_REGISTER_TRACE(gpu_activity_stop)) {
+			GATOR_UNREGISTER_TRACE(gpu_activity_start);
+			return 0;
+		}
+		gpu_trace_registered = 1;
+	}
+
+	return 0;
+}
+
+void gator_trace_gpu_stop(void)
+{
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx)
+	if (mali_timeline_trace_registered) {
+		GATOR_UNREGISTER_TRACE(mali_timeline_event);
+	}
+#endif
+
+#if defined(MALI_SUPPORT) && (MALI_SUPPORT == MALI_T6xx)
+	if (mali_job_slots_trace_registered) {
+		GATOR_UNREGISTER_TRACE(mali_job_slots_event);
+	}
+#endif
+
+	if (gpu_trace_registered) {
+		GATOR_UNREGISTER_TRACE(gpu_activity_stop);
+		GATOR_UNREGISTER_TRACE(gpu_activity_start);
+	}
+
+	gpu_trace_registered = mali_timeline_trace_registered = mali_job_slots_trace_registered = 0;
+}
diff --git a/drivers/gator/gator_trace_gpu.h b/drivers/gator/gator_trace_gpu.h
new file mode 100644
index 000000000000..bb0f42d290da
--- /dev/null
+++ b/drivers/gator/gator_trace_gpu.h
@@ -0,0 +1,79 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#undef TRACE_GPU
+#define TRACE_GPU gpu
+
+#if !defined(_TRACE_GPU_H)
+#define _TRACE_GPU_H
+
+#include <linux/tracepoint.h>
+
+/*
+ * UNIT - the GPU processor type
+ *  1 = Vertex Processor
+ *  2 = Fragment Processor
+ *
+ * CORE - the GPU processor core number
+ *  this is not the CPU core number
+ */
+
+/*
+ * Tracepoint for calling GPU unit start activity on core
+ */
+TRACE_EVENT(gpu_activity_start,
+
+	    TP_PROTO(int gpu_unit, int gpu_core, struct task_struct *p),
+
+	    TP_ARGS(gpu_unit, gpu_core, p),
+
+	    TP_STRUCT__entry(
+			     __field(int, gpu_unit)
+			     __field(int, gpu_core)
+			     __array(char, comm, TASK_COMM_LEN)
+			     __field(pid_t, pid)
+	    ),
+
+	    TP_fast_assign(
+			   __entry->gpu_unit = gpu_unit;
+			   __entry->gpu_core = gpu_core;
+			   memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+			   __entry->pid = p->pid;
+	    ),
+
+	    TP_printk("unit=%d core=%d comm=%s pid=%d",
+		      __entry->gpu_unit, __entry->gpu_core, __entry->comm,
+		      __entry->pid)
+    );
+
+/*
+ * Tracepoint for calling GPU unit stop activity on core
+ */
+TRACE_EVENT(gpu_activity_stop,
+
+	    TP_PROTO(int gpu_unit, int gpu_core),
+
+	    TP_ARGS(gpu_unit, gpu_core),
+
+	    TP_STRUCT__entry(
+			     __field(int, gpu_unit)
+			     __field(int, gpu_core)
+	    ),
+
+	    TP_fast_assign(
+			   __entry->gpu_unit = gpu_unit;
+			   __entry->gpu_core = gpu_core;
+	    ),
+
+	    TP_printk("unit=%d core=%d", __entry->gpu_unit, __entry->gpu_core)
+    );
+
+#endif /* _TRACE_GPU_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/gator/gator_trace_power.c b/drivers/gator/gator_trace_power.c
new file mode 100644
index 000000000000..272e05684ee8
--- /dev/null
+++ b/drivers/gator/gator_trace_power.c
@@ -0,0 +1,203 @@
+/**
+ * Copyright (C) ARM Limited 2011-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/cpufreq.h>
+#include <trace/events/power.h>
+
+#if defined(__arm__)
+
+#include <asm/mach-types.h>
+
+#define implements_wfi() (!machine_is_omap3_beagle())
+
+#else
+
+#define implements_wfi() false
+
+#endif
+
+// cpu_frequency and cpu_idle trace points were introduced in Linux kernel v2.6.38
+// the now deprecated power_frequency trace point was available prior to 2.6.38, but only for x86
+#if GATOR_CPU_FREQ_SUPPORT
+enum {
+	POWER_CPU_FREQ,
+	POWER_CPU_IDLE,
+	POWER_TOTAL
+};
+
+static DEFINE_PER_CPU(ulong, idle_prev_state);
+static ulong power_cpu_enabled[POWER_TOTAL];
+static ulong power_cpu_key[POWER_TOTAL];
+
+static int gator_trace_power_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+	int cpu;
+	bool found_nonzero_freq = false;
+
+	// Even if CONFIG_CPU_FREQ is defined, it still may not be used. Check
+	// for non-zero values from cpufreq_quick_get
+	for_each_online_cpu(cpu) {
+		if (cpufreq_quick_get(cpu) > 0) {
+			found_nonzero_freq = true;
+			break;
+		}
+	}
+
+	if (found_nonzero_freq) {
+		// cpu_frequency
+		dir = gatorfs_mkdir(sb, root, "Linux_power_cpu_freq");
+		if (!dir) {
+			return -1;
+		}
+		gatorfs_create_ulong(sb, dir, "enabled", &power_cpu_enabled[POWER_CPU_FREQ]);
+		gatorfs_create_ro_ulong(sb, dir, "key", &power_cpu_key[POWER_CPU_FREQ]);
+	}
+
+	// cpu_idle
+	dir = gatorfs_mkdir(sb, root, "Linux_power_cpu_idle");
+	if (!dir) {
+		return -1;
+	}
+	gatorfs_create_ulong(sb, dir, "enabled", &power_cpu_enabled[POWER_CPU_IDLE]);
+	gatorfs_create_ro_ulong(sb, dir, "key", &power_cpu_key[POWER_CPU_IDLE]);
+
+	return 0;
+}
+
+// 'cpu' may not equal smp_processor_id(), i.e. may not be running on the core that is having the freq/idle state change
+GATOR_DEFINE_PROBE(cpu_frequency, TP_PROTO(unsigned int frequency, unsigned int cpu))
+{
+	cpu = lcpu_to_pcpu(cpu);
+	marshal_event_single(cpu, power_cpu_key[POWER_CPU_FREQ], frequency * 1000);
+}
+
+GATOR_DEFINE_PROBE(cpu_idle, TP_PROTO(unsigned int state, unsigned int cpu))
+{
+	cpu = lcpu_to_pcpu(cpu);
+
+	if (state == per_cpu(idle_prev_state, cpu)) {
+		return;
+	}
+
+	if (implements_wfi()) {
+		if (state == PWR_EVENT_EXIT) {
+			// transition from wfi to non-wfi
+			marshal_idle(cpu, MESSAGE_IDLE_EXIT);
+		} else {
+			// transition from non-wfi to wfi
+			marshal_idle(cpu, MESSAGE_IDLE_ENTER);
+		}
+	}
+
+	per_cpu(idle_prev_state, cpu) = state;
+
+	if (power_cpu_enabled[POWER_CPU_IDLE]) {
+		// Increment state so that no negative numbers are sent
+		marshal_event_single(cpu, power_cpu_key[POWER_CPU_IDLE], state + 1);
+	}
+}
+
+static void gator_trace_power_online(void)
+{
+	int pcpu = get_physical_cpu();
+	int lcpu = get_logical_cpu();
+	if (power_cpu_enabled[POWER_CPU_FREQ]) {
+		marshal_event_single(pcpu, power_cpu_key[POWER_CPU_FREQ], cpufreq_quick_get(lcpu) * 1000);
+	}
+}
+
+static void gator_trace_power_offline(void)
+{
+	// Set frequency to zero on an offline
+	int cpu = get_physical_cpu();
+	if (power_cpu_enabled[POWER_CPU_FREQ]) {
+		marshal_event_single(cpu, power_cpu_key[POWER_CPU_FREQ], 0);
+	}
+}
+
+static int gator_trace_power_start(void)
+{
+	int cpu;
+
+	// register tracepoints
+	if (power_cpu_enabled[POWER_CPU_FREQ])
+		if (GATOR_REGISTER_TRACE(cpu_frequency))
+			goto fail_cpu_frequency_exit;
+
+	// Always register for cpu:idle for detecting WFI, independent of power_cpu_enabled[POWER_CPU_IDLE]
+	if (GATOR_REGISTER_TRACE(cpu_idle))
+		goto fail_cpu_idle_exit;
+	pr_debug("gator: registered power event tracepoints\n");
+
+	for_each_present_cpu(cpu) {
+		per_cpu(idle_prev_state, cpu) = 0;
+	}
+
+	return 0;
+
+	// unregister tracepoints on error
+fail_cpu_idle_exit:
+	if (power_cpu_enabled[POWER_CPU_FREQ])
+		GATOR_UNREGISTER_TRACE(cpu_frequency);
+fail_cpu_frequency_exit:
+	pr_err("gator: power event tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
+
+	return -1;
+}
+
+static void gator_trace_power_stop(void)
+{
+	int i;
+
+	if (power_cpu_enabled[POWER_CPU_FREQ])
+		GATOR_UNREGISTER_TRACE(cpu_frequency);
+	GATOR_UNREGISTER_TRACE(cpu_idle);
+	pr_debug("gator: unregistered power event tracepoints\n");
+
+	for (i = 0; i < POWER_TOTAL; i++) {
+		power_cpu_enabled[i] = 0;
+	}
+}
+
+void gator_trace_power_init(void)
+{
+	int i;
+	for (i = 0; i < POWER_TOTAL; i++) {
+		power_cpu_enabled[i] = 0;
+		power_cpu_key[i] = gator_events_get_key();
+	}
+}
+#else
+static int gator_trace_power_create_files(struct super_block *sb, struct dentry *root)
+{
+	return 0;
+}
+
+static void gator_trace_power_online(void)
+{
+}
+
+static void gator_trace_power_offline(void)
+{
+}
+
+static int gator_trace_power_start(void)
+{
+	return 0;
+}
+
+static void gator_trace_power_stop(void)
+{
+}
+
+void gator_trace_power_init(void)
+{
+}
+#endif
diff --git a/drivers/gator/gator_trace_sched.c b/drivers/gator/gator_trace_sched.c
new file mode 100644
index 000000000000..332b3f6ba965
--- /dev/null
+++ b/drivers/gator/gator_trace_sched.c
@@ -0,0 +1,270 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <trace/events/sched.h>
+#include "gator.h"
+
+#define TASK_MAP_ENTRIES		1024	/* must be power of 2 */
+#define TASK_MAX_COLLISIONS		2
+
+enum {
+	STATE_WAIT_ON_OTHER = 0,
+	STATE_CONTENTION,
+	STATE_WAIT_ON_IO,
+	CPU_WAIT_TOTAL
+};
+
+static DEFINE_PER_CPU(uint64_t *, taskname_keys);
+static DEFINE_PER_CPU(int, collecting);
+static DEFINE_PER_CPU(bool, in_scheduler_context);
+
+// this array is never read as the cpu wait charts are derived counters
+// the files are needed, nonetheless, to show that these counters are available
+static ulong cpu_wait_enabled[CPU_WAIT_TOTAL];
+static ulong sched_cpu_key[CPU_WAIT_TOTAL];
+
+static int sched_trace_create_files(struct super_block *sb, struct dentry *root)
+{
+	struct dentry *dir;
+
+	// CPU Wait - Contention
+	dir = gatorfs_mkdir(sb, root, "Linux_cpu_wait_contention");
+	if (!dir) {
+		return -1;
+	}
+	gatorfs_create_ulong(sb, dir, "enabled", &cpu_wait_enabled[STATE_CONTENTION]);
+	gatorfs_create_ro_ulong(sb, dir, "key", &sched_cpu_key[STATE_CONTENTION]);
+
+	// CPU Wait - I/O
+	dir = gatorfs_mkdir(sb, root, "Linux_cpu_wait_io");
+	if (!dir) {
+		return -1;
+	}
+	gatorfs_create_ulong(sb, dir, "enabled", &cpu_wait_enabled[STATE_WAIT_ON_IO]);
+	gatorfs_create_ro_ulong(sb, dir, "key", &sched_cpu_key[STATE_WAIT_ON_IO]);
+
+	return 0;
+}
+
+void emit_pid_name(struct task_struct *task)
+{
+	bool found = false;
+	char taskcomm[TASK_COMM_LEN + 3];
+	unsigned long x, cpu = get_physical_cpu();
+	uint64_t *keys = &(per_cpu(taskname_keys, cpu)[(task->pid & 0xFF) * TASK_MAX_COLLISIONS]);
+	uint64_t value;
+
+	value = gator_chksum_crc32(task->comm);
+	value = (value << 32) | (uint32_t)task->pid;
+
+	// determine if the thread name was emitted already
+	for (x = 0; x < TASK_MAX_COLLISIONS; x++) {
+		if (keys[x] == value) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		// shift values, new value always in front
+		uint64_t oldv, newv = value;
+		for (x = 0; x < TASK_MAX_COLLISIONS; x++) {
+			oldv = keys[x];
+			keys[x] = newv;
+			newv = oldv;
+		}
+
+		// emit pid names, cannot use get_task_comm, as it's not exported on all kernel versions
+		if (strlcpy(taskcomm, task->comm, TASK_COMM_LEN) == TASK_COMM_LEN - 1) {
+			// append ellipses if task->comm has length of TASK_COMM_LEN - 1
+			strcat(taskcomm, "...");
+		}
+
+		marshal_thread_name(task->pid, taskcomm);
+	}
+}
+
+static void collect_counters(u64 time, struct task_struct *task)
+{
+	int *buffer, len, cpu = get_physical_cpu();
+	long long *buffer64;
+	struct gator_interface *gi;
+
+	if (marshal_event_header(time)) {
+		list_for_each_entry(gi, &gator_events, list) {
+			if (gi->read) {
+				len = gi->read(&buffer);
+				marshal_event(len, buffer);
+			} else if (gi->read64) {
+				len = gi->read64(&buffer64);
+				marshal_event64(len, buffer64);
+			}
+			if (gi->read_proc && task != NULL) {
+				len = gi->read_proc(&buffer64, task);
+				marshal_event64(len, buffer64);
+			}
+		}
+		// Only check after writing all counters so that time and corresponding counters appear in the same frame
+		buffer_check(cpu, BLOCK_COUNTER_BUF, time);
+
+		// Commit buffers on timeout
+		if (gator_live_rate > 0 && time >= per_cpu(gator_buffer_commit_time, cpu)) {
+			static const int buftypes[] = { NAME_BUF, COUNTER_BUF, BLOCK_COUNTER_BUF, SCHED_TRACE_BUF };
+			unsigned long flags;
+			int i;
+
+			local_irq_save(flags);
+			for (i = 0; i < ARRAY_SIZE(buftypes); ++i) {
+				gator_commit_buffer(cpu, buftypes[i], time);
+			}
+			local_irq_restore(flags);
+
+			// Try to preemptively flush the annotate buffer to reduce the chance of the buffer being full
+			if (on_primary_core() && spin_trylock(&annotate_lock)) {
+				gator_commit_buffer(0, ANNOTATE_BUF, time);
+				spin_unlock(&annotate_lock);
+			}
+		}
+	}
+}
+
+// special case used during a suspend of the system
+static void trace_sched_insert_idle(void)
+{
+	marshal_sched_trace_switch(0, 0, 0, 0);
+}
+
+GATOR_DEFINE_PROBE(sched_process_fork, TP_PROTO(struct task_struct *parent, struct task_struct *child))
+{
+	int cookie;
+	int cpu = get_physical_cpu();
+
+	cookie = get_exec_cookie(cpu, child);
+	emit_pid_name(child);
+
+	marshal_sched_trace_start(child->tgid, child->pid, cookie);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next))
+#else
+GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_struct *next))
+#endif
+{
+	int cookie;
+	int state;
+	int cpu = get_physical_cpu();
+
+	per_cpu(in_scheduler_context, cpu) = true;
+
+	// do as much work as possible before disabling interrupts
+	cookie = get_exec_cookie(cpu, next);
+	emit_pid_name(next);
+	if (prev->state == TASK_RUNNING) {
+		state = STATE_CONTENTION;
+	} else if (prev->in_iowait) {
+		state = STATE_WAIT_ON_IO;
+	} else {
+		state = STATE_WAIT_ON_OTHER;
+	}
+
+	per_cpu(collecting, cpu) = 1;
+	collect_counters(gator_get_time(), prev);
+	per_cpu(collecting, cpu) = 0;
+
+	marshal_sched_trace_switch(next->tgid, next->pid, cookie, state);
+
+	per_cpu(in_scheduler_context, cpu) = false;
+}
+
+GATOR_DEFINE_PROBE(sched_process_free, TP_PROTO(struct task_struct *p))
+{
+	marshal_sched_trace_exit(p->tgid, p->pid);
+}
+
+static void do_nothing(void *info)
+{
+	// Intentionally do nothing
+	(void)info;
+}
+
+static int register_scheduler_tracepoints(void)
+{
+	// register tracepoints
+	if (GATOR_REGISTER_TRACE(sched_process_fork))
+		goto fail_sched_process_fork;
+	if (GATOR_REGISTER_TRACE(sched_switch))
+		goto fail_sched_switch;
+	if (GATOR_REGISTER_TRACE(sched_process_free))
+		goto fail_sched_process_free;
+	pr_debug("gator: registered tracepoints\n");
+
+	// Now that the scheduler tracepoint is registered, force a context switch
+	// on all cpus to capture what is currently running.
+	on_each_cpu(do_nothing, NULL, 0);
+
+	return 0;
+
+	// unregister tracepoints on error
+fail_sched_process_free:
+	GATOR_UNREGISTER_TRACE(sched_switch);
+fail_sched_switch:
+	GATOR_UNREGISTER_TRACE(sched_process_fork);
+fail_sched_process_fork:
+	pr_err("gator: tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
+
+	return -1;
+}
+
+int gator_trace_sched_start(void)
+{
+	int cpu, size;
+
+	for_each_present_cpu(cpu) {
+		size = TASK_MAP_ENTRIES * TASK_MAX_COLLISIONS * sizeof(uint64_t);
+		per_cpu(taskname_keys, cpu) = (uint64_t *)kmalloc(size, GFP_KERNEL);
+		if (!per_cpu(taskname_keys, cpu))
+			return -1;
+		memset(per_cpu(taskname_keys, cpu), 0, size);
+	}
+
+	return register_scheduler_tracepoints();
+}
+
+void gator_trace_sched_offline(void)
+{
+	trace_sched_insert_idle();
+}
+
+static void unregister_scheduler_tracepoints(void)
+{
+	GATOR_UNREGISTER_TRACE(sched_process_fork);
+	GATOR_UNREGISTER_TRACE(sched_switch);
+	GATOR_UNREGISTER_TRACE(sched_process_free);
+	pr_debug("gator: unregistered tracepoints\n");
+}
+
+void gator_trace_sched_stop(void)
+{
+	int cpu;
+	unregister_scheduler_tracepoints();
+
+	for_each_present_cpu(cpu) {
+		kfree(per_cpu(taskname_keys, cpu));
+	}
+}
+
+void gator_trace_sched_init(void)
+{
+	int i;
+	for (i = 0; i < CPU_WAIT_TOTAL; i++) {
+		cpu_wait_enabled[i] = 0;
+		sched_cpu_key[i] = gator_events_get_key();
+	}
+}
diff --git a/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h b/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h
new file mode 100644
index 000000000000..347a4fe404bc
--- /dev/null
+++ b/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h
@@ -0,0 +1,163 @@
+/**
+ * Copyright (C) ARM Limited 2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __MALI_MJOLLNIR_PROFILING_GATOR_API_H__
+#define __MALI_MJOLLNIR_PROFILING_GATOR_API_H__
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+
+/*
+ * The number of processor cores.  Update to suit your hardware implementation.
+ */
+#define MAX_NUM_FP_CORES            (4)
+#define MAX_NUM_VP_CORES            (1)
+#define MAX_NUM_L2_CACHE_CORES      (1)
+
+enum counters
+{
+    /* Timeline activity */
+    ACTIVITY_VP_0 = 0,
+    ACTIVITY_FP_0,
+    ACTIVITY_FP_1,
+    ACTIVITY_FP_2,
+    ACTIVITY_FP_3,
+
+    /* L2 cache counters */
+    COUNTER_L2_0_C0,
+    COUNTER_L2_0_C1,
+
+    /* Vertex processor counters */
+    COUNTER_VP_0_C0,
+    COUNTER_VP_0_C1,
+
+    /* Fragment processor counters */
+    COUNTER_FP_0_C0,
+    COUNTER_FP_0_C1,
+    COUNTER_FP_1_C0,
+    COUNTER_FP_1_C1,
+    COUNTER_FP_2_C0,
+    COUNTER_FP_2_C1,
+    COUNTER_FP_3_C0,
+    COUNTER_FP_3_C1,
+
+    /* EGL Software Counters */
+    COUNTER_EGL_BLIT_TIME,
+
+    /* GLES Software Counters */
+    COUNTER_GLES_DRAW_ELEMENTS_CALLS,
+    COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
+    COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
+    COUNTER_GLES_DRAW_ARRAYS_CALLS,
+    COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
+    COUNTER_GLES_DRAW_POINTS,
+    COUNTER_GLES_DRAW_LINES,
+    COUNTER_GLES_DRAW_LINE_LOOP,
+    COUNTER_GLES_DRAW_LINE_STRIP,
+    COUNTER_GLES_DRAW_TRIANGLES,
+    COUNTER_GLES_DRAW_TRIANGLE_STRIP,
+    COUNTER_GLES_DRAW_TRIANGLE_FAN,
+    COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
+    COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
+    COUNTER_GLES_UPLOAD_TEXTURE_TIME,
+    COUNTER_GLES_UPLOAD_VBO_TIME,
+    COUNTER_GLES_NUM_FLUSHES,
+    COUNTER_GLES_NUM_VSHADERS_GENERATED,
+    COUNTER_GLES_NUM_FSHADERS_GENERATED,
+    COUNTER_GLES_VSHADER_GEN_TIME,
+    COUNTER_GLES_FSHADER_GEN_TIME,
+    COUNTER_GLES_INPUT_TRIANGLES,
+    COUNTER_GLES_VXCACHE_HIT,
+    COUNTER_GLES_VXCACHE_MISS,
+    COUNTER_GLES_VXCACHE_COLLISION,
+    COUNTER_GLES_CULLED_TRIANGLES,
+    COUNTER_GLES_CULLED_LINES,
+    COUNTER_GLES_BACKFACE_TRIANGLES,
+    COUNTER_GLES_GBCLIP_TRIANGLES,
+    COUNTER_GLES_GBCLIP_LINES,
+    COUNTER_GLES_TRIANGLES_DRAWN,
+    COUNTER_GLES_DRAWCALL_TIME,
+    COUNTER_GLES_TRIANGLES_COUNT,
+    COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
+    COUNTER_GLES_STRIP_TRIANGLES_COUNT,
+    COUNTER_GLES_FAN_TRIANGLES_COUNT,
+    COUNTER_GLES_LINES_COUNT,
+    COUNTER_GLES_INDEPENDENT_LINES_COUNT,
+    COUNTER_GLES_STRIP_LINES_COUNT,
+    COUNTER_GLES_LOOP_LINES_COUNT,
+
+    COUNTER_FILMSTRIP,
+    COUNTER_FREQUENCY,
+    COUNTER_VOLTAGE,
+
+    NUMBER_OF_EVENTS
+};
+
+#define FIRST_ACTIVITY_EVENT    ACTIVITY_VP_0
+#define LAST_ACTIVITY_EVENT     ACTIVITY_FP_3
+
+#define FIRST_HW_COUNTER        COUNTER_L2_0_C0
+#define LAST_HW_COUNTER         COUNTER_FP_3_C1
+
+#define FIRST_SW_COUNTER        COUNTER_EGL_BLIT_TIME
+#define LAST_SW_COUNTER         COUNTER_GLES_LOOP_LINES_COUNT
+
+/* Signifies that the system is able to report voltage and frequency numbers. */
+#define DVFS_REPORTED_BY_DDK 1
+
+/**
+ * Structure to pass performance counter data of a Mali core
+ */
+typedef struct _mali_profiling_core_counters
+{
+    u32 source0;
+    u32 value0;
+    u32 source1;
+    u32 value1;
+} _mali_profiling_core_counters;
+
+/*
+ * For compatibility with utgard.
+ */
+typedef struct _mali_profiling_l2_counter_values
+{
+    struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES];
+} _mali_profiling_l2_counter_values;
+
+typedef struct _mali_profiling_mali_version
+{
+    u32 mali_product_id;
+    u32 mali_version_major;
+    u32 mali_version_minor;
+    u32 num_of_l2_cores;
+    u32 num_of_fp_cores;
+    u32 num_of_vp_cores;
+} _mali_profiling_mali_version;
+
+extern void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values);
+extern u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values);
+
+/*
+ * List of possible actions allowing DDK to be controlled by Streamline.
+ * The following numbers are used by DDK to control the frame buffer dumping.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE      (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_MJOLLNIR_PROFILING_GATOR_API_H__ */
diff --git a/drivers/gator/mali/mali_utgard_profiling_gator_api.h b/drivers/gator/mali/mali_utgard_profiling_gator_api.h
new file mode 100644
index 000000000000..559647a76d29
--- /dev/null
+++ b/drivers/gator/mali/mali_utgard_profiling_gator_api.h
@@ -0,0 +1,201 @@
+/**
+ * Copyright (C) ARM Limited 2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __MALI_UTGARD_PROFILING_GATOR_API_H__
+#define __MALI_UTGARD_PROFILING_GATOR_API_H__
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#define MALI_PROFILING_API_VERSION 4
+
+#define MAX_NUM_L2_CACHE_CORES 3
+#define MAX_NUM_FP_CORES 8
+#define MAX_NUM_VP_CORES 1
+
+/** The list of events supported by the Mali DDK. */
+typedef enum
+{
+    /* Vertex processor activity */
+    ACTIVITY_VP_0 = 0,
+
+    /* Fragment processor activity */
+    ACTIVITY_FP_0, /* 1 */
+    ACTIVITY_FP_1,
+    ACTIVITY_FP_2,
+    ACTIVITY_FP_3,
+    ACTIVITY_FP_4,
+    ACTIVITY_FP_5,
+    ACTIVITY_FP_6,
+    ACTIVITY_FP_7,
+
+    /* L2 cache counters */
+    COUNTER_L2_0_C0,
+    COUNTER_L2_0_C1,
+    COUNTER_L2_1_C0,
+    COUNTER_L2_1_C1,
+    COUNTER_L2_2_C0,
+    COUNTER_L2_2_C1,
+
+    /* Vertex processor counters */
+    COUNTER_VP_0_C0, /*15*/
+    COUNTER_VP_0_C1,
+
+    /* Fragment processor counters */
+    COUNTER_FP_0_C0,
+    COUNTER_FP_0_C1,
+    COUNTER_FP_1_C0,
+    COUNTER_FP_1_C1,
+    COUNTER_FP_2_C0,
+    COUNTER_FP_2_C1,
+    COUNTER_FP_3_C0,
+    COUNTER_FP_3_C1,
+    COUNTER_FP_4_C0,
+    COUNTER_FP_4_C1,
+    COUNTER_FP_5_C0,
+    COUNTER_FP_5_C1,
+    COUNTER_FP_6_C0,
+    COUNTER_FP_6_C1,
+    COUNTER_FP_7_C0,
+    COUNTER_FP_7_C1, /* 32 */
+
+    /*
+     * If more hardware counters are added, the _mali_osk_hw_counter_table
+     * below should also be updated.
+     */
+
+    /* EGL software counters */
+    COUNTER_EGL_BLIT_TIME,
+
+    /* GLES software counters */
+    COUNTER_GLES_DRAW_ELEMENTS_CALLS,
+    COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES,
+    COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED,
+    COUNTER_GLES_DRAW_ARRAYS_CALLS,
+    COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED,
+    COUNTER_GLES_DRAW_POINTS,
+    COUNTER_GLES_DRAW_LINES,
+    COUNTER_GLES_DRAW_LINE_LOOP,
+    COUNTER_GLES_DRAW_LINE_STRIP,
+    COUNTER_GLES_DRAW_TRIANGLES,
+    COUNTER_GLES_DRAW_TRIANGLE_STRIP,
+    COUNTER_GLES_DRAW_TRIANGLE_FAN,
+    COUNTER_GLES_NON_VBO_DATA_COPY_TIME,
+    COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI,
+    COUNTER_GLES_UPLOAD_TEXTURE_TIME,
+    COUNTER_GLES_UPLOAD_VBO_TIME,
+    COUNTER_GLES_NUM_FLUSHES,
+    COUNTER_GLES_NUM_VSHADERS_GENERATED,
+    COUNTER_GLES_NUM_FSHADERS_GENERATED,
+    COUNTER_GLES_VSHADER_GEN_TIME,
+    COUNTER_GLES_FSHADER_GEN_TIME,
+    COUNTER_GLES_INPUT_TRIANGLES,
+    COUNTER_GLES_VXCACHE_HIT,
+    COUNTER_GLES_VXCACHE_MISS,
+    COUNTER_GLES_VXCACHE_COLLISION,
+    COUNTER_GLES_CULLED_TRIANGLES,
+    COUNTER_GLES_CULLED_LINES,
+    COUNTER_GLES_BACKFACE_TRIANGLES,
+    COUNTER_GLES_GBCLIP_TRIANGLES,
+    COUNTER_GLES_GBCLIP_LINES,
+    COUNTER_GLES_TRIANGLES_DRAWN,
+    COUNTER_GLES_DRAWCALL_TIME,
+    COUNTER_GLES_TRIANGLES_COUNT,
+    COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT,
+    COUNTER_GLES_STRIP_TRIANGLES_COUNT,
+    COUNTER_GLES_FAN_TRIANGLES_COUNT,
+    COUNTER_GLES_LINES_COUNT,
+    COUNTER_GLES_INDEPENDENT_LINES_COUNT,
+    COUNTER_GLES_STRIP_LINES_COUNT,
+    COUNTER_GLES_LOOP_LINES_COUNT,
+
+    /* Framebuffer capture pseudo-counter */
+    COUNTER_FILMSTRIP,
+
+    NUMBER_OF_EVENTS
+} _mali_osk_counter_id;
+
+#define FIRST_ACTIVITY_EVENT    ACTIVITY_VP_0
+#define LAST_ACTIVITY_EVENT     ACTIVITY_FP_7
+
+#define FIRST_HW_COUNTER        COUNTER_L2_0_C0
+#define LAST_HW_COUNTER         COUNTER_FP_7_C1
+
+#define FIRST_SW_COUNTER        COUNTER_EGL_BLIT_TIME
+#define LAST_SW_COUNTER         COUNTER_GLES_LOOP_LINES_COUNT
+
+#define FIRST_SPECIAL_COUNTER   COUNTER_FILMSTRIP
+#define LAST_SPECIAL_COUNTER    COUNTER_FILMSTRIP
+
+/**
+ * Structure to pass performance counter data of a Mali core
+ */
+typedef struct _mali_profiling_core_counters
+{
+	u32 source0;
+	u32 value0;
+	u32 source1;
+	u32 value1;
+} _mali_profiling_core_counters;
+
+/**
+ * Structure to pass performance counter data of Mali L2 cache cores
+ */
+typedef struct _mali_profiling_l2_counter_values
+{
+	struct _mali_profiling_core_counters cores[MAX_NUM_L2_CACHE_CORES];
+} _mali_profiling_l2_counter_values;
+
+/**
+ * Structure to pass data defining Mali instance in use:
+ *
+ * mali_product_id - Mali product id
+ * mali_version_major - Mali version major number
+ * mali_version_minor - Mali version minor number
+ * num_of_l2_cores - number of L2 cache cores
+ * num_of_fp_cores - number of fragment processor cores
+ * num_of_vp_cores - number of vertex processor cores
+ */
+typedef struct _mali_profiling_mali_version
+{
+	u32 mali_product_id;
+	u32 mali_version_major;
+	u32 mali_version_minor;
+	u32 num_of_l2_cores;
+	u32 num_of_fp_cores;
+	u32 num_of_vp_cores;
+} _mali_profiling_mali_version;
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping and s/w counter reporting.
+ * We cannot use the enums in mali_uk_types.h because they are unknown inside gator.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+
+void _mali_profiling_control(u32 action, u32 value);
+
+u32 _mali_profiling_get_l2_counters(_mali_profiling_l2_counter_values *values);
+
+int _mali_profiling_set_event(u32 counter_id, s32 event_id);
+
+u32 _mali_profiling_get_api_version(void);
+
+void _mali_profiling_get_mali_version(struct _mali_profiling_mali_version *values);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MALI_UTGARD_PROFILING_GATOR_API_H__ */
diff --git a/drivers/gator/mali_t6xx.mk b/drivers/gator/mali_t6xx.mk
new file mode 100644
index 000000000000..1a98c1c6a73f
--- /dev/null
+++ b/drivers/gator/mali_t6xx.mk
@@ -0,0 +1,26 @@
+# Defines for Mali-T6xx driver
+EXTRA_CFLAGS += -DMALI_USE_UMP=1 \
+                -DMALI_LICENSE_IS_GPL=1 \
+                -DMALI_BASE_TRACK_MEMLEAK=0 \
+                -DMALI_DEBUG=0 \
+                -DMALI_ERROR_INJECT_ON=0 \
+                -DMALI_CUSTOMER_RELEASE=1 \
+                -DMALI_UNIT_TEST=0 \
+                -DMALI_BACKEND_KERNEL=1 \
+                -DMALI_NO_MALI=0
+
+DDK_DIR ?= .
+KBASE_DIR = $(DDK_DIR)/drivers/gpu/arm/t6xx/kbase
+OSK_DIR = $(DDK_DIR)/drivers/gpu/arm/t6xx/kbase/osk
+UMP_DIR = $(DDK_DIR)/include/linux
+
+# Include directories in the DDK
+EXTRA_CFLAGS += -I$(KBASE_DIR)/ \
+                -I$(KBASE_DIR)/.. \
+                -I$(OSK_DIR)/.. \
+                -I$(UMP_DIR)/.. \
+                -I$(DDK_DIR)/include \
+                -I$(KBASE_DIR)/osk/src/linux/include \
+                -I$(KBASE_DIR)/platform_dummy \
+                -I$(KBASE_DIR)/src
+
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 19ceaa60e0f4..65bc83747f66 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -41,6 +41,7 @@
 #include <linux/slab.h>
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqchip/arm-gic.h>
+#include <trace/events/arm-ipi.h>
 
 #include <asm/irq.h>
 #include <asm/exception.h>
@@ -253,10 +254,9 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 	if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids)
 		return -EINVAL;
 
+	raw_spin_lock(&irq_controller_lock);
 	mask = 0xff << shift;
 	bit = gic_cpu_map[cpu] << shift;
-
-	raw_spin_lock(&irq_controller_lock);
 	val = readl_relaxed(reg) & ~mask;
 	writel_relaxed(val | bit, reg);
 	raw_spin_unlock(&irq_controller_lock);
@@ -453,6 +453,12 @@ static void __cpuinit gic_cpu_init(struct gic_chip_data *gic)
 	writel_relaxed(1, base + GIC_CPU_CTRL);
 }
 
+void gic_cpu_if_down(void)
+{
+	void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]);
+	writel_relaxed(0, cpu_base + GIC_CPU_CTRL);
+}
+
 #ifdef CONFIG_CPU_PM
 /*
  * Saves the GIC distributor registers during suspend or idle.  Must be called
@@ -646,11 +652,15 @@ static void __init gic_pm_init(struct gic_chip_data *gic)
 void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 {
 	int cpu;
-	unsigned long map = 0;
+	unsigned long flags, map = 0;
+
+	raw_spin_lock_irqsave(&irq_controller_lock, flags);
 
 	/* Convert our logical CPU mask into a physical one. */
-	for_each_cpu(cpu, mask)
+	for_each_cpu(cpu, mask) {
+		trace_arm_ipi_send(irq, cpu);
 		map |= gic_cpu_map[cpu];
+	}
 
 	/*
 	 * Ensure that stores to Normal memory are visible to the
@@ -660,9 +670,145 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 
 	/* this always happens on GIC0 */
 	writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
+
+	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
 }
 #endif
 
+#ifdef CONFIG_BL_SWITCHER
+/*
+ * gic_send_sgi - send a SGI directly to given CPU interface number
+ *
+ * cpu_id: the ID for the destination CPU interface
+ * irq: the IPI number to send a SGI for
+ */
+void gic_send_sgi(unsigned int cpu_id, unsigned int irq)
+{
+	BUG_ON(cpu_id >= NR_GIC_CPU_IF);
+	cpu_id = 1 << cpu_id;
+	/* this always happens on GIC0 */
+	writel_relaxed((cpu_id << 16) | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
+}
+
+/*
+ * gic_get_cpu_id - get the CPU interface ID for the specified CPU
+ *
+ * @cpu: the logical CPU number to get the GIC ID for.
+ *
+ * Return the CPU interface ID for the given logical CPU number,
+ * or -1 if the CPU number is too large or the interface ID is
+ * unknown (more than one bit set).
+ */
+int gic_get_cpu_id(unsigned int cpu)
+{
+	unsigned int cpu_bit;
+
+	if (cpu >= NR_GIC_CPU_IF)
+		return -1;
+	cpu_bit = gic_cpu_map[cpu];
+	if (cpu_bit & (cpu_bit - 1))
+	       return -1;
+	return __ffs(cpu_bit);
+}
+
+/*
+ * gic_migrate_target - migrate IRQs to another PU interface
+ *
+ * @new_cpu_id: the CPU target ID to migrate IRQs to
+ *
+ * Migrate all peripheral interrupts with a target matching the current CPU
+ * to the interface corresponding to @new_cpu_id.  The CPU interface mapping
+ * is also updated.  Targets to other CPU interfaces are unchanged.
+ * This must be called with IRQs locally disabled.
+ */
+void gic_migrate_target(unsigned int new_cpu_id)
+{
+	unsigned int old_cpu_id, gic_irqs, gic_nr = 0;
+	void __iomem *dist_base;
+	int i, ror_val, cpu = smp_processor_id();
+	u32 val, old_mask, active_mask;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	dist_base = gic_data_dist_base(&gic_data[gic_nr]);
+	if (!dist_base)
+		return;
+	gic_irqs = gic_data[gic_nr].gic_irqs;
+
+	old_cpu_id = __ffs(gic_cpu_map[cpu]);
+	old_mask = 0x01010101 << old_cpu_id;
+	ror_val = (old_cpu_id - new_cpu_id) & 31;
+
+	raw_spin_lock(&irq_controller_lock);
+
+	gic_cpu_map[cpu] = 1 << new_cpu_id;
+
+	for (i = 8; i < DIV_ROUND_UP(gic_irqs, 4); i++) {
+		val = readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4);
+		active_mask = val & old_mask;
+		if (active_mask) {
+			val &= ~active_mask;
+			val |= ror32(active_mask, ror_val);
+			writel_relaxed(val, dist_base + GIC_DIST_TARGET + i * 4);
+		}
+	}
+
+	raw_spin_unlock(&irq_controller_lock);
+
+	/*
+	 * Now let's migrate and clear any potential SGIs that might be
+	 * pending for us (old_cpu_id).  Since GIC_DIST_SGI_PENDING_SET
+	 * is a banked register, we can only forward the SGI using
+	 * GIC_DIST_SOFTINT.  The original SGI source is lost but Linux
+	 * doesn't use that information anyway.
+	 *
+	 * For the same reason we do not adjust SGI source information
+	 * for previously sent SGIs by us to other CPUs either.
+	 */
+	for (i = 0; i < 16; i += 4) {
+		int j;
+		val = readl_relaxed(dist_base + GIC_DIST_SGI_PENDING_SET + i);
+		if (!val)
+			continue;
+		writel_relaxed(val, dist_base + GIC_DIST_SGI_PENDING_CLEAR + i);
+		for (j = i; j < i + 4; j++) {
+			if (val & 0xff)
+				writel_relaxed((1 << (new_cpu_id + 16)) | j,
+						dist_base + GIC_DIST_SOFTINT);
+			val >>= 8;
+		}
+	}
+}
+
+/*
+ * gic_get_sgir_physaddr - get the physical address for the SGI register
+ *
+ * REturn the physical address of the SGI register to be used
+ * by some early assembly code when the kernel is not yet available.
+ */
+static unsigned long gic_dist_physaddr;
+
+unsigned long gic_get_sgir_physaddr(void)
+{
+	if (!gic_dist_physaddr)
+		return 0;
+	return gic_dist_physaddr + GIC_DIST_SOFTINT;
+}
+
+void __init gic_init_physaddr(struct device_node *node)
+{
+	struct resource res;
+	if (of_address_to_resource(node, 0, &res) == 0) {
+		gic_dist_physaddr = res.start;
+		pr_info("GIC physical location is %#lx\n", gic_dist_physaddr);
+	}
+}
+
+#else
+#define gic_init_physaddr(node)  do { } while(0)
+#endif
+
 static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 				irq_hw_number_t hw)
 {
@@ -844,6 +990,8 @@ int __init gic_of_init(struct device_node *node, struct device_node *parent)
 		percpu_offset = 0;
 
 	gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset, node);
+	if (!gic_cnt)
+		gic_init_physaddr(node);
 
 	if (parent) {
 		irq = irq_of_parse_and_map(node, 0);
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index d54e985748b7..a5e54f0d6a73 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1144,7 +1144,15 @@ config MCP_UCB1200_TS
 endmenu
 
 config VEXPRESS_CONFIG
-	bool
+	bool "ARM Versatile Express platform infrastructure"
+	depends on ARM || ARM64
 	help
 	  Platform configuration infrastructure for the ARM Ltd.
 	  Versatile Express.
+
+config VEXPRESS_SPC
+	bool "Versatile Express SPC driver support"
+	depends on ARM
+	depends on VEXPRESS_CONFIG
+	help
+	  Serial Power Controller driver for ARM Ltd. test chips.
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 718e94a2a9a7..3a0120315aa3 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -153,5 +153,6 @@ obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
 obj-$(CONFIG_MFD_LM3533)	+= lm3533-core.o lm3533-ctrlbank.o
 obj-$(CONFIG_VEXPRESS_CONFIG)	+= vexpress-config.o vexpress-sysreg.o
+obj-$(CONFIG_VEXPRESS_SPC)	+= vexpress-spc.o
 obj-$(CONFIG_MFD_RETU)		+= retu-mfd.o
 obj-$(CONFIG_MFD_AS3711)	+= as3711.o
diff --git a/drivers/mfd/vexpress-config.c b/drivers/mfd/vexpress-config.c
index 84ce6b9daa3d..1af2b0e0182f 100644
--- a/drivers/mfd/vexpress-config.c
+++ b/drivers/mfd/vexpress-config.c
@@ -86,29 +86,13 @@ void vexpress_config_bridge_unregister(struct vexpress_config_bridge *bridge)
 }
 EXPORT_SYMBOL(vexpress_config_bridge_unregister);
 
-
-struct vexpress_config_func {
-	struct vexpress_config_bridge *bridge;
-	void *func;
-};
-
-struct vexpress_config_func *__vexpress_config_func_get(struct device *dev,
-		struct device_node *node)
+static struct vexpress_config_bridge *
+		vexpress_config_bridge_find(struct device_node *node)
 {
-	struct device_node *bridge_node;
-	struct vexpress_config_func *func;
 	int i;
+	struct vexpress_config_bridge *res = NULL;
+	struct device_node *bridge_node = of_node_get(node);
 
-	if (WARN_ON(dev && node && dev->of_node != node))
-		return NULL;
-	if (dev && !node)
-		node = dev->of_node;
-
-	func = kzalloc(sizeof(*func), GFP_KERNEL);
-	if (!func)
-		return NULL;
-
-	bridge_node = of_node_get(node);
 	while (bridge_node) {
 		const __be32 *prop = of_get_property(bridge_node,
 				"arm,vexpress,config-bridge", NULL);
@@ -129,13 +113,46 @@ struct vexpress_config_func *__vexpress_config_func_get(struct device *dev,
 
 		if (test_bit(i, vexpress_config_bridges_map) &&
 				bridge->node == bridge_node) {
-			func->bridge = bridge;
-			func->func = bridge->info->func_get(dev, node);
+			res = bridge;
 			break;
 		}
 	}
 	mutex_unlock(&vexpress_config_bridges_mutex);
 
+	return res;
+}
+
+
+struct vexpress_config_func {
+	struct vexpress_config_bridge *bridge;
+	void *func;
+};
+
+struct vexpress_config_func *__vexpress_config_func_get(
+		struct vexpress_config_bridge *bridge,
+		struct device *dev,
+		struct device_node *node,
+		const char *id)
+{
+	struct vexpress_config_func *func;
+
+	if (WARN_ON(dev && node && dev->of_node != node))
+		return NULL;
+	if (dev && !node)
+		node = dev->of_node;
+
+	if (!bridge)
+		bridge = vexpress_config_bridge_find(node);
+	if (!bridge)
+		return NULL;
+
+	func = kzalloc(sizeof(*func), GFP_KERNEL);
+	if (!func)
+		return NULL;
+
+	func->bridge = bridge;
+	func->func = bridge->info->func_get(dev, node, id);
+
 	if (!func->func) {
 		of_node_put(node);
 		kfree(func);
diff --git a/drivers/mfd/vexpress-spc.c b/drivers/mfd/vexpress-spc.c
new file mode 100644
index 000000000000..0c6718abf1ba
--- /dev/null
+++ b/drivers/mfd/vexpress-spc.c
@@ -0,0 +1,633 @@
+/*
+ * Versatile Express Serial Power Controller (SPC) support
+ *
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * Authors: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
+ *          Achin Gupta           <achin.gupta@arm.com>
+ *          Lorenzo Pieralisi     <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+#include <linux/vexpress.h>
+
+#include <asm/cacheflush.h>
+
+#define SCC_CFGREG19		0x120
+#define SCC_CFGREG20		0x124
+#define A15_CONF		0x400
+#define A7_CONF			0x500
+#define SYS_INFO		0x700
+#define PERF_LVL_A15		0xB00
+#define PERF_REQ_A15		0xB04
+#define PERF_LVL_A7		0xB08
+#define PERF_REQ_A7		0xB0c
+#define SYS_CFGCTRL		0xB10
+#define SYS_CFGCTRL_REQ		0xB14
+#define PWC_STATUS		0xB18
+#define PWC_FLAG		0xB1c
+#define WAKE_INT_MASK		0xB24
+#define WAKE_INT_RAW		0xB28
+#define WAKE_INT_STAT		0xB2c
+#define A15_PWRDN_EN		0xB30
+#define A7_PWRDN_EN		0xB34
+#define A7_PWRDNACK		0xB54
+#define A15_BX_ADDR0		0xB68
+#define SYS_CFG_WDATA		0xB70
+#define SYS_CFG_RDATA		0xB74
+#define A7_BX_ADDR0		0xB78
+
+#define GBL_WAKEUP_INT_MSK	(0x3 << 10)
+
+#define CLKF_SHIFT		16
+#define CLKF_MASK		0x1FFF
+#define CLKR_SHIFT		0
+#define CLKR_MASK		0x3F
+#define CLKOD_SHIFT		8
+#define CLKOD_MASK		0xF
+
+#define OPP_FUNCTION		6
+#define OPP_BASE_DEVICE		0x300
+#define OPP_A15_OFFSET		0x4
+#define OPP_A7_OFFSET		0xc
+
+#define SYS_CFGCTRL_START	(1 << 31)
+#define SYS_CFGCTRL_WRITE	(1 << 30)
+#define SYS_CFGCTRL_FUNC(n)	(((n) & 0x3f) << 20)
+#define SYS_CFGCTRL_DEVICE(n)	(((n) & 0xfff) << 0)
+
+#define MAX_OPPS	8
+#define MAX_CLUSTERS	2
+
+enum {
+	A15_OPP_TYPE		= 0,
+	A7_OPP_TYPE		= 1,
+	SYS_CFGCTRL_TYPE	= 2,
+	INVALID_TYPE
+};
+
+#define STAT_COMPLETE(type)	((1 << 0) << (type << 2))
+#define STAT_ERR(type)		((1 << 1) << (type << 2))
+#define RESPONSE_MASK(type)	(STAT_COMPLETE(type) | STAT_ERR(type))
+
+struct vexpress_spc_drvdata {
+	void __iomem *baseaddr;
+	u32 a15_clusid;
+	int irq;
+	u32 cur_req_type;
+	u32 freqs[MAX_CLUSTERS][MAX_OPPS];
+	int freqs_cnt[MAX_CLUSTERS];
+};
+
+enum spc_func_type {
+	CONFIG_FUNC = 0,
+	PERF_FUNC   = 1,
+};
+
+struct vexpress_spc_func {
+	enum spc_func_type type;
+	u32 function;
+	u32 device;
+};
+
+static struct vexpress_spc_drvdata *info;
+static u32 *vexpress_spc_config_data;
+static struct vexpress_config_bridge *vexpress_spc_config_bridge;
+static struct vexpress_config_func *opp_func, *perf_func;
+
+static int vexpress_spc_load_result = -EAGAIN;
+
+static bool vexpress_spc_initialized(void)
+{
+	return vexpress_spc_load_result == 0;
+}
+
+/**
+ * vexpress_spc_write_resume_reg() - set the jump address used for warm boot
+ *
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ * @cpu: mpidr[7:0] bitfield describing cpu affinity level
+ * @addr: physical resume address
+ */
+void vexpress_spc_write_resume_reg(u32 cluster, u32 cpu, u32 addr)
+{
+	void __iomem *baseaddr;
+
+	if (WARN_ON_ONCE(cluster >= MAX_CLUSTERS))
+		return;
+
+	if (cluster != info->a15_clusid)
+		baseaddr = info->baseaddr + A7_BX_ADDR0 + (cpu << 2);
+	else
+		baseaddr = info->baseaddr + A15_BX_ADDR0 + (cpu << 2);
+
+	writel_relaxed(addr, baseaddr);
+}
+
+/**
+ * vexpress_spc_get_nb_cpus() - get number of cpus in a cluster
+ *
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ *
+ * Return: number of cpus in the cluster
+ *         -EINVAL if cluster number invalid
+ */
+int vexpress_spc_get_nb_cpus(u32 cluster)
+{
+	u32 val;
+
+	if (WARN_ON_ONCE(cluster >= MAX_CLUSTERS))
+		return -EINVAL;
+
+	val = readl_relaxed(info->baseaddr + SYS_INFO);
+	val = (cluster != info->a15_clusid) ? (val >> 20) : (val >> 16);
+	return val & 0xf;
+}
+EXPORT_SYMBOL_GPL(vexpress_spc_get_nb_cpus);
+
+/**
+ * vexpress_spc_get_performance - get current performance level of cluster
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ * @freq: pointer to the performance level to be assigned
+ *
+ * Return: 0 on success
+ *         < 0 on read error
+ */
+int vexpress_spc_get_performance(u32 cluster, u32 *freq)
+{
+	u32 perf_cfg_reg;
+	int perf, ret;
+
+	if (!vexpress_spc_initialized() || (cluster >= MAX_CLUSTERS))
+		return -EINVAL;
+
+	perf_cfg_reg = cluster != info->a15_clusid ? PERF_LVL_A7 : PERF_LVL_A15;
+	ret = vexpress_config_read(perf_func, perf_cfg_reg, &perf);
+
+	if (!ret)
+		*freq = info->freqs[cluster][perf];
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vexpress_spc_get_performance);
+
+/**
+ * vexpress_spc_get_perf_index - get performance level corresponding to
+ *				 a frequency
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ * @freq: frequency to be looked-up
+ *
+ * Return: perf level index on success
+ *         -EINVAL on error
+ */
+static int vexpress_spc_find_perf_index(u32 cluster, u32 freq)
+{
+	int idx;
+
+	for (idx = 0; idx < info->freqs_cnt[cluster]; idx++)
+		if (info->freqs[cluster][idx] == freq)
+			break;
+	return (idx == info->freqs_cnt[cluster]) ? -EINVAL : idx;
+}
+
+/**
+ * vexpress_spc_set_performance - set current performance level of cluster
+ *
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ * @freq: performance level to be programmed
+ *
+ * Returns: 0 on success
+ *          < 0 on write error
+ */
+int vexpress_spc_set_performance(u32 cluster, u32 freq)
+{
+	int ret, perf, offset;
+
+	if (!vexpress_spc_initialized() || (cluster >= MAX_CLUSTERS))
+		return -EINVAL;
+
+	offset = (cluster != info->a15_clusid) ? PERF_LVL_A7 : PERF_LVL_A15;
+
+	perf = vexpress_spc_find_perf_index(cluster, freq);
+
+	if (perf < 0 || perf >= MAX_OPPS)
+		return -EINVAL;
+
+	ret = vexpress_config_write(perf_func, offset, perf);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vexpress_spc_set_performance);
+
+static void vexpress_spc_set_wake_intr(u32 mask)
+{
+	writel_relaxed(mask & VEXPRESS_SPC_WAKE_INTR_MASK,
+		       info->baseaddr + WAKE_INT_MASK);
+}
+
+static inline void reg_bitmask(u32 *reg, u32 mask, bool set)
+{
+	if (set)
+		*reg |= mask;
+	else
+		*reg &= ~mask;
+}
+
+/**
+ * vexpress_spc_set_global_wakeup_intr()
+ *
+ * Function to set/clear global wakeup IRQs. Not protected by locking since
+ * it might be used in code paths where normal cacheable locks are not
+ * working. Locking must be provided by the caller to ensure atomicity.
+ *
+ * @set: if true, global wake-up IRQs are set, if false they are cleared
+ */
+void vexpress_spc_set_global_wakeup_intr(bool set)
+{
+	u32 wake_int_mask_reg = 0;
+
+	wake_int_mask_reg = readl_relaxed(info->baseaddr + WAKE_INT_MASK);
+	reg_bitmask(&wake_int_mask_reg, GBL_WAKEUP_INT_MSK, set);
+	vexpress_spc_set_wake_intr(wake_int_mask_reg);
+}
+
+/**
+ * vexpress_spc_set_cpu_wakeup_irq()
+ *
+ * Function to set/clear per-CPU wake-up IRQs. Not protected by locking since
+ * it might be used in code paths where normal cacheable locks are not
+ * working. Locking must be provided by the caller to ensure atomicity.
+ *
+ * @cpu: mpidr[7:0] bitfield describing cpu affinity level
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ * @set: if true, wake-up IRQs are set, if false they are cleared
+ */
+void vexpress_spc_set_cpu_wakeup_irq(u32 cpu, u32 cluster, bool set)
+{
+	u32 mask = 0;
+	u32 wake_int_mask_reg = 0;
+
+	mask = 1 << cpu;
+	if (info->a15_clusid != cluster)
+		mask <<= 4;
+
+	wake_int_mask_reg = readl_relaxed(info->baseaddr + WAKE_INT_MASK);
+	reg_bitmask(&wake_int_mask_reg, mask, set);
+	vexpress_spc_set_wake_intr(wake_int_mask_reg);
+}
+
+/**
+ * vexpress_spc_powerdown_enable()
+ *
+ * Function to enable/disable cluster powerdown. Not protected by locking
+ * since it might be used in code paths where normal cacheable locks are not
+ * working. Locking must be provided by the caller to ensure atomicity.
+ *
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ * @enable: if true enables powerdown, if false disables it
+ */
+void vexpress_spc_powerdown_enable(u32 cluster, bool enable)
+{
+	u32 pwdrn_reg = 0;
+
+	if (cluster >= MAX_CLUSTERS)
+		return;
+	pwdrn_reg = cluster != info->a15_clusid ? A7_PWRDN_EN : A15_PWRDN_EN;
+	writel_relaxed(enable, info->baseaddr + pwdrn_reg);
+}
+
+irqreturn_t vexpress_spc_irq_handler(int irq, void *data)
+{
+	int ret;
+	u32 status = readl_relaxed(info->baseaddr + PWC_STATUS);
+
+	if (!(status & RESPONSE_MASK(info->cur_req_type)))
+		return IRQ_NONE;
+
+	if ((status == STAT_COMPLETE(SYS_CFGCTRL_TYPE))
+			&& vexpress_spc_config_data) {
+		*vexpress_spc_config_data =
+				readl_relaxed(info->baseaddr + SYS_CFG_RDATA);
+		vexpress_spc_config_data = NULL;
+	}
+
+	ret = STAT_COMPLETE(info->cur_req_type) ? 0 : -EIO;
+	info->cur_req_type = INVALID_TYPE;
+	vexpress_config_complete(vexpress_spc_config_bridge, ret);
+	return IRQ_HANDLED;
+}
+
+/**
+ * Based on the firmware documentation, this is always fixed to 20
+ * All the 4 OSC: A15 PLL0/1, A7 PLL0/1 must be programmed same
+ * values for both control and value registers.
+ * This function uses A15 PLL 0 registers to compute multiple factor
+ * F out = F in * (CLKF + 1) / ((CLKOD + 1) * (CLKR + 1))
+ */
+static inline int __get_mult_factor(void)
+{
+	int i_div, o_div, f_div;
+	u32 tmp;
+
+	tmp = readl(info->baseaddr + SCC_CFGREG19);
+	f_div = (tmp >> CLKF_SHIFT) & CLKF_MASK;
+
+	tmp = readl(info->baseaddr + SCC_CFGREG20);
+	o_div = (tmp >> CLKOD_SHIFT) & CLKOD_MASK;
+	i_div = (tmp >> CLKR_SHIFT) & CLKR_MASK;
+
+	return (f_div + 1) / ((o_div + 1) * (i_div + 1));
+}
+
+/**
+ * vexpress_spc_populate_opps() - initialize opp tables from microcontroller
+ *
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ *
+ * Return: 0 on success
+ *         < 0 on error
+ */
+static int vexpress_spc_populate_opps(u32 cluster)
+{
+	u32 data = 0, ret, i, offset;
+	int mult_fact = __get_mult_factor();
+
+	if (WARN_ON_ONCE(cluster >= MAX_CLUSTERS))
+		return -EINVAL;
+
+	offset = cluster != info->a15_clusid ? OPP_A7_OFFSET : OPP_A15_OFFSET;
+	for (i = 0; i < MAX_OPPS; i++) {
+		ret = vexpress_config_read(opp_func, i + offset, &data);
+		if (!ret)
+			info->freqs[cluster][i] = (data & 0xFFFFF) * mult_fact;
+		else
+			break;
+	}
+
+	info->freqs_cnt[cluster] = i;
+	return ret;
+}
+
+/**
+ * vexpress_spc_get_freq_table() - Retrieve a pointer to the frequency
+ *				   table for a given cluster
+ *
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ * @fptr: pointer to be initialized
+ * Return: operating points count on success
+ *         -EINVAL on pointer error
+ */
+int vexpress_spc_get_freq_table(u32 cluster, u32 **fptr)
+{
+	if (WARN_ON_ONCE(!fptr || cluster >= MAX_CLUSTERS))
+		return -EINVAL;
+	*fptr = info->freqs[cluster];
+	return info->freqs_cnt[cluster];
+}
+EXPORT_SYMBOL_GPL(vexpress_spc_get_freq_table);
+
+static void *vexpress_spc_func_get(struct device *dev,
+		struct device_node *node, const char *id)
+{
+	struct vexpress_spc_func *spc_func;
+	u32 func_device[2];
+	int err = 0;
+
+	spc_func = kzalloc(sizeof(*spc_func), GFP_KERNEL);
+	if (!spc_func)
+		return NULL;
+
+	if (strcmp(id, "opp") == 0) {
+		spc_func->type = CONFIG_FUNC;
+		spc_func->function = OPP_FUNCTION;
+		spc_func->device = OPP_BASE_DEVICE;
+	} else if (strcmp(id, "perf") == 0) {
+		spc_func->type = PERF_FUNC;
+	} else if (node) {
+		of_node_get(node);
+		err = of_property_read_u32_array(node,
+				"arm,vexpress-sysreg,func", func_device,
+				ARRAY_SIZE(func_device));
+		of_node_put(node);
+		spc_func->type = CONFIG_FUNC;
+		spc_func->function = func_device[0];
+		spc_func->device = func_device[1];
+	}
+
+	if (WARN_ON(err)) {
+		kfree(spc_func);
+		return NULL;
+	}
+
+	pr_debug("func 0x%p = 0x%x, %d %d\n", spc_func,
+					     spc_func->function,
+					     spc_func->device,
+					     spc_func->type);
+
+	return spc_func;
+}
+
+static void vexpress_spc_func_put(void *func)
+{
+	kfree(func);
+}
+
+static int vexpress_spc_func_exec(void *func, int offset, bool write,
+				  u32 *data)
+{
+	struct vexpress_spc_func *spc_func = func;
+	u32 command;
+
+	if (!data)
+		return -EINVAL;
+	/*
+	 * Setting and retrieval of operating points is not part of
+	 * DCC config interface. It was made to go through the same
+	 * code path so that requests to the M3 can be serialized
+	 * properly with config reads/writes through the common
+	 * vexpress config interface
+	 */
+	switch (spc_func->type) {
+	case PERF_FUNC:
+		if (write) {
+			info->cur_req_type = (offset == PERF_LVL_A15) ?
+					A15_OPP_TYPE : A7_OPP_TYPE;
+			writel_relaxed(*data, info->baseaddr + offset);
+			return VEXPRESS_CONFIG_STATUS_WAIT;
+		} else {
+			*data = readl_relaxed(info->baseaddr + offset);
+			return VEXPRESS_CONFIG_STATUS_DONE;
+		}
+	case CONFIG_FUNC:
+		info->cur_req_type = SYS_CFGCTRL_TYPE;
+
+		command = SYS_CFGCTRL_START;
+		command |= write ? SYS_CFGCTRL_WRITE : 0;
+		command |= SYS_CFGCTRL_FUNC(spc_func->function);
+		command |= SYS_CFGCTRL_DEVICE(spc_func->device + offset);
+
+		pr_debug("command %x\n", command);
+
+		if (!write)
+			vexpress_spc_config_data = data;
+		else
+			writel_relaxed(*data, info->baseaddr + SYS_CFG_WDATA);
+		writel_relaxed(command, info->baseaddr + SYS_CFGCTRL);
+
+		return VEXPRESS_CONFIG_STATUS_WAIT;
+	default:
+		return -EINVAL;
+	}
+}
+
+struct vexpress_config_bridge_info vexpress_spc_config_bridge_info = {
+	.name = "vexpress-spc",
+	.func_get = vexpress_spc_func_get,
+	.func_put = vexpress_spc_func_put,
+	.func_exec = vexpress_spc_func_exec,
+};
+
+static const struct of_device_id vexpress_spc_ids[] __initconst = {
+	{ .compatible = "arm,vexpress-spc,v2p-ca15_a7" },
+	{ .compatible = "arm,vexpress-spc" },
+	{},
+};
+
+static int __init vexpress_spc_init(void)
+{
+	int ret;
+	struct device_node *node = of_find_matching_node(NULL,
+							 vexpress_spc_ids);
+
+	if (!node)
+		return -ENODEV;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		pr_err("%s: unable to allocate mem\n", __func__);
+		return -ENOMEM;
+	}
+	info->cur_req_type = INVALID_TYPE;
+
+	info->baseaddr = of_iomap(node, 0);
+	if (WARN_ON(!info->baseaddr)) {
+		ret = -ENXIO;
+		goto mem_free;
+	}
+
+	info->irq = irq_of_parse_and_map(node, 0);
+
+	if (WARN_ON(!info->irq)) {
+		ret = -ENXIO;
+		goto unmap;
+	}
+
+	readl_relaxed(info->baseaddr + PWC_STATUS);
+
+	ret = request_irq(info->irq, vexpress_spc_irq_handler,
+		IRQF_DISABLED | IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+		"arm-spc", info);
+
+	if (ret) {
+		pr_err("IRQ %d request failed\n", info->irq);
+		ret = -ENODEV;
+		goto unmap;
+	}
+
+	info->a15_clusid = readl_relaxed(info->baseaddr + A15_CONF) & 0xf;
+
+	vexpress_spc_config_bridge = vexpress_config_bridge_register(
+			node, &vexpress_spc_config_bridge_info);
+
+	if (WARN_ON(!vexpress_spc_config_bridge)) {
+		ret = -ENODEV;
+		goto unmap;
+	}
+
+	opp_func = vexpress_config_func_get(vexpress_spc_config_bridge, "opp");
+	perf_func =
+		vexpress_config_func_get(vexpress_spc_config_bridge, "perf");
+
+	if (!opp_func || !perf_func) {
+		ret = -ENODEV;
+		goto unmap;
+	}
+
+	if (vexpress_spc_populate_opps(0) || vexpress_spc_populate_opps(1)) {
+		if (info->irq)
+			free_irq(info->irq, info);
+		pr_err("failed to build OPP table\n");
+		ret = -ENODEV;
+		goto unmap;
+	}
+	/*
+	 * Multi-cluster systems may need this data when non-coherent, during
+	 * cluster power-up/power-down. Make sure it reaches main memory:
+	 */
+	sync_cache_w(info);
+	sync_cache_w(&info);
+	pr_info("vexpress-spc loaded at %p\n", info->baseaddr);
+	return 0;
+
+unmap:
+	iounmap(info->baseaddr);
+
+mem_free:
+	kfree(info);
+	return ret;
+}
+
+static bool __init __vexpress_spc_check_loaded(void);
+/*
+ * Pointer spc_check_loaded is swapped after init hence it is safe
+ * to initialize it to a function in the __init section
+ */
+static bool (*spc_check_loaded)(void) __refdata = &__vexpress_spc_check_loaded;
+
+static bool __init __vexpress_spc_check_loaded(void)
+{
+	if (vexpress_spc_load_result == -EAGAIN)
+		vexpress_spc_load_result = vexpress_spc_init();
+	spc_check_loaded = &vexpress_spc_initialized;
+	return vexpress_spc_initialized();
+}
+
+/*
+ * Function exported to manage early_initcall ordering.
+ * SPC code is needed very early in the boot process
+ * to bring CPUs out of reset and initialize power
+ * management back-end. After boot swap pointers to
+ * make the functionality check available to loadable
+ * modules, when early boot init functions have been
+ * already freed from kernel address space.
+ */
+bool vexpress_spc_check_loaded(void)
+{
+	return spc_check_loaded();
+}
+EXPORT_SYMBOL_GPL(vexpress_spc_check_loaded);
+
+static int __init vexpress_spc_early_init(void)
+{
+	__vexpress_spc_check_loaded();
+	return vexpress_spc_load_result;
+}
+early_initcall(vexpress_spc_early_init);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Serial Power Controller (SPC) support");
diff --git a/drivers/mfd/vexpress-sysreg.c b/drivers/mfd/vexpress-sysreg.c
index 96a020b1dcd1..7f429afce112 100644
--- a/drivers/mfd/vexpress-sysreg.c
+++ b/drivers/mfd/vexpress-sysreg.c
@@ -165,7 +165,7 @@ static u32 *vexpress_sysreg_config_data;
 static int vexpress_sysreg_config_tries;
 
 static void *vexpress_sysreg_config_func_get(struct device *dev,
-		struct device_node *node)
+		struct device_node *node, const char *id)
 {
 	struct vexpress_sysreg_config_func *config_func;
 	u32 site;
@@ -351,6 +351,8 @@ void __init vexpress_sysreg_of_early_init(void)
 }
 
 
+#ifdef CONFIG_GPIOLIB
+
 #define VEXPRESS_SYSREG_GPIO(_name, _reg, _value) \
 	[VEXPRESS_GPIO_##_name] = { \
 		.reg = _reg, \
@@ -445,6 +447,8 @@ struct gpio_led_platform_data vexpress_sysreg_leds_pdata = {
 	.leds = vexpress_sysreg_leds,
 };
 
+#endif
+
 
 static ssize_t vexpress_sysreg_sys_id_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
@@ -480,6 +484,9 @@ static int vexpress_sysreg_probe(struct platform_device *pdev)
 	setup_timer(&vexpress_sysreg_config_timer,
 			vexpress_sysreg_config_complete, 0);
 
+	vexpress_sysreg_dev = &pdev->dev;
+
+#ifdef CONFIG_GPIOLIB
 	vexpress_sysreg_gpio_chip.dev = &pdev->dev;
 	err = gpiochip_add(&vexpress_sysreg_gpio_chip);
 	if (err) {
@@ -490,11 +497,10 @@ static int vexpress_sysreg_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	vexpress_sysreg_dev = &pdev->dev;
-
 	platform_device_register_data(vexpress_sysreg_dev, "leds-gpio",
 			PLATFORM_DEVID_AUTO, &vexpress_sysreg_leds_pdata,
 			sizeof(vexpress_sysreg_leds_pdata));
+#endif
 
 	device_create_file(vexpress_sysreg_dev, &dev_attr_sys_id);
 
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index dfbf978315df..bdd703c6bf16 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -1896,7 +1896,7 @@ static int smc_probe(struct net_device *dev, void __iomem *ioaddr,
 	SMC_SELECT_BANK(lp, 1);
 	val = SMC_GET_BASE(lp);
 	val = ((val & 0x1F00) >> 3) << SMC_IO_SHIFT;
-	if (((unsigned int)ioaddr & (0x3e0 << SMC_IO_SHIFT)) != val) {
+	if (((unsigned long)ioaddr & (0x3e0 << SMC_IO_SHIFT)) != val) {
 		printk("%s: IOADDR %p doesn't match configuration (%x).\n",
 			CARDNAME, ioaddr, val);
 	}
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 38f0b312ff85..663d2d0448b7 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -439,7 +439,7 @@ void phy_start_machine(struct phy_device *phydev,
 {
 	phydev->adjust_state = handler;
 
-	schedule_delayed_work(&phydev->state_queue, HZ);
+	queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, HZ);
 }
 
 /**
@@ -500,7 +500,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
 	disable_irq_nosync(irq);
 	atomic_inc(&phydev->irq_disable);
 
-	schedule_work(&phydev->phy_queue);
+	queue_work(system_power_efficient_wq, &phydev->phy_queue);
 
 	return IRQ_HANDLED;
 }
@@ -655,7 +655,7 @@ static void phy_change(struct work_struct *work)
 
 	/* reschedule state queue work to run as soon as possible */
 	cancel_delayed_work_sync(&phydev->state_queue);
-	schedule_delayed_work(&phydev->state_queue, 0);
+	queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, 0);
 
 	return;
 
@@ -918,7 +918,8 @@ void phy_state_machine(struct work_struct *work)
 	if (err < 0)
 		phy_error(phydev);
 
-	schedule_delayed_work(&phydev->state_queue, PHY_STATE_TIME * HZ);
+	queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
+			PHY_STATE_TIME * HZ);
 }
 
 static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad,
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 1d10b4ec6814..f24dca92ea43 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -18,6 +18,7 @@
  *      2 of the License, or (at your option) any later version.
  */
 #include <linux/ctype.h>
+#include <linux/cpu.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/spinlock.h>
@@ -230,6 +231,100 @@ const void *of_get_property(const struct device_node *np, const char *name,
 }
 EXPORT_SYMBOL(of_get_property);
 
+/*
+ * arch_match_cpu_phys_id - Match the given logical CPU and physical id
+ *
+ * @cpu: logical cpu index of a core/thread
+ * @phys_id: physical identifier of a core/thread
+ *
+ * CPU logical to physical index mapping is architecture specific.
+ * However this __weak function provides a default match of physical
+ * id to logical cpu index. phys_id provided here is usually values read
+ * from the device tree which must match the hardware internal registers.
+ *
+ * Returns true if the physical identifier and the logical cpu index
+ * correspond to the same core/thread, false otherwise.
+ */
+bool __weak arch_match_cpu_phys_id(int cpu, u64 phys_id)
+{
+	return (u32)phys_id == cpu;
+}
+
+/**
+ * Checks if the given "prop_name" property holds the physical id of the
+ * core/thread corresponding to the logical cpu 'cpu'. If 'thread' is not
+ * NULL, local thread number within the core is returned in it.
+ */
+static bool __of_find_n_match_cpu_property(struct device_node *cpun,
+			const char *prop_name, int cpu, unsigned int *thread)
+{
+	const __be32 *cell;
+	int ac, prop_len, tid;
+	u64 hwid;
+
+	ac = of_n_addr_cells(cpun);
+	cell = of_get_property(cpun, prop_name, &prop_len);
+	if (!cell)
+		return false;
+	prop_len /= sizeof(*cell);
+	for (tid = 0; tid < prop_len; tid++) {
+		hwid = of_read_number(cell, ac);
+		if (arch_match_cpu_phys_id(cpu, hwid)) {
+			if (thread)
+				*thread = tid;
+			return true;
+		}
+		cell += ac;
+	}
+	return false;
+}
+
+/**
+ * of_get_cpu_node - Get device node associated with the given logical CPU
+ *
+ * @cpu: CPU number(logical index) for which device node is required
+ * @thread: if not NULL, local thread number within the physical core is
+ *          returned
+ *
+ * The main purpose of this function is to retrieve the device node for the
+ * given logical CPU index. It should be used to initialize the of_node in
+ * cpu device. Once of_node in cpu device is populated, all the further
+ * references can use that instead.
+ *
+ * CPU logical to physical index mapping is architecture specific and is built
+ * before booting secondary cores. This function uses arch_match_cpu_phys_id
+ * which can be overridden by architecture specific implementation.
+ *
+ * Returns a node pointer for the logical cpu if found, else NULL.
+ */
+struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
+{
+	struct device_node *cpun, *cpus;
+
+	cpus = of_find_node_by_path("/cpus");
+	if (!cpus) {
+		pr_warn("Missing cpus node, bailing out\n");
+		return NULL;
+	}
+
+	for_each_child_of_node(cpus, cpun) {
+		if (of_node_cmp(cpun->type, "cpu"))
+			continue;
+		/* Check for non-standard "ibm,ppc-interrupt-server#s" property
+		 * for thread ids on PowerPC. If it doesn't exist fallback to
+		 * standard "reg" property.
+		 */
+		if (IS_ENABLED(CONFIG_PPC) &&
+			__of_find_n_match_cpu_property(cpun,
+				"ibm,ppc-interrupt-server#s", cpu, thread))
+			return cpun;
+		if (__of_find_n_match_cpu_property(cpun, "reg", cpu, thread))
+			return cpun;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(of_get_cpu_node);
+
 /** Checks if the given "compat" string matches one of the strings in
  * the device's "compatible" property
  */
diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig
index 349e9ae8090a..ee039dcead04 100644
--- a/drivers/power/reset/Kconfig
+++ b/drivers/power/reset/Kconfig
@@ -32,7 +32,8 @@ config POWER_RESET_RESTART
 	  user presses a key. u-boot then boots into Linux.
 
 config POWER_RESET_VEXPRESS
-	bool
+	bool "ARM Versatile Express power-off and reset driver"
+	depends on ARM || ARM64
 	depends on POWER_RESET
 	help
 	  Power off and reset support for the ARM Ltd. Versatile
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 815d6df8bd5f..89deb736b9ea 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1890,8 +1890,9 @@ int regulator_disable_deferred(struct regulator *regulator, int ms)
 	rdev->deferred_disables++;
 	mutex_unlock(&rdev->mutex);
 
-	ret = schedule_delayed_work(&rdev->disable_work,
-				    msecs_to_jiffies(ms));
+	ret = queue_delayed_work(system_power_efficient_wq,
+				 &rdev->disable_work,
+				 msecs_to_jiffies(ms));
 	if (ret < 0)
 		return ret;
 	else
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 2e937bdace6f..29a5121ce7fd 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -39,6 +39,11 @@ config VIDEOMODE_HELPERS
 config HDMI
 	bool
 
+config VEXPRESS_DVI_CONTROL
+	bool "Versatile Express DVI control"
+	depends on FB && VEXPRESS_CONFIG
+	default y
+
 menuconfig FB
 	tristate "Support for frame buffer devices"
 	---help---
@@ -312,7 +317,8 @@ config FB_PM2_FIFO_DISCONNECT
 
 config FB_ARMCLCD
 	tristate "ARM PrimeCell PL110 support"
-	depends on FB && ARM && ARM_AMBA
+	depends on ARM || ARM64 || COMPILE_TEST
+	depends on FB && ARM_AMBA
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
@@ -326,6 +332,21 @@ config FB_ARMCLCD
 	  here and read <file:Documentation/kbuild/modules.txt>.  The module
 	  will be called amba-clcd.
 
+config FB_ARMHDLCD
+	tristate "ARM High Definition LCD support"
+	depends on FB && ARM
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	help
+	  This framebuffer device driver is for the ARM High Definition
+	  Colour LCD controller.
+
+	  If you want to compile this as a module (=code which can be
+	  inserted into and removed from the running kernel), say M
+	  here and read <file:Documentation/kbuild/modules.txt>.  The module
+	  will be called arm-hdlcd.
+
 config FB_ACORN
 	bool "Acorn VIDC support"
 	depends on (FB = y) && ARM && ARCH_ACORN
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index e8bae8dd4804..33869eea4981 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_FB_ATMEL)		  += atmel_lcdfb.o
 obj-$(CONFIG_FB_PVR2)             += pvr2fb.o
 obj-$(CONFIG_FB_VOODOO1)          += sstfb.o
 obj-$(CONFIG_FB_ARMCLCD)	  += amba-clcd.o
+obj-$(CONFIG_FB_ARMHDLCD)	  += arm-hdlcd.o
 obj-$(CONFIG_FB_GOLDFISH)         += goldfishfb.o
 obj-$(CONFIG_FB_68328)            += 68328fb.o
 obj-$(CONFIG_FB_GBE)              += gbefb.o
@@ -177,3 +178,6 @@ obj-$(CONFIG_VIDEOMODE_HELPERS) += display_timing.o videomode.o
 ifeq ($(CONFIG_OF),y)
 obj-$(CONFIG_VIDEOMODE_HELPERS) += of_display_timing.o of_videomode.o
 endif
+
+# platform specific output drivers
+obj-$(CONFIG_VEXPRESS_DVI_CONTROL) += vexpress-dvi.o
diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c
index 0a2cce7285be..94a1998338da 100644
--- a/drivers/video/amba-clcd.c
+++ b/drivers/video/amba-clcd.c
@@ -16,7 +16,10 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
+#include <linux/of.h>
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
@@ -30,6 +33,16 @@
 
 #define to_clcd(info)	container_of(info, struct clcd_fb, fb)
 
+#ifdef CONFIG_ARM
+#define clcdfb_dma_alloc	dma_alloc_writecombine
+#define clcdfb_dma_free		dma_free_writecombine
+#define clcdfb_dma_mmap		dma_mmap_writecombine
+#else
+#define clcdfb_dma_alloc	dma_alloc_coherent
+#define clcdfb_dma_free		dma_free_coherent
+#define clcdfb_dma_mmap		dma_mmap_coherent
+#endif
+
 /* This is limited to 16 characters when displayed by X startup */
 static const char *clcd_name = "CLCD FB";
 
@@ -392,6 +405,44 @@ static int clcdfb_blank(int blank_mode, struct fb_info *info)
 	return 0;
 }
 
+int clcdfb_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vma)
+{
+	return clcdfb_dma_mmap(&fb->dev->dev, vma,
+			       fb->fb.screen_base,
+			       fb->fb.fix.smem_start,
+			       fb->fb.fix.smem_len);
+}
+
+int clcdfb_mmap_io(struct clcd_fb *fb, struct vm_area_struct *vma)
+{
+	unsigned long user_count, count, pfn, off;
+
+	user_count	= (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	count		= PAGE_ALIGN(fb->fb.fix.smem_len) >> PAGE_SHIFT;
+	pfn		= fb->fb.fix.smem_start >> PAGE_SHIFT;
+	off		= vma->vm_pgoff;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	if (off < count && user_count <= (count - off))
+		return remap_pfn_range(vma, vma->vm_start, pfn + off,
+				       user_count << PAGE_SHIFT,
+				       vma->vm_page_prot);
+
+	return -ENXIO;
+}
+
+void clcdfb_remove_dma(struct clcd_fb *fb)
+{
+	clcdfb_dma_free(&fb->dev->dev, fb->fb.fix.smem_len,
+			fb->fb.screen_base, fb->fb.fix.smem_start);
+}
+
+void clcdfb_remove_io(struct clcd_fb *fb)
+{
+	iounmap(fb->fb.screen_base);
+}
+
 static int clcdfb_mmap(struct fb_info *info,
 		       struct vm_area_struct *vma)
 {
@@ -542,14 +593,239 @@ static int clcdfb_register(struct clcd_fb *fb)
 	return ret;
 }
 
+struct string_lookup {
+	const char *string;
+	const u32	val;
+};
+
+static struct string_lookup vmode_lookups[] = {
+	{ "FB_VMODE_NONINTERLACED", FB_VMODE_NONINTERLACED},
+	{ "FB_VMODE_INTERLACED",    FB_VMODE_INTERLACED},
+	{ "FB_VMODE_DOUBLE",        FB_VMODE_DOUBLE},
+	{ "FB_VMODE_ODD_FLD_FIRST", FB_VMODE_ODD_FLD_FIRST},
+	{ NULL, 0 },
+};
+
+static struct string_lookup tim2_lookups[] = {
+	{ "TIM2_CLKSEL", TIM2_CLKSEL},
+	{ "TIM2_IVS",    TIM2_IVS},
+	{ "TIM2_IHS",    TIM2_IHS},
+	{ "TIM2_IPC",    TIM2_IPC},
+	{ "TIM2_IOE",    TIM2_IOE},
+	{ "TIM2_BCD",    TIM2_BCD},
+	{ NULL, 0},
+};
+static struct string_lookup cntl_lookups[] = {
+	{"CNTL_LCDEN",        CNTL_LCDEN},
+	{"CNTL_LCDBPP1",      CNTL_LCDBPP1},
+	{"CNTL_LCDBPP2",      CNTL_LCDBPP2},
+	{"CNTL_LCDBPP4",      CNTL_LCDBPP4},
+	{"CNTL_LCDBPP8",      CNTL_LCDBPP8},
+	{"CNTL_LCDBPP16",     CNTL_LCDBPP16},
+	{"CNTL_LCDBPP16_565", CNTL_LCDBPP16_565},
+	{"CNTL_LCDBPP16_444", CNTL_LCDBPP16_444},
+	{"CNTL_LCDBPP24",     CNTL_LCDBPP24},
+	{"CNTL_LCDBW",        CNTL_LCDBW},
+	{"CNTL_LCDTFT",       CNTL_LCDTFT},
+	{"CNTL_LCDMONO8",     CNTL_LCDMONO8},
+	{"CNTL_LCDDUAL",      CNTL_LCDDUAL},
+	{"CNTL_BGR",          CNTL_BGR},
+	{"CNTL_BEBO",         CNTL_BEBO},
+	{"CNTL_BEPO",         CNTL_BEPO},
+	{"CNTL_LCDPWR",       CNTL_LCDPWR},
+	{"CNTL_LCDVCOMP(1)",  CNTL_LCDVCOMP(1)},
+	{"CNTL_LCDVCOMP(2)",  CNTL_LCDVCOMP(2)},
+	{"CNTL_LCDVCOMP(3)",  CNTL_LCDVCOMP(3)},
+	{"CNTL_LCDVCOMP(4)",  CNTL_LCDVCOMP(4)},
+	{"CNTL_LCDVCOMP(5)",  CNTL_LCDVCOMP(5)},
+	{"CNTL_LCDVCOMP(6)",  CNTL_LCDVCOMP(6)},
+	{"CNTL_LCDVCOMP(7)",  CNTL_LCDVCOMP(7)},
+	{"CNTL_LDMAFIFOTIME", CNTL_LDMAFIFOTIME},
+	{"CNTL_WATERMARK",    CNTL_WATERMARK},
+	{ NULL, 0},
+};
+static struct string_lookup caps_lookups[] = {
+	{"CLCD_CAP_RGB444",  CLCD_CAP_RGB444},
+	{"CLCD_CAP_RGB5551", CLCD_CAP_RGB5551},
+	{"CLCD_CAP_RGB565",  CLCD_CAP_RGB565},
+	{"CLCD_CAP_RGB888",  CLCD_CAP_RGB888},
+	{"CLCD_CAP_BGR444",  CLCD_CAP_BGR444},
+	{"CLCD_CAP_BGR5551", CLCD_CAP_BGR5551},
+	{"CLCD_CAP_BGR565",  CLCD_CAP_BGR565},
+	{"CLCD_CAP_BGR888",  CLCD_CAP_BGR888},
+	{"CLCD_CAP_444",     CLCD_CAP_444},
+	{"CLCD_CAP_5551",    CLCD_CAP_5551},
+	{"CLCD_CAP_565",     CLCD_CAP_565},
+	{"CLCD_CAP_888",     CLCD_CAP_888},
+	{"CLCD_CAP_RGB",     CLCD_CAP_RGB},
+	{"CLCD_CAP_BGR",     CLCD_CAP_BGR},
+	{"CLCD_CAP_ALL",     CLCD_CAP_ALL},
+	{ NULL, 0},
+};
+
+u32 parse_setting(struct string_lookup *lookup, const char *name)
+{
+	int i = 0;
+	while (lookup[i].string != NULL) {
+		if (strcmp(lookup[i].string, name) == 0)
+			return lookup[i].val;
+		++i;
+	}
+	return -EINVAL;
+}
+
+u32 get_string_lookup(struct device_node *node, const char *name,
+		      struct string_lookup *lookup)
+{
+	const char *string;
+	int count, i, ret = 0;
+
+	count = of_property_count_strings(node, name);
+	if (count >= 0)
+		for (i = 0; i < count; i++)
+			if (of_property_read_string_index(node, name, i,
+					&string) == 0)
+				ret |= parse_setting(lookup, string);
+	return ret;
+}
+
+int get_val(struct device_node *node, const char *string)
+{
+	u32 ret = 0;
+
+	if (of_property_read_u32(node, string, &ret))
+		ret = -1;
+	return ret;
+}
+
+struct clcd_panel *getPanel(struct device_node *node)
+{
+	static struct clcd_panel panel;
+
+	panel.mode.refresh      = get_val(node, "refresh");
+	panel.mode.xres         = get_val(node, "xres");
+	panel.mode.yres         = get_val(node, "yres");
+	panel.mode.pixclock     = get_val(node, "pixclock");
+	panel.mode.left_margin  = get_val(node, "left_margin");
+	panel.mode.right_margin = get_val(node, "right_margin");
+	panel.mode.upper_margin = get_val(node, "upper_margin");
+	panel.mode.lower_margin = get_val(node, "lower_margin");
+	panel.mode.hsync_len    = get_val(node, "hsync_len");
+	panel.mode.vsync_len    = get_val(node, "vsync_len");
+	panel.mode.sync         = get_val(node, "sync");
+	panel.bpp               = get_val(node, "bpp");
+	panel.width             = (signed short) get_val(node, "width");
+	panel.height            = (signed short) get_val(node, "height");
+
+	panel.mode.vmode = get_string_lookup(node, "vmode", vmode_lookups);
+	panel.tim2       = get_string_lookup(node, "tim2",  tim2_lookups);
+	panel.cntl       = get_string_lookup(node, "cntl",  cntl_lookups);
+	panel.caps       = get_string_lookup(node, "caps",  caps_lookups);
+
+	return &panel;
+}
+
+struct clcd_panel *clcdfb_get_panel(const char *name)
+{
+	struct device_node *node = NULL;
+	const char *mode;
+	struct clcd_panel *panel = NULL;
+
+	do {
+		node = of_find_compatible_node(node, NULL, "panel");
+		if (node)
+			if (of_property_read_string(node, "mode", &mode) == 0)
+				if (strcmp(mode, name) == 0) {
+					panel = getPanel(node);
+					panel->mode.name = name;
+				}
+	} while (node != NULL);
+
+	return panel;
+}
+
+#ifdef CONFIG_OF
+static int clcdfb_dt_init(struct clcd_fb *fb)
+{
+	int err = 0;
+	struct device_node *node;
+	const char *mode;
+	dma_addr_t dma;
+	u32 use_dma;
+	const __be32 *prop;
+	int len, na, ns;
+	phys_addr_t fb_base, fb_size;
+
+	node = fb->dev->dev.of_node;
+	if (!node)
+		return -ENODEV;
+
+	na = of_n_addr_cells(node);
+	ns = of_n_size_cells(node);
+
+	if (WARN_ON(of_property_read_string(node, "mode", &mode)))
+		return -ENODEV;
+
+	fb->panel = clcdfb_get_panel(mode);
+	if (!fb->panel)
+		return -EINVAL;
+	fb->fb.fix.smem_len = fb->panel->mode.xres * fb->panel->mode.yres * 2;
+
+	fb->board->name		= "Device Tree CLCD PL111";
+	fb->board->caps		= CLCD_CAP_5551 | CLCD_CAP_565;
+	fb->board->check	= clcdfb_check;
+	fb->board->decode	= clcdfb_decode;
+
+	if (of_property_read_u32(node, "use_dma", &use_dma))
+		use_dma = 0;
+
+	if (use_dma) {
+		fb->fb.screen_base = clcdfb_dma_alloc(&fb->dev->dev,
+						      fb->fb.fix.smem_len,
+						      &dma, GFP_KERNEL);
+		if (!fb->fb.screen_base) {
+			pr_err("CLCD: unable to map framebuffer\n");
+			return -ENOMEM;
+		}
+
+		fb->fb.fix.smem_start	= dma;
+		fb->board->mmap		= clcdfb_mmap_dma;
+		fb->board->remove	= clcdfb_remove_dma;
+	} else {
+		prop = of_get_property(node, "framebuffer", &len);
+		if (WARN_ON(!prop || len < (na + ns) * sizeof(*prop)))
+			return -EINVAL;
+
+		fb_base = of_read_number(prop, na);
+		fb_size = of_read_number(prop + na, ns);
+
+		fb->fb.fix.smem_start	= fb_base;
+		fb->fb.screen_base	= ioremap_wc(fb_base, fb_size);
+		fb->board->mmap		= clcdfb_mmap_io;
+		fb->board->remove	= clcdfb_remove_io;
+	}
+
+	return err;
+}
+#endif /* CONFIG_OF */
+
 static int clcdfb_probe(struct amba_device *dev, const struct amba_id *id)
 {
 	struct clcd_board *board = dev->dev.platform_data;
 	struct clcd_fb *fb;
 	int ret;
 
-	if (!board)
-		return -EINVAL;
+	if (!board) {
+#ifdef CONFIG_OF
+		if (dev->dev.of_node) {
+			board = kzalloc(sizeof(struct clcd_board), GFP_KERNEL);
+			if (!board)
+				return -ENOMEM;
+			board->setup   = clcdfb_dt_init;
+		} else
+#endif
+			return -EINVAL;
+	}
 
 	ret = amba_request_regions(dev, NULL);
 	if (ret) {
diff --git a/drivers/video/arm-hdlcd.c b/drivers/video/arm-hdlcd.c
new file mode 100644
index 000000000000..cfd631e3dc52
--- /dev/null
+++ b/drivers/video/arm-hdlcd.c
@@ -0,0 +1,844 @@
+/*
+ * drivers/video/arm-hdlcd.c
+ *
+ * Copyright (C) 2011 ARM Limited
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ *  ARM HDLCD Controller
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/fb.h>
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
+#include <linux/memblock.h>
+#include <linux/arm-hdlcd.h>
+#ifdef HDLCD_COUNT_BUFFERUNDERRUNS
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#endif
+
+#include "edid.h"
+
+#ifdef CONFIG_SERIAL_AMBA_PCU_UART
+int get_edid(u8 *msgbuf);
+#else
+#endif
+
+#define to_hdlcd_device(info)	container_of(info, struct hdlcd_device, fb)
+
+static struct of_device_id  hdlcd_of_matches[] = {
+	{ .compatible	= "arm,hdlcd" },
+	{},
+};
+
+/* Framebuffer size.  */
+static unsigned long framebuffer_size;
+
+#ifdef HDLCD_COUNT_BUFFERUNDERRUNS
+static unsigned long buffer_underrun_events;
+static DEFINE_SPINLOCK(hdlcd_underrun_lock);
+
+static void hdlcd_underrun_set(unsigned long val)
+{
+	spin_lock(&hdlcd_underrun_lock);
+	buffer_underrun_events = val;
+	spin_unlock(&hdlcd_underrun_lock);
+}
+
+static unsigned long hdlcd_underrun_get(void)
+{
+	unsigned long val;
+	spin_lock(&hdlcd_underrun_lock);
+	val = buffer_underrun_events;
+	spin_unlock(&hdlcd_underrun_lock);
+	return val;
+}
+
+#ifdef CONFIG_PROC_FS
+static int hdlcd_underrun_show(struct seq_file *m, void *v)
+{
+	unsigned char underrun_string[32];
+	snprintf(underrun_string, 32, "%lu\n", hdlcd_underrun_get());
+	seq_puts(m, underrun_string);
+	return 0;
+}
+
+static int proc_hdlcd_underrun_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, hdlcd_underrun_show, NULL);
+}
+
+static const struct file_operations proc_hdlcd_underrun_operations = {
+	.open		= proc_hdlcd_underrun_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int hdlcd_underrun_init(void)
+{
+	hdlcd_underrun_set(0);
+	proc_create("hdlcd_underrun", 0, NULL, &proc_hdlcd_underrun_operations);
+	return 0;
+}
+static void hdlcd_underrun_close(void)
+{
+	remove_proc_entry("hdlcd_underrun", NULL);
+}
+#else
+static int hdlcd_underrun_init(void) { return 0; }
+static void hdlcd_underrun_close(void) { }
+#endif
+#endif
+
+static char *fb_mode = "1680x1050-32@60\0\0\0\0\0";
+
+static struct fb_var_screeninfo cached_var_screeninfo;
+
+static struct fb_videomode hdlcd_default_mode = {
+	.refresh	= 60,
+	.xres		= 1680,
+	.yres		= 1050,
+	.pixclock	= 8403,
+	.left_margin	= 80,
+	.right_margin	= 48,
+	.upper_margin	= 21,
+	.lower_margin	= 3,
+	.hsync_len	= 32,
+	.vsync_len	= 6,
+	.sync		= FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+	.vmode		= FB_VMODE_NONINTERLACED
+};
+
+static inline void hdlcd_enable(struct hdlcd_device *hdlcd)
+{
+	dev_dbg(hdlcd->dev, "HDLCD: output enabled\n");
+	writel(1, hdlcd->base + HDLCD_REG_COMMAND);
+}
+
+static inline void hdlcd_disable(struct hdlcd_device *hdlcd)
+{
+	dev_dbg(hdlcd->dev, "HDLCD: output disabled\n");
+	writel(0, hdlcd->base + HDLCD_REG_COMMAND);
+}
+
+static int hdlcd_set_bitfields(struct hdlcd_device *hdlcd,
+				struct fb_var_screeninfo *var)
+{
+	int ret = 0;
+
+	memset(&var->transp, 0, sizeof(var->transp));
+	var->red.msb_right = 0;
+	var->green.msb_right = 0;
+	var->blue.msb_right = 0;
+	var->blue.offset = 0;
+
+	switch (var->bits_per_pixel) {
+	case 8:
+		/* pseudocolor */
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		break;
+	case 16:
+		/* 565 format */
+		var->red.length = 5;
+		var->green.length = 6;
+		var->blue.length = 5;
+		break;
+	case 32:
+		var->transp.length = 8;
+	case 24:
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (!ret) {
+		if(var->bits_per_pixel != 32)
+		{
+			var->green.offset = var->blue.length;
+			var->red.offset = var->green.offset + var->green.length;
+		}
+		else
+		{
+			/* Previously, the byte ordering for 32-bit color was
+			 * (msb)<alpha><red><green><blue>(lsb)
+			 * but this does not match what android expects and
+			 * the colors are odd. Instead, use
+			 * <alpha><blue><green><red>
+			 * Since we tell fb what we are doing, console
+			 * , X and directfb access should work fine.
+			 */
+			var->green.offset = var->red.length;
+			var->blue.offset = var->green.offset + var->green.length;
+			var->transp.offset = var->blue.offset + var->blue.length;
+		}
+	}
+
+	return ret;
+}
+
+static int hdlcd_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+	struct hdlcd_device *hdlcd = to_hdlcd_device(info);
+	int bytes_per_pixel = var->bits_per_pixel / 8;
+
+#ifdef HDLCD_NO_VIRTUAL_SCREEN
+	var->yres_virtual = var->yres;
+#else
+	var->yres_virtual = 2 * var->yres;
+#endif
+
+	if ((var->xres_virtual * bytes_per_pixel * var->yres_virtual) > hdlcd->fb.fix.smem_len)
+		return -ENOMEM;
+
+	if (var->xres > HDLCD_MAX_XRES || var->yres > HDLCD_MAX_YRES)
+		return -EINVAL;
+
+	/* make sure the bitfields are set appropriately */
+	return hdlcd_set_bitfields(hdlcd, var);
+}
+
+/* prototype */
+static int hdlcd_pan_display(struct fb_var_screeninfo *var,
+	struct fb_info *info);
+
+#define WRITE_HDLCD_REG(reg, value)	writel((value), hdlcd->base + (reg))
+#define READ_HDLCD_REG(reg)		readl(hdlcd->base + (reg))
+
+static int hdlcd_set_par(struct fb_info *info)
+{
+	struct hdlcd_device *hdlcd = to_hdlcd_device(info);
+	int bytes_per_pixel = hdlcd->fb.var.bits_per_pixel / 8;
+	int polarities;
+	int old_yoffset;
+
+	/* check for shortcuts */
+	old_yoffset = cached_var_screeninfo.yoffset;
+	cached_var_screeninfo.yoffset = info->var.yoffset;
+	if (!memcmp(&info->var, &cached_var_screeninfo,
+				sizeof(struct fb_var_screeninfo))) {
+		if(old_yoffset != info->var.yoffset) {
+			/* we only changed yoffset, and we already
+			 * already recorded it a couple lines up
+			 */
+			hdlcd_pan_display(&info->var, info);
+		}
+		/* or no change */
+		return 0;
+	}
+
+	hdlcd->fb.fix.line_length = hdlcd->fb.var.xres * bytes_per_pixel;
+
+	if (hdlcd->fb.var.bits_per_pixel >= 16)
+		hdlcd->fb.fix.visual = FB_VISUAL_TRUECOLOR;
+	else
+		hdlcd->fb.fix.visual = FB_VISUAL_PSEUDOCOLOR;
+
+	memcpy(&cached_var_screeninfo, &info->var, sizeof(struct fb_var_screeninfo));
+
+	polarities = HDLCD_POLARITY_DATAEN |
+#ifndef CONFIG_ARCH_TUSCAN
+		HDLCD_POLARITY_PIXELCLK |
+#endif
+		HDLCD_POLARITY_DATA;
+	polarities |= (hdlcd->fb.var.sync & FB_SYNC_HOR_HIGH_ACT) ? HDLCD_POLARITY_HSYNC : 0;
+	polarities |= (hdlcd->fb.var.sync & FB_SYNC_VERT_HIGH_ACT) ? HDLCD_POLARITY_VSYNC : 0;
+
+	hdlcd_disable(hdlcd);
+
+	WRITE_HDLCD_REG(HDLCD_REG_FB_LINE_LENGTH, hdlcd->fb.var.xres * bytes_per_pixel);
+	WRITE_HDLCD_REG(HDLCD_REG_FB_LINE_PITCH, hdlcd->fb.var.xres * bytes_per_pixel);
+	WRITE_HDLCD_REG(HDLCD_REG_FB_LINE_COUNT, hdlcd->fb.var.yres - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_V_SYNC, hdlcd->fb.var.vsync_len - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_V_BACK_PORCH, hdlcd->fb.var.upper_margin - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_V_DATA, hdlcd->fb.var.yres - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_V_FRONT_PORCH, hdlcd->fb.var.lower_margin - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_H_SYNC, hdlcd->fb.var.hsync_len - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_H_BACK_PORCH, hdlcd->fb.var.left_margin - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_H_DATA, hdlcd->fb.var.xres - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_H_FRONT_PORCH, hdlcd->fb.var.right_margin - 1);
+	WRITE_HDLCD_REG(HDLCD_REG_POLARITIES, polarities);
+	WRITE_HDLCD_REG(HDLCD_REG_PIXEL_FORMAT, (bytes_per_pixel - 1) << 3);
+#ifdef HDLCD_RED_DEFAULT_COLOUR
+	WRITE_HDLCD_REG(HDLCD_REG_RED_SELECT, (0x00ff0000 | (hdlcd->fb.var.red.length & 0xf) << 8) \
+													  | hdlcd->fb.var.red.offset);
+#else
+	WRITE_HDLCD_REG(HDLCD_REG_RED_SELECT, ((hdlcd->fb.var.red.length & 0xf) << 8) | hdlcd->fb.var.red.offset);
+#endif
+	WRITE_HDLCD_REG(HDLCD_REG_GREEN_SELECT, ((hdlcd->fb.var.green.length & 0xf) << 8) | hdlcd->fb.var.green.offset);
+	WRITE_HDLCD_REG(HDLCD_REG_BLUE_SELECT, ((hdlcd->fb.var.blue.length & 0xf) << 8) | hdlcd->fb.var.blue.offset);
+
+	clk_set_rate(hdlcd->clk, (1000000000 / hdlcd->fb.var.pixclock) * 1000);
+	clk_enable(hdlcd->clk);
+
+	hdlcd_enable(hdlcd);
+
+	return 0;
+}
+
+static int hdlcd_setcolreg(unsigned int regno, unsigned int red, unsigned int green,
+		unsigned int blue, unsigned int transp, struct fb_info *info)
+{
+	if (regno < 16) {
+		u32 *pal = info->pseudo_palette;
+
+		pal[regno] = ((red >> 8) << info->var.red.offset) |
+			((green >> 8) << info->var.green.offset) |
+			((blue >> 8) << info->var.blue.offset);
+	}
+
+	return 0;
+}
+
+static irqreturn_t hdlcd_irq(int irq, void *data)
+{
+	struct hdlcd_device *hdlcd = data;
+	unsigned long irq_mask, irq_status;
+
+	irq_mask = READ_HDLCD_REG(HDLCD_REG_INT_MASK);
+	irq_status = READ_HDLCD_REG(HDLCD_REG_INT_STATUS);
+
+	/* acknowledge interrupt(s) */
+	WRITE_HDLCD_REG(HDLCD_REG_INT_CLEAR, irq_status);
+#ifdef HDLCD_COUNT_BUFFERUNDERRUNS
+	if (irq_status & HDLCD_INTERRUPT_UNDERRUN) {
+		/* increment the count */
+		hdlcd_underrun_set(hdlcd_underrun_get() + 1);
+	}
+#endif
+	if (irq_status & HDLCD_INTERRUPT_VSYNC) {
+		/* disable future VSYNC interrupts */
+		WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, irq_mask & ~HDLCD_INTERRUPT_VSYNC);
+
+		complete(&hdlcd->vsync_completion);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int hdlcd_wait_for_vsync(struct fb_info *info)
+{
+	struct hdlcd_device *hdlcd = to_hdlcd_device(info);
+	unsigned long irq_mask;
+	int err;
+
+	/* enable VSYNC interrupt */
+	irq_mask = READ_HDLCD_REG(HDLCD_REG_INT_MASK);
+	WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, irq_mask | HDLCD_INTERRUPT_VSYNC);
+
+	err = wait_for_completion_interruptible_timeout(&hdlcd->vsync_completion,
+							msecs_to_jiffies(100));
+
+	if (!err)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int hdlcd_blank(int blank_mode, struct fb_info *info)
+{
+	struct hdlcd_device *hdlcd = to_hdlcd_device(info);
+
+	switch (blank_mode) {
+	case FB_BLANK_POWERDOWN:
+		clk_disable(hdlcd->clk);
+	case FB_BLANK_NORMAL:
+		hdlcd_disable(hdlcd);
+		break;
+	case FB_BLANK_UNBLANK:
+		clk_enable(hdlcd->clk);
+		hdlcd_enable(hdlcd);
+		break;
+	case FB_BLANK_VSYNC_SUSPEND:
+	case FB_BLANK_HSYNC_SUSPEND:
+	default:
+		return 1;
+	}
+
+	return 0;
+}
+
+static void hdlcd_mmap_open(struct vm_area_struct *vma)
+{
+}
+
+static void hdlcd_mmap_close(struct vm_area_struct *vma)
+{
+}
+
+static struct vm_operations_struct hdlcd_mmap_ops = {
+	.open	= hdlcd_mmap_open,
+	.close	= hdlcd_mmap_close,
+};
+
+static int hdlcd_mmap(struct fb_info *info, struct vm_area_struct *vma)
+{
+	struct hdlcd_device *hdlcd = to_hdlcd_device(info);
+	unsigned long off;
+	unsigned long start;
+	unsigned long len = hdlcd->fb.fix.smem_len;
+
+	if (vma->vm_end - vma->vm_start == 0)
+		return 0;
+	if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT))
+		return -EINVAL;
+
+	off = vma->vm_pgoff << PAGE_SHIFT;
+	if ((off >= len) || (vma->vm_end - vma->vm_start + off) > len)
+		return -EINVAL;
+
+	start = hdlcd->fb.fix.smem_start;
+	off += start;
+
+	vma->vm_pgoff = off >> PAGE_SHIFT;
+	vma->vm_flags |= VM_IO;
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	vma->vm_ops = &hdlcd_mmap_ops;
+	if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT,
+				vma->vm_end - vma->vm_start,
+				vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
+
+static int hdlcd_pan_display(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+	struct hdlcd_device *hdlcd = to_hdlcd_device(info);
+
+	hdlcd->fb.var.yoffset = var->yoffset;
+	WRITE_HDLCD_REG(HDLCD_REG_FB_BASE, hdlcd->fb.fix.smem_start +
+			(var->yoffset * hdlcd->fb.fix.line_length));
+
+	hdlcd_wait_for_vsync(info);
+
+	return 0;
+}
+
+static int hdlcd_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg)
+{
+	int err;
+
+	switch (cmd) {
+	case FBIO_WAITFORVSYNC:
+		err = hdlcd_wait_for_vsync(info);
+		break;
+	default:
+		err = -ENOIOCTLCMD;
+		break;
+	}
+
+	return err;
+}
+
+static struct fb_ops hdlcd_ops = {
+	.owner			= THIS_MODULE,
+	.fb_check_var		= hdlcd_check_var,
+	.fb_set_par		= hdlcd_set_par,
+	.fb_setcolreg		= hdlcd_setcolreg,
+	.fb_blank		= hdlcd_blank,
+	.fb_fillrect		= cfb_fillrect,
+	.fb_copyarea		= cfb_copyarea,
+	.fb_imageblit		= cfb_imageblit,
+	.fb_mmap		= hdlcd_mmap,
+	.fb_pan_display		= hdlcd_pan_display,
+	.fb_ioctl		= hdlcd_ioctl,
+	.fb_compat_ioctl	= hdlcd_ioctl
+};
+
+static int hdlcd_setup(struct hdlcd_device *hdlcd)
+{
+	u32 version;
+	int err = -EFAULT;
+
+	hdlcd->fb.device = hdlcd->dev;
+
+	hdlcd->clk = clk_get(hdlcd->dev, NULL);
+	if (IS_ERR(hdlcd->clk)) {
+		dev_err(hdlcd->dev, "HDLCD: unable to find clock data\n");
+		return PTR_ERR(hdlcd->clk);
+	}
+
+	err = clk_prepare(hdlcd->clk);
+	if (err)
+		goto clk_prepare_err;
+
+	hdlcd->base = ioremap_nocache(hdlcd->fb.fix.mmio_start, hdlcd->fb.fix.mmio_len);
+	if (!hdlcd->base) {
+		dev_err(hdlcd->dev, "HDLCD: unable to map registers\n");
+		goto remap_err;
+	}
+
+	hdlcd->fb.pseudo_palette = kmalloc(sizeof(u32) * 16, GFP_KERNEL);
+	if (!hdlcd->fb.pseudo_palette) {
+		dev_err(hdlcd->dev, "HDLCD: unable to allocate pseudo_palette memory\n");
+		err = -ENOMEM;
+		goto kmalloc_err;
+	}
+
+	version = readl(hdlcd->base + HDLCD_REG_VERSION);
+	if ((version & HDLCD_PRODUCT_MASK) != HDLCD_PRODUCT_ID) {
+		dev_err(hdlcd->dev, "HDLCD: unknown product id: 0x%x\n", version);
+		err = -EINVAL;
+		goto kmalloc_err;
+	}
+	dev_info(hdlcd->dev, "HDLCD: found ARM HDLCD version r%dp%d\n",
+		(version & HDLCD_VERSION_MAJOR_MASK) >> 8,
+		version & HDLCD_VERSION_MINOR_MASK);
+
+	strcpy(hdlcd->fb.fix.id, "hdlcd");
+	hdlcd->fb.fbops			= &hdlcd_ops;
+	hdlcd->fb.flags			= FBINFO_FLAG_DEFAULT/* | FBINFO_VIRTFB*/;
+
+	hdlcd->fb.fix.type		= FB_TYPE_PACKED_PIXELS;
+	hdlcd->fb.fix.type_aux		= 0;
+	hdlcd->fb.fix.xpanstep		= 0;
+	hdlcd->fb.fix.ypanstep		= 1;
+	hdlcd->fb.fix.ywrapstep		= 0;
+	hdlcd->fb.fix.accel		= FB_ACCEL_NONE;
+
+	hdlcd->fb.var.nonstd		= 0;
+	hdlcd->fb.var.activate		= FB_ACTIVATE_NOW;
+	hdlcd->fb.var.height		= -1;
+	hdlcd->fb.var.width		= -1;
+	hdlcd->fb.var.accel_flags	= 0;
+
+	init_completion(&hdlcd->vsync_completion);
+
+	if (hdlcd->edid) {
+		/* build modedb from EDID */
+		fb_edid_to_monspecs(hdlcd->edid, &hdlcd->fb.monspecs);
+		fb_videomode_to_modelist(hdlcd->fb.monspecs.modedb,
+					hdlcd->fb.monspecs.modedb_len,
+					&hdlcd->fb.modelist);
+		fb_find_mode(&hdlcd->fb.var, &hdlcd->fb, fb_mode,
+			hdlcd->fb.monspecs.modedb,
+			hdlcd->fb.monspecs.modedb_len,
+			&hdlcd_default_mode, 32);
+	} else {
+		hdlcd->fb.monspecs.hfmin	= 0;
+		hdlcd->fb.monspecs.hfmax	= 100000;
+		hdlcd->fb.monspecs.vfmin	= 0;
+		hdlcd->fb.monspecs.vfmax	= 400;
+		hdlcd->fb.monspecs.dclkmin	= 1000000;
+		hdlcd->fb.monspecs.dclkmax	= 100000000;
+		fb_find_mode(&hdlcd->fb.var, &hdlcd->fb, fb_mode, NULL, 0, &hdlcd_default_mode, 32);
+	}
+
+	dev_info(hdlcd->dev, "using %dx%d-%d@%d mode\n", hdlcd->fb.var.xres,
+		hdlcd->fb.var.yres, hdlcd->fb.var.bits_per_pixel,
+		hdlcd->fb.mode ? hdlcd->fb.mode->refresh : 60);
+	hdlcd->fb.var.xres_virtual	= hdlcd->fb.var.xres;
+#ifdef HDLCD_NO_VIRTUAL_SCREEN
+	hdlcd->fb.var.yres_virtual	= hdlcd->fb.var.yres;
+#else
+	hdlcd->fb.var.yres_virtual	= hdlcd->fb.var.yres * 2;
+#endif
+
+	/* initialise and set the palette */
+	if (fb_alloc_cmap(&hdlcd->fb.cmap, NR_PALETTE, 0)) {
+		dev_err(hdlcd->dev, "failed to allocate cmap memory\n");
+		err = -ENOMEM;
+		goto setup_err;
+	}
+	fb_set_cmap(&hdlcd->fb.cmap, &hdlcd->fb);
+
+	/* Allow max number of outstanding requests with the largest beat burst */
+	WRITE_HDLCD_REG(HDLCD_REG_BUS_OPTIONS, HDLCD_BUS_MAX_OUTSTAND | HDLCD_BUS_BURST_16);
+	/* Set the framebuffer base to start of allocated memory */
+	WRITE_HDLCD_REG(HDLCD_REG_FB_BASE, hdlcd->fb.fix.smem_start);
+#ifdef HDLCD_COUNT_BUFFERUNDERRUNS
+	/* turn on underrun interrupt for counting */
+	WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, HDLCD_INTERRUPT_UNDERRUN);
+#else
+	/* Ensure interrupts are disabled */
+	WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, 0);
+#endif	
+	fb_set_var(&hdlcd->fb, &hdlcd->fb.var);
+
+	if (!register_framebuffer(&hdlcd->fb)) {
+		return 0;
+	}
+
+	dev_err(hdlcd->dev, "HDLCD: cannot register framebuffer\n");
+
+	fb_dealloc_cmap(&hdlcd->fb.cmap);
+setup_err:
+	iounmap(hdlcd->base);
+kmalloc_err:
+	kfree(hdlcd->fb.pseudo_palette);
+remap_err:
+	clk_unprepare(hdlcd->clk);
+clk_prepare_err:
+	clk_put(hdlcd->clk);
+	return err;
+}
+
+static inline unsigned char atohex(u8 data)
+{
+	if (!isxdigit(data))
+		return 0;
+	/* truncate the upper nibble and add 9 to non-digit values */
+	return (data > 0x39) ? ((data & 0xf) + 9) : (data & 0xf);
+}
+
+/* EDID data is passed from devicetree in a literal string that can contain spaces and
+   the hexadecimal dump of the data */
+static int parse_edid_data(struct hdlcd_device *hdlcd, const u8 *edid_data, int data_len)
+{
+	int i, j;
+
+	if (!edid_data)
+		return -EINVAL;
+
+	hdlcd->edid = kzalloc(EDID_LENGTH, GFP_KERNEL);
+	if (!hdlcd->edid)
+		return -ENOMEM;
+
+	for (i = 0, j = 0; i < data_len; i++) {
+		if (isspace(edid_data[i]))
+			continue;
+		hdlcd->edid[j++] = atohex(edid_data[i]);
+		if (j >= EDID_LENGTH)
+			break;
+	}
+
+	if (j < EDID_LENGTH) {
+		kfree(hdlcd->edid);
+		hdlcd->edid = NULL;
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hdlcd_probe(struct platform_device *pdev)
+{
+	int err = 0, i;
+	struct hdlcd_device *hdlcd;
+	struct resource *mem;
+#ifdef CONFIG_OF
+	struct device_node *of_node;
+#endif
+
+	memset(&cached_var_screeninfo, 0, sizeof(struct fb_var_screeninfo));
+
+	dev_dbg(&pdev->dev, "HDLCD: probing\n");
+
+	hdlcd = kzalloc(sizeof(*hdlcd), GFP_KERNEL);
+	if (!hdlcd)
+		return -ENOMEM;
+
+#ifdef CONFIG_OF
+	of_node = pdev->dev.of_node;
+	if (of_node) {
+		int len;
+		const u8 *edid;
+		const __be32 *prop = of_get_property(of_node, "mode", &len);
+		if (prop)
+			strncpy(fb_mode, (char *)prop, len);
+		prop = of_get_property(of_node, "framebuffer", &len);
+		if (prop) {
+			hdlcd->fb.fix.smem_start = of_read_ulong(prop,
+					of_n_addr_cells(of_node));
+			prop += of_n_addr_cells(of_node);
+			framebuffer_size = of_read_ulong(prop,
+					of_n_size_cells(of_node));
+			if (framebuffer_size > HDLCD_MAX_FRAMEBUFFER_SIZE)
+				framebuffer_size = HDLCD_MAX_FRAMEBUFFER_SIZE;
+			dev_dbg(&pdev->dev, "HDLCD: phys_addr = 0x%lx, size = 0x%lx\n",
+				hdlcd->fb.fix.smem_start, framebuffer_size);
+		}
+		edid = of_get_property(of_node, "edid", &len);
+		if (edid) {
+			err = parse_edid_data(hdlcd, edid, len);
+#ifdef CONFIG_SERIAL_AMBA_PCU_UART
+		} else {
+			/* ask the firmware to fetch the EDID */
+			dev_dbg(&pdev->dev, "HDLCD: Requesting EDID data\n");
+			hdlcd->edid = kzalloc(EDID_LENGTH, GFP_KERNEL);
+			if (!hdlcd->edid)
+				return -ENOMEM;
+			err = get_edid(hdlcd->edid);
+#endif /* CONFIG_SERIAL_AMBA_PCU_UART */
+		}
+		if (err)
+			dev_info(&pdev->dev, "HDLCD: Failed to parse EDID data\n");
+	}
+#endif /* CONFIG_OF */
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!mem) {
+		dev_err(&pdev->dev, "HDLCD: cannot get platform resources\n");
+		err = -EINVAL;
+		goto resource_err;
+	}
+
+	i = platform_get_irq(pdev, 0);
+	if (i < 0) {
+		dev_err(&pdev->dev, "HDLCD: no irq defined for vsync\n");
+		err = -ENOENT;
+		goto resource_err;
+	} else {
+		err = request_irq(i, hdlcd_irq, 0, dev_name(&pdev->dev), hdlcd);
+		if (err) {
+			dev_err(&pdev->dev, "HDLCD: unable to request irq\n");
+			goto resource_err;
+		}
+		hdlcd->irq = i;
+	}
+
+	if (!request_mem_region(mem->start, resource_size(mem),	dev_name(&pdev->dev))) {
+		err = -ENXIO;
+		goto request_err;
+	}
+
+	if (!hdlcd->fb.fix.smem_start) {
+		dev_err(&pdev->dev, "platform did not allocate frame buffer memory\n");
+		err = -ENOMEM;
+		goto memalloc_err;
+	}
+	hdlcd->fb.screen_base = ioremap_wc(hdlcd->fb.fix.smem_start, framebuffer_size);
+	if (!hdlcd->fb.screen_base) {
+		dev_err(&pdev->dev, "unable to ioremap framebuffer\n");
+		err = -ENOMEM;
+		goto probe_err;
+	}
+
+	hdlcd->fb.screen_size = framebuffer_size;
+	hdlcd->fb.fix.smem_len = framebuffer_size;
+	hdlcd->fb.fix.mmio_start = mem->start;
+	hdlcd->fb.fix.mmio_len = resource_size(mem);
+
+	/* Clear the framebuffer */
+	memset(hdlcd->fb.screen_base, 0, framebuffer_size);
+
+	hdlcd->dev = &pdev->dev;
+
+	dev_dbg(&pdev->dev, "HDLCD: framebuffer virt base %p, phys base 0x%lX\n",
+		hdlcd->fb.screen_base, (unsigned long)hdlcd->fb.fix.smem_start);
+
+	err = hdlcd_setup(hdlcd);
+
+	if (err)
+		goto probe_err;
+
+	platform_set_drvdata(pdev, hdlcd);
+	return 0;
+
+probe_err:
+	iounmap(hdlcd->fb.screen_base);
+	memblock_free(hdlcd->fb.fix.smem_start, hdlcd->fb.fix.smem_start);
+
+memalloc_err:
+	release_mem_region(mem->start, resource_size(mem));
+
+request_err:
+	free_irq(hdlcd->irq, hdlcd);
+
+resource_err:
+	kfree(hdlcd);
+
+	return err;
+}
+
+static int hdlcd_remove(struct platform_device *pdev)
+{
+	struct hdlcd_device *hdlcd = platform_get_drvdata(pdev);
+
+	clk_disable(hdlcd->clk);
+	clk_unprepare(hdlcd->clk);
+	clk_put(hdlcd->clk);
+
+	/* unmap memory */
+	iounmap(hdlcd->fb.screen_base);
+	iounmap(hdlcd->base);
+
+	/* deallocate fb memory */
+	fb_dealloc_cmap(&hdlcd->fb.cmap);
+	kfree(hdlcd->fb.pseudo_palette);
+	memblock_free(hdlcd->fb.fix.smem_start, hdlcd->fb.fix.smem_start);
+	release_mem_region(hdlcd->fb.fix.mmio_start, hdlcd->fb.fix.mmio_len);
+
+	free_irq(hdlcd->irq, NULL);
+	kfree(hdlcd);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int hdlcd_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	/* not implemented yet */
+	return 0;
+}
+
+static int hdlcd_resume(struct platform_device *pdev)
+{
+	/* not implemented yet */
+	return 0;
+}
+#else
+#define hdlcd_suspend	NULL
+#define hdlcd_resume	NULL
+#endif
+
+static struct platform_driver hdlcd_driver = {
+	.probe		= hdlcd_probe,
+	.remove		= hdlcd_remove,
+	.suspend	= hdlcd_suspend,
+	.resume		= hdlcd_resume,
+	.driver	= {
+		.name		= "hdlcd",
+		.owner		= THIS_MODULE,
+		.of_match_table	= hdlcd_of_matches,
+	},
+};
+
+static int __init hdlcd_init(void)
+{
+#ifdef HDLCD_COUNT_BUFFERUNDERRUNS
+	int err = platform_driver_register(&hdlcd_driver);
+	if (!err)
+		hdlcd_underrun_init();
+	return err;
+#else
+	return platform_driver_register(&hdlcd_driver);
+#endif
+}
+
+void __exit hdlcd_exit(void)
+{
+#ifdef HDLCD_COUNT_BUFFERUNDERRUNS
+	hdlcd_underrun_close();
+#endif
+	platform_driver_unregister(&hdlcd_driver);
+}
+
+module_init(hdlcd_init);
+module_exit(hdlcd_exit);
+
+MODULE_AUTHOR("Liviu Dudau");
+MODULE_DESCRIPTION("ARM HDLCD core driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index a92783e480e6..0d8f98c79a6c 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -404,7 +404,7 @@ static void cursor_timer_handler(unsigned long dev_addr)
 	struct fb_info *info = (struct fb_info *) dev_addr;
 	struct fbcon_ops *ops = info->fbcon_par;
 
-	schedule_work(&info->queue);
+	queue_work(system_power_efficient_wq, &info->queue);
 	mod_timer(&ops->cursor_timer, jiffies + HZ/5);
 }
 
diff --git a/drivers/video/vexpress-dvi.c b/drivers/video/vexpress-dvi.c
new file mode 100644
index 000000000000..f08753450ee4
--- /dev/null
+++ b/drivers/video/vexpress-dvi.c
@@ -0,0 +1,220 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2012 ARM Limited
+ */
+
+#define pr_fmt(fmt) "vexpress-dvi: " fmt
+
+#include <linux/fb.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/vexpress.h>
+
+
+static struct vexpress_config_func *vexpress_dvimode_func;
+
+static struct {
+	u32 xres, yres, mode;
+} vexpress_dvi_dvimodes[] = {
+	{ 640, 480, 0 }, /* VGA */
+	{ 800, 600, 1 }, /* SVGA */
+	{ 1024, 768, 2 }, /* XGA */
+	{ 1280, 1024, 3 }, /* SXGA */
+	{ 1600, 1200, 4 }, /* UXGA */
+	{ 1920, 1080, 5 }, /* HD1080 */
+};
+
+static void vexpress_dvi_mode_set(struct fb_info *info, u32 xres, u32 yres)
+{
+	int err = -ENOENT;
+	int i;
+
+	if (!vexpress_dvimode_func)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(vexpress_dvi_dvimodes); i++) {
+		if (vexpress_dvi_dvimodes[i].xres == xres &&
+				vexpress_dvi_dvimodes[i].yres == yres) {
+			pr_debug("mode: %ux%u = %d\n", xres, yres,
+					vexpress_dvi_dvimodes[i].mode);
+			err = vexpress_config_write(vexpress_dvimode_func, 0,
+					vexpress_dvi_dvimodes[i].mode);
+			break;
+		}
+	}
+
+	if (err)
+		pr_warn("Failed to set %ux%u mode! (%d)\n", xres, yres, err);
+}
+
+
+static struct vexpress_config_func *vexpress_muxfpga_func;
+static int vexpress_dvi_fb = -1;
+
+static int vexpress_dvi_mux_set(struct fb_info *info)
+{
+	int err;
+	u32 site = vexpress_get_site_by_dev(info->device);
+
+	if (!vexpress_muxfpga_func)
+		return -ENXIO;
+
+	err = vexpress_config_write(vexpress_muxfpga_func, 0, site);
+	if (!err) {
+		pr_debug("Selected MUXFPGA input %d (fb%d)\n", site,
+				info->node);
+		vexpress_dvi_fb = info->node;
+		vexpress_dvi_mode_set(info, info->var.xres,
+				info->var.yres);
+	} else {
+		pr_warn("Failed to select MUXFPGA input %d (fb%d)! (%d)\n",
+				site, info->node, err);
+	}
+
+	return err;
+}
+
+static int vexpress_dvi_fb_select(int fb)
+{
+	int err;
+	struct fb_info *info;
+
+	/* fb0 is the default */
+	if (fb < 0)
+		fb = 0;
+
+	info = registered_fb[fb];
+	if (!info || !lock_fb_info(info))
+		return -ENODEV;
+
+	err = vexpress_dvi_mux_set(info);
+
+	unlock_fb_info(info);
+
+	return err;
+}
+
+static ssize_t vexpress_dvi_fb_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", vexpress_dvi_fb);
+}
+
+static ssize_t vexpress_dvi_fb_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	long value;
+	int err = kstrtol(buf, 0, &value);
+
+	if (!err)
+		err = vexpress_dvi_fb_select(value);
+
+	return err ? err : count;
+}
+
+DEVICE_ATTR(fb, S_IRUGO | S_IWUSR, vexpress_dvi_fb_show,
+		vexpress_dvi_fb_store);
+
+
+static int vexpress_dvi_fb_event_notify(struct notifier_block *self,
+			      unsigned long action, void *data)
+{
+	struct fb_event *event = data;
+	struct fb_info *info = event->info;
+	struct fb_videomode *mode = event->data;
+
+	switch (action) {
+	case FB_EVENT_FB_REGISTERED:
+		if (vexpress_dvi_fb < 0)
+			vexpress_dvi_mux_set(info);
+		break;
+	case FB_EVENT_MODE_CHANGE:
+	case FB_EVENT_MODE_CHANGE_ALL:
+		if (info->node == vexpress_dvi_fb)
+			vexpress_dvi_mode_set(info, mode->xres, mode->yres);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block vexpress_dvi_fb_notifier = {
+	.notifier_call = vexpress_dvi_fb_event_notify,
+};
+static bool vexpress_dvi_fb_notifier_registered;
+
+
+enum vexpress_dvi_func { FUNC_MUXFPGA, FUNC_DVIMODE };
+
+static struct of_device_id vexpress_dvi_of_match[] = {
+	{
+		.compatible = "arm,vexpress-muxfpga",
+		.data = (void *)FUNC_MUXFPGA,
+	}, {
+		.compatible = "arm,vexpress-dvimode",
+		.data = (void *)FUNC_DVIMODE,
+	},
+	{}
+};
+
+static int vexpress_dvi_probe(struct platform_device *pdev)
+{
+	enum vexpress_dvi_func func;
+	const struct of_device_id *match =
+			of_match_device(vexpress_dvi_of_match, &pdev->dev);
+
+	if (match)
+		func = (enum vexpress_dvi_func)match->data;
+	else
+		func = pdev->id_entry->driver_data;
+
+	switch (func) {
+	case FUNC_MUXFPGA:
+		vexpress_muxfpga_func =
+				vexpress_config_func_get_by_dev(&pdev->dev);
+		device_create_file(&pdev->dev, &dev_attr_fb);
+		break;
+	case FUNC_DVIMODE:
+		vexpress_dvimode_func =
+				vexpress_config_func_get_by_dev(&pdev->dev);
+		break;
+	}
+
+	if (!vexpress_dvi_fb_notifier_registered) {
+		fb_register_client(&vexpress_dvi_fb_notifier);
+		vexpress_dvi_fb_notifier_registered = true;
+	}
+
+	vexpress_dvi_fb_select(vexpress_dvi_fb);
+
+	return 0;
+}
+
+static const struct platform_device_id vexpress_dvi_id_table[] = {
+	{ .name = "vexpress-muxfpga", .driver_data = FUNC_MUXFPGA, },
+	{ .name = "vexpress-dvimode", .driver_data = FUNC_DVIMODE, },
+	{}
+};
+
+static struct platform_driver vexpress_dvi_driver = {
+	.probe = vexpress_dvi_probe,
+	.driver = {
+		.name = "vexpress-dvi",
+		.of_match_table = vexpress_dvi_of_match,
+	},
+	.id_table = vexpress_dvi_id_table,
+};
+
+static int __init vexpress_dvi_init(void)
+{
+	return platform_driver_register(&vexpress_dvi_driver);
+}
+device_initcall(vexpress_dvi_init);