aboutsummaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/thermal.c9
-rw-r--r--drivers/irqchip/Makefile2
-rw-r--r--drivers/irqchip/irq-gic-v2m.c27
-rw-r--r--drivers/irqchip/irq-gic-v3-its-pci-msi.c140
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c144
-rw-r--r--drivers/of/irq.c21
-rw-r--r--drivers/of/platform.c1
-rw-r--r--drivers/pci/Makefile1
-rw-r--r--drivers/pci/host/Kconfig11
-rw-r--r--drivers/pci/host/Makefile1
-rw-r--r--drivers/pci/host/pci-host-generic.c52
-rw-r--r--drivers/pci/host/pci-xgene-msi.c587
-rw-r--r--drivers/pci/host/pci-xgene.c21
-rw-r--r--drivers/pci/msi.c20
-rw-r--r--drivers/pci/of.c30
-rw-r--r--drivers/pci/probe.c45
-rw-r--r--drivers/platform/x86/acerhdf.c3
-rw-r--r--drivers/thermal/Kconfig26
-rw-r--r--drivers/thermal/Makefile1
-rw-r--r--drivers/thermal/cpu_cooling.c585
-rw-r--r--drivers/thermal/db8500_thermal.c2
-rw-r--r--drivers/thermal/fair_share.c39
-rw-r--r--drivers/thermal/imx_thermal.c3
-rw-r--r--drivers/thermal/of-thermal.c41
-rw-r--r--drivers/thermal/power_allocator.c544
-rw-r--r--drivers/thermal/thermal_core.c314
-rw-r--r--drivers/thermal/thermal_core.h11
-rw-r--r--drivers/thermal/ti-soc-thermal/ti-thermal-common.c3
28 files changed, 2475 insertions, 209 deletions
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index d24fa1964eb8..6d4e44ea74ac 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -800,7 +800,8 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal,
result =
thermal_zone_bind_cooling_device
(thermal, trip, cdev,
- THERMAL_NO_LIMIT, THERMAL_NO_LIMIT);
+ THERMAL_NO_LIMIT, THERMAL_NO_LIMIT,
+ THERMAL_WEIGHT_DEFAULT);
else
result =
thermal_zone_unbind_cooling_device
@@ -824,7 +825,8 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal,
if (bind)
result = thermal_zone_bind_cooling_device
(thermal, trip, cdev,
- THERMAL_NO_LIMIT, THERMAL_NO_LIMIT);
+ THERMAL_NO_LIMIT, THERMAL_NO_LIMIT,
+ THERMAL_WEIGHT_DEFAULT);
else
result = thermal_zone_unbind_cooling_device
(thermal, trip, cdev);
@@ -841,7 +843,8 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal,
result = thermal_zone_bind_cooling_device
(thermal, THERMAL_TRIPS_NONE,
cdev, THERMAL_NO_LIMIT,
- THERMAL_NO_LIMIT);
+ THERMAL_NO_LIMIT,
+ THERMAL_WEIGHT_DEFAULT);
else
result = thermal_zone_unbind_cooling_device
(thermal, THERMAL_TRIPS_NONE,
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index dda4927e47a6..801bd75f7266 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -22,7 +22,7 @@ obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o
obj-$(CONFIG_ARM_GIC) += irq-gic.o irq-gic-common.o
obj-$(CONFIG_ARM_GIC_V2M) += irq-gic-v2m.o
obj-$(CONFIG_ARM_GIC_V3) += irq-gic-v3.o irq-gic-common.o
-obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o
+obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o irq-gic-v3-its-pci-msi.o
obj-$(CONFIG_ARM_NVIC) += irq-nvic.o
obj-$(CONFIG_ARM_VIC) += irq-vic.o
obj-$(CONFIG_ATMEL_AIC_IRQ) += irq-atmel-aic-common.o irq-atmel-aic.o
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index fdf706555d72..ec9c37674a0b 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -45,7 +45,6 @@
struct v2m_data {
spinlock_t msi_cnt_lock;
- struct msi_controller mchip;
struct resource res; /* GICv2m resource */
void __iomem *base; /* GICv2m virt address */
u32 spi_start; /* The SPI number that MSIs start */
@@ -218,6 +217,7 @@ static int __init gicv2m_init_one(struct device_node *node,
{
int ret;
struct v2m_data *v2m;
+ struct irq_domain *inner_domain;
v2m = kzalloc(sizeof(struct v2m_data), GFP_KERNEL);
if (!v2m) {
@@ -261,19 +261,18 @@ static int __init gicv2m_init_one(struct device_node *node,
goto err_iounmap;
}
- v2m->domain = irq_domain_add_tree(NULL, &gicv2m_domain_ops, v2m);
- if (!v2m->domain) {
+ inner_domain = irq_domain_add_tree(node, &gicv2m_domain_ops, v2m);
+ if (!inner_domain) {
pr_err("Failed to create GICv2m domain\n");
ret = -ENOMEM;
goto err_free_bm;
}
- v2m->domain->parent = parent;
- v2m->mchip.of_node = node;
- v2m->mchip.domain = pci_msi_create_irq_domain(node,
- &gicv2m_msi_domain_info,
- v2m->domain);
- if (!v2m->mchip.domain) {
+ inner_domain->bus_token = DOMAIN_BUS_NEXUS;
+ inner_domain->parent = parent;
+ v2m->domain = pci_msi_create_irq_domain(node, &gicv2m_msi_domain_info,
+ inner_domain);
+ if (!v2m->domain) {
pr_err("Failed to create MSI domain\n");
ret = -ENOMEM;
goto err_free_domains;
@@ -281,12 +280,6 @@ static int __init gicv2m_init_one(struct device_node *node,
spin_lock_init(&v2m->msi_cnt_lock);
- ret = of_pci_msi_chip_add(&v2m->mchip);
- if (ret) {
- pr_err("Failed to add msi_chip.\n");
- goto err_free_domains;
- }
-
pr_info("Node %s: range[%#lx:%#lx], SPI[%d:%d]\n", node->name,
(unsigned long)v2m->res.start, (unsigned long)v2m->res.end,
v2m->spi_start, (v2m->spi_start + v2m->nr_spis));
@@ -294,10 +287,10 @@ static int __init gicv2m_init_one(struct device_node *node,
return 0;
err_free_domains:
- if (v2m->mchip.domain)
- irq_domain_remove(v2m->mchip.domain);
if (v2m->domain)
irq_domain_remove(v2m->domain);
+ if (inner_domain)
+ irq_domain_remove(inner_domain);
err_free_bm:
kfree(v2m->bm);
err_iounmap:
diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
new file mode 100644
index 000000000000..cf351c637464
--- /dev/null
+++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+
+static void its_mask_msi_irq(struct irq_data *d)
+{
+ pci_msi_mask_irq(d);
+ irq_chip_mask_parent(d);
+}
+
+static void its_unmask_msi_irq(struct irq_data *d)
+{
+ pci_msi_unmask_irq(d);
+ irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip its_msi_irq_chip = {
+ .name = "ITS-MSI",
+ .irq_unmask = its_unmask_msi_irq,
+ .irq_mask = its_mask_msi_irq,
+ .irq_eoi = irq_chip_eoi_parent,
+ .irq_write_msi_msg = pci_msi_domain_write_msg,
+};
+
+struct its_pci_alias {
+ struct pci_dev *pdev;
+ u32 dev_id;
+ u32 count;
+};
+
+static int its_pci_msi_vec_count(struct pci_dev *pdev)
+{
+ int msi, msix;
+
+ msi = max(pci_msi_vec_count(pdev), 0);
+ msix = max(pci_msix_vec_count(pdev), 0);
+
+ return max(msi, msix);
+}
+
+static int its_get_pci_alias(struct pci_dev *pdev, u16 alias, void *data)
+{
+ struct its_pci_alias *dev_alias = data;
+
+ dev_alias->dev_id = alias;
+ if (pdev != dev_alias->pdev)
+ dev_alias->count += its_pci_msi_vec_count(dev_alias->pdev);
+
+ return 0;
+}
+
+static int its_pci_msi_prepare(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *info)
+{
+ struct pci_dev *pdev;
+ struct its_pci_alias dev_alias;
+ struct msi_domain_info *msi_info;
+
+ if (!dev_is_pci(dev))
+ return -EINVAL;
+
+ msi_info = msi_get_domain_info(domain->parent);
+
+ pdev = to_pci_dev(dev);
+ dev_alias.pdev = pdev;
+ dev_alias.count = nvec;
+
+ pci_for_each_dma_alias(pdev, its_get_pci_alias, &dev_alias);
+
+ /* ITS specific DeviceID, as the core ITS ignores dev. */
+ info->scratchpad[0].ul = dev_alias.dev_id;
+
+ return msi_info->ops->msi_prepare(domain->parent,
+ dev, dev_alias.count, info);
+}
+
+static struct msi_domain_ops its_pci_msi_ops = {
+ .msi_prepare = its_pci_msi_prepare,
+};
+
+static struct msi_domain_info its_pci_msi_domain_info = {
+ .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX),
+ .ops = &its_pci_msi_ops,
+ .chip = &its_msi_irq_chip,
+};
+
+static struct of_device_id its_device_id[] = {
+ { .compatible = "arm,gic-v3-its", },
+ {},
+};
+
+static int __init its_pci_msi_init(void)
+{
+ struct device_node *np;
+ struct irq_domain *parent;
+
+ for (np = of_find_matching_node(NULL, its_device_id); np;
+ np = of_find_matching_node(np, its_device_id)) {
+ if (!of_property_read_bool(np, "msi-controller"))
+ continue;
+
+ parent = irq_find_matching_host(np, DOMAIN_BUS_NEXUS);
+ if (!parent || !msi_get_domain_info(parent)) {
+ pr_err("%s: unable to locate ITS domain\n",
+ np->full_name);
+ continue;
+ }
+
+ if (!pci_msi_create_irq_domain(np, &its_pci_msi_domain_info,
+ parent)) {
+ pr_err("%s: unable to create PCI domain\n",
+ np->full_name);
+ continue;
+ }
+
+ pr_info("PCI/MSI: %s domain created\n", np->full_name);
+ }
+
+ return 0;
+}
+early_initcall(its_pci_msi_init);
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 9a791dd52199..78db25293384 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -54,14 +54,12 @@ struct its_collection {
/*
* The ITS structure - contains most of the infrastructure, with the
- * msi_controller, the command queue, the collections, and the list of
- * devices writing to it.
+ * top-level MSI domain, the command queue, the collections, and the
+ * list of devices writing to it.
*/
struct its_node {
raw_spinlock_t lock;
struct list_head entry;
- struct msi_controller msi_chip;
- struct irq_domain *domain;
void __iomem *base;
unsigned long phys_base;
struct its_cmd_block *cmd_base;
@@ -643,26 +641,6 @@ static struct irq_chip its_irq_chip = {
.irq_compose_msi_msg = its_irq_compose_msi_msg,
};
-static void its_mask_msi_irq(struct irq_data *d)
-{
- pci_msi_mask_irq(d);
- irq_chip_mask_parent(d);
-}
-
-static void its_unmask_msi_irq(struct irq_data *d)
-{
- pci_msi_unmask_irq(d);
- irq_chip_unmask_parent(d);
-}
-
-static struct irq_chip its_msi_irq_chip = {
- .name = "ITS-MSI",
- .irq_unmask = its_unmask_msi_irq,
- .irq_mask = its_mask_msi_irq,
- .irq_eoi = irq_chip_eoi_parent,
- .irq_write_msi_msg = pci_msi_domain_write_msg,
-};
-
/*
* How we allocate LPIs:
*
@@ -742,6 +720,9 @@ static unsigned long *its_lpi_alloc_chunks(int nr_irqs, int *base, int *nr_ids)
out:
spin_unlock(&lpi_lock);
+ if (!bitmap)
+ *base = *nr_ids = 0;
+
return bitmap;
}
@@ -831,7 +812,7 @@ static void its_free_tables(struct its_node *its)
}
}
-static int its_alloc_tables(struct its_node *its)
+static int its_alloc_tables(const char *node_name, struct its_node *its)
{
int err;
int i;
@@ -874,7 +855,7 @@ static int its_alloc_tables(struct its_node *its)
if (order >= MAX_ORDER) {
order = MAX_ORDER - 1;
pr_warn("%s: Device Table too large, reduce its page order to %u\n",
- its->msi_chip.of_node->full_name, order);
+ node_name, order);
}
}
@@ -946,7 +927,7 @@ retry_baser:
if (val != tmp) {
pr_err("ITS: %s: GITS_BASER%d doesn't stick: %lx %lx\n",
- its->msi_chip.of_node->full_name, i,
+ node_name, i,
(unsigned long) val, (unsigned long) tmp);
err = -ENXIO;
goto out_free;
@@ -1213,85 +1194,50 @@ static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq)
return 0;
}
-struct its_pci_alias {
- struct pci_dev *pdev;
- u32 dev_id;
- u32 count;
-};
-
-static int its_pci_msi_vec_count(struct pci_dev *pdev)
-{
- int msi, msix;
-
- msi = max(pci_msi_vec_count(pdev), 0);
- msix = max(pci_msix_vec_count(pdev), 0);
-
- return max(msi, msix);
-}
-
-static int its_get_pci_alias(struct pci_dev *pdev, u16 alias, void *data)
-{
- struct its_pci_alias *dev_alias = data;
-
- dev_alias->dev_id = alias;
- if (pdev != dev_alias->pdev)
- dev_alias->count += its_pci_msi_vec_count(dev_alias->pdev);
-
- return 0;
-}
-
static int its_msi_prepare(struct irq_domain *domain, struct device *dev,
int nvec, msi_alloc_info_t *info)
{
- struct pci_dev *pdev;
struct its_node *its;
struct its_device *its_dev;
- struct its_pci_alias dev_alias;
-
- if (!dev_is_pci(dev))
- return -EINVAL;
+ struct msi_domain_info *msi_info;
+ u32 dev_id;
- pdev = to_pci_dev(dev);
- dev_alias.pdev = pdev;
- dev_alias.count = nvec;
+ /*
+ * We ignore "dev" entierely, and rely on the dev_id that has
+ * been passed via the scratchpad. This limits this domain's
+ * usefulness to upper layers that definitely know that they
+ * are built on top of the ITS.
+ */
+ dev_id = info->scratchpad[0].ul;
- pci_for_each_dma_alias(pdev, its_get_pci_alias, &dev_alias);
- its = domain->parent->host_data;
+ msi_info = msi_get_domain_info(domain);
+ its = msi_info->data;
- its_dev = its_find_device(its, dev_alias.dev_id);
+ its_dev = its_find_device(its, dev_id);
if (its_dev) {
/*
* We already have seen this ID, probably through
* another alias (PCI bridge of some sort). No need to
* create the device.
*/
- dev_dbg(dev, "Reusing ITT for devID %x\n", dev_alias.dev_id);
+ pr_debug("Reusing ITT for devID %x\n", dev_id);
goto out;
}
- its_dev = its_create_device(its, dev_alias.dev_id, dev_alias.count);
+ its_dev = its_create_device(its, dev_id, nvec);
if (!its_dev)
return -ENOMEM;
- dev_dbg(&pdev->dev, "ITT %d entries, %d bits\n",
- dev_alias.count, ilog2(dev_alias.count));
+ pr_debug("ITT %d entries, %d bits\n", nvec, ilog2(nvec));
out:
info->scratchpad[0].ptr = its_dev;
- info->scratchpad[1].ptr = dev;
return 0;
}
-static struct msi_domain_ops its_pci_msi_ops = {
+static struct msi_domain_ops its_msi_domain_ops = {
.msi_prepare = its_msi_prepare,
};
-static struct msi_domain_info its_pci_msi_domain_info = {
- .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
- MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX),
- .ops = &its_pci_msi_ops,
- .chip = &its_msi_irq_chip,
-};
-
static int its_irq_gic_domain_alloc(struct irq_domain *domain,
unsigned int virq,
irq_hw_number_t hwirq)
@@ -1327,9 +1273,9 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
irq_domain_set_hwirq_and_chip(domain, virq + i,
hwirq, &its_irq_chip, its_dev);
- dev_dbg(info->scratchpad[1].ptr, "ID:%d pID:%d vID:%d\n",
- (int)(hwirq - its_dev->event_map.lpi_base),
- (int)hwirq, virq + i);
+ pr_debug("ID:%d pID:%d vID:%d\n",
+ (int)(hwirq - its_dev->event_map.lpi_base),
+ (int) hwirq, virq + i);
}
return 0;
@@ -1430,6 +1376,7 @@ static int its_probe(struct device_node *node, struct irq_domain *parent)
struct resource res;
struct its_node *its;
void __iomem *its_base;
+ struct irq_domain *inner_domain;
u32 val;
u64 baser, tmp;
int err;
@@ -1473,7 +1420,6 @@ static int its_probe(struct device_node *node, struct irq_domain *parent)
INIT_LIST_HEAD(&its->its_device_list);
its->base = its_base;
its->phys_base = res.start;
- its->msi_chip.of_node = node;
its->ite_size = ((readl_relaxed(its_base + GITS_TYPER) >> 4) & 0xf) + 1;
its->cmd_base = kzalloc(ITS_CMD_QUEUE_SZ, GFP_KERNEL);
@@ -1483,7 +1429,7 @@ static int its_probe(struct device_node *node, struct irq_domain *parent)
}
its->cmd_write = its->cmd_base;
- err = its_alloc_tables(its);
+ err = its_alloc_tables(node->full_name, its);
if (err)
goto out_free_cmd;
@@ -1519,26 +1465,27 @@ static int its_probe(struct device_node *node, struct irq_domain *parent)
writeq_relaxed(0, its->base + GITS_CWRITER);
writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR);
- if (of_property_read_bool(its->msi_chip.of_node, "msi-controller")) {
- its->domain = irq_domain_add_tree(NULL, &its_domain_ops, its);
- if (!its->domain) {
+ if (of_property_read_bool(node, "msi-controller")) {
+ struct msi_domain_info *info;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info) {
err = -ENOMEM;
goto out_free_tables;
}
- its->domain->parent = parent;
-
- its->msi_chip.domain = pci_msi_create_irq_domain(node,
- &its_pci_msi_domain_info,
- its->domain);
- if (!its->msi_chip.domain) {
+ inner_domain = irq_domain_add_tree(node, &its_domain_ops, its);
+ if (!inner_domain) {
err = -ENOMEM;
- goto out_free_domains;
+ kfree(info);
+ goto out_free_tables;
}
- err = of_pci_msi_chip_add(&its->msi_chip);
- if (err)
- goto out_free_domains;
+ inner_domain->parent = parent;
+ inner_domain->bus_token = DOMAIN_BUS_NEXUS;
+ info->ops = &its_msi_domain_ops;
+ info->data = its;
+ inner_domain->host_data = info;
}
spin_lock(&its_lock);
@@ -1547,11 +1494,6 @@ static int its_probe(struct device_node *node, struct irq_domain *parent)
return 0;
-out_free_domains:
- if (its->msi_chip.domain)
- irq_domain_remove(its->msi_chip.domain);
- if (its->domain)
- irq_domain_remove(its->domain);
out_free_tables:
its_free_tables(its);
out_free_cmd:
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 1a7980692f25..c21e364dcd96 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -18,6 +18,7 @@
* driver.
*/
+#include <linux/device.h>
#include <linux/errno.h>
#include <linux/list.h>
#include <linux/module.h>
@@ -577,3 +578,23 @@ err:
kfree(desc);
}
}
+
+/**
+ * of_msi_configure - Set the msi_domain field of a device
+ * @dev: device structure to associate with an MSI irq domain
+ * @np: device node for that device
+ */
+void of_msi_configure(struct device *dev, struct device_node *np)
+{
+ struct device_node *msi_np;
+ struct irq_domain *d;
+
+ msi_np = of_parse_phandle(np, "msi-parent", 0);
+ if (!msi_np)
+ return;
+
+ d = irq_find_matching_host(msi_np, DOMAIN_BUS_PLATFORM_MSI);
+ if (!d)
+ d = irq_find_host(msi_np);
+ dev_set_msi_domain(dev, d);
+}
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index ddf8e42c9367..8a002d6151f2 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -184,6 +184,7 @@ static struct platform_device *of_platform_device_create_pdata(
dev->dev.bus = &platform_bus_type;
dev->dev.platform_data = platform_data;
of_dma_configure(&dev->dev, dev->dev.of_node);
+ of_msi_configure(&dev->dev, dev->dev.of_node);
if (of_device_add(dev) != 0) {
of_dma_deconfigure(&dev->dev);
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 73e4af400a5a..be3f631c3f75 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_PCI_IOV) += iov.o
#
obj-$(CONFIG_ALPHA) += setup-irq.o
obj-$(CONFIG_ARM) += setup-irq.o
+obj-$(CONFIG_ARM64) += setup-irq.o
obj-$(CONFIG_UNICORE32) += setup-irq.o
obj-$(CONFIG_SUPERH) += setup-irq.o
obj-$(CONFIG_MIPS) += setup-irq.o
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index 1dfb567b3522..e90d132d3316 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -53,7 +53,7 @@ config PCI_RCAR_GEN2_PCIE
config PCI_HOST_GENERIC
bool "Generic PCI host controller"
- depends on ARM && OF
+ depends on (ARM || ARM64) && OF
help
Say Y here if you want to support a simple generic PCI host
controller, such as the one emulated by kvmtool.
@@ -89,11 +89,20 @@ config PCI_XGENE
depends on ARCH_XGENE
depends on OF
select PCIEPORTBUS
+ select PCI_MSI_IRQ_DOMAIN if PCI_MSI
help
Say Y here if you want internal PCI support on APM X-Gene SoC.
There are 5 internal PCIe ports available. Each port is GEN3 capable
and have varied lanes from x1 to x8.
+config PCI_XGENE_MSI
+ bool "X-Gene v1 PCIe MSI feature"
+ depends on PCI_XGENE && PCI_MSI
+ default y
+ help
+ Say Y here if you want PCIe MSI support for the APM X-Gene v1 SoC.
+ This MSI driver supports 5 PCIe ports on the APM X-Gene v1 SoC.
+
config PCI_LAYERSCAPE
bool "Freescale Layerscape PCIe controller"
depends on OF && ARM
diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
index f733b4e27642..1957431d3fcc 100644
--- a/drivers/pci/host/Makefile
+++ b/drivers/pci/host/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o
obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o pci-keystone.o
obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
+obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
obj-$(CONFIG_PCIE_IPROC) += pcie-iproc.o
diff --git a/drivers/pci/host/pci-host-generic.c b/drivers/pci/host/pci-host-generic.c
index ba46e581db99..265dd25169bf 100644
--- a/drivers/pci/host/pci-host-generic.c
+++ b/drivers/pci/host/pci-host-generic.c
@@ -38,7 +38,16 @@ struct gen_pci_cfg_windows {
const struct gen_pci_cfg_bus_ops *ops;
};
+/*
+ * ARM pcibios functions expect the ARM struct pci_sys_data as the PCI
+ * sysdata. Add pci_sys_data as the first element in struct gen_pci so
+ * that when we use a gen_pci pointer as sysdata, it is also a pointer to
+ * a struct pci_sys_data.
+ */
struct gen_pci {
+#ifdef CONFIG_ARM
+ struct pci_sys_data sys;
+#endif
struct pci_host_bridge host;
struct gen_pci_cfg_windows cfg;
struct list_head resources;
@@ -48,8 +57,7 @@ static void __iomem *gen_pci_map_cfg_bus_cam(struct pci_bus *bus,
unsigned int devfn,
int where)
{
- struct pci_sys_data *sys = bus->sysdata;
- struct gen_pci *pci = sys->private_data;
+ struct gen_pci *pci = bus->sysdata;
resource_size_t idx = bus->number - pci->cfg.bus_range->start;
return pci->cfg.win[idx] + ((devfn << 8) | where);
@@ -64,8 +72,7 @@ static void __iomem *gen_pci_map_cfg_bus_ecam(struct pci_bus *bus,
unsigned int devfn,
int where)
{
- struct pci_sys_data *sys = bus->sysdata;
- struct gen_pci *pci = sys->private_data;
+ struct gen_pci *pci = bus->sysdata;
resource_size_t idx = bus->number - pci->cfg.bus_range->start;
return pci->cfg.win[idx] + ((devfn << 12) | where);
@@ -198,13 +205,6 @@ static int gen_pci_parse_map_cfg_windows(struct gen_pci *pci)
return 0;
}
-static int gen_pci_setup(int nr, struct pci_sys_data *sys)
-{
- struct gen_pci *pci = sys->private_data;
- list_splice_init(&pci->resources, &sys->resources);
- return 1;
-}
-
static int gen_pci_probe(struct platform_device *pdev)
{
int err;
@@ -214,13 +214,7 @@ static int gen_pci_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct device_node *np = dev->of_node;
struct gen_pci *pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
- struct hw_pci hw = {
- .nr_controllers = 1,
- .private_data = (void **)&pci,
- .setup = gen_pci_setup,
- .map_irq = of_irq_parse_and_map_pci,
- .ops = &gen_pci_ops,
- };
+ struct pci_bus *bus, *child;
if (!pci)
return -ENOMEM;
@@ -258,7 +252,27 @@ static int gen_pci_probe(struct platform_device *pdev)
return err;
}
- pci_common_init_dev(dev, &hw);
+ /* Do not reassign resources if probe only */
+ if (!pci_has_flag(PCI_PROBE_ONLY))
+ pci_add_flags(PCI_REASSIGN_ALL_RSRC | PCI_REASSIGN_ALL_BUS);
+
+ bus = pci_scan_root_bus(dev, 0, &gen_pci_ops, pci, &pci->resources);
+ if (!bus) {
+ dev_err(dev, "Scanning rootbus failed");
+ return -ENODEV;
+ }
+
+ pci_fixup_irqs(pci_common_swizzle, of_irq_parse_and_map_pci);
+
+ if (!pci_has_flag(PCI_PROBE_ONLY)) {
+ pci_bus_size_bridges(bus);
+ pci_bus_assign_resources(bus);
+
+ list_for_each_entry(child, &bus->children, node)
+ pcie_bus_configure_settings(child);
+ }
+
+ pci_bus_add_devices(bus);
return 0;
}
diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c
new file mode 100644
index 000000000000..ec5a14b3189b
--- /dev/null
+++ b/drivers/pci/host/pci-xgene-msi.c
@@ -0,0 +1,587 @@
+/*
+ * APM X-Gene MSI Driver
+ *
+ * Copyright (c) 2014, Applied Micro Circuits Corporation
+ * Author: Tanmay Inamdar <tinamdar@apm.com>
+ * Duc Dang <dhdang@apm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#include <linux/cpu.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of_irq.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/of_pci.h>
+
+#define MSI_IR0 0x000000
+#define MSI_INT0 0x800000
+#define IDX_PER_GROUP 8
+#define IRQS_PER_IDX 16
+#define NR_HW_IRQS 16
+#define NR_MSI_VEC (IDX_PER_GROUP * IRQS_PER_IDX * NR_HW_IRQS)
+
+struct xgene_msi_group {
+ struct xgene_msi *msi;
+ int gic_irq;
+ u32 msi_grp;
+};
+
+struct xgene_msi {
+ struct device_node *node;
+ struct irq_domain *inner_domain;
+ struct irq_domain *msi_domain;
+ u64 msi_addr;
+ void __iomem *msi_regs;
+ unsigned long *bitmap;
+ struct mutex bitmap_lock;
+ struct xgene_msi_group *msi_groups;
+ int num_cpus;
+};
+
+/* Global data */
+static struct xgene_msi xgene_msi_ctrl;
+
+static struct irq_chip xgene_msi_top_irq_chip = {
+ .name = "X-Gene1 MSI",
+ .irq_enable = pci_msi_unmask_irq,
+ .irq_disable = pci_msi_mask_irq,
+ .irq_mask = pci_msi_mask_irq,
+ .irq_unmask = pci_msi_unmask_irq,
+};
+
+static struct msi_domain_info xgene_msi_domain_info = {
+ .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_PCI_MSIX),
+ .chip = &xgene_msi_top_irq_chip,
+};
+
+/*
+ * X-Gene v1 has 16 groups of MSI termination registers MSInIRx, where
+ * n is group number (0..F), x is index of registers in each group (0..7)
+ * The register layout is as follows:
+ * MSI0IR0 base_addr
+ * MSI0IR1 base_addr + 0x10000
+ * ... ...
+ * MSI0IR6 base_addr + 0x60000
+ * MSI0IR7 base_addr + 0x70000
+ * MSI1IR0 base_addr + 0x80000
+ * MSI1IR1 base_addr + 0x90000
+ * ... ...
+ * MSI1IR7 base_addr + 0xF0000
+ * MSI2IR0 base_addr + 0x100000
+ * ... ...
+ * MSIFIR0 base_addr + 0x780000
+ * MSIFIR1 base_addr + 0x790000
+ * ... ...
+ * MSIFIR7 base_addr + 0x7F0000
+ * MSIINT0 base_addr + 0x800000
+ * MSIINT1 base_addr + 0x810000
+ * ... ...
+ * MSIINTF base_addr + 0x8F0000
+ *
+ * Each index register supports 16 MSI vectors (0..15) to generate interrupt.
+ * There are total 16 GIC IRQs assigned for these 16 groups of MSI termination
+ * registers.
+ *
+ * Each MSI termination group has 1 MSIINTn register (n is 0..15) to indicate
+ * the MSI pending status caused by 1 of its 8 index registers.
+ */
+
+/* MSInIRx read helper */
+static u32 xgene_msi_ir_read(struct xgene_msi *msi,
+ u32 msi_grp, u32 msir_idx)
+{
+ return readl_relaxed(msi->msi_regs + MSI_IR0 +
+ (msi_grp << 19) + (msir_idx << 16));
+}
+
+/* MSIINTn read helper */
+static u32 xgene_msi_int_read(struct xgene_msi *msi, u32 msi_grp)
+{
+ return readl_relaxed(msi->msi_regs + MSI_INT0 + (msi_grp << 16));
+}
+
+/*
+ * With 2048 MSI vectors supported, the MSI message can be constructed using
+ * following scheme:
+ * - Divide into 8 256-vector groups
+ * Group 0: 0-255
+ * Group 1: 256-511
+ * Group 2: 512-767
+ * ...
+ * Group 7: 1792-2047
+ * - Each 256-vector group is divided into 16 16-vector groups
+ * As an example: 16 16-vector groups for 256-vector group 0-255 is
+ * Group 0: 0-15
+ * Group 1: 16-32
+ * ...
+ * Group 15: 240-255
+ * - The termination address of MSI vector in 256-vector group n and 16-vector
+ * group x is the address of MSIxIRn
+ * - The data for MSI vector in 16-vector group x is x
+ */
+static u32 hwirq_to_reg_set(unsigned long hwirq)
+{
+ return (hwirq / (NR_HW_IRQS * IRQS_PER_IDX));
+}
+
+static u32 hwirq_to_group(unsigned long hwirq)
+{
+ return (hwirq % NR_HW_IRQS);
+}
+
+static u32 hwirq_to_msi_data(unsigned long hwirq)
+{
+ return ((hwirq / NR_HW_IRQS) % IRQS_PER_IDX);
+}
+
+static void xgene_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ struct xgene_msi *msi = irq_data_get_irq_chip_data(data);
+ u32 reg_set = hwirq_to_reg_set(data->hwirq);
+ u32 group = hwirq_to_group(data->hwirq);
+ u64 target_addr = msi->msi_addr + (((8 * group) + reg_set) << 16);
+
+ msg->address_hi = upper_32_bits(target_addr);
+ msg->address_lo = lower_32_bits(target_addr);
+ msg->data = hwirq_to_msi_data(data->hwirq);
+}
+
+/*
+ * X-Gene v1 only has 16 MSI GIC IRQs for 2048 MSI vectors. To maintain
+ * the expected behaviour of .set_affinity for each MSI interrupt, the 16
+ * MSI GIC IRQs are statically allocated to 8 X-Gene v1 cores (2 GIC IRQs
+ * for each core). The MSI vector is moved fom 1 MSI GIC IRQ to another
+ * MSI GIC IRQ to steer its MSI interrupt to correct X-Gene v1 core. As a
+ * consequence, the total MSI vectors that X-Gene v1 supports will be
+ * reduced to 256 (2048/8) vectors.
+ */
+static int hwirq_to_cpu(unsigned long hwirq)
+{
+ return (hwirq % xgene_msi_ctrl.num_cpus);
+}
+
+static unsigned long hwirq_to_canonical_hwirq(unsigned long hwirq)
+{
+ return (hwirq - hwirq_to_cpu(hwirq));
+}
+
+static int xgene_msi_set_affinity(struct irq_data *irqdata,
+ const struct cpumask *mask, bool force)
+{
+ int target_cpu = cpumask_first(mask);
+ int curr_cpu;
+
+ curr_cpu = hwirq_to_cpu(irqdata->hwirq);
+ if (curr_cpu == target_cpu)
+ return IRQ_SET_MASK_OK_DONE;
+
+ /* Update MSI number to target the new CPU */
+ irqdata->hwirq = hwirq_to_canonical_hwirq(irqdata->hwirq) + target_cpu;
+
+ return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip xgene_msi_bottom_irq_chip = {
+ .name = "MSI",
+ .irq_set_affinity = xgene_msi_set_affinity,
+ .irq_compose_msi_msg = xgene_compose_msi_msg,
+};
+
+static int xgene_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *args)
+{
+ struct xgene_msi *msi = domain->host_data;
+ int msi_irq;
+
+ mutex_lock(&msi->bitmap_lock);
+
+ msi_irq = bitmap_find_next_zero_area(msi->bitmap, NR_MSI_VEC, 0,
+ msi->num_cpus, 0);
+ if (msi_irq < NR_MSI_VEC)
+ bitmap_set(msi->bitmap, msi_irq, msi->num_cpus);
+ else
+ msi_irq = -ENOSPC;
+
+ mutex_unlock(&msi->bitmap_lock);
+
+ if (msi_irq < 0)
+ return msi_irq;
+
+ irq_domain_set_info(domain, virq, msi_irq,
+ &xgene_msi_bottom_irq_chip, domain->host_data,
+ handle_simple_irq, NULL, NULL);
+ set_irq_flags(virq, IRQF_VALID);
+
+ return 0;
+}
+
+static void xgene_irq_domain_free(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs)
+{
+ struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+ struct xgene_msi *msi = irq_data_get_irq_chip_data(d);
+ u32 hwirq;
+
+ mutex_lock(&msi->bitmap_lock);
+
+ hwirq = hwirq_to_canonical_hwirq(d->hwirq);
+ bitmap_clear(msi->bitmap, hwirq, msi->num_cpus);
+
+ mutex_unlock(&msi->bitmap_lock);
+
+ irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops msi_domain_ops = {
+ .alloc = xgene_irq_domain_alloc,
+ .free = xgene_irq_domain_free,
+};
+
+static int xgene_allocate_domains(struct xgene_msi *msi)
+{
+ msi->inner_domain = irq_domain_add_linear(NULL, NR_MSI_VEC,
+ &msi_domain_ops, msi);
+ if (!msi->inner_domain)
+ return -ENOMEM;
+
+ msi->msi_domain = pci_msi_create_irq_domain(msi->node,
+ &xgene_msi_domain_info,
+ msi->inner_domain);
+
+ if (!msi->msi_domain) {
+ irq_domain_remove(msi->inner_domain);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void xgene_free_domains(struct xgene_msi *msi)
+{
+ if (msi->msi_domain)
+ irq_domain_remove(msi->msi_domain);
+ if (msi->inner_domain)
+ irq_domain_remove(msi->inner_domain);
+}
+
+static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi)
+{
+ int size = BITS_TO_LONGS(NR_MSI_VEC) * sizeof(long);
+
+ xgene_msi->bitmap = kzalloc(size, GFP_KERNEL);
+ if (!xgene_msi->bitmap)
+ return -ENOMEM;
+
+ mutex_init(&xgene_msi->bitmap_lock);
+
+ xgene_msi->msi_groups = kcalloc(NR_HW_IRQS,
+ sizeof(struct xgene_msi_group),
+ GFP_KERNEL);
+ if (!xgene_msi->msi_groups)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void xgene_msi_isr(unsigned int irq, struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct xgene_msi_group *msi_groups;
+ struct xgene_msi *xgene_msi;
+ unsigned int virq;
+ int msir_index, msir_val, hw_irq;
+ u32 intr_index, grp_select, msi_grp;
+
+ chained_irq_enter(chip, desc);
+
+ msi_groups = irq_desc_get_handler_data(desc);
+ xgene_msi = msi_groups->msi;
+ msi_grp = msi_groups->msi_grp;
+
+ /*
+ * MSIINTn (n is 0..F) indicates if there is a pending MSI interrupt
+ * If bit x of this register is set (x is 0..7), one or more interupts
+ * corresponding to MSInIRx is set.
+ */
+ grp_select = xgene_msi_int_read(xgene_msi, msi_grp);
+ while (grp_select) {
+ msir_index = ffs(grp_select) - 1;
+ /*
+ * Calculate MSInIRx address to read to check for interrupts
+ * (refer to termination address and data assignment
+ * described in xgene_compose_msi_msg() )
+ */
+ msir_val = xgene_msi_ir_read(xgene_msi, msi_grp, msir_index);
+ while (msir_val) {
+ intr_index = ffs(msir_val) - 1;
+ /*
+ * Calculate MSI vector number (refer to the termination
+ * address and data assignment described in
+ * xgene_compose_msi_msg function)
+ */
+ hw_irq = (((msir_index * IRQS_PER_IDX) + intr_index) *
+ NR_HW_IRQS) + msi_grp;
+ /*
+ * As we have multiple hw_irq that maps to single MSI,
+ * always look up the virq using the hw_irq as seen from
+ * CPU0
+ */
+ hw_irq = hwirq_to_canonical_hwirq(hw_irq);
+ virq = irq_find_mapping(xgene_msi->inner_domain, hw_irq);
+ WARN_ON(!virq);
+ if (virq != 0)
+ generic_handle_irq(virq);
+ msir_val &= ~(1 << intr_index);
+ }
+ grp_select &= ~(1 << msir_index);
+
+ if (!grp_select) {
+ /*
+ * We handled all interrupts happened in this group,
+ * resample this group MSI_INTx register in case
+ * something else has been made pending in the meantime
+ */
+ grp_select = xgene_msi_int_read(xgene_msi, msi_grp);
+ }
+ }
+
+ chained_irq_exit(chip, desc);
+}
+
+static int xgene_msi_remove(struct platform_device *pdev)
+{
+ int virq, i;
+ struct xgene_msi *msi = platform_get_drvdata(pdev);
+
+ for (i = 0; i < NR_HW_IRQS; i++) {
+ virq = msi->msi_groups[i].gic_irq;
+ if (virq != 0) {
+ irq_set_chained_handler(virq, NULL);
+ irq_set_handler_data(virq, NULL);
+ }
+ }
+ kfree(msi->msi_groups);
+
+ kfree(msi->bitmap);
+ msi->bitmap = NULL;
+
+ xgene_free_domains(msi);
+
+ return 0;
+}
+
+static int xgene_msi_hwirq_alloc(unsigned int cpu)
+{
+ struct xgene_msi *msi = &xgene_msi_ctrl;
+ struct xgene_msi_group *msi_group;
+ cpumask_var_t mask;
+ int i;
+ int err;
+
+ for (i = cpu; i < NR_HW_IRQS; i += msi->num_cpus) {
+ msi_group = &msi->msi_groups[i];
+ if (!msi_group->gic_irq)
+ continue;
+
+ irq_set_chained_handler(msi_group->gic_irq,
+ xgene_msi_isr);
+ err = irq_set_handler_data(msi_group->gic_irq, msi_group);
+ if (err) {
+ pr_err("failed to register GIC IRQ handler\n");
+ return -EINVAL;
+ }
+ /*
+ * Statically allocate MSI GIC IRQs to each CPU core.
+ * With 8-core X-Gene v1, 2 MSI GIC IRQs are allocated
+ * to each core.
+ */
+ if (alloc_cpumask_var(&mask, GFP_KERNEL)) {
+ cpumask_clear(mask);
+ cpumask_set_cpu(cpu, mask);
+ err = irq_set_affinity(msi_group->gic_irq, mask);
+ if (err)
+ pr_err("failed to set affinity for GIC IRQ");
+ free_cpumask_var(mask);
+ } else {
+ pr_err("failed to alloc CPU mask for affinity\n");
+ err = -EINVAL;
+ }
+
+ if (err) {
+ irq_set_chained_handler(msi_group->gic_irq, NULL);
+ irq_set_handler_data(msi_group->gic_irq, NULL);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static void xgene_msi_hwirq_free(unsigned int cpu)
+{
+ struct xgene_msi *msi = &xgene_msi_ctrl;
+ struct xgene_msi_group *msi_group;
+ int i;
+
+ for (i = cpu; i < NR_HW_IRQS; i += msi->num_cpus) {
+ msi_group = &msi->msi_groups[i];
+ if (!msi_group->gic_irq)
+ continue;
+
+ irq_set_chained_handler(msi_group->gic_irq, NULL);
+ irq_set_handler_data(msi_group->gic_irq, NULL);
+ }
+}
+
+static int xgene_msi_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ unsigned cpu = (unsigned long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ xgene_msi_hwirq_alloc(cpu);
+ break;
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ xgene_msi_hwirq_free(cpu);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block xgene_msi_cpu_notifier = {
+ .notifier_call = xgene_msi_cpu_callback,
+};
+
+static const struct of_device_id xgene_msi_match_table[] = {
+ {.compatible = "apm,xgene1-msi"},
+ {},
+};
+
+static int xgene_msi_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+ int rc, irq_index;
+ struct xgene_msi *xgene_msi;
+ unsigned int cpu;
+ int virt_msir;
+ u32 msi_val, msi_idx;
+
+ xgene_msi = &xgene_msi_ctrl;
+
+ platform_set_drvdata(pdev, xgene_msi);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ xgene_msi->msi_regs = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(xgene_msi->msi_regs)) {
+ dev_err(&pdev->dev, "no reg space\n");
+ rc = -EINVAL;
+ goto error;
+ }
+ xgene_msi->msi_addr = res->start;
+ xgene_msi->node = pdev->dev.of_node;
+ xgene_msi->num_cpus = num_possible_cpus();
+
+ rc = xgene_msi_init_allocator(xgene_msi);
+ if (rc) {
+ dev_err(&pdev->dev, "Error allocating MSI bitmap\n");
+ goto error;
+ }
+
+ rc = xgene_allocate_domains(xgene_msi);
+ if (rc) {
+ dev_err(&pdev->dev, "Failed to allocate MSI domain\n");
+ goto error;
+ }
+
+ for (irq_index = 0; irq_index < NR_HW_IRQS; irq_index++) {
+ virt_msir = platform_get_irq(pdev, irq_index);
+ if (virt_msir < 0) {
+ dev_err(&pdev->dev, "Cannot translate IRQ index %d\n",
+ irq_index);
+ rc = -EINVAL;
+ goto error;
+ }
+ xgene_msi->msi_groups[irq_index].gic_irq = virt_msir;
+ xgene_msi->msi_groups[irq_index].msi_grp = irq_index;
+ xgene_msi->msi_groups[irq_index].msi = xgene_msi;
+ }
+
+ /*
+ * MSInIRx registers are read-to-clear; before registering
+ * interrupt handlers, read all of them to clear spurious
+ * interrupts that may occur before the driver is probed.
+ */
+ for (irq_index = 0; irq_index < NR_HW_IRQS; irq_index++) {
+ for (msi_idx = 0; msi_idx < IDX_PER_GROUP; msi_idx++)
+ msi_val = xgene_msi_ir_read(xgene_msi, irq_index,
+ msi_idx);
+ /* Read MSIINTn to confirm */
+ msi_val = xgene_msi_int_read(xgene_msi, irq_index);
+ if (msi_val) {
+ dev_err(&pdev->dev, "Failed to clear spurious IRQ\n");
+ rc = -EINVAL;
+ goto error;
+ }
+ }
+
+ cpu_notifier_register_begin();
+
+ for_each_online_cpu(cpu)
+ if (xgene_msi_hwirq_alloc(cpu)) {
+ dev_err(&pdev->dev, "failed to register MSI handlers\n");
+ cpu_notifier_register_done();
+ goto error;
+ }
+
+ rc = __register_hotcpu_notifier(&xgene_msi_cpu_notifier);
+ if (rc) {
+ dev_err(&pdev->dev, "failed to add CPU MSI notifier\n");
+ cpu_notifier_register_done();
+ goto error;
+ }
+
+ cpu_notifier_register_done();
+
+ dev_info(&pdev->dev, "APM X-Gene PCIe MSI driver loaded\n");
+
+ return 0;
+
+error:
+ xgene_msi_remove(pdev);
+ return rc;
+}
+
+static struct platform_driver xgene_msi_driver = {
+ .driver = {
+ .name = "xgene-msi",
+ .owner = THIS_MODULE,
+ .of_match_table = xgene_msi_match_table,
+ },
+ .probe = xgene_msi_probe,
+ .remove = xgene_msi_remove,
+};
+
+static int __init xgene_pcie_msi_init(void)
+{
+ return platform_driver_register(&xgene_msi_driver);
+}
+subsys_initcall(xgene_pcie_msi_init);
diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c
index ee082c0366ec..3e5a636c9a9a 100644
--- a/drivers/pci/host/pci-xgene.c
+++ b/drivers/pci/host/pci-xgene.c
@@ -468,6 +468,23 @@ static int xgene_pcie_setup(struct xgene_pcie_port *port,
return 0;
}
+static int xgene_pcie_msi_enable(struct pci_bus *bus)
+{
+ struct device_node *msi_node;
+
+ msi_node = of_parse_phandle(bus->dev.of_node,
+ "msi-parent", 0);
+ if (!msi_node)
+ return -ENODEV;
+
+ bus->msi = of_pci_find_msi_chip_by_node(msi_node);
+ if (!bus->msi)
+ return -ENODEV;
+
+ bus->msi->dev = &bus->dev;
+ return 0;
+}
+
static int xgene_pcie_probe_bridge(struct platform_device *pdev)
{
struct device_node *dn = pdev->dev.of_node;
@@ -504,6 +521,10 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev)
if (!bus)
return -ENOMEM;
+ if (IS_ENABLED(CONFIG_PCI_MSI))
+ if (xgene_pcie_msi_enable(bus))
+ dev_info(port->dev, "failed to enable MSI\n");
+
pci_scan_child_bus(bus);
pci_assign_unassigned_bus_resources(bus);
pci_bus_add_devices(bus);
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index c3e7dfcf9ff5..fc54038495da 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -39,14 +39,13 @@ struct irq_domain * __weak arch_get_pci_msi_domain(struct pci_dev *dev)
static struct irq_domain *pci_msi_get_domain(struct pci_dev *dev)
{
- struct irq_domain *domain = NULL;
+ struct irq_domain *domain;
- if (dev->bus->msi)
- domain = dev->bus->msi->domain;
- if (!domain)
- domain = arch_get_pci_msi_domain(dev);
+ domain = dev_get_msi_domain(&dev->dev);
+ if (domain)
+ return domain;
- return domain;
+ return arch_get_pci_msi_domain(dev);
}
static int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
@@ -1302,12 +1301,19 @@ struct irq_domain *pci_msi_create_irq_domain(struct device_node *node,
struct msi_domain_info *info,
struct irq_domain *parent)
{
+ struct irq_domain *domain;
+
if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS)
pci_msi_domain_update_dom_ops(info);
if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
pci_msi_domain_update_chip_ops(info);
- return msi_create_irq_domain(node, info, parent);
+ domain = msi_create_irq_domain(node, info, parent);
+ if (!domain)
+ return NULL;
+
+ domain->bus_token = DOMAIN_BUS_PCI_MSI;
+ return domain;
}
/**
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index f0929934bb7a..2e99a500cb83 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -9,6 +9,7 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/irqdomain.h>
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/of.h>
@@ -59,3 +60,32 @@ struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus)
return of_node_get(bus->bridge->parent->of_node);
return NULL;
}
+
+struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus)
+{
+#ifdef CONFIG_IRQ_DOMAIN
+ struct device_node *np;
+ struct irq_domain *d;
+
+ if (!bus->dev.of_node)
+ return NULL;
+
+ /* Start looking for a phandle to an MSI controller. */
+ np = of_parse_phandle(bus->dev.of_node, "msi-parent", 0);
+
+ /*
+ * If we don't have an msi-parent property, look for a domain
+ * directly attached to the host bridge.
+ */
+ if (!np)
+ np = bus->dev.of_node;
+
+ d = irq_find_matching_host(np, DOMAIN_BUS_PCI_MSI);
+ if (d)
+ return d;
+
+ return irq_find_host(np);
+#else
+ return NULL;
+#endif
+}
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index c91185721345..8a369ed5ea3f 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -661,6 +661,35 @@ static void pci_set_bus_speed(struct pci_bus *bus)
}
}
+static struct irq_domain *pci_host_bridge_msi_domain(struct pci_bus *bus)
+{
+ struct irq_domain *d;
+
+ /*
+ * Any firmware interface that can resolve the msi_domain
+ * should be called from here.
+ */
+ d = pci_host_bridge_of_msi_domain(bus);
+
+ return d;
+}
+
+static void pci_set_bus_msi_domain(struct pci_bus *bus)
+{
+ struct irq_domain *d;
+
+ /*
+ * Either bus is the root, and we must obtain it from the
+ * firmware, or we inherit it from the bridge device.
+ */
+ if (pci_is_root_bus(bus))
+ d = pci_host_bridge_msi_domain(bus);
+ else
+ d = dev_get_msi_domain(&bus->self->dev);
+
+ dev_set_msi_domain(&bus->dev, d);
+}
+
static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent,
struct pci_dev *bridge, int busnr)
{
@@ -714,6 +743,7 @@ static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent,
bridge->subordinate = child;
add_dev:
+ pci_set_bus_msi_domain(child);
ret = device_register(&child->dev);
WARN_ON(ret < 0);
@@ -1508,6 +1538,17 @@ static void pci_init_capabilities(struct pci_dev *dev)
pci_enable_acs(dev);
}
+static void pci_set_msi_domain(struct pci_dev *dev)
+{
+ /*
+ * If no domain has been set through the pcibios_add_device
+ * callback, inherit the default from the bus device.
+ */
+ if (!dev_get_msi_domain(&dev->dev))
+ dev_set_msi_domain(&dev->dev,
+ dev_get_msi_domain(&dev->bus->dev));
+}
+
void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
{
int ret;
@@ -1549,6 +1590,9 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
ret = pcibios_add_device(dev);
WARN_ON(ret < 0);
+ /* Setup MSI irq domain */
+ pci_set_msi_domain(dev);
+
/* Notifier could use PCI capabilities */
dev->match_driver = false;
ret = device_add(&dev->dev);
@@ -1939,6 +1983,7 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
b->bridge = get_device(&bridge->dev);
device_enable_async_suspend(b->bridge);
pci_set_bus_of_node(b);
+ pci_set_bus_msi_domain(b);
if (!parent)
set_dev_node(b->bridge, pcibus_to_node(b));
diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
index 594c918b553d..1ef02daddb60 100644
--- a/drivers/platform/x86/acerhdf.c
+++ b/drivers/platform/x86/acerhdf.c
@@ -372,7 +372,8 @@ static int acerhdf_bind(struct thermal_zone_device *thermal,
return 0;
if (thermal_zone_bind_cooling_device(thermal, 0, cdev,
- THERMAL_NO_LIMIT, THERMAL_NO_LIMIT)) {
+ THERMAL_NO_LIMIT, THERMAL_NO_LIMIT,
+ THERMAL_WEIGHT_DEFAULT)) {
pr_err("error binding cooling dev\n");
return -EINVAL;
}
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index af40db0df58e..2acc88378099 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -42,6 +42,17 @@ config THERMAL_OF
Say 'Y' here if you need to build thermal infrastructure
based on device tree.
+config THERMAL_WRITABLE_TRIPS
+ bool "Enable writable trip points"
+ help
+ This option allows the system integrator to choose whether
+ trip temperatures can be changed from userspace. The
+ writable trips need to be specified when setting up the
+ thermal zone but the choice here takes precedence.
+
+ Say 'Y' here if you would like to allow userspace tools to
+ change trip temperatures.
+
choice
prompt "Default Thermal governor"
default THERMAL_DEFAULT_GOV_STEP_WISE
@@ -71,6 +82,14 @@ config THERMAL_DEFAULT_GOV_USER_SPACE
Select this if you want to let the user space manage the
platform thermals.
+config THERMAL_DEFAULT_GOV_POWER_ALLOCATOR
+ bool "power_allocator"
+ select THERMAL_GOV_POWER_ALLOCATOR
+ help
+ Select this if you want to control temperature based on
+ system and device power allocation. This governor can only
+ operate on cooling devices that implement the power API.
+
endchoice
config THERMAL_GOV_FAIR_SHARE
@@ -99,6 +118,13 @@ config THERMAL_GOV_USER_SPACE
help
Enable this to let the user space manage the platform thermals.
+config THERMAL_GOV_POWER_ALLOCATOR
+ bool "Power allocator thermal governor"
+ select THERMAL_POWER_ACTOR
+ help
+ Enable this to manage platform thermals by dynamically
+ allocating and limiting power to devices.
+
config CPU_THERMAL
bool "generic cpu cooling support"
depends on CPU_FREQ
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index fa0dc486790f..cd769ab06cbb 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -14,6 +14,7 @@ thermal_sys-$(CONFIG_THERMAL_GOV_FAIR_SHARE) += fair_share.o
thermal_sys-$(CONFIG_THERMAL_GOV_BANG_BANG) += gov_bang_bang.o
thermal_sys-$(CONFIG_THERMAL_GOV_STEP_WISE) += step_wise.o
thermal_sys-$(CONFIG_THERMAL_GOV_USER_SPACE) += user_space.o
+thermal_sys-$(CONFIG_THERMAL_GOV_POWER_ALLOCATOR) += power_allocator.o
# cpufreq cooling
thermal_sys-$(CONFIG_CPU_THERMAL) += cpu_cooling.o
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index f65f0d109fc8..6509c61b9648 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -26,10 +26,13 @@
#include <linux/thermal.h>
#include <linux/cpufreq.h>
#include <linux/err.h>
+#include <linux/pm_opp.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/cpu_cooling.h>
+#include <trace/events/thermal.h>
+
/*
* Cooling state <-> CPUFreq frequency
*
@@ -45,6 +48,19 @@
*/
/**
+ * struct power_table - frequency to power conversion
+ * @frequency: frequency in KHz
+ * @power: power in mW
+ *
+ * This structure is built when the cooling device registers and helps
+ * in translating frequency to power and viceversa.
+ */
+struct power_table {
+ u32 frequency;
+ u32 power;
+};
+
+/**
* struct cpufreq_cooling_device - data for cooling device with cpufreq
* @id: unique integer value corresponding to each cpufreq_cooling_device
* registered.
@@ -58,6 +74,15 @@
* cpufreq frequencies.
* @allowed_cpus: all the cpus involved for this cpufreq_cooling_device.
* @node: list_head to link all cpufreq_cooling_device together.
+ * @last_load: load measured by the latest call to cpufreq_get_actual_power()
+ * @time_in_idle: previous reading of the absolute time that this cpu was idle
+ * @time_in_idle_timestamp: wall time of the last invocation of
+ * get_cpu_idle_time_us()
+ * @dyn_power_table: array of struct power_table for frequency to power
+ * conversion, sorted in ascending order.
+ * @dyn_power_table_entries: number of entries in the @dyn_power_table array
+ * @cpu_dev: the first cpu_device from @allowed_cpus that has OPPs registered
+ * @plat_get_static_power: callback to calculate the static power
*
* This structure is required for keeping information of each registered
* cpufreq_cooling_device.
@@ -71,6 +96,13 @@ struct cpufreq_cooling_device {
unsigned int *freq_table; /* In descending order */
struct cpumask allowed_cpus;
struct list_head node;
+ u32 last_load;
+ u64 *time_in_idle;
+ u64 *time_in_idle_timestamp;
+ struct power_table *dyn_power_table;
+ int dyn_power_table_entries;
+ struct device *cpu_dev;
+ get_static_t plat_get_static_power;
};
static DEFINE_IDR(cpufreq_idr);
static DEFINE_MUTEX(cooling_cpufreq_lock);
@@ -186,23 +218,237 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
unsigned long max_freq = 0;
struct cpufreq_cooling_device *cpufreq_dev;
- if (event != CPUFREQ_ADJUST)
- return 0;
+ switch (event) {
- mutex_lock(&cooling_cpufreq_lock);
- list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
- if (!cpumask_test_cpu(policy->cpu,
- &cpufreq_dev->allowed_cpus))
+ case CPUFREQ_ADJUST:
+ mutex_lock(&cooling_cpufreq_lock);
+ list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
+ if (!cpumask_test_cpu(policy->cpu,
+ &cpufreq_dev->allowed_cpus))
+ continue;
+
+ max_freq = cpufreq_dev->cpufreq_val;
+
+ if (policy->max != max_freq)
+ cpufreq_verify_within_limits(policy, 0,
+ max_freq);
+ }
+ mutex_unlock(&cooling_cpufreq_lock);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
+}
+
+/**
+ * build_dyn_power_table() - create a dynamic power to frequency table
+ * @cpufreq_device: the cpufreq cooling device in which to store the table
+ * @capacitance: dynamic power coefficient for these cpus
+ *
+ * Build a dynamic power to frequency table for this cpu and store it
+ * in @cpufreq_device. This table will be used in cpu_power_to_freq() and
+ * cpu_freq_to_power() to convert between power and frequency
+ * efficiently. Power is stored in mW, frequency in KHz. The
+ * resulting table is in ascending order.
+ *
+ * Return: 0 on success, -E* on error.
+ */
+static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device,
+ u32 capacitance)
+{
+ struct power_table *power_table;
+ struct dev_pm_opp *opp;
+ struct device *dev = NULL;
+ int num_opps = 0, cpu, i, ret = 0;
+ unsigned long freq;
+
+ rcu_read_lock();
+
+ for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
+ dev = get_cpu_device(cpu);
+ if (!dev) {
+ dev_warn(&cpufreq_device->cool_dev->device,
+ "No cpu device for cpu %d\n", cpu);
continue;
+ }
+
+ num_opps = dev_pm_opp_get_opp_count(dev);
+ if (num_opps > 0) {
+ break;
+ } else if (num_opps < 0) {
+ ret = num_opps;
+ goto unlock;
+ }
+ }
- max_freq = cpufreq_dev->cpufreq_val;
+ if (num_opps == 0) {
+ ret = -EINVAL;
+ goto unlock;
+ }
- if (policy->max != max_freq)
- cpufreq_verify_within_limits(policy, 0, max_freq);
+ power_table = kcalloc(num_opps, sizeof(*power_table), GFP_KERNEL);
+ if (!power_table) {
+ ret = -ENOMEM;
+ goto unlock;
}
- mutex_unlock(&cooling_cpufreq_lock);
- return 0;
+ for (freq = 0, i = 0;
+ opp = dev_pm_opp_find_freq_ceil(dev, &freq), !IS_ERR(opp);
+ freq++, i++) {
+ u32 freq_mhz, voltage_mv;
+ u64 power;
+
+ freq_mhz = freq / 1000000;
+ voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
+
+ /*
+ * Do the multiplication with MHz and millivolt so as
+ * to not overflow.
+ */
+ power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
+ do_div(power, 1000000000);
+
+ /* frequency is stored in power_table in KHz */
+ power_table[i].frequency = freq / 1000;
+
+ /* power is stored in mW */
+ power_table[i].power = power;
+ }
+
+ if (i == 0) {
+ ret = PTR_ERR(opp);
+ goto unlock;
+ }
+
+ cpufreq_device->cpu_dev = dev;
+ cpufreq_device->dyn_power_table = power_table;
+ cpufreq_device->dyn_power_table_entries = i;
+
+unlock:
+ rcu_read_unlock();
+ return ret;
+}
+
+static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device,
+ u32 freq)
+{
+ int i;
+ struct power_table *pt = cpufreq_device->dyn_power_table;
+
+ for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
+ if (freq < pt[i].frequency)
+ break;
+
+ return pt[i - 1].power;
+}
+
+static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device,
+ u32 power)
+{
+ int i;
+ struct power_table *pt = cpufreq_device->dyn_power_table;
+
+ for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
+ if (power < pt[i].power)
+ break;
+
+ return pt[i - 1].frequency;
+}
+
+/**
+ * get_load() - get load for a cpu since last updated
+ * @cpufreq_device: &struct cpufreq_cooling_device for this cpu
+ * @cpu: cpu number
+ *
+ * Return: The average load of cpu @cpu in percentage since this
+ * function was last called.
+ */
+static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu)
+{
+ u32 load;
+ u64 now, now_idle, delta_time, delta_idle;
+
+ now_idle = get_cpu_idle_time(cpu, &now, 0);
+ delta_idle = now_idle - cpufreq_device->time_in_idle[cpu];
+ delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu];
+
+ if (delta_time <= delta_idle)
+ load = 0;
+ else
+ load = div64_u64(100 * (delta_time - delta_idle), delta_time);
+
+ cpufreq_device->time_in_idle[cpu] = now_idle;
+ cpufreq_device->time_in_idle_timestamp[cpu] = now;
+
+ return load;
+}
+
+/**
+ * get_static_power() - calculate the static power consumed by the cpus
+ * @cpufreq_device: struct &cpufreq_cooling_device for this cpu cdev
+ * @tz: thermal zone device in which we're operating
+ * @freq: frequency in KHz
+ * @power: pointer in which to store the calculated static power
+ *
+ * Calculate the static power consumed by the cpus described by
+ * @cpu_actor running at frequency @freq. This function relies on a
+ * platform specific function that should have been provided when the
+ * actor was registered. If it wasn't, the static power is assumed to
+ * be negligible. The calculated static power is stored in @power.
+ *
+ * Return: 0 on success, -E* on failure.
+ */
+static int get_static_power(struct cpufreq_cooling_device *cpufreq_device,
+ struct thermal_zone_device *tz, unsigned long freq,
+ u32 *power)
+{
+ struct dev_pm_opp *opp;
+ unsigned long voltage;
+ struct cpumask *cpumask = &cpufreq_device->allowed_cpus;
+ unsigned long freq_hz = freq * 1000;
+
+ if (!cpufreq_device->plat_get_static_power ||
+ !cpufreq_device->cpu_dev) {
+ *power = 0;
+ return 0;
+ }
+
+ rcu_read_lock();
+
+ opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz,
+ true);
+ voltage = dev_pm_opp_get_voltage(opp);
+
+ rcu_read_unlock();
+
+ if (voltage == 0) {
+ dev_warn_ratelimited(cpufreq_device->cpu_dev,
+ "Failed to get voltage for frequency %lu: %ld\n",
+ freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0);
+ return -EINVAL;
+ }
+
+ return cpufreq_device->plat_get_static_power(cpumask, tz->passive_delay,
+ voltage, power);
+}
+
+/**
+ * get_dynamic_power() - calculate the dynamic power
+ * @cpufreq_device: &cpufreq_cooling_device for this cdev
+ * @freq: current frequency
+ *
+ * Return: the dynamic power consumed by the cpus described by
+ * @cpufreq_device.
+ */
+static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device,
+ unsigned long freq)
+{
+ u32 raw_cpu_power;
+
+ raw_cpu_power = cpu_freq_to_power(cpufreq_device, freq);
+ return (raw_cpu_power * cpufreq_device->last_load) / 100;
}
/* cpufreq cooling device callback functions are defined below */
@@ -280,8 +526,205 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
return 0;
}
+/**
+ * cpufreq_get_requested_power() - get the current power
+ * @cdev: &thermal_cooling_device pointer
+ * @tz: a valid thermal zone device pointer
+ * @power: pointer in which to store the resulting power
+ *
+ * Calculate the current power consumption of the cpus in milliwatts
+ * and store it in @power. This function should actually calculate
+ * the requested power, but it's hard to get the frequency that
+ * cpufreq would have assigned if there were no thermal limits.
+ * Instead, we calculate the current power on the assumption that the
+ * immediate future will look like the immediate past.
+ *
+ * We use the current frequency and the average load since this
+ * function was last called. In reality, there could have been
+ * multiple opps since this function was last called and that affects
+ * the load calculation. While it's not perfectly accurate, this
+ * simplification is good enough and works. REVISIT this, as more
+ * complex code may be needed if experiments show that it's not
+ * accurate enough.
+ *
+ * Return: 0 on success, -E* if getting the static power failed.
+ */
+static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
+ struct thermal_zone_device *tz,
+ u32 *power)
+{
+ unsigned long freq;
+ int i = 0, cpu, ret;
+ u32 static_power, dynamic_power, total_load = 0;
+ struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+ u32 *load_cpu = NULL;
+
+ cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
+
+ /*
+ * All the CPUs are offline, thus the requested power by
+ * the cdev is 0
+ */
+ if (cpu >= nr_cpu_ids) {
+ *power = 0;
+ return 0;
+ }
+
+ freq = cpufreq_quick_get(cpu);
+
+ if (trace_thermal_power_cpu_get_power_enabled()) {
+ u32 ncpus = cpumask_weight(&cpufreq_device->allowed_cpus);
+
+ load_cpu = devm_kcalloc(&cdev->device, ncpus, sizeof(*load_cpu),
+ GFP_KERNEL);
+ }
+
+ for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
+ u32 load;
+
+ if (cpu_online(cpu))
+ load = get_load(cpufreq_device, cpu);
+ else
+ load = 0;
+
+ total_load += load;
+ if (trace_thermal_power_cpu_limit_enabled() && load_cpu)
+ load_cpu[i] = load;
+
+ i++;
+ }
+
+ cpufreq_device->last_load = total_load;
+
+ dynamic_power = get_dynamic_power(cpufreq_device, freq);
+ ret = get_static_power(cpufreq_device, tz, freq, &static_power);
+ if (ret) {
+ if (load_cpu)
+ devm_kfree(&cdev->device, load_cpu);
+ return ret;
+ }
+
+ if (load_cpu) {
+ trace_thermal_power_cpu_get_power(
+ &cpufreq_device->allowed_cpus,
+ freq, load_cpu, i, dynamic_power, static_power);
+
+ devm_kfree(&cdev->device, load_cpu);
+ }
+
+ *power = static_power + dynamic_power;
+ return 0;
+}
+
+/**
+ * cpufreq_state2power() - convert a cpu cdev state to power consumed
+ * @cdev: &thermal_cooling_device pointer
+ * @tz: a valid thermal zone device pointer
+ * @state: cooling device state to be converted
+ * @power: pointer in which to store the resulting power
+ *
+ * Convert cooling device state @state into power consumption in
+ * milliwatts assuming 100% load. Store the calculated power in
+ * @power.
+ *
+ * Return: 0 on success, -EINVAL if the cooling device state could not
+ * be converted into a frequency or other -E* if there was an error
+ * when calculating the static power.
+ */
+static int cpufreq_state2power(struct thermal_cooling_device *cdev,
+ struct thermal_zone_device *tz,
+ unsigned long state, u32 *power)
+{
+ unsigned int freq, num_cpus;
+ cpumask_t cpumask;
+ u32 static_power, dynamic_power;
+ int ret;
+ struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+
+ cpumask_and(&cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask);
+ num_cpus = cpumask_weight(&cpumask);
+
+ /* None of our cpus are online, so no power */
+ if (num_cpus == 0) {
+ *power = 0;
+ return 0;
+ }
+
+ freq = cpufreq_device->freq_table[state];
+ if (!freq)
+ return -EINVAL;
+
+ dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus;
+ ret = get_static_power(cpufreq_device, tz, freq, &static_power);
+ if (ret)
+ return ret;
+
+ *power = static_power + dynamic_power;
+ return 0;
+}
+
+/**
+ * cpufreq_power2state() - convert power to a cooling device state
+ * @cdev: &thermal_cooling_device pointer
+ * @tz: a valid thermal zone device pointer
+ * @power: power in milliwatts to be converted
+ * @state: pointer in which to store the resulting state
+ *
+ * Calculate a cooling device state for the cpus described by @cdev
+ * that would allow them to consume at most @power mW and store it in
+ * @state. Note that this calculation depends on external factors
+ * such as the cpu load or the current static power. Calling this
+ * function with the same power as input can yield different cooling
+ * device states depending on those external factors.
+ *
+ * Return: 0 on success, -ENODEV if no cpus are online or -EINVAL if
+ * the calculated frequency could not be converted to a valid state.
+ * The latter should not happen unless the frequencies available to
+ * cpufreq have changed since the initialization of the cpu cooling
+ * device.
+ */
+static int cpufreq_power2state(struct thermal_cooling_device *cdev,
+ struct thermal_zone_device *tz, u32 power,
+ unsigned long *state)
+{
+ unsigned int cpu, cur_freq, target_freq;
+ int ret;
+ s32 dyn_power;
+ u32 last_load, normalised_power, static_power;
+ struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+
+ cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
+
+ /* None of our cpus are online */
+ if (cpu >= nr_cpu_ids)
+ return -ENODEV;
+
+ cur_freq = cpufreq_quick_get(cpu);
+ ret = get_static_power(cpufreq_device, tz, cur_freq, &static_power);
+ if (ret)
+ return ret;
+
+ dyn_power = power - static_power;
+ dyn_power = dyn_power > 0 ? dyn_power : 0;
+ last_load = cpufreq_device->last_load ?: 1;
+ normalised_power = (dyn_power * 100) / last_load;
+ target_freq = cpu_power_to_freq(cpufreq_device, normalised_power);
+
+ *state = cpufreq_cooling_get_level(cpu, target_freq);
+ if (*state == THERMAL_CSTATE_INVALID) {
+ dev_warn_ratelimited(&cdev->device,
+ "Failed to convert %dKHz for cpu %d into a cdev state\n",
+ target_freq, cpu);
+ return -EINVAL;
+ }
+
+ trace_thermal_power_cpu_limit(&cpufreq_device->allowed_cpus,
+ target_freq, *state, power);
+ return 0;
+}
+
/* Bind cpufreq callbacks to thermal cooling device ops */
-static struct thermal_cooling_device_ops const cpufreq_cooling_ops = {
+static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
.get_max_state = cpufreq_get_max_state,
.get_cur_state = cpufreq_get_cur_state,
.set_cur_state = cpufreq_set_cur_state,
@@ -311,6 +754,9 @@ static unsigned int find_next_max(struct cpufreq_frequency_table *table,
* @np: a valid struct device_node to the cooling device device tree node
* @clip_cpus: cpumask of cpus where the frequency constraints will happen.
* Normally this should be same as cpufreq policy->related_cpus.
+ * @capacitance: dynamic power coefficient for these cpus
+ * @plat_static_func: function to calculate the static power consumed by these
+ * cpus (optional)
*
* This interface function registers the cpufreq cooling device with the name
* "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
@@ -322,13 +768,14 @@ static unsigned int find_next_max(struct cpufreq_frequency_table *table,
*/
static struct thermal_cooling_device *
__cpufreq_cooling_register(struct device_node *np,
- const struct cpumask *clip_cpus)
+ const struct cpumask *clip_cpus, u32 capacitance,
+ get_static_t plat_static_func)
{
struct thermal_cooling_device *cool_dev;
struct cpufreq_cooling_device *cpufreq_dev;
char dev_name[THERMAL_NAME_LENGTH];
struct cpufreq_frequency_table *pos, *table;
- unsigned int freq, i;
+ unsigned int freq, i, num_cpus;
int ret;
table = cpufreq_frequency_get_table(cpumask_first(clip_cpus));
@@ -341,6 +788,23 @@ __cpufreq_cooling_register(struct device_node *np,
if (!cpufreq_dev)
return ERR_PTR(-ENOMEM);
+ num_cpus = cpumask_weight(clip_cpus);
+ cpufreq_dev->time_in_idle = kcalloc(num_cpus,
+ sizeof(*cpufreq_dev->time_in_idle),
+ GFP_KERNEL);
+ if (!cpufreq_dev->time_in_idle) {
+ cool_dev = ERR_PTR(-ENOMEM);
+ goto free_cdev;
+ }
+
+ cpufreq_dev->time_in_idle_timestamp =
+ kcalloc(num_cpus, sizeof(*cpufreq_dev->time_in_idle_timestamp),
+ GFP_KERNEL);
+ if (!cpufreq_dev->time_in_idle_timestamp) {
+ cool_dev = ERR_PTR(-ENOMEM);
+ goto free_time_in_idle;
+ }
+
/* Find max levels */
cpufreq_for_each_valid_entry(pos, table)
cpufreq_dev->max_level++;
@@ -349,7 +813,7 @@ __cpufreq_cooling_register(struct device_node *np,
cpufreq_dev->max_level, GFP_KERNEL);
if (!cpufreq_dev->freq_table) {
cool_dev = ERR_PTR(-ENOMEM);
- goto free_cdev;
+ goto free_time_in_idle_timestamp;
}
/* max_level is an index, not a counter */
@@ -357,6 +821,20 @@ __cpufreq_cooling_register(struct device_node *np,
cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus);
+ if (capacitance) {
+ cpufreq_cooling_ops.get_requested_power =
+ cpufreq_get_requested_power;
+ cpufreq_cooling_ops.state2power = cpufreq_state2power;
+ cpufreq_cooling_ops.power2state = cpufreq_power2state;
+ cpufreq_dev->plat_get_static_power = plat_static_func;
+
+ ret = build_dyn_power_table(cpufreq_dev, capacitance);
+ if (ret) {
+ cool_dev = ERR_PTR(ret);
+ goto free_table;
+ }
+ }
+
ret = get_idr(&cpufreq_idr, &cpufreq_dev->id);
if (ret) {
cool_dev = ERR_PTR(ret);
@@ -402,6 +880,10 @@ remove_idr:
release_idr(&cpufreq_idr, cpufreq_dev->id);
free_table:
kfree(cpufreq_dev->freq_table);
+free_time_in_idle_timestamp:
+ kfree(cpufreq_dev->time_in_idle_timestamp);
+free_time_in_idle:
+ kfree(cpufreq_dev->time_in_idle);
free_cdev:
kfree(cpufreq_dev);
@@ -422,7 +904,7 @@ free_cdev:
struct thermal_cooling_device *
cpufreq_cooling_register(const struct cpumask *clip_cpus)
{
- return __cpufreq_cooling_register(NULL, clip_cpus);
+ return __cpufreq_cooling_register(NULL, clip_cpus, 0, NULL);
}
EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
@@ -446,11 +928,78 @@ of_cpufreq_cooling_register(struct device_node *np,
if (!np)
return ERR_PTR(-EINVAL);
- return __cpufreq_cooling_register(np, clip_cpus);
+ return __cpufreq_cooling_register(np, clip_cpus, 0, NULL);
}
EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
/**
+ * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
+ * @clip_cpus: cpumask of cpus where the frequency constraints will happen
+ * @capacitance: dynamic power coefficient for these cpus
+ * @plat_static_func: function to calculate the static power consumed by these
+ * cpus (optional)
+ *
+ * This interface function registers the cpufreq cooling device with
+ * the name "thermal-cpufreq-%x". This api can support multiple
+ * instances of cpufreq cooling devices. Using this function, the
+ * cooling device will implement the power extensions by using a
+ * simple cpu power model. The cpus must have registered their OPPs
+ * using the OPP library.
+ *
+ * An optional @plat_static_func may be provided to calculate the
+ * static power consumed by these cpus. If the platform's static
+ * power consumption is unknown or negligible, make it NULL.
+ *
+ * Return: a valid struct thermal_cooling_device pointer on success,
+ * on failure, it returns a corresponding ERR_PTR().
+ */
+struct thermal_cooling_device *
+cpufreq_power_cooling_register(const struct cpumask *clip_cpus, u32 capacitance,
+ get_static_t plat_static_func)
+{
+ return __cpufreq_cooling_register(NULL, clip_cpus, capacitance,
+ plat_static_func);
+}
+EXPORT_SYMBOL(cpufreq_power_cooling_register);
+
+/**
+ * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
+ * @np: a valid struct device_node to the cooling device device tree node
+ * @clip_cpus: cpumask of cpus where the frequency constraints will happen
+ * @capacitance: dynamic power coefficient for these cpus
+ * @plat_static_func: function to calculate the static power consumed by these
+ * cpus (optional)
+ *
+ * This interface function registers the cpufreq cooling device with
+ * the name "thermal-cpufreq-%x". This api can support multiple
+ * instances of cpufreq cooling devices. Using this API, the cpufreq
+ * cooling device will be linked to the device tree node provided.
+ * Using this function, the cooling device will implement the power
+ * extensions by using a simple cpu power model. The cpus must have
+ * registered their OPPs using the OPP library.
+ *
+ * An optional @plat_static_func may be provided to calculate the
+ * static power consumed by these cpus. If the platform's static
+ * power consumption is unknown or negligible, make it NULL.
+ *
+ * Return: a valid struct thermal_cooling_device pointer on success,
+ * on failure, it returns a corresponding ERR_PTR().
+ */
+struct thermal_cooling_device *
+of_cpufreq_power_cooling_register(struct device_node *np,
+ const struct cpumask *clip_cpus,
+ u32 capacitance,
+ get_static_t plat_static_func)
+{
+ if (!np)
+ return ERR_PTR(-EINVAL);
+
+ return __cpufreq_cooling_register(np, clip_cpus, capacitance,
+ plat_static_func);
+}
+EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
+
+/**
* cpufreq_cooling_unregister - function to remove cpufreq cooling device.
* @cdev: thermal cooling device pointer.
*
@@ -475,6 +1024,8 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
thermal_cooling_device_unregister(cpufreq_dev->cool_dev);
release_idr(&cpufreq_idr, cpufreq_dev->id);
+ kfree(cpufreq_dev->time_in_idle_timestamp);
+ kfree(cpufreq_dev->time_in_idle);
kfree(cpufreq_dev->freq_table);
kfree(cpufreq_dev);
}
diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c
index 20adfbe27df1..2fb273c4baa9 100644
--- a/drivers/thermal/db8500_thermal.c
+++ b/drivers/thermal/db8500_thermal.c
@@ -76,7 +76,7 @@ static int db8500_cdev_bind(struct thermal_zone_device *thermal,
upper = lower = i > max_state ? max_state : i;
ret = thermal_zone_bind_cooling_device(thermal, i, cdev,
- upper, lower);
+ upper, lower, THERMAL_WEIGHT_DEFAULT);
dev_info(&cdev->device, "%s bind to %d: %d-%s\n", cdev->type,
i, ret, ret ? "fail" : "succeed");
diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c
index 6e0a3fbfae86..8c50b8d6afb7 100644
--- a/drivers/thermal/fair_share.c
+++ b/drivers/thermal/fair_share.c
@@ -59,13 +59,13 @@ static int get_trip_level(struct thermal_zone_device *tz)
}
static long get_target_state(struct thermal_zone_device *tz,
- struct thermal_cooling_device *cdev, int weight, int level)
+ struct thermal_cooling_device *cdev, int percentage, int level)
{
unsigned long max_state;
cdev->ops->get_max_state(cdev, &max_state);
- return (long)(weight * level * max_state) / (100 * tz->trips);
+ return (long)(percentage * level * max_state) / (100 * tz->trips);
}
/**
@@ -77,7 +77,7 @@ static long get_target_state(struct thermal_zone_device *tz,
*
* Parameters used for Throttling:
* P1. max_state: Maximum throttle state exposed by the cooling device.
- * P2. weight[i]/100:
+ * P2. percentage[i]/100:
* How 'effective' the 'i'th device is, in cooling the given zone.
* P3. cur_trip_level/max_no_of_trips:
* This describes the extent to which the devices should be throttled.
@@ -88,28 +88,33 @@ static long get_target_state(struct thermal_zone_device *tz,
*/
static int fair_share_throttle(struct thermal_zone_device *tz, int trip)
{
- const struct thermal_zone_params *tzp;
- struct thermal_cooling_device *cdev;
struct thermal_instance *instance;
- int i;
+ int total_weight = 0;
+ int total_instance = 0;
int cur_trip_level = get_trip_level(tz);
- if (!tz->tzp || !tz->tzp->tbp)
- return -EINVAL;
+ list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+ if (instance->trip != trip)
+ continue;
+
+ total_weight += instance->weight;
+ total_instance++;
+ }
- tzp = tz->tzp;
+ list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+ int percentage;
+ struct thermal_cooling_device *cdev = instance->cdev;
- for (i = 0; i < tzp->num_tbps; i++) {
- if (!tzp->tbp[i].cdev)
+ if (instance->trip != trip)
continue;
- cdev = tzp->tbp[i].cdev;
- instance = get_thermal_instance(tz, cdev, trip);
- if (!instance)
- continue;
+ if (!total_weight)
+ percentage = 100 / total_instance;
+ else
+ percentage = (instance->weight * 100) / total_weight;
- instance->target = get_target_state(tz, cdev,
- tzp->tbp[i].weight, cur_trip_level);
+ instance->target = get_target_state(tz, cdev, percentage,
+ cur_trip_level);
instance->cdev->updated = false;
thermal_cdev_update(cdev);
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index 2ccbc0788353..fde4c2876d14 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -306,7 +306,8 @@ static int imx_bind(struct thermal_zone_device *tz,
ret = thermal_zone_bind_cooling_device(tz, IMX_TRIP_PASSIVE, cdev,
THERMAL_NO_LIMIT,
- THERMAL_NO_LIMIT);
+ THERMAL_NO_LIMIT,
+ THERMAL_WEIGHT_DEFAULT);
if (ret) {
dev_err(&tz->device,
"binding zone %s with cdev %s failed:%d\n",
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 668fb1bdea9e..b295b2b6c191 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -58,6 +58,8 @@ struct __thermal_bind_params {
* @mode: current thermal zone device mode (enabled/disabled)
* @passive_delay: polling interval while passive cooling is activated
* @polling_delay: zone polling interval
+ * @slope: slope of the temperature adjustment curve
+ * @offset: offset of the temperature adjustment curve
* @ntrips: number of trip points
* @trips: an array of trip points (0..ntrips - 1)
* @num_tbps: number of thermal bind params
@@ -70,6 +72,8 @@ struct __thermal_zone {
enum thermal_device_mode mode;
int passive_delay;
int polling_delay;
+ int slope;
+ int offset;
/* trip data */
int ntrips;
@@ -227,7 +231,8 @@ static int of_thermal_bind(struct thermal_zone_device *thermal,
ret = thermal_zone_bind_cooling_device(thermal,
tbp->trip_id, cdev,
tbp->max,
- tbp->min);
+ tbp->min,
+ tbp->usage);
if (ret)
return ret;
}
@@ -581,7 +586,7 @@ static int thermal_of_populate_bind_params(struct device_node *np,
u32 prop;
/* Default weight. Usage is optional */
- __tbp->usage = 0;
+ __tbp->usage = THERMAL_WEIGHT_DEFAULT;
ret = of_property_read_u32(np, "contribution", &prop);
if (ret == 0)
__tbp->usage = prop;
@@ -715,7 +720,7 @@ static int thermal_of_populate_trip(struct device_node *np,
* @np parameter and fills the read data into a __thermal_zone data structure
* and return this pointer.
*
- * TODO: Missing properties to parse: thermal-sensor-names and coefficients
+ * TODO: Missing properties to parse: thermal-sensor-names
*
* Return: On success returns a valid struct __thermal_zone,
* otherwise, it returns a corresponding ERR_PTR(). Caller must
@@ -727,7 +732,7 @@ thermal_of_build_thermal_zone(struct device_node *np)
struct device_node *child = NULL, *gchild;
struct __thermal_zone *tz;
int ret, i;
- u32 prop;
+ u32 prop, coef[2];
if (!np) {
pr_err("no thermal zone np\n");
@@ -752,6 +757,20 @@ thermal_of_build_thermal_zone(struct device_node *np)
}
tz->polling_delay = prop;
+ /*
+ * REVIST: for now, the thermal framework supports only
+ * one sensor per thermal zone. Thus, we are considering
+ * only the first two values as slope and offset.
+ */
+ ret = of_property_read_u32_array(np, "coefficients", coef, 2);
+ if (ret == 0) {
+ tz->slope = coef[0];
+ tz->offset = coef[1];
+ } else {
+ tz->slope = 1;
+ tz->offset = 0;
+ }
+
/* trips */
child = of_get_child_by_name(np, "trips");
@@ -865,6 +884,8 @@ int __init of_parse_thermal_zones(void)
for_each_child_of_node(np, child) {
struct thermal_zone_device *zone;
struct thermal_zone_params *tzp;
+ int i, mask = 0;
+ u32 prop;
/* Check whether child is enabled or not */
if (!of_device_is_available(child))
@@ -891,8 +912,18 @@ int __init of_parse_thermal_zones(void)
/* No hwmon because there might be hwmon drivers registering */
tzp->no_hwmon = true;
+ if (!of_property_read_u32(child, "sustainable-power", &prop))
+ tzp->sustainable_power = prop;
+
+ for (i = 0; i < tz->ntrips; i++)
+ mask |= 1 << i;
+
+ /* these two are left for temperature drivers to use */
+ tzp->slope = tz->slope;
+ tzp->offset = tz->offset;
+
zone = thermal_zone_device_register(child->name, tz->ntrips,
- 0, tz,
+ mask, tz,
ops, tzp,
tz->passive_delay,
tz->polling_delay);
diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c
new file mode 100644
index 000000000000..251676902869
--- /dev/null
+++ b/drivers/thermal/power_allocator.c
@@ -0,0 +1,544 @@
+/*
+ * A power allocator to manage temperature
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "Power allocator: " fmt
+
+#include <linux/rculist.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/thermal_power_allocator.h>
+
+#include "thermal_core.h"
+
+#define FRAC_BITS 10
+#define int_to_frac(x) ((x) << FRAC_BITS)
+#define frac_to_int(x) ((x) >> FRAC_BITS)
+
+/**
+ * mul_frac() - multiply two fixed-point numbers
+ * @x: first multiplicand
+ * @y: second multiplicand
+ *
+ * Return: the result of multiplying two fixed-point numbers. The
+ * result is also a fixed-point number.
+ */
+static inline s64 mul_frac(s64 x, s64 y)
+{
+ return (x * y) >> FRAC_BITS;
+}
+
+/**
+ * div_frac() - divide two fixed-point numbers
+ * @x: the dividend
+ * @y: the divisor
+ *
+ * Return: the result of dividing two fixed-point numbers. The
+ * result is also a fixed-point number.
+ */
+static inline s64 div_frac(s64 x, s64 y)
+{
+ return div_s64(x << FRAC_BITS, y);
+}
+
+/**
+ * struct power_allocator_params - parameters for the power allocator governor
+ * @err_integral: accumulated error in the PID controller.
+ * @prev_err: error in the previous iteration of the PID controller.
+ * Used to calculate the derivative term.
+ * @trip_switch_on: first passive trip point of the thermal zone. The
+ * governor switches on when this trip point is crossed.
+ * @trip_max_desired_temperature: last passive trip point of the thermal
+ * zone. The temperature we are
+ * controlling for.
+ */
+struct power_allocator_params {
+ s64 err_integral;
+ s32 prev_err;
+ int trip_switch_on;
+ int trip_max_desired_temperature;
+};
+
+/**
+ * pid_controller() - PID controller
+ * @tz: thermal zone we are operating in
+ * @current_temp: the current temperature in millicelsius
+ * @control_temp: the target temperature in millicelsius
+ * @max_allocatable_power: maximum allocatable power for this thermal zone
+ *
+ * This PID controller increases the available power budget so that the
+ * temperature of the thermal zone gets as close as possible to
+ * @control_temp and limits the power if it exceeds it. k_po is the
+ * proportional term when we are overshooting, k_pu is the
+ * proportional term when we are undershooting. integral_cutoff is a
+ * threshold below which we stop accumulating the error. The
+ * accumulated error is only valid if the requested power will make
+ * the system warmer. If the system is mostly idle, there's no point
+ * in accumulating positive error.
+ *
+ * Return: The power budget for the next period.
+ */
+static u32 pid_controller(struct thermal_zone_device *tz,
+ unsigned long current_temp,
+ unsigned long control_temp,
+ u32 max_allocatable_power)
+{
+ s64 p, i, d, power_range;
+ s32 err, max_power_frac;
+ struct power_allocator_params *params = tz->governor_data;
+
+ max_power_frac = int_to_frac(max_allocatable_power);
+
+ err = ((s32)control_temp - (s32)current_temp);
+ err = int_to_frac(err);
+
+ /* Calculate the proportional term */
+ p = mul_frac(err < 0 ? tz->tzp->k_po : tz->tzp->k_pu, err);
+
+ /*
+ * Calculate the integral term
+ *
+ * if the error is less than cut off allow integration (but
+ * the integral is limited to max power)
+ */
+ i = mul_frac(tz->tzp->k_i, params->err_integral);
+
+ if (err < int_to_frac(tz->tzp->integral_cutoff)) {
+ s64 i_next = i + mul_frac(tz->tzp->k_i, err);
+
+ if (abs64(i_next) < max_power_frac) {
+ i = i_next;
+ params->err_integral += err;
+ }
+ }
+
+ /*
+ * Calculate the derivative term
+ *
+ * We do err - prev_err, so with a positive k_d, a decreasing
+ * error (i.e. driving closer to the line) results in less
+ * power being applied, slowing down the controller)
+ */
+ d = mul_frac(tz->tzp->k_d, err - params->prev_err);
+ d = div_frac(d, tz->passive_delay);
+ params->prev_err = err;
+
+ power_range = p + i + d;
+
+ /* feed-forward the known sustainable dissipatable power */
+ power_range = tz->tzp->sustainable_power + frac_to_int(power_range);
+
+ power_range = clamp(power_range, (s64)0, (s64)max_allocatable_power);
+
+ trace_thermal_power_allocator_pid(tz, frac_to_int(err),
+ frac_to_int(params->err_integral),
+ frac_to_int(p), frac_to_int(i),
+ frac_to_int(d), power_range);
+
+ return power_range;
+}
+
+/**
+ * divvy_up_power() - divvy the allocated power between the actors
+ * @req_power: each actor's requested power
+ * @max_power: each actor's maximum available power
+ * @num_actors: size of the @req_power, @max_power and @granted_power's array
+ * @total_req_power: sum of @req_power
+ * @power_range: total allocated power
+ * @granted_power: output array: each actor's granted power
+ * @extra_actor_power: an appropriately sized array to be used in the
+ * function as temporary storage of the extra power given
+ * to the actors
+ *
+ * This function divides the total allocated power (@power_range)
+ * fairly between the actors. It first tries to give each actor a
+ * share of the @power_range according to how much power it requested
+ * compared to the rest of the actors. For example, if only one actor
+ * requests power, then it receives all the @power_range. If
+ * three actors each requests 1mW, each receives a third of the
+ * @power_range.
+ *
+ * If any actor received more than their maximum power, then that
+ * surplus is re-divvied among the actors based on how far they are
+ * from their respective maximums.
+ *
+ * Granted power for each actor is written to @granted_power, which
+ * should've been allocated by the calling function.
+ */
+static void divvy_up_power(u32 *req_power, u32 *max_power, int num_actors,
+ u32 total_req_power, u32 power_range,
+ u32 *granted_power, u32 *extra_actor_power)
+{
+ u32 extra_power, capped_extra_power;
+ int i;
+
+ /*
+ * Prevent division by 0 if none of the actors request power.
+ */
+ if (!total_req_power)
+ total_req_power = 1;
+
+ capped_extra_power = 0;
+ extra_power = 0;
+ for (i = 0; i < num_actors; i++) {
+ u64 req_range = req_power[i] * power_range;
+
+ granted_power[i] = DIV_ROUND_CLOSEST_ULL(req_range,
+ total_req_power);
+
+ if (granted_power[i] > max_power[i]) {
+ extra_power += granted_power[i] - max_power[i];
+ granted_power[i] = max_power[i];
+ }
+
+ extra_actor_power[i] = max_power[i] - granted_power[i];
+ capped_extra_power += extra_actor_power[i];
+ }
+
+ if (!extra_power)
+ return;
+
+ /*
+ * Re-divvy the reclaimed extra among actors based on
+ * how far they are from the max
+ */
+ extra_power = min(extra_power, capped_extra_power);
+ if (capped_extra_power > 0)
+ for (i = 0; i < num_actors; i++)
+ granted_power[i] += (extra_actor_power[i] *
+ extra_power) / capped_extra_power;
+}
+
+static int allocate_power(struct thermal_zone_device *tz,
+ unsigned long current_temp,
+ unsigned long control_temp)
+{
+ struct thermal_instance *instance;
+ struct power_allocator_params *params = tz->governor_data;
+ u32 *req_power, *max_power, *granted_power, *extra_actor_power;
+ u32 *weighted_req_power;
+ u32 total_req_power, max_allocatable_power, total_weighted_req_power;
+ u32 total_granted_power, power_range;
+ int i, num_actors, total_weight, ret = 0;
+ int trip_max_desired_temperature = params->trip_max_desired_temperature;
+
+ mutex_lock(&tz->lock);
+
+ num_actors = 0;
+ total_weight = 0;
+ list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+ if ((instance->trip == trip_max_desired_temperature) &&
+ cdev_is_power_actor(instance->cdev)) {
+ num_actors++;
+ total_weight += instance->weight;
+ }
+ }
+
+ /*
+ * We need to allocate five arrays of the same size:
+ * req_power, max_power, granted_power, extra_actor_power and
+ * weighted_req_power. They are going to be needed until this
+ * function returns. Allocate them all in one go to simplify
+ * the allocation and deallocation logic.
+ */
+ BUILD_BUG_ON(sizeof(*req_power) != sizeof(*max_power));
+ BUILD_BUG_ON(sizeof(*req_power) != sizeof(*granted_power));
+ BUILD_BUG_ON(sizeof(*req_power) != sizeof(*extra_actor_power));
+ BUILD_BUG_ON(sizeof(*req_power) != sizeof(*weighted_req_power));
+ req_power = kcalloc(num_actors * 5, sizeof(*req_power), GFP_KERNEL);
+ if (!req_power) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ max_power = &req_power[num_actors];
+ granted_power = &req_power[2 * num_actors];
+ extra_actor_power = &req_power[3 * num_actors];
+ weighted_req_power = &req_power[4 * num_actors];
+
+ i = 0;
+ total_weighted_req_power = 0;
+ total_req_power = 0;
+ max_allocatable_power = 0;
+
+ list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+ int weight;
+ struct thermal_cooling_device *cdev = instance->cdev;
+
+ if (instance->trip != trip_max_desired_temperature)
+ continue;
+
+ if (!cdev_is_power_actor(cdev))
+ continue;
+
+ if (cdev->ops->get_requested_power(cdev, tz, &req_power[i]))
+ continue;
+
+ if (!total_weight)
+ weight = 1 << FRAC_BITS;
+ else
+ weight = instance->weight;
+
+ weighted_req_power[i] = frac_to_int(weight * req_power[i]);
+
+ if (power_actor_get_max_power(cdev, tz, &max_power[i]))
+ continue;
+
+ total_req_power += req_power[i];
+ max_allocatable_power += max_power[i];
+ total_weighted_req_power += weighted_req_power[i];
+
+ i++;
+ }
+
+ power_range = pid_controller(tz, current_temp, control_temp,
+ max_allocatable_power);
+
+ divvy_up_power(weighted_req_power, max_power, num_actors,
+ total_weighted_req_power, power_range, granted_power,
+ extra_actor_power);
+
+ total_granted_power = 0;
+ i = 0;
+ list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+ if (instance->trip != trip_max_desired_temperature)
+ continue;
+
+ if (!cdev_is_power_actor(instance->cdev))
+ continue;
+
+ power_actor_set_power(instance->cdev, instance,
+ granted_power[i]);
+ total_granted_power += granted_power[i];
+
+ i++;
+ }
+
+ trace_thermal_power_allocator(tz, req_power, total_req_power,
+ granted_power, total_granted_power,
+ num_actors, power_range,
+ max_allocatable_power, current_temp,
+ (s32)control_temp - (s32)current_temp);
+
+ kfree(req_power);
+unlock:
+ mutex_unlock(&tz->lock);
+
+ return ret;
+}
+
+static int get_governor_trips(struct thermal_zone_device *tz,
+ struct power_allocator_params *params)
+{
+ int i, ret, last_passive;
+ bool found_first_passive;
+
+ found_first_passive = false;
+ last_passive = -1;
+ ret = -EINVAL;
+
+ for (i = 0; i < tz->trips; i++) {
+ enum thermal_trip_type type;
+
+ ret = tz->ops->get_trip_type(tz, i, &type);
+ if (ret)
+ return ret;
+
+ if (!found_first_passive) {
+ if (type == THERMAL_TRIP_PASSIVE) {
+ params->trip_switch_on = i;
+ found_first_passive = true;
+ }
+ } else if (type == THERMAL_TRIP_PASSIVE) {
+ last_passive = i;
+ } else {
+ break;
+ }
+ }
+
+ if (last_passive != -1) {
+ params->trip_max_desired_temperature = last_passive;
+ ret = 0;
+ } else {
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static void reset_pid_controller(struct power_allocator_params *params)
+{
+ params->err_integral = 0;
+ params->prev_err = 0;
+}
+
+static void allow_maximum_power(struct thermal_zone_device *tz)
+{
+ struct thermal_instance *instance;
+ struct power_allocator_params *params = tz->governor_data;
+
+ list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+ if ((instance->trip != params->trip_max_desired_temperature) ||
+ (!cdev_is_power_actor(instance->cdev)))
+ continue;
+
+ instance->target = 0;
+ instance->cdev->updated = false;
+ thermal_cdev_update(instance->cdev);
+ }
+}
+
+/**
+ * power_allocator_bind() - bind the power_allocator governor to a thermal zone
+ * @tz: thermal zone to bind it to
+ *
+ * Check that the thermal zone is valid for this governor, that is, it
+ * has two thermal trips. If so, initialize the PID controller
+ * parameters and bind it to the thermal zone.
+ *
+ * Return: 0 on success, -EINVAL if the trips were invalid or -ENOMEM
+ * if we ran out of memory.
+ */
+static int power_allocator_bind(struct thermal_zone_device *tz)
+{
+ int ret;
+ struct power_allocator_params *params;
+ unsigned long switch_on_temp, control_temp;
+ u32 temperature_threshold;
+
+ if (!tz->tzp || !tz->tzp->sustainable_power) {
+ dev_err(&tz->device,
+ "power_allocator: missing sustainable_power\n");
+ return -EINVAL;
+ }
+
+ params = kzalloc(sizeof(*params), GFP_KERNEL);
+ if (!params)
+ return -ENOMEM;
+
+ ret = get_governor_trips(tz, params);
+ if (ret) {
+ dev_err(&tz->device,
+ "thermal zone %s has wrong trip setup for power allocator\n",
+ tz->type);
+ goto free;
+ }
+
+ ret = tz->ops->get_trip_temp(tz, params->trip_switch_on,
+ &switch_on_temp);
+ if (ret)
+ goto free;
+
+ ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature,
+ &control_temp);
+ if (ret)
+ goto free;
+
+ temperature_threshold = control_temp - switch_on_temp;
+
+ tz->tzp->k_po = tz->tzp->k_po ?:
+ int_to_frac(tz->tzp->sustainable_power) / temperature_threshold;
+ tz->tzp->k_pu = tz->tzp->k_pu ?:
+ int_to_frac(2 * tz->tzp->sustainable_power) /
+ temperature_threshold;
+ tz->tzp->k_i = tz->tzp->k_i ?: int_to_frac(10) / 1000;
+ /*
+ * The default for k_d and integral_cutoff is 0, so we can
+ * leave them as they are.
+ */
+
+ reset_pid_controller(params);
+
+ tz->governor_data = params;
+
+ return 0;
+
+free:
+ kfree(params);
+ return ret;
+}
+
+static void power_allocator_unbind(struct thermal_zone_device *tz)
+{
+ dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id);
+ kfree(tz->governor_data);
+ tz->governor_data = NULL;
+}
+
+static int power_allocator_throttle(struct thermal_zone_device *tz, int trip)
+{
+ int ret;
+ unsigned long switch_on_temp, control_temp, current_temp;
+ struct power_allocator_params *params = tz->governor_data;
+
+ /*
+ * We get called for every trip point but we only need to do
+ * our calculations once
+ */
+ if (trip != params->trip_max_desired_temperature)
+ return 0;
+
+ ret = thermal_zone_get_temp(tz, &current_temp);
+ if (ret) {
+ dev_warn(&tz->device, "Failed to get temperature: %d\n", ret);
+ return ret;
+ }
+
+ ret = tz->ops->get_trip_temp(tz, params->trip_switch_on,
+ &switch_on_temp);
+ if (ret) {
+ dev_warn(&tz->device,
+ "Failed to get switch on temperature: %d\n", ret);
+ return ret;
+ }
+
+ if (current_temp < switch_on_temp) {
+ tz->passive = 0;
+ reset_pid_controller(params);
+ allow_maximum_power(tz);
+ return 0;
+ }
+
+ tz->passive = 1;
+
+ ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature,
+ &control_temp);
+ if (ret) {
+ dev_warn(&tz->device,
+ "Failed to get the maximum desired temperature: %d\n",
+ ret);
+ return ret;
+ }
+
+ return allocate_power(tz, current_temp, control_temp);
+}
+
+static struct thermal_governor thermal_gov_power_allocator = {
+ .name = "power_allocator",
+ .bind_to_tz = power_allocator_bind,
+ .unbind_from_tz = power_allocator_unbind,
+ .throttle = power_allocator_throttle,
+};
+
+int thermal_gov_power_allocator_register(void)
+{
+ return thermal_register_governor(&thermal_gov_power_allocator);
+}
+
+void thermal_gov_power_allocator_unregister(void)
+{
+ thermal_unregister_governor(&thermal_gov_power_allocator);
+}
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 4108db7e10c1..04659bfb888b 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -75,6 +75,58 @@ static struct thermal_governor *__find_governor(const char *name)
return NULL;
}
+/**
+ * bind_previous_governor() - bind the previous governor of the thermal zone
+ * @tz: a valid pointer to a struct thermal_zone_device
+ * @failed_gov_name: the name of the governor that failed to register
+ *
+ * Register the previous governor of the thermal zone after a new
+ * governor has failed to be bound.
+ */
+static void bind_previous_governor(struct thermal_zone_device *tz,
+ const char *failed_gov_name)
+{
+ if (tz->governor && tz->governor->bind_to_tz) {
+ if (tz->governor->bind_to_tz(tz)) {
+ dev_err(&tz->device,
+ "governor %s failed to bind and the previous one (%s) failed to bind again, thermal zone %s has no governor\n",
+ failed_gov_name, tz->governor->name, tz->type);
+ tz->governor = NULL;
+ }
+ }
+}
+
+/**
+ * thermal_set_governor() - Switch to another governor
+ * @tz: a valid pointer to a struct thermal_zone_device
+ * @new_gov: pointer to the new governor
+ *
+ * Change the governor of thermal zone @tz.
+ *
+ * Return: 0 on success, an error if the new governor's bind_to_tz() failed.
+ */
+static int thermal_set_governor(struct thermal_zone_device *tz,
+ struct thermal_governor *new_gov)
+{
+ int ret = 0;
+
+ if (tz->governor && tz->governor->unbind_from_tz)
+ tz->governor->unbind_from_tz(tz);
+
+ if (new_gov && new_gov->bind_to_tz) {
+ ret = new_gov->bind_to_tz(tz);
+ if (ret) {
+ bind_previous_governor(tz, new_gov->name);
+
+ return ret;
+ }
+ }
+
+ tz->governor = new_gov;
+
+ return ret;
+}
+
int thermal_register_governor(struct thermal_governor *governor)
{
int err;
@@ -107,8 +159,15 @@ int thermal_register_governor(struct thermal_governor *governor)
name = pos->tzp->governor_name;
- if (!strncasecmp(name, governor->name, THERMAL_NAME_LENGTH))
- pos->governor = governor;
+ if (!strncasecmp(name, governor->name, THERMAL_NAME_LENGTH)) {
+ int ret;
+
+ ret = thermal_set_governor(pos, governor);
+ if (ret)
+ dev_err(&pos->device,
+ "Failed to set governor %s for thermal zone %s: %d\n",
+ governor->name, pos->type, ret);
+ }
}
mutex_unlock(&thermal_list_lock);
@@ -134,7 +193,7 @@ void thermal_unregister_governor(struct thermal_governor *governor)
list_for_each_entry(pos, &thermal_tz_list, node) {
if (!strncasecmp(pos->governor->name, governor->name,
THERMAL_NAME_LENGTH))
- pos->governor = NULL;
+ thermal_set_governor(pos, NULL);
}
mutex_unlock(&thermal_list_lock);
@@ -218,7 +277,8 @@ static void print_bind_err_msg(struct thermal_zone_device *tz,
static void __bind(struct thermal_zone_device *tz, int mask,
struct thermal_cooling_device *cdev,
- unsigned long *limits)
+ unsigned long *limits,
+ unsigned int weight)
{
int i, ret;
@@ -233,7 +293,8 @@ static void __bind(struct thermal_zone_device *tz, int mask,
upper = limits[i * 2 + 1];
}
ret = thermal_zone_bind_cooling_device(tz, i, cdev,
- upper, lower);
+ upper, lower,
+ weight);
if (ret)
print_bind_err_msg(tz, cdev, ret);
}
@@ -280,7 +341,8 @@ static void bind_cdev(struct thermal_cooling_device *cdev)
continue;
tzp->tbp[i].cdev = cdev;
__bind(pos, tzp->tbp[i].trip_mask, cdev,
- tzp->tbp[i].binding_limits);
+ tzp->tbp[i].binding_limits,
+ tzp->tbp[i].weight);
}
}
@@ -319,7 +381,8 @@ static void bind_tz(struct thermal_zone_device *tz)
continue;
tzp->tbp[i].cdev = pos;
__bind(tz, tzp->tbp[i].trip_mask, pos,
- tzp->tbp[i].binding_limits);
+ tzp->tbp[i].binding_limits,
+ tzp->tbp[i].weight);
}
}
exit:
@@ -713,7 +776,8 @@ passive_store(struct device *dev, struct device_attribute *attr,
thermal_zone_bind_cooling_device(tz,
THERMAL_TRIPS_NONE, cdev,
THERMAL_NO_LIMIT,
- THERMAL_NO_LIMIT);
+ THERMAL_NO_LIMIT,
+ THERMAL_WEIGHT_DEFAULT);
}
mutex_unlock(&thermal_list_lock);
if (!tz->passive_delay)
@@ -765,8 +829,9 @@ policy_store(struct device *dev, struct device_attribute *attr,
if (!gov)
goto exit;
- tz->governor = gov;
- ret = count;
+ ret = thermal_set_governor(tz, gov);
+ if (!ret)
+ ret = count;
exit:
mutex_unlock(&tz->lock);
@@ -810,6 +875,158 @@ emul_temp_store(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store);
#endif/*CONFIG_THERMAL_EMULATION*/
+static ssize_t
+sustainable_power_show(struct device *dev, struct device_attribute *devattr,
+ char *buf)
+{
+ struct thermal_zone_device *tz = to_thermal_zone(dev);
+
+ if (tz->tzp)
+ return sprintf(buf, "%u\n", tz->tzp->sustainable_power);
+ else
+ return -EIO;
+}
+
+static ssize_t
+sustainable_power_store(struct device *dev, struct device_attribute *devattr,
+ const char *buf, size_t count)
+{
+ struct thermal_zone_device *tz = to_thermal_zone(dev);
+ u32 sustainable_power;
+
+ if (!tz->tzp)
+ return -EIO;
+
+ if (kstrtou32(buf, 10, &sustainable_power))
+ return -EINVAL;
+
+ tz->tzp->sustainable_power = sustainable_power;
+
+ return count;
+}
+static DEVICE_ATTR(sustainable_power, S_IWUSR | S_IRUGO, sustainable_power_show,
+ sustainable_power_store);
+
+#define create_s32_tzp_attr(name) \
+ static ssize_t \
+ name##_show(struct device *dev, struct device_attribute *devattr, \
+ char *buf) \
+ { \
+ struct thermal_zone_device *tz = to_thermal_zone(dev); \
+ \
+ if (tz->tzp) \
+ return sprintf(buf, "%u\n", tz->tzp->name); \
+ else \
+ return -EIO; \
+ } \
+ \
+ static ssize_t \
+ name##_store(struct device *dev, struct device_attribute *devattr, \
+ const char *buf, size_t count) \
+ { \
+ struct thermal_zone_device *tz = to_thermal_zone(dev); \
+ s32 value; \
+ \
+ if (!tz->tzp) \
+ return -EIO; \
+ \
+ if (kstrtos32(buf, 10, &value)) \
+ return -EINVAL; \
+ \
+ tz->tzp->name = value; \
+ \
+ return count; \
+ } \
+ static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, name##_show, name##_store)
+
+create_s32_tzp_attr(k_po);
+create_s32_tzp_attr(k_pu);
+create_s32_tzp_attr(k_i);
+create_s32_tzp_attr(k_d);
+create_s32_tzp_attr(integral_cutoff);
+create_s32_tzp_attr(slope);
+create_s32_tzp_attr(offset);
+#undef create_s32_tzp_attr
+
+static struct device_attribute *dev_tzp_attrs[] = {
+ &dev_attr_sustainable_power,
+ &dev_attr_k_po,
+ &dev_attr_k_pu,
+ &dev_attr_k_i,
+ &dev_attr_k_d,
+ &dev_attr_integral_cutoff,
+ &dev_attr_slope,
+ &dev_attr_offset,
+};
+
+static int create_tzp_attrs(struct device *dev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(dev_tzp_attrs); i++) {
+ int ret;
+ struct device_attribute *dev_attr = dev_tzp_attrs[i];
+
+ ret = device_create_file(dev, dev_attr);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * power_actor_get_max_power() - get the maximum power that a cdev can consume
+ * @cdev: pointer to &thermal_cooling_device
+ * @tz: a valid thermal zone device pointer
+ * @max_power: pointer in which to store the maximum power
+ *
+ * Calculate the maximum power consumption in milliwats that the
+ * cooling device can currently consume and store it in @max_power.
+ *
+ * Return: 0 on success, -EINVAL if @cdev doesn't support the
+ * power_actor API or -E* on other error.
+ */
+int power_actor_get_max_power(struct thermal_cooling_device *cdev,
+ struct thermal_zone_device *tz, u32 *max_power)
+{
+ if (!cdev_is_power_actor(cdev))
+ return -EINVAL;
+
+ return cdev->ops->state2power(cdev, tz, 0, max_power);
+}
+
+/**
+ * power_actor_set_power() - limit the maximum power that a cooling device can consume
+ * @cdev: pointer to &thermal_cooling_device
+ * @instance: thermal instance to update
+ * @power: the power in milliwatts
+ *
+ * Set the cooling device to consume at most @power milliwatts.
+ *
+ * Return: 0 on success, -EINVAL if the cooling device does not
+ * implement the power actor API or -E* for other failures.
+ */
+int power_actor_set_power(struct thermal_cooling_device *cdev,
+ struct thermal_instance *instance, u32 power)
+{
+ unsigned long state;
+ int ret;
+
+ if (!cdev_is_power_actor(cdev))
+ return -EINVAL;
+
+ ret = cdev->ops->power2state(cdev, instance->tz, power, &state);
+ if (ret)
+ return ret;
+
+ instance->target = state;
+ cdev->updated = false;
+ thermal_cdev_update(cdev);
+
+ return 0;
+}
+
static DEVICE_ATTR(type, 0444, type_show, NULL);
static DEVICE_ATTR(temp, 0444, temp_show, NULL);
static DEVICE_ATTR(mode, 0644, mode_show, mode_store);
@@ -917,6 +1134,34 @@ static const struct attribute_group *cooling_device_attr_groups[] = {
NULL,
};
+static ssize_t
+thermal_cooling_device_weight_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct thermal_instance *instance;
+
+ instance = container_of(attr, struct thermal_instance, weight_attr);
+
+ return sprintf(buf, "%d\n", instance->weight);
+}
+
+static ssize_t
+thermal_cooling_device_weight_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct thermal_instance *instance;
+ int ret, weight;
+
+ ret = kstrtoint(buf, 0, &weight);
+ if (ret)
+ return ret;
+
+ instance = container_of(attr, struct thermal_instance, weight_attr);
+ instance->weight = weight;
+
+ return count;
+}
/* Device management */
/**
@@ -931,6 +1176,9 @@ static const struct attribute_group *cooling_device_attr_groups[] = {
* @lower: the Minimum cooling state can be used for this trip point.
* THERMAL_NO_LIMIT means no lower limit,
* and the cooling device can be in cooling state 0.
+ * @weight: The weight of the cooling device to be bound to the
+ * thermal zone. Use THERMAL_WEIGHT_DEFAULT for the
+ * default value
*
* This interface function bind a thermal cooling device to the certain trip
* point of a thermal zone device.
@@ -941,7 +1189,8 @@ static const struct attribute_group *cooling_device_attr_groups[] = {
int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
int trip,
struct thermal_cooling_device *cdev,
- unsigned long upper, unsigned long lower)
+ unsigned long upper, unsigned long lower,
+ unsigned int weight)
{
struct thermal_instance *dev;
struct thermal_instance *pos;
@@ -986,6 +1235,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
dev->upper = upper;
dev->lower = lower;
dev->target = THERMAL_NO_TARGET;
+ dev->weight = weight;
result = get_idr(&tz->idr, &tz->lock, &dev->id);
if (result)
@@ -1006,6 +1256,16 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
if (result)
goto remove_symbol_link;
+ sprintf(dev->weight_attr_name, "cdev%d_weight", dev->id);
+ sysfs_attr_init(&dev->weight_attr.attr);
+ dev->weight_attr.attr.name = dev->weight_attr_name;
+ dev->weight_attr.attr.mode = S_IWUSR | S_IRUGO;
+ dev->weight_attr.show = thermal_cooling_device_weight_show;
+ dev->weight_attr.store = thermal_cooling_device_weight_store;
+ result = device_create_file(&tz->device, &dev->weight_attr);
+ if (result)
+ goto remove_trip_file;
+
mutex_lock(&tz->lock);
mutex_lock(&cdev->lock);
list_for_each_entry(pos, &tz->thermal_instances, tz_node)
@@ -1023,6 +1283,8 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
if (!result)
return 0;
+ device_remove_file(&tz->device, &dev->weight_attr);
+remove_trip_file:
device_remove_file(&tz->device, &dev->attr);
remove_symbol_link:
sysfs_remove_link(&tz->device.kobj, dev->name);
@@ -1377,7 +1639,8 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask)
tz->trip_temp_attrs[indx].name;
tz->trip_temp_attrs[indx].attr.attr.mode = S_IRUGO;
tz->trip_temp_attrs[indx].attr.show = trip_point_temp_show;
- if (mask & (1 << indx)) {
+ if (IS_ENABLED(CONFIG_THERMAL_WRITABLE_TRIPS) &&
+ mask & (1 << indx)) {
tz->trip_temp_attrs[indx].attr.attr.mode |= S_IWUSR;
tz->trip_temp_attrs[indx].attr.store =
trip_point_temp_store;
@@ -1454,7 +1717,7 @@ static void remove_trip_attrs(struct thermal_zone_device *tz)
struct thermal_zone_device *thermal_zone_device_register(const char *type,
int trips, int mask, void *devdata,
struct thermal_zone_device_ops *ops,
- const struct thermal_zone_params *tzp,
+ struct thermal_zone_params *tzp,
int passive_delay, int polling_delay)
{
struct thermal_zone_device *tz;
@@ -1462,6 +1725,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
int result;
int count;
int passive = 0;
+ struct thermal_governor *governor;
if (type && strlen(type) >= THERMAL_NAME_LENGTH)
return ERR_PTR(-EINVAL);
@@ -1548,13 +1812,24 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
if (result)
goto unregister;
+ /* Add thermal zone params */
+ result = create_tzp_attrs(&tz->device);
+ if (result)
+ goto unregister;
+
/* Update 'this' zone's governor information */
mutex_lock(&thermal_governor_lock);
if (tz->tzp)
- tz->governor = __find_governor(tz->tzp->governor_name);
+ governor = __find_governor(tz->tzp->governor_name);
else
- tz->governor = def_governor;
+ governor = def_governor;
+
+ result = thermal_set_governor(tz, governor);
+ if (result) {
+ mutex_unlock(&thermal_governor_lock);
+ goto unregister;
+ }
mutex_unlock(&thermal_governor_lock);
@@ -1643,7 +1918,7 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
device_remove_file(&tz->device, &dev_attr_mode);
device_remove_file(&tz->device, &dev_attr_policy);
remove_trip_attrs(tz);
- tz->governor = NULL;
+ thermal_set_governor(tz, NULL);
thermal_remove_hwmon_sysfs(tz);
release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id);
@@ -1799,7 +2074,11 @@ static int __init thermal_register_governors(void)
if (result)
return result;
- return thermal_gov_user_space_register();
+ result = thermal_gov_user_space_register();
+ if (result)
+ return result;
+
+ return thermal_gov_power_allocator_register();
}
static void thermal_unregister_governors(void)
@@ -1808,6 +2087,7 @@ static void thermal_unregister_governors(void)
thermal_gov_fair_share_unregister();
thermal_gov_bang_bang_unregister();
thermal_gov_user_space_unregister();
+ thermal_gov_power_allocator_unregister();
}
static int __init thermal_init(void)
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index 8e391812e503..d7ac1fccd659 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -46,8 +46,11 @@ struct thermal_instance {
unsigned long target; /* expected cooling state */
char attr_name[THERMAL_NAME_LENGTH];
struct device_attribute attr;
+ char weight_attr_name[THERMAL_NAME_LENGTH];
+ struct device_attribute weight_attr;
struct list_head tz_node; /* node in tz->thermal_instances */
struct list_head cdev_node; /* node in cdev->thermal_instances */
+ unsigned int weight; /* The weight of the cooling device */
};
int thermal_register_governor(struct thermal_governor *);
@@ -85,6 +88,14 @@ static inline int thermal_gov_user_space_register(void) { return 0; }
static inline void thermal_gov_user_space_unregister(void) {}
#endif /* CONFIG_THERMAL_GOV_USER_SPACE */
+#ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
+int thermal_gov_power_allocator_register(void);
+void thermal_gov_power_allocator_unregister(void);
+#else
+static inline int thermal_gov_power_allocator_register(void) { return 0; }
+static inline void thermal_gov_power_allocator_unregister(void) {}
+#endif /* CONFIG_THERMAL_GOV_POWER_ALLOCATOR */
+
/* device tree support */
#ifdef CONFIG_THERMAL_OF
int of_parse_thermal_zones(void);
diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index a38c1756442a..cb45e729adb5 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -146,7 +146,8 @@ static int ti_thermal_bind(struct thermal_zone_device *thermal,
return thermal_zone_bind_cooling_device(thermal, 0, cdev,
/* bind with min and max states defined by cpu_cooling */
THERMAL_NO_LIMIT,
- THERMAL_NO_LIMIT);
+ THERMAL_NO_LIMIT,
+ THERMAL_WEIGHT_DEFAULT);
}
/* Unbind callback functions for thermal zone */