irq_timings: add per-CPU prediction queueing

Once a good IRQ prediction is made, we need to enqueue it for later consumption. While at it we discard any predictions whose time stamp is in the past. There shouldn't be that many expected IRQs at any given time. A sorted list is most likely going to be good enough. And, by definition, the most frequent IRQs will end up near the beginning of the list anyway. Tthere is no generic way to determine what the IRQ controller is going to do if the IRQ affinity mask contains multiple CPUs. It is therefore assumed that the next occurrence of an IRQ is most likely to happen on the same CPU as the last one. It appears to be the case overall from observations on X86 despite active migration controlled from user space. On ARM it is the first CPU in the affinity mask that is selected by the GIC driver so this assumption is quite right in that case. If migration frequency becomes significant compared to IRQ occurrences then we could consider registering an affinity notifier. Signed-off-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
author: Nicolas Pitre <nicolas.pitre@linaro.org> 2015-01-12 16:17:47 -0500
committer: Daniel Lezcano <daniel.lezcano@linaro.org> 2015-01-15 13:38:03 +0100
commit: 57a61a9ba3e991e2cf9a769114eb264cf47d7c47 (patch)
tree: b288f939c055517c331ca4e73a30e2b611d752ba
parent: afd42120fa9fb92b3e0dec81bfd4721cb859573e (diff)
3 files changed, 86 insertions, 2 deletions
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 4332d766619d..dcfdf76718e6 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -106,6 +106,12 @@ static inline void unregister_handler_proc(unsigned int irq,
 					   struct irqaction *action) { }
 #endif
 
+#ifdef CONFIG_IRQ_TIMINGS
+extern void __init irqt_init(void);
+#else
+static inline void irqt_init(void) { }
+#endif
+
 extern int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask);
 
 extern void irq_set_thread_affinity(struct irq_desc *desc);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index a1782f88f0af..75a289b0f589 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -216,6 +216,7 @@ int __init early_irq_init(void)
 	int i, initcnt, node = first_online_node;
 	struct irq_desc *desc;
 
+	irqt_init();
 	init_irq_default_affinity();
 
 	/* Let arch update nr_irqs and return the nr of preallocated irqs */
@@ -254,6 +255,7 @@ int __init early_irq_init(void)
 	int count, i, node = first_online_node;
 	struct irq_desc *desc;
 
+	irqt_init();
 	init_irq_default_affinity();
 
 	printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS);
diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
index ae8dbce37528..b9498f306d61 100644
--- a/kernel/irq/timings.c
+++ b/kernel/irq/timings.c
@@ -11,6 +11,8 @@
 
 #include <linux/irq.h>
 #include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
 
 #include "internals.h"
 
@@ -23,6 +25,13 @@
  */
 #define IRQT_INTERVAL_WINDOW	3
 
+
+struct irqt_prediction {
+	struct list_head node;
+	ktime_t		 time;		/* expected occurrence time */
+	int		 cpu;		/* CPU for which this was queued for */
+};
+
 struct irqt_stat {
 	ktime_t		last_time;	/* previous IRQ occurrence */
 	u64		n_M2;		/* IRQ interval variance (n scaled) */
@@ -32,8 +41,71 @@ struct irqt_stat {
 	unsigned int	w_ptr;		/* current window pointer */
 	u32		predictable;	/* # of IRQs that were predictable */
 	u32		unpredictable;	/* # of IRQs that were not */
+	struct irqt_prediction prediction;
 };
 
+static DEFINE_PER_CPU(struct list_head, irqt_predictions);
+static DEFINE_PER_CPU(raw_spinlock_t, irqt_predictions_lock);
+
+void __init irqt_init(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		INIT_LIST_HEAD(&per_cpu(irqt_predictions, cpu));
+		raw_spin_lock_init(&per_cpu(irqt_predictions_lock, cpu));
+	}
+}
+
+/*
+ * Purge past events.
+ * Caller must take care of locking.
+ */
+static void irqt_purge(ktime_t now, struct list_head *head)
+{
+	struct irqt_prediction *entry, *n;
+
+	list_for_each_entry_safe(entry, n, head, node) {
+		if (ktime_after(entry->time, now))
+			break;
+		list_del_init(&entry->node);
+	}
+}
+
+/*
+ * Enqueue the next predicted event for this IRQ on this CPU.
+ * We are in interrupt context with IRQs disabled.
+ */
+static void irqt_enqueue_prediction(ktime_t now, struct irqt_stat *s)
+{
+	int this_cpu = raw_smp_processor_id();
+	int prev_cpu = s->prediction.cpu;
+	struct list_head *head = &per_cpu(irqt_predictions, this_cpu);
+	u32 predicted_interval = s->n_mean / IRQT_INTERVAL_WINDOW;
+	struct irqt_prediction *list_entry, *new_entry;
+	raw_spinlock_t *lock;
+
+	if (unlikely(prev_cpu != this_cpu && prev_cpu != -1)) {
+		lock = &per_cpu(irqt_predictions_lock, prev_cpu);
+		raw_spin_lock(lock);
+		list_del_init(&s->prediction.node);
+		raw_spin_unlock(lock);
+	}
+		
+	lock = &per_cpu(irqt_predictions_lock, this_cpu);
+	raw_spin_lock(lock);
+	irqt_purge(now, head);
+	__list_del_entry(&s->prediction.node);
+	new_entry = &s->prediction;
+	new_entry->time = ktime_add_us(now, predicted_interval);
+	new_entry->cpu = this_cpu;
+	list_for_each_entry(list_entry, head, node)
+		if (ktime_after(new_entry->time, list_entry->time))
+			break;
+	list_add_tail(&new_entry->node, &list_entry->node);
+	raw_spin_unlock(lock);
+}
+
 /*
  * irqt_process - update timing interval statistics for the given IRQ
  *
@@ -152,8 +224,12 @@ void irqt_process(unsigned int irq, struct irqt_stat *s)
 	 * 	n_mean/n * n_mean/n < n_M2/n / (n - 1)  -->
 	 * 	n_mean * n_mean * (n - 1) < n_M2 * n
 	 */
-	if ((u64)s->n_mean * s->n_mean * (n - 1) > s->n_M2 * n)
+	if ((u64)s->n_mean * s->n_mean * (n - 1) > s->n_M2 * n) {
 		s->predictable++;
-	else
+		if (s->predictable >= IRQT_INTERVAL_WINDOW)
+			irqt_enqueue_prediction(now, s);
+	} else {
+		s->predictable = 0;
 		s->unpredictable++;
+	}
 }
author	Nicolas Pitre <nicolas.pitre@linaro.org>	2015-01-12 16:17:47 -0500
committer	Daniel Lezcano <daniel.lezcano@linaro.org>	2015-01-15 13:38:03 +0100
commit	57a61a9ba3e991e2cf9a769114eb264cf47d7c47 (patch)
tree	b288f939c055517c331ca4e73a30e2b611d752ba
parent	afd42120fa9fb92b3e0dec81bfd4721cb859573e (diff)