/* * linux/kernel/irq/timings.c * * Copyright (C) 2016, Linaro Ltd - Daniel Lezcano * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * */ #include #include #include #include #include #include #include #include #include #include #include "internals.h" DEFINE_STATIC_KEY_FALSE(irq_timing_enabled); DEFINE_PER_CPU(struct irq_timings, irq_timings); struct irqt_stat { u64 ne; /* next event */ u64 lts; /* last timestamp */ u64 variance; /* variance */ u32 avg; /* mean value */ u32 count; /* number of samples */ int anomalies; /* number of consecutives anomalies */ int valid; /* behaviour of the interrupt */ }; static DEFINE_IDR(irqt_stats); void irq_timings_enable(void) { static_branch_enable(&irq_timing_enabled); } void irq_timings_disable(void) { static_branch_disable(&irq_timing_enabled); } /* * Number of elements in the circular buffer: If it happens it was * flushed before, then the number of elements could be smaller than * IRQ_TIMINGS_SIZE, so the count is used, otherwise the array size is * used as we wrapped. The index begins from zero when we did not * wrap. That could be done in a nicer way with the proper circular * array structure type but with the cost of extra computation in the * interrupt handler hot path. We choose efficiency. * * Inject measured irq/timestamp to the statistical model while * decrementing the counter because we consume the data from our * circular buffer. */ #define for_each_irqts(__iter, __irqts) \ for (__iter = __irqts->count < IRQ_TIMINGS_SIZE ? \ 0 :__irqts->count & IRQ_TIMINGS_MASK, \ __irqts->count = min(IRQ_TIMINGS_SIZE, __irqts->count); \ __irqts->count > 0; __irqts->count--, \ __iter = (__iter + 1) & IRQ_TIMINGS_MASK) /** * irqs_update - update the irq timing statistics with a new timestamp * * @irqs: an irqt_stat struct pointer * @ts: the new timestamp * * ** This function must be called with the local irq disabled ** * * The statistics are computed online, in other words, the code is * designed to compute the statistics on a stream of values rather * than doing multiple passes on the values to compute the average, * then the variance. The integer division introduces a loss of * precision but with an acceptable error margin regarding the results * we would have with the double floating precision: we are dealing * with nanosec, so big numbers, consequently the mantisse is * negligeable, especially when converting the time in usec * afterwards. * * The computation happens at idle time. When the CPU is not idle, the * interrupts' timestamps are stored in the circular buffer, when the * CPU goes idle and this routine is called, all the buffer's values * are injected in the statistical model continuying to extend the * statistics from the previous busy-idle cycle. * * The observations showed a device will trigger a burst of periodic * interrupts followed by one or two peaks of longer time, for * instance when a SD card device flushes its cache, then the periodic * intervals occur again. A one second inactivity period resets the * stats, that gives us the certitude the statistical values won't * exceed 1x10^9, thus the computation won't overflow. * * Basically, the purpose of the algorithm is to watch the periodic * interrupts and eliminate the peaks. * * An interrupt is considered periodically stable if the interval of * its occurences follow the normal distribution, thus the values * comply with: * * avg - 3 x stddev < value < avg + 3 x stddev * * Which can be simplified to: * * -3 x stddev < value - avg < 3 x stddev * * abs(value - avg) < 3 x stddev * * In order to save a costly square root computation, we use the * variance. For the record, stddev = sqrt(variance). The equation * above becomes: * * abs(value - avg) < 3 x sqrt(variance) * * And finally we square it: * * (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2 * * (value - avg) x (value - avg) < 9 x variance * * Statistically speaking, any values out of this interval is * considered as an anomaly and is discarded. However, a normal * distribution appears when the number of samples is 30 (it is the * rule of thumb in statistics, cf. "30 samples" on Internet). When * there are three consecutive anomalies, the statistics are resetted. * */ static void irqs_update(struct irqt_stat *irqs, u64 ts) { u64 old_ts = irqs->lts; u64 variance = 0; u64 interval; s64 diff; /* * The timestamps are absolute time values, we need to compute * the timing interval between two interrupts. */ irqs->lts = ts; /* * The interval type is u64 in order to deal with the same * type in our computation, that prevent mindfuck issues with * overflow, sign and division. */ interval = ts - old_ts; /* * The interrupt triggered more than one second apart, that * ends the sequence as predictible for our purpose. In this * case, assume we have the beginning of a sequence and the * timestamp is the first value. As it is impossible to * predict anything at this point, return. * * Note the first timestamp of the sequence will always fall * in this test because the old_ts is zero. That is what we * want as we need another timestamp to compute an interval. */ if (interval >= NSEC_PER_SEC) { memset(irqs, 0, sizeof(*irqs)); irqs->lts = ts; return; } /* * Pre-compute the delta with the average as the result is * used several times in this function. */ diff = interval - irqs->avg; /* * Increment the number of samples. */ irqs->count++; /* * Online variance divided by the number of elements if there * is more than one sample. Normally the formula is division * by count - 1 but we assume the number of element will be * more than 32 and dividing by 32 instead of 31 is enough * precise. */ if (likely(irqs->count > 1)) variance = irqs->variance >> IRQ_TIMINGS_SHIFT; /* * The rule of thumb in statistics for the normal distribution * is having at least 30 samples in order to have the model to * apply. Values outside the interval are considered as an * anomaly. */ if ((irqs->count >= 30) && ((diff * diff) > (9 * variance))) { /* * After three consecutive anomalies, we reset the * stats as it is no longer stable enough. */ if (irqs->anomalies++ >= 3) { memset(irqs, 0, sizeof(*irqs)); irqs->lts = ts; return; } } else { /* * The anomalies must be consecutives, so at this * point, we reset the anomalies counter. */ irqs->anomalies = 0; } /* * The interrupt is considered stable enough to try to predict * the next event on it. */ irqs->valid = 1; /* * Online average algorithm: * * new_average = average + ((value - average) / count) * * The variance computation depends on the new average * to be computed here first. * */ irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT); /* * Online variance algorithm: * * new_variance = variance + (value - average) x (value - new_average) * * Warning: irqs->avg is updated with the line above, hence * 'interval - irqs->avg' is no longer equal to 'diff' */ irqs->variance = irqs->variance + (diff * (interval - irqs->avg)); /* * Update the next event */ irqs->ne = ts + irqs->avg; } /** * irq_timings_rate : Returns the number of interrupt per jiffies * * @now: the local cpu time in nsec * * Computes a rough average of the interrupt intervals and its growth * compared to the last interrupt event and now. * * Returns the interrupt rate (nsec based), the meaning of the value * is the mean interval interrupt value. A negative value means the * rate is decreasing, a positive value means the rate is increasing. */ u64 irq_timings_rate(u64 now) { struct irq_timings *irqts = this_cpu_ptr(&irq_timings); int not_used; u64 ts, rate = 0; if (irqts->count) { /* * Pick the first element in the irq timings array and * measure the average interrupt interval duration, * then compare it with the last duration. */ irq_timing_decode( irqts->values[irqts->count & IRQ_TIMINGS_MASK], &ts, ¬_used); rate = div_u64((now - ts), irqts->count); irq_timing_decode( irqts->values[(irqts->count - 1) & IRQ_TIMINGS_MASK], &ts, ¬_used); } return rate; } /** * irq_timings_next_event - Return when the next event is supposed to arrive * * *** This function must be called with the local irq disabled *** * * During the last busy cycle, the number of interrupts is incremented * and stored in the irq_timings structure. This information is * necessary to: * * - know if the index in the table wrapped up: * * If more than the array size interrupts happened during the * last busy/idle cycle, the index wrapped up and we have to * begin with the next element in the array which is the last one * in the sequence, otherwise it is a the index 0. * * - have an indication of the interrupts activity on this CPU * (eg. irq/sec) * * The values are 'consumed' after inserting in the statistical model, * thus the count is reinitialized. * * The array of values **must** be browsed in the time direction, the * timestamp must increase between an element and the next one. * * Returns a nanosec time based estimation of the earliest interrupt, * U64_MAX otherwise. */ u64 irq_timings_next_event(u64 now) { struct irq_timings *irqts = this_cpu_ptr(&irq_timings); struct irqt_stat *irqs; struct irqt_stat __percpu *s; u64 ts, ne = U64_MAX; int i, irq = 0; for_each_irqts(i, irqts) { irq_timing_decode(irqts->values[i], &ts, &irq); s = idr_find(&irqt_stats, irq); if (s) { irqs = this_cpu_ptr(s); irqs_update(irqs, ts); } } /* * Look in the list of interrupts' statistics, the earliest * next event. */ idr_for_each_entry(&irqt_stats, s, i) { irqs = this_cpu_ptr(s); if (!irqs->valid) continue; if (irqs->ne <= now) { irq = i; ne = now; /* * This interrupt mustn't use in the future * until new events occur and update the * statistics. */ irqs->valid = 0; break; } if (irqs->ne < ne) { irq = i; ne = irqs->ne; } } if (irq) trace_irq_timings_next_event(irq, ne); return ne; } void irq_timings_free(int irq) { struct irqt_stat __percpu *s; s = idr_find(&irqt_stats, irq); if (s) { free_percpu(s); idr_remove(&irqt_stats, irq); } } int irq_timings_alloc(int irq) { int id; struct irqt_stat __percpu *s; /* * Some platforms can have the same private interrupt per cpu, * so this function may be be called several times with the * same interrupt number. Just bail out in case the per cpu * stat structure is already allocated. */ s = idr_find(&irqt_stats, irq); if (s) return 0; s = alloc_percpu(*s); if (!s) return -ENOMEM; idr_preload(GFP_KERNEL); id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT); idr_preload_end(); if (id < 0) { free_percpu(s); return id; } return 0; } #ifdef CONFIG_TEST_IRQ_TIMINGS static int __init irq_timings_test_irqts(struct irq_timings *irqts, unsigned count) { int i, irq; int ots = 0xDEAD, oirq = 0xBEEF; int start = count > IRQ_TIMINGS_SIZE ? count - IRQ_TIMINGS_SIZE : 0; u64 ts; /* * Fill the circular buffer by using the dedicated function. */ for (i = 0; i < count; i++) { pr_debug("%d (%d): %X %X\n", i, i & IRQ_TIMINGS_MASK, ots + i, oirq + i); irq_timings_push(ots + i, oirq + i); } /* * Compute the first elements values after the index wrapped * up or not. */ ots += start; oirq += start; /* * Test the circular buffer count is correct. */ pr_debug(" -- Checking timings array count (%d) is right\n", count); if (WARN_ON(irqts->count != count)) return -EINVAL; /* * Test the macro allowing to browse all the irqts. */ pr_debug(" -- Checking the for_each_irqts() macro\n"); for_each_irqts(i, irqts) { irq_timing_decode(irqts->values[i], &ts, &irq); pr_debug("%d: %llX (%X) %X (%X)\n", i, ts, ots, irq, oirq); if (WARN_ON(ts != ots || irq != oirq)) return -EINVAL; ots++; oirq++; } /* * The circular buffer should have be flushed when browsed * with for_each_irqts */ pr_debug(" -- Checking timings array is empty after browsing it\n"); if (WARN_ON(irqts->count)) return -EINVAL; return 0; } static int __init irq_timings_selftest(void) { struct irq_timings *irqts = this_cpu_ptr(&irq_timings); /* * Test the circular buffer with different number of * elements. The purpose is to test at the limits (empty, half * full, full, wrapped with the cursor at the boundaries, * wrapped several times, etc ... */ int i, ret, count[] = { 0, IRQ_TIMINGS_SIZE >> 1, IRQ_TIMINGS_SIZE, IRQ_TIMINGS_SIZE + (IRQ_TIMINGS_SIZE >> 1), 2 * IRQ_TIMINGS_SIZE, (2 * IRQ_TIMINGS_SIZE) + 3, }; pr_info("Starting irq timings selftest\n"); /* * At this point, we don't except any subsystem to use the irq * timings but us, so it should not be enabled. */ if (static_branch_unlikely(&irq_timing_enabled)) { pr_warn("irq timings already initialized, skipping selftest\n"); return 0; } for (i = 0; i < ARRAY_SIZE(count); i++) { pr_info(" -- Checking the timings with %d/%d values\n", count[i], IRQ_TIMINGS_SIZE); ret = irq_timings_test_irqts(irqts, count[i]); if (ret) return ret; } pr_info("Irq timings selftest passed\n"); return 0; } early_initcall(irq_timings_selftest); #endif