summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm/configs/exynos_defconfig2
-rw-r--r--arch/arm/configs/multi_v7_defconfig2
-rw-r--r--drivers/cpufreq/Kconfig5
-rw-r--r--drivers/cpufreq/cpufreq_governor.c2
-rw-r--r--drivers/cpufreq/intel_pstate.c63
-rw-r--r--include/linux/sched.h13
-rw-r--r--include/trace/events/power.h13
-rw-r--r--kernel/sched/cpufreq.c2
-rw-r--r--kernel/sched/cpufreq_schedutil.c122
-rw-r--r--kernel/sched/deadline.c5
-rw-r--r--kernel/sched/fair.c23
-rw-r--r--kernel/sched/rt.c5
-rw-r--r--kernel/sched/sched.h40
13 files changed, 178 insertions, 119 deletions
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index 01986deef7c5..36cc7cc012f9 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -28,7 +28,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=m
CONFIG_CPU_FREQ_GOV_USERSPACE=m
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m
+CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
CONFIG_CPUFREQ_DT=y
CONFIG_CPU_IDLE=y
CONFIG_ARM_EXYNOS_CPUIDLE=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 2c8665cd9dc5..ab5fea73cc8b 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -135,7 +135,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=m
CONFIG_CPU_FREQ_GOV_USERSPACE=m
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m
+CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
CONFIG_QORIQ_CPUFREQ=y
CONFIG_CPU_IDLE=y
CONFIG_ARM_CPUIDLE=y
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 9be9013c2914..d8b164a7c4e5 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -194,7 +194,7 @@ config CPU_FREQ_GOV_CONSERVATIVE
If in doubt, say N.
config CPU_FREQ_GOV_SCHEDUTIL
- tristate "'schedutil' cpufreq policy governor"
+ bool "'schedutil' cpufreq policy governor"
depends on CPU_FREQ && SMP
select CPU_FREQ_GOV_ATTR_SET
select IRQ_WORK
@@ -208,9 +208,6 @@ config CPU_FREQ_GOV_SCHEDUTIL
frequency tipping point is at utilization/capacity equal to 80% in
both cases.
- To compile this driver as a module, choose M here: the module will
- be called cpufreq_schedutil.
-
If in doubt, say N.
comment "CPU frequency scaling drivers"
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index e415349ab31b..642dd0f183a8 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -260,7 +260,7 @@ static void dbs_irq_work(struct irq_work *irq_work)
}
static void dbs_update_util_handler(struct update_util_data *data, u64 time,
- unsigned long util, unsigned long max)
+ unsigned int flags)
{
struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util);
struct policy_dbs_info *policy_dbs = cdbs->policy_dbs;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 5095b890a06a..806f2039571e 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -181,6 +181,8 @@ struct _pid {
* @cpu: CPU number for this instance data
* @update_util: CPUFreq utility callback information
* @update_util_set: CPUFreq utility callback is set
+ * @iowait_boost: iowait-related boost fraction
+ * @last_update: Time of the last update.
* @pstate: Stores P state limits for this CPU
* @vid: Stores VID limits for this CPU
* @pid: Stores PID parameters for this CPU
@@ -206,6 +208,7 @@ struct cpudata {
struct vid_data vid;
struct _pid pid;
+ u64 last_update;
u64 last_sample_time;
u64 prev_aperf;
u64 prev_mperf;
@@ -216,6 +219,7 @@ struct cpudata {
struct acpi_processor_performance acpi_perf_data;
bool valid_pss_table;
#endif
+ unsigned int iowait_boost;
};
static struct cpudata **all_cpu_data;
@@ -229,6 +233,7 @@ static struct cpudata **all_cpu_data;
* @p_gain_pct: PID proportional gain
* @i_gain_pct: PID integral gain
* @d_gain_pct: PID derivative gain
+ * @boost_iowait: Whether or not to use iowait boosting.
*
* Stores per CPU model static PID configuration data.
*/
@@ -240,6 +245,7 @@ struct pstate_adjust_policy {
int p_gain_pct;
int d_gain_pct;
int i_gain_pct;
+ bool boost_iowait;
};
/**
@@ -1037,6 +1043,7 @@ static const struct cpu_defaults silvermont_params = {
.p_gain_pct = 14,
.d_gain_pct = 0,
.i_gain_pct = 4,
+ .boost_iowait = true,
},
.funcs = {
.get_max = atom_get_max_pstate,
@@ -1058,6 +1065,7 @@ static const struct cpu_defaults airmont_params = {
.p_gain_pct = 14,
.d_gain_pct = 0,
.i_gain_pct = 4,
+ .boost_iowait = true,
},
.funcs = {
.get_max = atom_get_max_pstate,
@@ -1099,6 +1107,7 @@ static const struct cpu_defaults bxt_params = {
.p_gain_pct = 14,
.d_gain_pct = 0,
.i_gain_pct = 4,
+ .boost_iowait = true,
},
.funcs = {
.get_max = core_get_max_pstate,
@@ -1222,36 +1231,18 @@ static inline int32_t get_avg_pstate(struct cpudata *cpu)
static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
{
struct sample *sample = &cpu->sample;
- u64 cummulative_iowait, delta_iowait_us;
- u64 delta_iowait_mperf;
- u64 mperf, now;
- int32_t cpu_load;
+ int32_t busy_frac, boost;
- cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now);
+ busy_frac = div_fp(sample->mperf, sample->tsc);
- /*
- * Convert iowait time into number of IO cycles spent at max_freq.
- * IO is considered as busy only for the cpu_load algorithm. For
- * performance this is not needed since we always try to reach the
- * maximum P-State, so we are already boosting the IOs.
- */
- delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait;
- delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling *
- cpu->pstate.max_pstate, MSEC_PER_SEC);
+ boost = cpu->iowait_boost;
+ cpu->iowait_boost >>= 1;
- mperf = cpu->sample.mperf + delta_iowait_mperf;
- cpu->prev_cummulative_iowait = cummulative_iowait;
+ if (busy_frac < boost)
+ busy_frac = boost;
- /*
- * The load can be estimated as the ratio of the mperf counter
- * running at a constant frequency during active periods
- * (C0) and the time stamp counter running at the same frequency
- * also during C-states.
- */
- cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
- cpu->sample.busy_scaled = cpu_load;
-
- return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load);
+ sample->busy_scaled = busy_frac * 100;
+ return get_avg_pstate(cpu) - pid_calc(&cpu->pid, sample->busy_scaled);
}
static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
@@ -1325,15 +1316,29 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
sample->mperf,
sample->aperf,
sample->tsc,
- get_avg_frequency(cpu));
+ get_avg_frequency(cpu),
+ fp_toint(cpu->iowait_boost * 100));
}
static void intel_pstate_update_util(struct update_util_data *data, u64 time,
- unsigned long util, unsigned long max)
+ unsigned int flags)
{
struct cpudata *cpu = container_of(data, struct cpudata, update_util);
- u64 delta_ns = time - cpu->sample.time;
+ u64 delta_ns;
+
+ if (pid_params.boost_iowait) {
+ if (flags & SCHED_CPUFREQ_IOWAIT) {
+ cpu->iowait_boost = int_tofp(1);
+ } else if (cpu->iowait_boost) {
+ /* Clear iowait_boost if the CPU may have been idle. */
+ delta_ns = time - cpu->last_update;
+ if (delta_ns > TICK_NSEC)
+ cpu->iowait_boost = 0;
+ }
+ cpu->last_update = time;
+ }
+ delta_ns = time - cpu->sample.time;
if ((s64)delta_ns >= pid_params.sample_rate_ns) {
bool sample_taken = intel_pstate_sample(cpu, time);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 62c68e513e39..98fe95fea30c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3469,15 +3469,20 @@ static inline unsigned long rlimit_max(unsigned int limit)
return task_rlimit_max(current, limit);
}
+#define SCHED_CPUFREQ_RT (1U << 0)
+#define SCHED_CPUFREQ_DL (1U << 1)
+#define SCHED_CPUFREQ_IOWAIT (1U << 2)
+
+#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
+
#ifdef CONFIG_CPU_FREQ
struct update_util_data {
- void (*func)(struct update_util_data *data,
- u64 time, unsigned long util, unsigned long max);
+ void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
};
void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
- void (*func)(struct update_util_data *data, u64 time,
- unsigned long util, unsigned long max));
+ void (*func)(struct update_util_data *data, u64 time,
+ unsigned int flags));
void cpufreq_remove_update_util_hook(int cpu);
#endif /* CONFIG_CPU_FREQ */
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 19e50300ce7d..54e3aad32806 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -69,7 +69,8 @@ TRACE_EVENT(pstate_sample,
u64 mperf,
u64 aperf,
u64 tsc,
- u32 freq
+ u32 freq,
+ u32 io_boost
),
TP_ARGS(core_busy,
@@ -79,7 +80,8 @@ TRACE_EVENT(pstate_sample,
mperf,
aperf,
tsc,
- freq
+ freq,
+ io_boost
),
TP_STRUCT__entry(
@@ -91,6 +93,7 @@ TRACE_EVENT(pstate_sample,
__field(u64, aperf)
__field(u64, tsc)
__field(u32, freq)
+ __field(u32, io_boost)
),
TP_fast_assign(
@@ -102,9 +105,10 @@ TRACE_EVENT(pstate_sample,
__entry->aperf = aperf;
__entry->tsc = tsc;
__entry->freq = freq;
+ __entry->io_boost = io_boost;
),
- TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu ",
+ TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu io_boost=%lu",
(unsigned long)__entry->core_busy,
(unsigned long)__entry->scaled_busy,
(unsigned long)__entry->from,
@@ -112,7 +116,8 @@ TRACE_EVENT(pstate_sample,
(unsigned long long)__entry->mperf,
(unsigned long long)__entry->aperf,
(unsigned long long)__entry->tsc,
- (unsigned long)__entry->freq
+ (unsigned long)__entry->freq,
+ (unsigned long)__entry->io_boost
)
);
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index 1141954e73b4..dbc51442ecbc 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
*/
void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
void (*func)(struct update_util_data *data, u64 time,
- unsigned long util, unsigned long max))
+ unsigned int flags))
{
if (WARN_ON(!data || !func))
return;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index a84641b222c1..69e06898997d 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -12,7 +12,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/cpufreq.h>
-#include <linux/module.h>
#include <linux/slab.h>
#include <trace/events/power.h>
@@ -48,11 +47,14 @@ struct sugov_cpu {
struct sugov_policy *sg_policy;
unsigned int cached_raw_freq;
+ unsigned long iowait_boost;
+ unsigned long iowait_boost_max;
+ u64 last_update;
/* The fields below are only needed when sharing a policy. */
unsigned long util;
unsigned long max;
- u64 last_update;
+ unsigned int flags;
};
static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
@@ -144,24 +146,75 @@ static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util,
return cpufreq_driver_resolve_freq(policy, freq);
}
+static void sugov_get_util(unsigned long *util, unsigned long *max)
+{
+ struct rq *rq = this_rq();
+ unsigned long cfs_max;
+
+ cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id());
+
+ *util = min(rq->cfs.avg.util_avg, cfs_max);
+ *max = cfs_max;
+}
+
+static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
+ unsigned int flags)
+{
+ if (flags & SCHED_CPUFREQ_IOWAIT) {
+ sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+ } else if (sg_cpu->iowait_boost) {
+ s64 delta_ns = time - sg_cpu->last_update;
+
+ /* Clear iowait_boost if the CPU apprears to have been idle. */
+ if (delta_ns > TICK_NSEC)
+ sg_cpu->iowait_boost = 0;
+ }
+}
+
+static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
+ unsigned long *max)
+{
+ unsigned long boost_util = sg_cpu->iowait_boost;
+ unsigned long boost_max = sg_cpu->iowait_boost_max;
+
+ if (!boost_util)
+ return;
+
+ if (*util * boost_max < *max * boost_util) {
+ *util = boost_util;
+ *max = boost_max;
+ }
+ sg_cpu->iowait_boost >>= 1;
+}
+
static void sugov_update_single(struct update_util_data *hook, u64 time,
- unsigned long util, unsigned long max)
+ unsigned int flags)
{
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
struct cpufreq_policy *policy = sg_policy->policy;
+ unsigned long util, max;
unsigned int next_f;
+ sugov_set_iowait_boost(sg_cpu, time, flags);
+ sg_cpu->last_update = time;
+
if (!sugov_should_update_freq(sg_policy, time))
return;
- next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq :
- get_next_freq(sg_cpu, util, max);
+ if (flags & SCHED_CPUFREQ_RT_DL) {
+ next_f = policy->cpuinfo.max_freq;
+ } else {
+ sugov_get_util(&util, &max);
+ sugov_iowait_boost(sg_cpu, &util, &max);
+ next_f = get_next_freq(sg_cpu, util, max);
+ }
sugov_update_commit(sg_policy, time, next_f);
}
static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
- unsigned long util, unsigned long max)
+ unsigned long util, unsigned long max,
+ unsigned int flags)
{
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
struct cpufreq_policy *policy = sg_policy->policy;
@@ -169,9 +222,11 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
u64 last_freq_update_time = sg_policy->last_freq_update_time;
unsigned int j;
- if (util == ULONG_MAX)
+ if (flags & SCHED_CPUFREQ_RT_DL)
return max_f;
+ sugov_iowait_boost(sg_cpu, &util, &max);
+
for_each_cpu(j, policy->cpus) {
struct sugov_cpu *j_sg_cpu;
unsigned long j_util, j_max;
@@ -186,41 +241,50 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
* frequency update and the time elapsed between the last update
* of the CPU utilization and the last frequency update is long
* enough, don't take the CPU into account as it probably is
- * idle now.
+ * idle now (and clear iowait_boost for it).
*/
delta_ns = last_freq_update_time - j_sg_cpu->last_update;
- if (delta_ns > TICK_NSEC)
+ if (delta_ns > TICK_NSEC) {
+ j_sg_cpu->iowait_boost = 0;
continue;
-
- j_util = j_sg_cpu->util;
- if (j_util == ULONG_MAX)
+ }
+ if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
return max_f;
+ j_util = j_sg_cpu->util;
j_max = j_sg_cpu->max;
if (j_util * max > j_max * util) {
util = j_util;
max = j_max;
}
+
+ sugov_iowait_boost(j_sg_cpu, &util, &max);
}
return get_next_freq(sg_cpu, util, max);
}
static void sugov_update_shared(struct update_util_data *hook, u64 time,
- unsigned long util, unsigned long max)
+ unsigned int flags)
{
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+ unsigned long util, max;
unsigned int next_f;
+ sugov_get_util(&util, &max);
+
raw_spin_lock(&sg_policy->update_lock);
sg_cpu->util = util;
sg_cpu->max = max;
+ sg_cpu->flags = flags;
+
+ sugov_set_iowait_boost(sg_cpu, time, flags);
sg_cpu->last_update = time;
if (sugov_should_update_freq(sg_policy, time)) {
- next_f = sugov_next_freq_shared(sg_cpu, util, max);
+ next_f = sugov_next_freq_shared(sg_cpu, util, max, flags);
sugov_update_commit(sg_policy, time, next_f);
}
@@ -444,10 +508,13 @@ static int sugov_start(struct cpufreq_policy *policy)
sg_cpu->sg_policy = sg_policy;
if (policy_is_shared(policy)) {
- sg_cpu->util = ULONG_MAX;
+ sg_cpu->util = 0;
sg_cpu->max = 0;
+ sg_cpu->flags = SCHED_CPUFREQ_RT;
sg_cpu->last_update = 0;
sg_cpu->cached_raw_freq = 0;
+ sg_cpu->iowait_boost = 0;
+ sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
sugov_update_shared);
} else {
@@ -495,28 +562,15 @@ static struct cpufreq_governor schedutil_gov = {
.limits = sugov_limits,
};
-static int __init sugov_module_init(void)
-{
- return cpufreq_register_governor(&schedutil_gov);
-}
-
-static void __exit sugov_module_exit(void)
-{
- cpufreq_unregister_governor(&schedutil_gov);
-}
-
-MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>");
-MODULE_DESCRIPTION("Utilization-based CPU frequency selection");
-MODULE_LICENSE("GPL");
-
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
struct cpufreq_governor *cpufreq_default_governor(void)
{
return &schedutil_gov;
}
-
-fs_initcall(sugov_module_init);
-#else
-module_init(sugov_module_init);
#endif
-module_exit(sugov_module_exit);
+
+static int __init sugov_register(void)
+{
+ return cpufreq_register_governor(&schedutil_gov);
+}
+fs_initcall(sugov_register);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 1ce8867283dc..974779656999 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -735,9 +735,8 @@ static void update_curr_dl(struct rq *rq)
return;
}
- /* kick cpufreq (see the comment in linux/cpufreq.h). */
- if (cpu_of(rq) == smp_processor_id())
- cpufreq_trigger_update(rq_clock(rq));
+ /* kick cpufreq (see the comment in kernel/sched/sched.h). */
+ cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL);
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 039de34f1521..a5cd07b25aa1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2875,12 +2875,7 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
{
- struct rq *rq = rq_of(cfs_rq);
- int cpu = cpu_of(rq);
-
- if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
- unsigned long max = rq->cpu_capacity_orig;
-
+ if (&this_rq()->cfs == cfs_rq) {
/*
* There are a few boundary cases this might miss but it should
* get called often enough that that should (hopefully) not be
@@ -2897,8 +2892,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
*
* See cpu_util().
*/
- cpufreq_update_util(rq_clock(rq),
- min(cfs_rq->avg.util_avg, max), max);
+ cpufreq_update_util(rq_of(cfs_rq), 0);
}
}
@@ -3159,10 +3153,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
static inline void update_load_avg(struct sched_entity *se, int not_used)
{
- struct cfs_rq *cfs_rq = cfs_rq_of(se);
- struct rq *rq = rq_of(cfs_rq);
-
- cpufreq_trigger_update(rq_clock(rq));
+ cpufreq_update_util(rq_of(cfs_rq_of(se)), 0);
}
static inline void
@@ -4509,6 +4500,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
+ /*
+ * If in_iowait is set, the code below may not trigger any cpufreq
+ * utilization updates, so do it here explicitly with the IOWAIT flag
+ * passed.
+ */
+ if (p->in_iowait)
+ cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT);
+
for_each_sched_entity(se) {
if (se->on_rq)
break;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index d5690b722691..2516b8df6dbb 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -957,9 +957,8 @@ static void update_curr_rt(struct rq *rq)
if (unlikely((s64)delta_exec <= 0))
return;
- /* Kick cpufreq (see the comment in linux/cpufreq.h). */
- if (cpu_of(rq) == smp_processor_id())
- cpufreq_trigger_update(rq_clock(rq));
+ /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
+ cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c64fc5114004..b7fc1ced4380 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1763,27 +1763,13 @@ DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
/**
* cpufreq_update_util - Take a note about CPU utilization changes.
- * @time: Current time.
- * @util: Current utilization.
- * @max: Utilization ceiling.
+ * @rq: Runqueue to carry out the update for.
+ * @flags: Update reason flags.
*
- * This function is called by the scheduler on every invocation of
- * update_load_avg() on the CPU whose utilization is being updated.
+ * This function is called by the scheduler on the CPU whose utilization is
+ * being updated.
*
* It can only be called from RCU-sched read-side critical sections.
- */
-static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
-{
- struct update_util_data *data;
-
- data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
- if (data)
- data->func(data, time, util, max);
-}
-
-/**
- * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
- * @time: Current time.
*
* The way cpufreq is currently arranged requires it to evaluate the CPU
* performance state (frequency/voltage) on a regular basis to prevent it from
@@ -1797,13 +1783,23 @@ static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned lo
* but that really is a band-aid. Going forward it should be replaced with
* solutions targeted more specifically at RT and DL tasks.
*/
-static inline void cpufreq_trigger_update(u64 time)
+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
+{
+ struct update_util_data *data;
+
+ data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
+ if (data)
+ data->func(data, rq_clock(rq), flags);
+}
+
+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
{
- cpufreq_update_util(time, ULONG_MAX, 0);
+ if (cpu_of(rq) == smp_processor_id())
+ cpufreq_update_util(rq, flags);
}
#else
-static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {}
-static inline void cpufreq_trigger_update(u64 time) {}
+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
#endif /* CONFIG_CPU_FREQ */
#ifdef arch_scale_freq_capacity