diff options
author | Jon Medhurst <tixy@linaro.org> | 2013-07-11 10:30:54 +0100 |
---|---|---|
committer | Jon Medhurst <tixy@linaro.org> | 2013-07-11 10:30:54 +0100 |
commit | ca286b2f687f12fbb51d1a9453d270e34d6146f3 (patch) | |
tree | 8345f886b87a6dc249f691e15ceed10424e5b26a | |
parent | 522cfc7b46bb14e9bb2c2e2c0c8d9dc5edaf553f (diff) | |
parent | ad8c941910465fde7f896de3daca7e9558929fbb (diff) |
Merge branch 'big-LITTLE-MP-updates-ll' into integration-linaro-vexpresstracking-integration-linaro-vexpress-ll-20130711.0
-rw-r--r-- | arch/arm/Kconfig | 1 | ||||
-rw-r--r-- | arch/arm/kernel/topology.c | 3 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_interactive.c | 8 | ||||
-rw-r--r-- | include/linux/vmstat.h | 2 | ||||
-rw-r--r-- | kernel/sched/core.c | 23 | ||||
-rw-r--r-- | kernel/sched/fair.c | 133 | ||||
-rw-r--r-- | linaro/configs/big-LITTLE-MP.conf | 2 | ||||
-rw-r--r-- | mm/vmstat.c | 95 |
8 files changed, 206 insertions, 61 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ca4c8a9723bc..83198521dc78 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1555,7 +1555,6 @@ config SCHED_HMP config SCHED_HMP_PRIO_FILTER bool "(EXPERIMENTAL) Filter HMP migrations by task priority" depends on SCHED_HMP - default y help Enables task priority based HMP migration filter. Any task with a NICE value above the threshold will always be on low-power cpus diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 2018d0d41d5a..677da58d9e88 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -367,10 +367,11 @@ void __init arch_get_fast_and_slow_cpus(struct cpumask *fast, cpumask_clear(slow); } +struct cpumask hmp_slow_cpu_mask; + void __init arch_get_hmp_domains(struct list_head *hmp_domains_list) { struct cpumask hmp_fast_cpu_mask; - struct cpumask hmp_slow_cpu_mask; struct hmp_domain *domain; arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask); diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 7f1782ade53c..a494fbd7dbe8 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -316,13 +316,13 @@ static u64 update_load(int cpu) pcpu->policy->governor_data; u64 now; u64 now_idle; - unsigned int delta_idle; - unsigned int delta_time; + u64 delta_idle; + u64 delta_time; u64 active_time; now_idle = get_cpu_idle_time(cpu, &now, tunables->io_is_busy); - delta_idle = (unsigned int)(now_idle - pcpu->time_in_idle); - delta_time = (unsigned int)(now - pcpu->time_in_idle_timestamp); + delta_idle = (now_idle - pcpu->time_in_idle); + delta_time = (now - pcpu->time_in_idle_timestamp); if (delta_time <= delta_idle) active_time = 0; diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index c586679b6fef..a30ab7910ff4 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -198,7 +198,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item); extern void dec_zone_state(struct zone *, enum zone_stat_item); extern void __dec_zone_state(struct zone *, enum zone_stat_item); -void refresh_cpu_vm_stats(int); +bool refresh_cpu_vm_stats(int); void refresh_zone_stat_thresholds(void); void drain_zonestat(struct zone *zone, struct per_cpu_pageset *); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e2f2f252d9e6..cb21581be5e7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1618,8 +1618,18 @@ static void __sched_fork(struct task_struct *p) p->se.avg.runnable_avg_period = 0; p->se.avg.runnable_avg_sum = 0; #ifdef CONFIG_SCHED_HMP - p->se.avg.hmp_last_up_migration = 0; - p->se.avg.hmp_last_down_migration = 0; + /* keep LOAD_AVG_MAX in sync with fair.c if load avg series is changed */ +#define LOAD_AVG_MAX 47742 + if (p->mm) { + p->se.avg.hmp_last_up_migration = 0; + p->se.avg.hmp_last_down_migration = 0; + p->se.avg.load_avg_ratio = 1023; + p->se.avg.load_avg_contrib = + (1023 * scale_load_down(p->se.load.weight)); + p->se.avg.runnable_avg_period = LOAD_AVG_MAX; + p->se.avg.runnable_avg_sum = LOAD_AVG_MAX; + p->se.avg.usage_avg_sum = LOAD_AVG_MAX; + } #endif #endif #ifdef CONFIG_SCHEDSTATS @@ -3817,6 +3827,8 @@ static struct task_struct *find_process_by_pid(pid_t pid) return pid ? find_task_by_vpid(pid) : current; } +extern struct cpumask hmp_slow_cpu_mask; + /* Actually do priority change: must hold rq lock. */ static void __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) @@ -3826,8 +3838,13 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) p->normal_prio = normal_prio(p); /* we are holding p->pi_lock already */ p->prio = rt_mutex_getprio(p); - if (rt_prio(p->prio)) + if (rt_prio(p->prio)) { p->sched_class = &rt_sched_class; +#ifdef CONFIG_SCHED_HMP + if (cpumask_equal(&p->cpus_allowed, cpu_all_mask)) + do_set_cpus_allowed(p, &hmp_slow_cpu_mask); +#endif + } else p->sched_class = &fair_sched_class; set_load_weight(p); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 90f61d848cb2..c849d68a9b76 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3596,8 +3596,10 @@ unsigned int hmp_up_prio = NICE_TO_PRIO(CONFIG_SCHED_HMP_PRIO_FILTER_VAL); unsigned int hmp_next_up_threshold = 4096; unsigned int hmp_next_down_threshold = 4096; -static unsigned int hmp_up_migration(int cpu, struct sched_entity *se); +static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se); static unsigned int hmp_down_migration(int cpu, struct sched_entity *se); +static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd, + int *min_cpu); /* Check if cpu is in fastest hmp_domain */ static inline unsigned int hmp_cpu_is_fastest(int cpu) @@ -3642,7 +3644,16 @@ static inline struct hmp_domain *hmp_faster_domain(int cpu) static inline unsigned int hmp_select_faster_cpu(struct task_struct *tsk, int cpu) { - return cpumask_any_and(&hmp_faster_domain(cpu)->cpus, + int lowest_cpu=NR_CPUS; + __always_unused int lowest_ratio = hmp_domain_min_load(hmp_faster_domain(cpu), &lowest_cpu); + /* + * If the lowest-loaded CPU in the domain is allowed by the task affinity + * select that one, otherwise select one which is allowed + */ + if(lowest_cpu != NR_CPUS && cpumask_test_cpu(lowest_cpu,tsk_cpus_allowed(tsk))) + return lowest_cpu; + else + return cpumask_any_and(&hmp_faster_domain(cpu)->cpus, tsk_cpus_allowed(tsk)); } @@ -3653,7 +3664,16 @@ static inline unsigned int hmp_select_faster_cpu(struct task_struct *tsk, static inline unsigned int hmp_select_slower_cpu(struct task_struct *tsk, int cpu) { - return cpumask_any_and(&hmp_slower_domain(cpu)->cpus, + int lowest_cpu=NR_CPUS; + __always_unused int lowest_ratio = hmp_domain_min_load(hmp_slower_domain(cpu), &lowest_cpu); + /* + * If the lowest-loaded CPU in the domain is allowed by the task affinity + * select that one, otherwise select one which is allowed + */ + if(lowest_cpu != NR_CPUS && cpumask_test_cpu(lowest_cpu,tsk_cpus_allowed(tsk))) + return lowest_cpu; + else + return cpumask_any_and(&hmp_slower_domain(cpu)->cpus, tsk_cpus_allowed(tsk)); } @@ -3841,20 +3861,24 @@ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd, int *min_cpu) { int cpu; - int min_load = INT_MAX; - int min_cpu_temp = NR_CPUS; + int min_cpu_runnable_temp = NR_CPUS; + unsigned long min_runnable_load = INT_MAX; + unsigned long contrib; for_each_cpu_mask(cpu, hmpd->cpus) { - if (cpu_rq(cpu)->cfs.tg_load_contrib < min_load) { - min_load = cpu_rq(cpu)->cfs.tg_load_contrib; - min_cpu_temp = cpu; + /* don't use the divisor in the loop, just at the end */ + contrib = cpu_rq(cpu)->avg.runnable_avg_sum * scale_load_down(1024); + if (contrib < min_runnable_load) { + min_runnable_load = contrib; + min_cpu_runnable_temp = cpu; } } if (min_cpu) - *min_cpu = min_cpu_temp; + *min_cpu = min_cpu_runnable_temp; - return min_load; + /* domain will often have at least one empty CPU */ + return min_runnable_load ? min_runnable_load / (LOAD_AVG_MAX + 1) : 0; } /* @@ -3882,22 +3906,18 @@ static inline unsigned int hmp_offload_down(int cpu, struct sched_entity *se) return NR_CPUS; /* Is the current domain fully loaded? */ - /* load < ~94% */ + /* load < ~50% */ min_usage = hmp_domain_min_load(hmp_cpu_domain(cpu), NULL); - if (min_usage < NICE_0_LOAD-64) - return NR_CPUS; - - /* Is the cpu oversubscribed? */ - /* load < ~194% */ - if (cpu_rq(cpu)->cfs.tg_load_contrib < 2*NICE_0_LOAD-64) + if (min_usage < (NICE_0_LOAD>>1)) return NR_CPUS; /* Is the task alone on the cpu? */ - if (cpu_rq(cpu)->nr_running < 2) + if (cpu_rq(cpu)->cfs.nr_running < 2) return NR_CPUS; /* Is the task actually starving? */ - if (hmp_task_starvation(se) > 768) /* <25% waiting */ + /* >=25% ratio running/runnable = starving */ + if (hmp_task_starvation(se) > 768) return NR_CPUS; /* Does the slower domain have spare cycles? */ @@ -3908,6 +3928,7 @@ static inline unsigned int hmp_offload_down(int cpu, struct sched_entity *se) if (cpumask_test_cpu(dest_cpu, &hmp_slower_domain(cpu)->cpus)) return dest_cpu; + return NR_CPUS; } #endif /* CONFIG_SCHED_HMP */ @@ -3936,6 +3957,28 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) if (p->nr_cpus_allowed == 1) return prev_cpu; +#ifdef CONFIG_SCHED_HMP + /* always put non-kernel forking tasks on a big domain */ + if (p->mm && (sd_flag & SD_BALANCE_FORK)) { + if(hmp_cpu_is_fastest(prev_cpu)) { + struct hmp_domain *hmpdom = list_entry(&hmp_cpu_domain(prev_cpu)->hmp_domains, struct hmp_domain, hmp_domains); + __always_unused int lowest_ratio = hmp_domain_min_load(hmpdom, &new_cpu); + if(new_cpu != NR_CPUS && cpumask_test_cpu(new_cpu,tsk_cpus_allowed(p))) + return new_cpu; + else { + new_cpu = cpumask_any_and(&hmp_faster_domain(cpu)->cpus, + tsk_cpus_allowed(p)); + if(new_cpu < nr_cpu_ids) + return new_cpu; + } + } else { + new_cpu = hmp_select_faster_cpu(p, prev_cpu); + if (new_cpu != NR_CPUS) + return new_cpu; + } + } +#endif + if (sd_flag & SD_BALANCE_WAKE) { if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) want_affine = 1; @@ -4011,8 +4054,7 @@ unlock: rcu_read_unlock(); #ifdef CONFIG_SCHED_HMP - if (hmp_up_migration(prev_cpu, &p->se)) { - new_cpu = hmp_select_faster_cpu(p, prev_cpu); + if (hmp_up_migration(prev_cpu, &new_cpu, &p->se)) { hmp_next_up_delay(&p->se, new_cpu); trace_sched_hmp_migrate(p, new_cpu, 0); return new_cpu; @@ -5986,7 +6028,11 @@ static struct { static inline int find_new_ilb(int call_cpu) { int ilb = cpumask_first(nohz.idle_cpus_mask); - +#ifdef CONFIG_SCHED_HMP + /* restrict nohz balancing to occur in the same hmp domain */ + ilb = cpumask_first_and(nohz.idle_cpus_mask, + &((struct hmp_domain *)hmp_cpu_domain(call_cpu))->cpus); +#endif if (ilb < nr_cpu_ids && idle_cpu(ilb)) return ilb; @@ -6265,6 +6311,18 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu) if (time_before(now, nohz.next_balance)) return 0; +#ifdef CONFIG_SCHED_HMP + /* + * Bail out if there are no nohz CPUs in our + * HMP domain, since we will move tasks between + * domains through wakeup and force balancing + * as necessary based upon task load. + */ + if (cpumask_first_and(nohz.idle_cpus_mask, + &((struct hmp_domain *)hmp_cpu_domain(cpu))->cpus) >= nr_cpu_ids) + return 0; +#endif + if (rq->nr_running >= 2) goto need_kick; @@ -6299,12 +6357,15 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { } #ifdef CONFIG_SCHED_HMP /* Check if task should migrate to a faster cpu */ -static unsigned int hmp_up_migration(int cpu, struct sched_entity *se) +static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se) { struct task_struct *p = task_of(se); struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs; u64 now; + if (target_cpu) + *target_cpu = NR_CPUS; + if (hmp_cpu_is_fastest(cpu)) return 0; @@ -6313,6 +6374,8 @@ static unsigned int hmp_up_migration(int cpu, struct sched_entity *se) if (p->prio >= hmp_up_prio) return 0; #endif + if (se->avg.load_avg_ratio < hmp_up_threshold) + return 0; /* Let the task load settle before doing another up migration */ now = cfs_rq_clock_task(cfs_rq); @@ -6320,15 +6383,15 @@ static unsigned int hmp_up_migration(int cpu, struct sched_entity *se) < hmp_next_up_threshold) return 0; - if (se->avg.load_avg_ratio > hmp_up_threshold) { - /* Target domain load < ~94% */ - if (hmp_domain_min_load(hmp_faster_domain(cpu), NULL) - > NICE_0_LOAD-64) - return 0; - if (cpumask_intersects(&hmp_faster_domain(cpu)->cpus, - tsk_cpus_allowed(p))) - return 1; - } + /* Target domain load < 94% */ + if (hmp_domain_min_load(hmp_faster_domain(cpu), target_cpu) + > NICE_0_LOAD-64) + return 0; + + if (cpumask_intersects(&hmp_faster_domain(cpu)->cpus, + tsk_cpus_allowed(p))) + return 1; + return 0; } @@ -6521,7 +6584,7 @@ static DEFINE_SPINLOCK(hmp_force_migration); */ static void hmp_force_up_migration(int this_cpu) { - int cpu; + int cpu, target_cpu; struct sched_entity *curr; struct rq *target; unsigned long flags; @@ -6549,10 +6612,10 @@ static void hmp_force_up_migration(int this_cpu) } } p = task_of(curr); - if (hmp_up_migration(cpu, curr)) { + if (hmp_up_migration(cpu, &target_cpu, curr)) { if (!target->active_balance) { target->active_balance = 1; - target->push_cpu = hmp_select_faster_cpu(p, cpu); + target->push_cpu = target_cpu; target->migrate_task = p; force = 1; trace_sched_hmp_migrate(p, target->push_cpu, 1); diff --git a/linaro/configs/big-LITTLE-MP.conf b/linaro/configs/big-LITTLE-MP.conf index 8cc2be049a41..0bbc603a13e5 100644 --- a/linaro/configs/big-LITTLE-MP.conf +++ b/linaro/configs/big-LITTLE-MP.conf @@ -9,5 +9,3 @@ CONFIG_HMP_FAST_CPU_MASK="" CONFIG_HMP_SLOW_CPU_MASK="" CONFIG_HMP_VARIABLE_SCALE=y CONFIG_HMP_FREQUENCY_INVARIANT_SCALE=y -CONFIG_SCHED_HMP_PRIO_FILTER=y -CONFIG_SCHED_HMP_PRIO_FILTER_VAL=5 diff --git a/mm/vmstat.c b/mm/vmstat.c index f42745e65780..b916a43a6b37 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/cpu.h> +#include <linux/cpumask.h> #include <linux/vmstat.h> #include <linux/sched.h> #include <linux/math64.h> @@ -432,11 +433,12 @@ EXPORT_SYMBOL(dec_zone_page_state); * with the global counters. These could cause remote node cache line * bouncing and will have to be only done when necessary. */ -void refresh_cpu_vm_stats(int cpu) +bool refresh_cpu_vm_stats(int cpu) { struct zone *zone; int i; int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; + bool vm_activity = false; for_each_populated_zone(zone) { struct per_cpu_pageset *p; @@ -483,14 +485,21 @@ void refresh_cpu_vm_stats(int cpu) if (p->expire) continue; - if (p->pcp.count) + if (p->pcp.count) { + vm_activity = true; drain_zone_pages(zone, &p->pcp); + } #endif } for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) - if (global_diff[i]) + if (global_diff[i]) { atomic_long_add(global_diff[i], &vm_stat[i]); + vm_activity = true; + } + + return vm_activity; + } /* @@ -1174,22 +1183,72 @@ static const struct file_operations proc_vmstat_file_operations = { #ifdef CONFIG_SMP static DEFINE_PER_CPU(struct delayed_work, vmstat_work); int sysctl_stat_interval __read_mostly = HZ; +static struct cpumask vmstat_off_cpus; +struct delayed_work vmstat_monitor_work; -static void vmstat_update(struct work_struct *w) +static inline bool need_vmstat(int cpu) { - refresh_cpu_vm_stats(smp_processor_id()); - schedule_delayed_work(&__get_cpu_var(vmstat_work), - round_jiffies_relative(sysctl_stat_interval)); + struct zone *zone; + int i; + + for_each_populated_zone(zone) { + struct per_cpu_pageset *p; + + p = per_cpu_ptr(zone->pageset, cpu); + + for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) + if (p->vm_stat_diff[i]) + return true; + + if (zone_to_nid(zone) != numa_node_id() && p->pcp.count) + return true; + } + + return false; } -static void __cpuinit start_cpu_timer(int cpu) +static void vmstat_update(struct work_struct *w); + +static void start_cpu_timer(int cpu) { struct delayed_work *work = &per_cpu(vmstat_work, cpu); - INIT_DEFERRABLE_WORK(work, vmstat_update); + cpumask_clear_cpu(cpu, &vmstat_off_cpus); schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu)); } +static void __cpuinit setup_cpu_timer(int cpu) +{ + struct delayed_work *work = &per_cpu(vmstat_work, cpu); + + INIT_DEFERRABLE_WORK(work, vmstat_update); + start_cpu_timer(cpu); +} + +static void vmstat_update_monitor(struct work_struct *w) +{ + int cpu; + + for_each_cpu_and(cpu, &vmstat_off_cpus, cpu_online_mask) + if (need_vmstat(cpu)) + start_cpu_timer(cpu); + + queue_delayed_work(system_unbound_wq, &vmstat_monitor_work, + round_jiffies_relative(sysctl_stat_interval)); +} + + +static void vmstat_update(struct work_struct *w) +{ + int cpu = smp_processor_id(); + + if (likely(refresh_cpu_vm_stats(cpu))) + schedule_delayed_work(&__get_cpu_var(vmstat_work), + round_jiffies_relative(sysctl_stat_interval)); + else + cpumask_set_cpu(cpu, &vmstat_off_cpus); +} + /* * Use the cpu notifier to insure that the thresholds are recalculated * when necessary. @@ -1204,17 +1263,19 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, case CPU_ONLINE: case CPU_ONLINE_FROZEN: refresh_zone_stat_thresholds(); - start_cpu_timer(cpu); + setup_cpu_timer(cpu); node_set_state(cpu_to_node(cpu), N_CPU); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: - cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu)); - per_cpu(vmstat_work, cpu).work.func = NULL; + if (!cpumask_test_cpu(cpu, &vmstat_off_cpus)) { + cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu)); + per_cpu(vmstat_work, cpu).work.func = NULL; + } break; case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - start_cpu_timer(cpu); + setup_cpu_timer(cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: @@ -1237,8 +1298,14 @@ static int __init setup_vmstat(void) register_cpu_notifier(&vmstat_notifier); + INIT_DEFERRABLE_WORK(&vmstat_monitor_work, + vmstat_update_monitor); + queue_delayed_work(system_unbound_wq, + &vmstat_monitor_work, + round_jiffies_relative(HZ)); + for_each_online_cpu(cpu) - start_cpu_timer(cpu); + setup_cpu_timer(cpu); #endif #ifdef CONFIG_PROC_FS proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); |