summaryrefslogtreecommitdiff
path: root/include/linux/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r--include/linux/sched.h108
1 files changed, 81 insertions, 27 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 50f8c6343f17..fc3a7cf107ec 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -243,6 +243,43 @@ extern char ___assert_task_state[1 - 2*!!(
((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
(task->flags & PF_FROZEN) == 0)
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+
+#define __set_task_state(tsk, state_value) \
+ do { \
+ (tsk)->task_state_change = _THIS_IP_; \
+ (tsk)->state = (state_value); \
+ } while (0)
+#define set_task_state(tsk, state_value) \
+ do { \
+ (tsk)->task_state_change = _THIS_IP_; \
+ set_mb((tsk)->state, (state_value)); \
+ } while (0)
+
+/*
+ * set_current_state() includes a barrier so that the write of current->state
+ * is correctly serialised wrt the caller's subsequent test of whether to
+ * actually sleep:
+ *
+ * set_current_state(TASK_UNINTERRUPTIBLE);
+ * if (do_i_need_to_sleep())
+ * schedule();
+ *
+ * If the caller does not need such serialisation then use __set_current_state()
+ */
+#define __set_current_state(state_value) \
+ do { \
+ current->task_state_change = _THIS_IP_; \
+ current->state = (state_value); \
+ } while (0)
+#define set_current_state(state_value) \
+ do { \
+ current->task_state_change = _THIS_IP_; \
+ set_mb(current->state, (state_value)); \
+ } while (0)
+
+#else
+
#define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0)
#define set_task_state(tsk, state_value) \
@@ -259,11 +296,13 @@ extern char ___assert_task_state[1 - 2*!!(
*
* If the caller does not need such serialisation then use __set_current_state()
*/
-#define __set_current_state(state_value) \
+#define __set_current_state(state_value) \
do { current->state = (state_value); } while (0)
-#define set_current_state(state_value) \
+#define set_current_state(state_value) \
set_mb(current->state, (state_value))
+#endif
+
/* Task command name length */
#define TASK_COMM_LEN 16
@@ -1072,15 +1111,28 @@ struct load_weight {
};
struct sched_avg {
+ u64 last_runnable_update;
+ s64 decay_count;
+ /*
+ * utilization_avg_contrib describes the amount of time that a
+ * sched_entity is running on a CPU. It is based on running_avg_sum
+ * and is scaled in the range [0..SCHED_LOAD_SCALE].
+ * load_avg_contrib described the amount of time that a sched_entity
+ * is runnable on a rq. It is based on both runnable_avg_sum and the
+ * weight of the task.
+ */
+ unsigned long load_avg_contrib, utilization_avg_contrib;
/*
* These sums represent an infinite geometric series and so are bound
* above by 1024/(1-y). Thus we only need a u32 to store them for all
* choices of y < 1-2^(-32)*1024.
+ * running_avg_sum reflects the time that the sched_entity is
+ * effectively running on the CPU.
+ * runnable_avg_sum represents the amount of time a sched_entity is on
+ * a runqueue which includes the running time that is monitored by
+ * running_avg_sum.
*/
- u32 runnable_avg_sum, runnable_avg_period;
- u64 last_runnable_update;
- s64 decay_count;
- unsigned long load_avg_contrib;
+ u32 runnable_avg_sum, avg_period, running_avg_sum;
};
#ifdef CONFIG_SCHEDSTATS
@@ -1576,28 +1628,23 @@ struct task_struct {
struct numa_group *numa_group;
/*
- * Exponential decaying average of faults on a per-node basis.
- * Scheduling placement decisions are made based on the these counts.
- * The values remain static for the duration of a PTE scan
+ * numa_faults is an array split into four regions:
+ * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
+ * in this precise order.
+ *
+ * faults_memory: Exponential decaying average of faults on a per-node
+ * basis. Scheduling placement decisions are made based on these
+ * counts. The values remain static for the duration of a PTE scan.
+ * faults_cpu: Track the nodes the process was running on when a NUMA
+ * hinting fault was incurred.
+ * faults_memory_buffer and faults_cpu_buffer: Record faults per node
+ * during the current scan window. When the scan completes, the counts
+ * in faults_memory and faults_cpu decay and these values are copied.
*/
- unsigned long *numa_faults_memory;
+ unsigned long *numa_faults;
unsigned long total_numa_faults;
/*
- * numa_faults_buffer records faults per node during the current
- * scan window. When the scan completes, the counts in
- * numa_faults_memory decay and these values are copied.
- */
- unsigned long *numa_faults_buffer_memory;
-
- /*
- * Track the nodes the process was running on when a NUMA hinting
- * fault was incurred.
- */
- unsigned long *numa_faults_cpu;
- unsigned long *numa_faults_buffer_cpu;
-
- /*
* numa_faults_locality tracks if faults recorded during the last
* scan window were remote/local. The task scan period is adapted
* based on the locality of the faults with different weights
@@ -1682,6 +1729,9 @@ struct task_struct {
#ifdef CONFIG_SCHED_IO_LATENCY
struct io_latency_node io_latency;
#endif
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+ unsigned long task_state_change;
+#endif
};
/* Future-safe accessor for struct task_struct's cpus_allowed. */
@@ -2073,6 +2123,10 @@ static inline void tsk_restore_flags(struct task_struct *task,
task->flags |= orig_flags & flags;
}
+extern int cpuset_cpumask_can_shrink(const struct cpumask *cur,
+ const struct cpumask *trial);
+extern int task_can_attach(struct task_struct *p,
+ const struct cpumask *cs_cpus_allowed);
#ifdef CONFIG_SMP
extern void do_set_cpus_allowed(struct task_struct *p,
const struct cpumask *new_mask);
@@ -2781,7 +2835,7 @@ static inline int signal_pending_state(long state, struct task_struct *p)
extern int _cond_resched(void);
#define cond_resched() ({ \
- __might_sleep(__FILE__, __LINE__, 0); \
+ ___might_sleep(__FILE__, __LINE__, 0); \
_cond_resched(); \
})
@@ -2794,14 +2848,14 @@ extern int __cond_resched_lock(spinlock_t *lock);
#endif
#define cond_resched_lock(lock) ({ \
- __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
+ ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
__cond_resched_lock(lock); \
})
extern int __cond_resched_softirq(void);
#define cond_resched_softirq() ({ \
- __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
+ ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
__cond_resched_softirq(); \
})