From 01e3eb82278bf45221fc38b391bc5ee0f6a314d6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 Jan 2009 13:00:50 +0100 Subject: Revert "sched: improve preempt debugging" This reverts commit 7317d7b87edb41a9135e30be1ec3f7ef817c53dd. This has been reported (and bisected) by Alexey Zaytsev and Kamalesh Babulal to produce annoying warnings during bootup on both x86 and powerpc. kernel_locked() is not a valid test in IRQ context (we update the BKL's ->lock_depth and the preempt count separately and non-atomicalyy), so we cannot put it into the generic preempt debugging checks which can run in IRQ contexts too. Reported-and-bisected-by: Alexey Zaytsev Reported-and-bisected-by: Kamalesh Babulal Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 8be2c13b50d0..3b630d882660 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4440,7 +4440,7 @@ void __kprobes sub_preempt_count(int val) /* * Underflow? */ - if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked()))) + if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) return; /* * Is the spinlock portion underflowing? -- cgit v1.2.3 From 2ed7c03ec17779afb4fcfa3b8c61df61bd4879ba Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:54 +0100 Subject: [CVE-2009-0029] Convert all system calls to return a long Convert all system calls to return a long. This should be a NOP since all converted types should have the same size anyway. With the exception of sys_exit_group which returned void. But that doesn't matter since the system call doesn't return. Signed-off-by: Heiko Carstens --- kernel/exit.c | 4 +++- kernel/signal.c | 2 +- kernel/timer.c | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index c7740fa3252c..fac9b040af2c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1182,9 +1182,11 @@ do_group_exit(int exit_code) * wait4()-ing process will get the correct exit code - even if this * thread is not the thread group leader. */ -asmlinkage void sys_exit_group(int error_code) +asmlinkage long sys_exit_group(int error_code) { do_group_exit((error_code & 0xff) << 8); + /* NOTREACHED */ + return 0; } static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) diff --git a/kernel/signal.c b/kernel/signal.c index 3152ac3b62e2..856a5479d49d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2559,7 +2559,7 @@ sys_ssetmask(int newmask) /* * For backwards compatibility. Functionality superseded by sigaction. */ -asmlinkage unsigned long +asmlinkage long sys_signal(int sig, __sighandler_t handler) { struct k_sigaction new_sa, old_sa; diff --git a/kernel/timer.c b/kernel/timer.c index dee3f641a7a7..7b8697d7f04d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1129,7 +1129,7 @@ void do_timer(unsigned long ticks) * For backwards compatibility? This can be done in libc so Alpha * and all newer ports shouldn't need it. */ -asmlinkage unsigned long sys_alarm(unsigned int seconds) +asmlinkage long sys_alarm(unsigned int seconds) { return alarm_setitimer(seconds); } -- cgit v1.2.3 From f627a741d24f12955fa2d9f8831c3b12860635bd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:58 +0100 Subject: [CVE-2009-0029] Make sys_syslog a conditional system call Remove the -ENOSYS implementation for !CONFIG_PRINTK and use the cond_syscall infrastructure instead. Acked-by: Kyle McMartin Signed-off-by: Heiko Carstens --- kernel/printk.c | 5 ----- kernel/sys_ni.c | 1 + 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 7015733793e8..e48cf33783fc 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -742,11 +742,6 @@ EXPORT_SYMBOL(vprintk); #else -asmlinkage long sys_syslog(int type, char __user *buf, int len) -{ - return -ENOSYS; -} - static void call_console_drivers(unsigned start, unsigned end) { } diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index e14a23281707..27dad2967387 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -131,6 +131,7 @@ cond_syscall(sys_io_destroy); cond_syscall(sys_io_submit); cond_syscall(sys_io_cancel); cond_syscall(sys_io_getevents); +cond_syscall(sys_syslog); /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read); -- cgit v1.2.3 From 58fd3aa288939d3097fa04505b25c2f5e6e144d1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:03 +0100 Subject: [CVE-2009-0029] System call wrappers part 01 Signed-off-by: Heiko Carstens --- kernel/hrtimer.c | 4 ++-- kernel/sys.c | 2 +- kernel/time.c | 14 +++++++------- kernel/timer.c | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 1455b7651b6b..2dc30c59c5fd 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1467,8 +1467,8 @@ out: return ret; } -asmlinkage long -sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) +SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, + struct timespec __user *, rmtp) { struct timespec tu; diff --git a/kernel/sys.c b/kernel/sys.c index 763c3c17ded3..37165e552331 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -919,7 +919,7 @@ void do_sys_times(struct tms *tms) tms->tms_cstime = cputime_to_clock_t(cstime); } -asmlinkage long sys_times(struct tms __user * tbuf) +SYSCALL_DEFINE1(times, struct tms __user *, tbuf) { if (tbuf) { struct tms tmp; diff --git a/kernel/time.c b/kernel/time.c index 4886e3ce83a4..29511943871a 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -60,7 +60,7 @@ EXPORT_SYMBOL(sys_tz); * why not move it into the appropriate arch directory (for those * architectures that need it). */ -asmlinkage long sys_time(time_t __user * tloc) +SYSCALL_DEFINE1(time, time_t __user *, tloc) { time_t i = get_seconds(); @@ -79,7 +79,7 @@ asmlinkage long sys_time(time_t __user * tloc) * architectures that need it). */ -asmlinkage long sys_stime(time_t __user *tptr) +SYSCALL_DEFINE1(stime, time_t __user *, tptr) { struct timespec tv; int err; @@ -99,8 +99,8 @@ asmlinkage long sys_stime(time_t __user *tptr) #endif /* __ARCH_WANT_SYS_TIME */ -asmlinkage long sys_gettimeofday(struct timeval __user *tv, - struct timezone __user *tz) +SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv, + struct timezone __user *, tz) { if (likely(tv != NULL)) { struct timeval ktv; @@ -184,8 +184,8 @@ int do_sys_settimeofday(struct timespec *tv, struct timezone *tz) return 0; } -asmlinkage long sys_settimeofday(struct timeval __user *tv, - struct timezone __user *tz) +SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv, + struct timezone __user *, tz) { struct timeval user_tv; struct timespec new_ts; @@ -205,7 +205,7 @@ asmlinkage long sys_settimeofday(struct timeval __user *tv, return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL); } -asmlinkage long sys_adjtimex(struct timex __user *txc_p) +SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) { struct timex txc; /* Local copy of parameter */ int ret; diff --git a/kernel/timer.c b/kernel/timer.c index 7b8697d7f04d..76041df06c57 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1129,7 +1129,7 @@ void do_timer(unsigned long ticks) * For backwards compatibility? This can be done in libc so Alpha * and all newer ports shouldn't need it. */ -asmlinkage long sys_alarm(unsigned int seconds) +SYSCALL_DEFINE1(alarm, unsigned int, seconds) { return alarm_setitimer(seconds); } @@ -1152,7 +1152,7 @@ asmlinkage long sys_alarm(unsigned int seconds) * * This is SMP safe as current->tgid does not change. */ -asmlinkage long sys_getpid(void) +SYSCALL_DEFINE0(getpid) { return task_tgid_vnr(current); } @@ -1308,7 +1308,7 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) EXPORT_SYMBOL(schedule_timeout_uninterruptible); /* Thread ID - the internal kernel "pid" */ -asmlinkage long sys_gettid(void) +SYSCALL_DEFINE0(gettid) { return task_pid_vnr(current); } -- cgit v1.2.3 From dbf040d9d1cbf1ef6250bdb095c5c118950bcde8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:04 +0100 Subject: [CVE-2009-0029] System call wrappers part 02 Signed-off-by: Heiko Carstens --- kernel/sys.c | 10 +++++----- kernel/timer.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 37165e552331..4c33555f8d95 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -756,7 +756,7 @@ error: return retval; } -asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) +SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid) { const struct cred *cred = current_cred(); int retval; @@ -814,7 +814,7 @@ error: return retval; } -asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) +SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid) { const struct cred *cred = current_cred(); int retval; @@ -1015,7 +1015,7 @@ out: return err; } -asmlinkage long sys_getpgid(pid_t pid) +SYSCALL_DEFINE1(getpgid, pid_t, pid) { struct task_struct *p; struct pid *grp; @@ -1045,14 +1045,14 @@ out: #ifdef __ARCH_WANT_SYS_GETPGRP -asmlinkage long sys_getpgrp(void) +SYSCALL_DEFINE0(getpgrp) { return sys_getpgid(0); } #endif -asmlinkage long sys_getsid(pid_t pid) +SYSCALL_DEFINE1(getsid, pid_t, pid) { struct task_struct *p; struct pid *sid; diff --git a/kernel/timer.c b/kernel/timer.c index 76041df06c57..14a51530a4cd 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1163,7 +1163,7 @@ SYSCALL_DEFINE0(getpid) * value of ->real_parent under rcu_read_lock(), see * release_task()->call_rcu(delayed_put_task_struct). */ -asmlinkage long sys_getppid(void) +SYSCALL_DEFINE0(getppid) { int pid; @@ -1174,25 +1174,25 @@ asmlinkage long sys_getppid(void) return pid; } -asmlinkage long sys_getuid(void) +SYSCALL_DEFINE0(getuid) { /* Only we change this so SMP safe */ return current_uid(); } -asmlinkage long sys_geteuid(void) +SYSCALL_DEFINE0(geteuid) { /* Only we change this so SMP safe */ return current_euid(); } -asmlinkage long sys_getgid(void) +SYSCALL_DEFINE0(getgid) { /* Only we change this so SMP safe */ return current_gid(); } -asmlinkage long sys_getegid(void) +SYSCALL_DEFINE0(getegid) { /* Only we change this so SMP safe */ return current_egid(); -- cgit v1.2.3 From ae1251ab785f6da87219df8352ffdac68bba23e4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:05 +0100 Subject: [CVE-2009-0029] System call wrappers part 03 Signed-off-by: Heiko Carstens --- kernel/sys.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 4c33555f8d95..ace9ced598b9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -478,7 +478,7 @@ void ctrl_alt_del(void) * SMP: There are not races, the GIDs are checked only by filesystem * operations (as far as semantic preservation is concerned). */ -asmlinkage long sys_setregid(gid_t rgid, gid_t egid) +SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) { const struct cred *old; struct cred *new; @@ -529,7 +529,7 @@ error: * * SMP: Same implicit races as above. */ -asmlinkage long sys_setgid(gid_t gid) +SYSCALL_DEFINE1(setgid, gid_t, gid) { const struct cred *old; struct cred *new; @@ -597,7 +597,7 @@ static int set_user(struct cred *new) * 100% compatible with BSD. A program which uses just setuid() will be * 100% compatible with POSIX with saved IDs. */ -asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) +SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) { const struct cred *old; struct cred *new; @@ -661,7 +661,7 @@ error: * will allow a root program to temporarily drop privileges and be able to * regain them by swapping the real and effective uid. */ -asmlinkage long sys_setuid(uid_t uid) +SYSCALL_DEFINE1(setuid, uid_t, uid) { const struct cred *old; struct cred *new; @@ -705,7 +705,7 @@ error: * This function implements a generic ability to update ruid, euid, * and suid. This allows you to implement the 4.4 compatible seteuid(). */ -asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) +SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) { const struct cred *old; struct cred *new; @@ -771,7 +771,7 @@ SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __u /* * Same as above, but for rgid, egid, sgid. */ -asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) +SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) { const struct cred *old; struct cred *new; @@ -833,7 +833,7 @@ SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __u * whatever uid it wants to). It normally shadows "euid", except when * explicitly set by setfsuid() or for access.. */ -asmlinkage long sys_setfsuid(uid_t uid) +SYSCALL_DEFINE1(setfsuid, uid_t, uid) { const struct cred *old; struct cred *new; @@ -870,7 +870,7 @@ change_okay: /* * Samma på svenska.. */ -asmlinkage long sys_setfsgid(gid_t gid) +SYSCALL_DEFINE1(setfsgid, gid_t, gid) { const struct cred *old; struct cred *new; @@ -1311,7 +1311,7 @@ int set_current_groups(struct group_info *group_info) EXPORT_SYMBOL(set_current_groups); -asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) +SYSCALL_DEFINE2(getgroups, int, gidsetsize, gid_t __user *, grouplist) { const struct cred *cred = current_cred(); int i; -- cgit v1.2.3 From b290ebe2c46d01b742b948ce03f09e8a3efb9a92 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:06 +0100 Subject: [CVE-2009-0029] System call wrappers part 04 Signed-off-by: Heiko Carstens --- kernel/acct.c | 2 +- kernel/capability.c | 4 ++-- kernel/exec_domain.c | 3 +-- kernel/itimer.c | 2 +- kernel/signal.c | 7 +++---- kernel/sys.c | 6 +++--- 6 files changed, 11 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/acct.c b/kernel/acct.c index d57b7cbb98b6..7afa31564162 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -277,7 +277,7 @@ static int acct_on(char *name) * should be written. If the filename is NULL, accounting will be * shutdown. */ -asmlinkage long sys_acct(const char __user *name) +SYSCALL_DEFINE1(acct, const char __user *, name) { int error; diff --git a/kernel/capability.c b/kernel/capability.c index 688926e496be..4e17041963f5 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -161,7 +161,7 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, * * Returns 0 on success and < 0 on error. */ -asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) +SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr) { int ret = 0; pid_t pid; @@ -235,7 +235,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) * * Returns 0 on success and < 0 on error. */ -asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) +SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data) { struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; unsigned i, tocopy; diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index 0511716e9424..667c841c2952 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -209,8 +209,7 @@ static int __init proc_execdomains_init(void) module_init(proc_execdomains_init); #endif -asmlinkage long -sys_personality(u_long personality) +SYSCALL_DEFINE1(personality, u_long, personality) { u_long old = current->personality; diff --git a/kernel/itimer.c b/kernel/itimer.c index db7c358b9a02..7e0663ea94fc 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -100,7 +100,7 @@ int do_getitimer(int which, struct itimerval *value) return 0; } -asmlinkage long sys_getitimer(int which, struct itimerval __user *value) +SYSCALL_DEFINE2(getitimer, int, which, struct itimerval __user *, value) { int error = -EFAULT; struct itimerval get_buffer; diff --git a/kernel/signal.c b/kernel/signal.c index 856a5479d49d..3fe08eaa5dea 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2434,8 +2434,7 @@ out: #ifdef __ARCH_WANT_SYS_SIGPENDING -asmlinkage long -sys_sigpending(old_sigset_t __user *set) +SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set) { return do_sigpending(set, sizeof(*set)); } @@ -2446,8 +2445,8 @@ sys_sigpending(old_sigset_t __user *set) /* Some platforms have their own version with special arguments others support only sys_rt_sigprocmask. */ -asmlinkage long -sys_sigprocmask(int how, old_sigset_t __user *set, old_sigset_t __user *oset) +SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set, + old_sigset_t __user *, oset) { int error; old_sigset_t old_set, new_set; diff --git a/kernel/sys.c b/kernel/sys.c index ace9ced598b9..cbe4502c28a1 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -944,7 +944,7 @@ SYSCALL_DEFINE1(times, struct tms __user *, tbuf) * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. * LBT 04.03.94 */ -asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) +SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) { struct task_struct *p; struct task_struct *group_leader = current->group_leader; @@ -1080,7 +1080,7 @@ out: return retval; } -asmlinkage long sys_setsid(void) +SYSCALL_DEFINE0(setsid) { struct task_struct *group_leader = current->group_leader; struct pid *sid = task_pid(group_leader); @@ -1340,7 +1340,7 @@ out: * without another task interfering. */ -asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist) +SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) { struct group_info *group_info; int retval; -- cgit v1.2.3 From 362e9c07c7220c0a78c88826fc0d2bf7e4a4bb68 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:07 +0100 Subject: [CVE-2009-0029] System call wrappers part 05 Signed-off-by: Heiko Carstens --- kernel/itimer.c | 5 ++--- kernel/posix-timers.c | 43 +++++++++++++++++++------------------------ 2 files changed, 21 insertions(+), 27 deletions(-) (limited to 'kernel') diff --git a/kernel/itimer.c b/kernel/itimer.c index 7e0663ea94fc..6a5fe93dd8bd 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -260,9 +260,8 @@ unsigned int alarm_setitimer(unsigned int seconds) return it_old.it_value.tv_sec; } -asmlinkage long sys_setitimer(int which, - struct itimerval __user *value, - struct itimerval __user *ovalue) +SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value, + struct itimerval __user *, ovalue) { struct itimerval set_buffer, get_buffer; int error; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 887c63787de6..052ec4d195c7 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -477,10 +477,9 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set) /* Create a POSIX.1b interval timer. */ -asmlinkage long -sys_timer_create(const clockid_t which_clock, - struct sigevent __user *timer_event_spec, - timer_t __user * created_timer_id) +SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, + struct sigevent __user *, timer_event_spec, + timer_t __user *, created_timer_id) { struct k_itimer *new_timer; int error, new_timer_id; @@ -661,8 +660,8 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) } /* Get the time remaining on a POSIX.1b interval timer. */ -asmlinkage long -sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting) +SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, + struct itimerspec __user *, setting) { struct k_itimer *timr; struct itimerspec cur_setting; @@ -691,8 +690,7 @@ sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting) * the call back to do_schedule_next_timer(). So all we need to do is * to pick up the frozen overrun. */ -asmlinkage long -sys_timer_getoverrun(timer_t timer_id) +SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) { struct k_itimer *timr; int overrun; @@ -760,10 +758,9 @@ common_timer_set(struct k_itimer *timr, int flags, } /* Set a POSIX.1b interval timer */ -asmlinkage long -sys_timer_settime(timer_t timer_id, int flags, - const struct itimerspec __user *new_setting, - struct itimerspec __user *old_setting) +SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, + const struct itimerspec __user *, new_setting, + struct itimerspec __user *, old_setting) { struct k_itimer *timr; struct itimerspec new_spec, old_spec; @@ -816,8 +813,7 @@ static inline int timer_delete_hook(struct k_itimer *timer) } /* Delete a POSIX.1b interval timer. */ -asmlinkage long -sys_timer_delete(timer_t timer_id) +SYSCALL_DEFINE1(timer_delete, timer_t, timer_id) { struct k_itimer *timer; unsigned long flags; @@ -903,8 +899,8 @@ int do_posix_clock_nonanosleep(const clockid_t clock, int flags, } EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep); -asmlinkage long sys_clock_settime(const clockid_t which_clock, - const struct timespec __user *tp) +SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, + const struct timespec __user *, tp) { struct timespec new_tp; @@ -916,8 +912,8 @@ asmlinkage long sys_clock_settime(const clockid_t which_clock, return CLOCK_DISPATCH(which_clock, clock_set, (which_clock, &new_tp)); } -asmlinkage long -sys_clock_gettime(const clockid_t which_clock, struct timespec __user *tp) +SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, + struct timespec __user *,tp) { struct timespec kernel_tp; int error; @@ -933,8 +929,8 @@ sys_clock_gettime(const clockid_t which_clock, struct timespec __user *tp) } -asmlinkage long -sys_clock_getres(const clockid_t which_clock, struct timespec __user *tp) +SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, + struct timespec __user *, tp) { struct timespec rtn_tp; int error; @@ -963,10 +959,9 @@ static int common_nsleep(const clockid_t which_clock, int flags, which_clock); } -asmlinkage long -sys_clock_nanosleep(const clockid_t which_clock, int flags, - const struct timespec __user *rqtp, - struct timespec __user *rmtp) +SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, + const struct timespec __user *, rqtp, + struct timespec __user *, rmtp) { struct timespec t; -- cgit v1.2.3 From 5add95d4f7cf08f6f62510f19576992912387501 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:08 +0100 Subject: [CVE-2009-0029] System call wrappers part 06 Signed-off-by: Heiko Carstens --- kernel/sched.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 8be2c13b50d0..1a0fdfa5ddf9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5126,7 +5126,7 @@ int can_nice(const struct task_struct *p, const int nice) * sys_setpriority is a more generic, but much slower function that * does similar things. */ -asmlinkage long sys_nice(int increment) +SYSCALL_DEFINE1(nice, int, increment) { long nice, retval; @@ -5433,8 +5433,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) * @policy: new policy. * @param: structure containing the new RT priority. */ -asmlinkage long -sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) +SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, + struct sched_param __user *, param) { /* negative values for policy are not valid */ if (policy < 0) @@ -5448,7 +5448,7 @@ sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) * @pid: the pid in question. * @param: structure containing the new RT priority. */ -asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) +SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) { return do_sched_setscheduler(pid, -1, param); } @@ -5457,7 +5457,7 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) * sys_sched_getscheduler - get the policy (scheduling class) of a thread * @pid: the pid in question. */ -asmlinkage long sys_sched_getscheduler(pid_t pid) +SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) { struct task_struct *p; int retval; @@ -5482,7 +5482,7 @@ asmlinkage long sys_sched_getscheduler(pid_t pid) * @pid: the pid in question. * @param: structure containing the RT priority. */ -asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) +SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) { struct sched_param lp; struct task_struct *p; @@ -5600,8 +5600,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, * @len: length in bytes of the bitmask pointed to by user_mask_ptr * @user_mask_ptr: user-space pointer to the new cpu mask */ -asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, - unsigned long __user *user_mask_ptr) +SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, + unsigned long __user *, user_mask_ptr) { cpumask_var_t new_mask; int retval; @@ -5648,8 +5648,8 @@ out_unlock: * @len: length in bytes of the bitmask pointed to by user_mask_ptr * @user_mask_ptr: user-space pointer to hold the current cpu mask */ -asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, - unsigned long __user *user_mask_ptr) +SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, + unsigned long __user *, user_mask_ptr) { int ret; cpumask_var_t mask; @@ -5678,7 +5678,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, * This function yields the current CPU to other tasks. If there are no * other threads running on this CPU then this function will return. */ -asmlinkage long sys_sched_yield(void) +SYSCALL_DEFINE0(sched_yield) { struct rq *rq = this_rq_lock(); @@ -5819,7 +5819,7 @@ long __sched io_schedule_timeout(long timeout) * this syscall returns the maximum rt_priority that can be used * by a given scheduling class. */ -asmlinkage long sys_sched_get_priority_max(int policy) +SYSCALL_DEFINE1(sched_get_priority_max, int, policy) { int ret = -EINVAL; @@ -5844,7 +5844,7 @@ asmlinkage long sys_sched_get_priority_max(int policy) * this syscall returns the minimum rt_priority that can be used * by a given scheduling class. */ -asmlinkage long sys_sched_get_priority_min(int policy) +SYSCALL_DEFINE1(sched_get_priority_min, int, policy) { int ret = -EINVAL; -- cgit v1.2.3 From 754fe8d297bfae7b77f7ce866e2fb0c5fb186506 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:09 +0100 Subject: [CVE-2009-0029] System call wrappers part 07 Signed-off-by: Heiko Carstens --- kernel/exit.c | 8 ++++---- kernel/kexec.c | 5 ++--- kernel/sched.c | 4 ++-- kernel/signal.c | 2 +- kernel/sys.c | 7 ++++--- 5 files changed, 13 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index fac9b040af2c..08895df0eab3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1141,7 +1141,7 @@ NORET_TYPE void complete_and_exit(struct completion *comp, long code) EXPORT_SYMBOL(complete_and_exit); -asmlinkage long sys_exit(int error_code) +SYSCALL_DEFINE1(exit, int, error_code) { do_exit((error_code&0xff)<<8); } @@ -1182,7 +1182,7 @@ do_group_exit(int exit_code) * wait4()-ing process will get the correct exit code - even if this * thread is not the thread group leader. */ -asmlinkage long sys_exit_group(int error_code) +SYSCALL_DEFINE1(exit_group, int, error_code) { do_group_exit((error_code & 0xff) << 8); /* NOTREACHED */ @@ -1795,8 +1795,8 @@ asmlinkage long sys_waitid(int which, pid_t upid, return ret; } -asmlinkage long sys_wait4(pid_t upid, int __user *stat_addr, - int options, struct rusage __user *ru) +SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, + int, options, struct rusage __user *, ru) { struct pid *pid = NULL; enum pid_type type; diff --git a/kernel/kexec.c b/kernel/kexec.c index 3fb855ad6aa0..8a6d7b08864e 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -934,9 +934,8 @@ struct kimage *kexec_crash_image; static DEFINE_MUTEX(kexec_mutex); -asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, - struct kexec_segment __user *segments, - unsigned long flags) +SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, + struct kexec_segment __user *, segments, unsigned long, flags) { struct kimage **dest_image, *image; int result; diff --git a/kernel/sched.c b/kernel/sched.c index 1a0fdfa5ddf9..65c02037b052 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5869,8 +5869,8 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy) * this syscall writes the default timeslice value of a given process * into the user-space timespec buffer. A value of '0' means infinity. */ -asmlinkage -long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) +SYSCALL_DEFINE4(sched_rr_get_interval, pid_t, pid, + struct timespec __user *, interval) { struct task_struct *p; unsigned int time_slice; diff --git a/kernel/signal.c b/kernel/signal.c index 3fe08eaa5dea..41f32e08615e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1961,7 +1961,7 @@ EXPORT_SYMBOL(unblock_all_signals); * System call entry points. */ -asmlinkage long sys_restart_syscall(void) +SYSCALL_DEFINE0(restart_syscall) { struct restart_block *restart = ¤t_thread_info()->restart_block; return restart->fn(restart); diff --git a/kernel/sys.c b/kernel/sys.c index cbe4502c28a1..39b192b40034 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -143,7 +143,7 @@ out: return error; } -asmlinkage long sys_setpriority(int which, int who, int niceval) +SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) { struct task_struct *g, *p; struct user_struct *user; @@ -208,7 +208,7 @@ out: * has been offset by 20 (ie it returns 40..1 instead of -20..19) * to stay compatible. */ -asmlinkage long sys_getpriority(int which, int who) +SYSCALL_DEFINE2(getpriority, int, which, int, who) { struct task_struct *g, *p; struct user_struct *user; @@ -355,7 +355,8 @@ EXPORT_SYMBOL_GPL(kernel_power_off); * * reboot doesn't sync: do that yourself before calling this. */ -asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user * arg) +SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, + void __user *, arg) { char buffer[256]; -- cgit v1.2.3 From 17da2bd90abf428523de0fb98f7075e00e3ed42e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:10 +0100 Subject: [CVE-2009-0029] System call wrappers part 08 Signed-off-by: Heiko Carstens --- kernel/exit.c | 7 +++---- kernel/fork.c | 2 +- kernel/futex.c | 6 +++--- kernel/module.c | 10 ++++------ kernel/sched.c | 2 +- kernel/signal.c | 18 +++++++----------- 6 files changed, 19 insertions(+), 26 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 08895df0eab3..f80dec3f1875 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1754,9 +1754,8 @@ end: return retval; } -asmlinkage long sys_waitid(int which, pid_t upid, - struct siginfo __user *infop, int options, - struct rusage __user *ru) +SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, + infop, int, options, struct rusage __user *, ru) { struct pid *pid = NULL; enum pid_type type; @@ -1833,7 +1832,7 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, * sys_waitpid() remains for compatibility. waitpid() should be * implemented by calling sys_wait4() from libc.a. */ -asmlinkage long sys_waitpid(pid_t pid, int __user *stat_addr, int options) +SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options) { return sys_wait4(pid, stat_addr, options, NULL); } diff --git a/kernel/fork.c b/kernel/fork.c index 1d68f1255dd8..8eb37d38c6a4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -901,7 +901,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) clear_freeze_flag(p); } -asmlinkage long sys_set_tid_address(int __user *tidptr) +SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) { current->clear_child_tid = tidptr; diff --git a/kernel/futex.c b/kernel/futex.c index 002aa189eb09..e86931d8d4e9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1978,9 +1978,9 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, } -asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, - struct timespec __user *utime, u32 __user *uaddr2, - u32 val3) +SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + struct timespec __user *, utime, u32 __user *, uaddr2, + u32, val3) { struct timespec ts; ktime_t t, *tp = NULL; diff --git a/kernel/module.c b/kernel/module.c index c9332c90d5a0..e8b51d41dd72 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -743,8 +743,8 @@ static void wait_for_zero_refcount(struct module *mod) mutex_lock(&module_mutex); } -asmlinkage long -sys_delete_module(const char __user *name_user, unsigned int flags) +SYSCALL_DEFINE2(delete_module, const char __user *, name_user, + unsigned int, flags) { struct module *mod; char name[MODULE_NAME_LEN]; @@ -2296,10 +2296,8 @@ static noinline struct module *load_module(void __user *umod, } /* This is where the real work happens */ -asmlinkage long -sys_init_module(void __user *umod, - unsigned long len, - const char __user *uargs) +SYSCALL_DEFINE3(init_module, void __user *, umod, + unsigned long, len, const char __user *, uargs) { struct module *mod; int ret = 0; diff --git a/kernel/sched.c b/kernel/sched.c index 65c02037b052..eb1931eef587 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5869,7 +5869,7 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy) * this syscall writes the default timeslice value of a given process * into the user-space timespec buffer. A value of '0' means infinity. */ -SYSCALL_DEFINE4(sched_rr_get_interval, pid_t, pid, +SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, struct timespec __user *, interval) { struct task_struct *p; diff --git a/kernel/signal.c b/kernel/signal.c index 41f32e08615e..278cc8737f17 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2014,8 +2014,8 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset) return error; } -asmlinkage long -sys_rt_sigprocmask(int how, sigset_t __user *set, sigset_t __user *oset, size_t sigsetsize) +SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set, + sigset_t __user *, oset, size_t, sigsetsize) { int error = -EINVAL; sigset_t old_set, new_set; @@ -2074,8 +2074,7 @@ out: return error; } -asmlinkage long -sys_rt_sigpending(sigset_t __user *set, size_t sigsetsize) +SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize) { return do_sigpending(set, sigsetsize); } @@ -2146,11 +2145,9 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from) #endif -asmlinkage long -sys_rt_sigtimedwait(const sigset_t __user *uthese, - siginfo_t __user *uinfo, - const struct timespec __user *uts, - size_t sigsetsize) +SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, + siginfo_t __user *, uinfo, const struct timespec __user *, uts, + size_t, sigsetsize) { int ret, sig; sigset_t these; @@ -2223,8 +2220,7 @@ sys_rt_sigtimedwait(const sigset_t __user *uthese, return ret; } -asmlinkage long -sys_kill(pid_t pid, int sig) +SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) { struct siginfo info; -- cgit v1.2.3 From a5f8fa9e9ba5ef3305e147f41ad6e1e84ac1f0bd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:11 +0100 Subject: [CVE-2009-0029] System call wrappers part 09 Signed-off-by: Heiko Carstens --- kernel/signal.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index 278cc8737f17..e2333929611a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2279,7 +2279,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig) * exists but it's not belonging to the target process anymore. This * method solves the problem of threads exiting and PIDs getting reused. */ -asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig) +SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig) { /* This is only valid for single tasks */ if (pid <= 0 || tgid <= 0) @@ -2291,8 +2291,7 @@ asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig) /* * Send a signal to only one task, even if it's a CLONE_THREAD task. */ -asmlinkage long -sys_tkill(pid_t pid, int sig) +SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig) { /* This is only valid for single tasks */ if (pid <= 0) @@ -2301,8 +2300,8 @@ sys_tkill(pid_t pid, int sig) return do_tkill(0, pid, sig); } -asmlinkage long -sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo) +SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig, + siginfo_t __user *, uinfo) { siginfo_t info; @@ -2526,15 +2525,13 @@ out: /* * For backwards compatibility. Functionality superseded by sigprocmask. */ -asmlinkage long -sys_sgetmask(void) +SYSCALL_DEFINE0(sgetmask) { /* SMP safe */ return current->blocked.sig[0]; } -asmlinkage long -sys_ssetmask(int newmask) +SYSCALL_DEFINE1(ssetmask, int, newmask) { int old; @@ -2554,8 +2551,7 @@ sys_ssetmask(int newmask) /* * For backwards compatibility. Functionality superseded by sigaction. */ -asmlinkage long -sys_signal(int sig, __sighandler_t handler) +SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler) { struct k_sigaction new_sa, old_sa; int ret; @@ -2572,8 +2568,7 @@ sys_signal(int sig, __sighandler_t handler) #ifdef __ARCH_WANT_SYS_PAUSE -asmlinkage long -sys_pause(void) +SYSCALL_DEFINE0(pause) { current->state = TASK_INTERRUPTIBLE; schedule(); -- cgit v1.2.3 From ca013e945b1ba5828b151ee646946f1297b67a4c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:19 +0100 Subject: [CVE-2009-0029] System call wrappers part 17 Signed-off-by: Heiko Carstens --- kernel/uid16.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/uid16.c b/kernel/uid16.c index 2460c3199b5a..37f48c049a2a 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -17,7 +17,7 @@ #include -asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gid_t group) +SYSCALL_DEFINE3(chown16, const char __user *, filename, old_uid_t, user, old_gid_t, group) { long ret = sys_chown(filename, low2highuid(user), low2highgid(group)); /* avoid REGPARM breakage on x86: */ @@ -25,7 +25,7 @@ asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gi return ret; } -asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_gid_t group) +SYSCALL_DEFINE3(lchown16, const char __user *, filename, old_uid_t, user, old_gid_t, group) { long ret = sys_lchown(filename, low2highuid(user), low2highgid(group)); /* avoid REGPARM breakage on x86: */ @@ -33,7 +33,7 @@ asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_g return ret; } -asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group) +SYSCALL_DEFINE3(fchown16, unsigned int, fd, old_uid_t, user, old_gid_t, group) { long ret = sys_fchown(fd, low2highuid(user), low2highgid(group)); /* avoid REGPARM breakage on x86: */ -- cgit v1.2.3 From a6b42e83f249aad723589b2bdf6d1dfb2b0997c8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:20 +0100 Subject: [CVE-2009-0029] System call wrappers part 18 Signed-off-by: Heiko Carstens --- kernel/uid16.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/uid16.c b/kernel/uid16.c index 37f48c049a2a..221894e6e980 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -41,7 +41,7 @@ SYSCALL_DEFINE3(fchown16, unsigned int, fd, old_uid_t, user, old_gid_t, group) return ret; } -asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid) +SYSCALL_DEFINE2(setregid16, old_gid_t, rgid, old_gid_t, egid) { long ret = sys_setregid(low2highgid(rgid), low2highgid(egid)); /* avoid REGPARM breakage on x86: */ @@ -49,7 +49,7 @@ asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid) return ret; } -asmlinkage long sys_setgid16(old_gid_t gid) +SYSCALL_DEFINE1(setgid16, old_gid_t, gid) { long ret = sys_setgid(low2highgid(gid)); /* avoid REGPARM breakage on x86: */ @@ -57,7 +57,7 @@ asmlinkage long sys_setgid16(old_gid_t gid) return ret; } -asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid) +SYSCALL_DEFINE2(setreuid16, old_uid_t, ruid, old_uid_t, euid) { long ret = sys_setreuid(low2highuid(ruid), low2highuid(euid)); /* avoid REGPARM breakage on x86: */ @@ -65,7 +65,7 @@ asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid) return ret; } -asmlinkage long sys_setuid16(old_uid_t uid) +SYSCALL_DEFINE1(setuid16, old_uid_t, uid) { long ret = sys_setuid(low2highuid(uid)); /* avoid REGPARM breakage on x86: */ @@ -73,7 +73,7 @@ asmlinkage long sys_setuid16(old_uid_t uid) return ret; } -asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid) +SYSCALL_DEFINE3(setresuid16, old_uid_t, ruid, old_uid_t, euid, old_uid_t, suid) { long ret = sys_setresuid(low2highuid(ruid), low2highuid(euid), low2highuid(suid)); @@ -82,7 +82,7 @@ asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid) return ret; } -asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid) +SYSCALL_DEFINE3(getresuid16, old_uid_t __user *, ruid, old_uid_t __user *, euid, old_uid_t __user *, suid) { const struct cred *cred = current_cred(); int retval; @@ -94,7 +94,7 @@ asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, return retval; } -asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid) +SYSCALL_DEFINE3(setresgid16, old_gid_t, rgid, old_gid_t, egid, old_gid_t, sgid) { long ret = sys_setresgid(low2highgid(rgid), low2highgid(egid), low2highgid(sgid)); @@ -103,7 +103,8 @@ asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid) return ret; } -asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid) + +SYSCALL_DEFINE3(getresgid16, old_gid_t __user *, rgid, old_gid_t __user *, egid, old_gid_t __user *, sgid) { const struct cred *cred = current_cred(); int retval; @@ -115,7 +116,7 @@ asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, return retval; } -asmlinkage long sys_setfsuid16(old_uid_t uid) +SYSCALL_DEFINE1(setfsuid16, old_uid_t, uid) { long ret = sys_setfsuid(low2highuid(uid)); /* avoid REGPARM breakage on x86: */ @@ -123,7 +124,7 @@ asmlinkage long sys_setfsuid16(old_uid_t uid) return ret; } -asmlinkage long sys_setfsgid16(old_gid_t gid) +SYSCALL_DEFINE1(setfsgid16, old_gid_t, gid) { long ret = sys_setfsgid(low2highgid(gid)); /* avoid REGPARM breakage on x86: */ -- cgit v1.2.3 From 003d7ab479168132a2b2c6700fe682b08f08ab0c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:21 +0100 Subject: [CVE-2009-0029] System call wrappers part 19 Signed-off-by: Heiko Carstens --- kernel/uid16.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/uid16.c b/kernel/uid16.c index 221894e6e980..0314501688b9 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -162,7 +162,7 @@ static int groups16_from_user(struct group_info *group_info, return 0; } -asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist) +SYSCALL_DEFINE2(getgroups16, int, gidsetsize, old_gid_t __user *, grouplist) { const struct cred *cred = current_cred(); int i; @@ -185,7 +185,7 @@ out: return i; } -asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist) +SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) { struct group_info *group_info; int retval; @@ -210,22 +210,22 @@ asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist) return retval; } -asmlinkage long sys_getuid16(void) +SYSCALL_DEFINE0(getuid16) { return high2lowuid(current_uid()); } -asmlinkage long sys_geteuid16(void) +SYSCALL_DEFINE0(geteuid16) { return high2lowuid(current_euid()); } -asmlinkage long sys_getgid16(void) +SYSCALL_DEFINE0(getgid16) { return high2lowgid(current_gid()); } -asmlinkage long sys_getegid16(void) +SYSCALL_DEFINE0(getegid16) { return high2lowgid(current_egid()); } -- cgit v1.2.3 From 5a8a82b1d306a325d899b67715618413657efda4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:25 +0100 Subject: [CVE-2009-0029] System call wrappers part 23 Signed-off-by: Heiko Carstens --- kernel/sys.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 39b192b40034..5292f2119da4 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1406,7 +1406,7 @@ asmlinkage long sys_newuname(struct new_utsname __user * name) return errno; } -asmlinkage long sys_sethostname(char __user *name, int len) +SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) { int errno; char tmp[__NEW_UTS_LEN]; @@ -1430,7 +1430,7 @@ asmlinkage long sys_sethostname(char __user *name, int len) #ifdef __ARCH_WANT_SYS_GETHOSTNAME -asmlinkage long sys_gethostname(char __user *name, int len) +SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) { int i, errno; struct new_utsname *u; @@ -1455,7 +1455,7 @@ asmlinkage long sys_gethostname(char __user *name, int len) * Only setdomainname; getdomainname can be implemented by calling * uname() */ -asmlinkage long sys_setdomainname(char __user *name, int len) +SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) { int errno; char tmp[__NEW_UTS_LEN]; -- cgit v1.2.3 From e48fbb699f82ef1e80bd7126046394d2dc9ca7e6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:26 +0100 Subject: [CVE-2009-0029] System call wrappers part 24 Signed-off-by: Heiko Carstens --- kernel/sys.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 5292f2119da4..70ffa8408cd4 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1395,7 +1395,7 @@ EXPORT_SYMBOL(in_egroup_p); DECLARE_RWSEM(uts_sem); -asmlinkage long sys_newuname(struct new_utsname __user * name) +SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) { int errno = 0; @@ -1478,7 +1478,7 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) return errno; } -asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim) +SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) { if (resource >= RLIM_NLIMITS) return -EINVAL; @@ -1497,7 +1497,8 @@ asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim) * Back compatibility for getrlimit. Needed for some apps. */ -asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim) +SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, + struct rlimit __user *, rlim) { struct rlimit x; if (resource >= RLIM_NLIMITS) @@ -1515,7 +1516,7 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r #endif -asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) +SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) { struct rlimit new_rlim, *old_rlim; int retval; @@ -1688,7 +1689,7 @@ int getrusage(struct task_struct *p, int who, struct rusage __user *ru) return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; } -asmlinkage long sys_getrusage(int who, struct rusage __user *ru) +SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) { if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && who != RUSAGE_THREAD) @@ -1696,7 +1697,7 @@ asmlinkage long sys_getrusage(int who, struct rusage __user *ru) return getrusage(current, who, ru); } -asmlinkage long sys_umask(int mask) +SYSCALL_DEFINE1(umask, int, mask) { mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); return mask; -- cgit v1.2.3 From c4ea37c26a691ad0b7e86aa5884aab27830e95c9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:28 +0100 Subject: [CVE-2009-0029] System call wrappers part 26 Signed-off-by: Heiko Carstens --- kernel/sys.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 70ffa8408cd4..59aadcdad6ce 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1703,8 +1703,8 @@ SYSCALL_DEFINE1(umask, int, mask) return mask; } -asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5) +SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, + unsigned long, arg4, unsigned long, arg5) { struct task_struct *me = current; unsigned char comm[sizeof(me->comm)]; -- cgit v1.2.3 From 1e7bfb2134dfec37ce04fb3a4ca89299e892d10c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:29 +0100 Subject: [CVE-2009-0029] System call wrappers part 27 Signed-off-by: Heiko Carstens --- kernel/printk.c | 2 +- kernel/ptrace.c | 2 +- kernel/sysctl.c | 4 ++-- kernel/timer.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index e48cf33783fc..69188f226a93 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -382,7 +382,7 @@ out: return error; } -asmlinkage long sys_syslog(int type, char __user *buf, int len) +SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) { return do_syslog(type, buf, len); } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 29dc700e198c..c9cf48b21f05 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -574,7 +574,7 @@ struct task_struct *ptrace_get_task_struct(pid_t pid) #define arch_ptrace_attach(child) do { } while (0) #endif -asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) { struct task_struct *child; long ret; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 89d74436318c..3e38b74b6124 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1688,7 +1688,7 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol return error; } -asmlinkage long sys_sysctl(struct __sysctl_args __user *args) +SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args) { struct __sysctl_args tmp; int error; @@ -2989,7 +2989,7 @@ int sysctl_ms_jiffies(struct ctl_table *table, #else /* CONFIG_SYSCTL_SYSCALL */ -asmlinkage long sys_sysctl(struct __sysctl_args __user *args) +SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args) { struct __sysctl_args tmp; int error; diff --git a/kernel/timer.c b/kernel/timer.c index 14a51530a4cd..13dd64fe143d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1400,7 +1400,7 @@ out: return 0; } -asmlinkage long sys_sysinfo(struct sysinfo __user *info) +SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) { struct sysinfo val; -- cgit v1.2.3 From 6559eed8ca7db0531a207cd80be5e28cd6f213c5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:32 +0100 Subject: [CVE-2009-0029] System call wrappers part 30 Signed-off-by: Heiko Carstens --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 8eb37d38c6a4..bf0cef8bbdf2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1603,7 +1603,7 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp * constructed. Here we are modifying the current, active, * task_struct. */ -asmlinkage long sys_unshare(unsigned long unshare_flags) +SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) { int err = 0; struct fs_struct *fs, *new_fs = NULL; -- cgit v1.2.3 From 836f92adf121f806e9beb5b6b88bd5c9c4ea3f24 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:33 +0100 Subject: [CVE-2009-0029] System call wrappers part 31 Signed-off-by: Heiko Carstens --- kernel/futex.c | 11 +++++------ kernel/sys.c | 4 ++-- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index e86931d8d4e9..f89d373a9c6d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1733,9 +1733,8 @@ pi_faulted: * @head: pointer to the list-head * @len: length of the list-head, as userspace expects */ -asmlinkage long -sys_set_robust_list(struct robust_list_head __user *head, - size_t len) +SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, + size_t, len) { if (!futex_cmpxchg_enabled) return -ENOSYS; @@ -1756,9 +1755,9 @@ sys_set_robust_list(struct robust_list_head __user *head, * @head_ptr: pointer to a list-head pointer, the kernel fills it in * @len_ptr: pointer to a length field, the kernel fills in the header size */ -asmlinkage long -sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, - size_t __user *len_ptr) +SYSCALL_DEFINE3(get_robust_list, int, pid, + struct robust_list_head __user * __user *, head_ptr, + size_t __user *, len_ptr) { struct robust_list_head __user *head; unsigned long ret; diff --git a/kernel/sys.c b/kernel/sys.c index 59aadcdad6ce..e7dc0e10a485 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1817,8 +1817,8 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return error; } -asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, - struct getcpu_cache __user *unused) +SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, + struct getcpu_cache __user *, unused) { int err = 0; int cpu = raw_smp_processor_id(); -- cgit v1.2.3 From d4e82042c4cfa87a7d51710b71f568fe80132551 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:34 +0100 Subject: [CVE-2009-0029] System call wrappers part 32 Signed-off-by: Heiko Carstens --- kernel/signal.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index e2333929611a..e73759783dc8 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2491,11 +2491,10 @@ out: #endif /* __ARCH_WANT_SYS_SIGPROCMASK */ #ifdef __ARCH_WANT_SYS_RT_SIGACTION -asmlinkage long -sys_rt_sigaction(int sig, - const struct sigaction __user *act, - struct sigaction __user *oact, - size_t sigsetsize) +SYSCALL_DEFINE4(rt_sigaction, int, sig, + const struct sigaction __user *, act, + struct sigaction __user *, oact, + size_t, sigsetsize) { struct k_sigaction new_sa, old_sa; int ret = -EINVAL; @@ -2578,7 +2577,7 @@ SYSCALL_DEFINE0(pause) #endif #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND -asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize) +SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize) { sigset_t newset; -- cgit v1.2.3 From 9316fcacb89c59fe556c48587ac02cd7f5d38045 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 14 Jan 2009 09:35:44 -0800 Subject: kernel/up.c: omit it if SMP=y, USE_GENERIC_SMP_HELPERS=n Fix the sparc build - we were including `up.o' on SMP builds, when CONFIG_USE_GENERIC_SMP_HELPERS=n. Tested-by: Robert Reif Fixed-by: Robert Reif Cc: David Miller Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/Makefile b/kernel/Makefile index 2aebc4cd7878..170a9213c1b6 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -40,9 +40,8 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o -ifeq ($(CONFIG_USE_GENERIC_SMP_HELPERS),y) -obj-y += smp.o -else +obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o +ifneq ($(CONFIG_SMP),y) obj-y += up.o endif obj-$(CONFIG_SMP) += spinlock.o -- cgit v1.2.3 From 98a4826b99bc4bcc34c604b2fc4fcf4d771600ec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Jan 2009 10:56:32 +0100 Subject: sched: fix bandwidth validation for UID grouping Impact: make rt-limit tunables work again Mark Glines reported: > I've got an issue on x86-64 where I can't configure the system to allow > RT tasks for a non-root user. > > In 2.6.26.5, I was able to do the following to set things up nicely: > echo 450000 >/sys/kernel/uids/0/cpu_rt_runtime > echo 450000 >/sys/kernel/uids/1000/cpu_rt_runtime > > Seems like every value I try to echo into the /sys files returns EINVAL. For UID grouping we initialize the root group with infinite bandwidth which by default is actually more than the global limit, therefore the bandwidth check always fails. Because the root group is a phantom group (for UID grouping) we cannot runtime adjust it, therefore we let it reflect the global bandwidth settings. Reported-by: Mark Glines Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 3b630d882660..ed62d1cee05c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9050,6 +9050,13 @@ static int tg_schedulable(struct task_group *tg, void *data) runtime = d->rt_runtime; } +#ifdef CONFIG_USER_SCHED + if (tg == &root_task_group) { + period = global_rt_period(); + runtime = global_rt_runtime(); + } +#endif + /* * Cannot have more runtime than the period. */ -- cgit v1.2.3 From cce7ade803699463ecc62a065ca522004f7ccb3d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Jan 2009 14:53:37 +0100 Subject: sched: SCHED_IDLE weight change Increase the SCHED_IDLE weight from 2 to 3, this gives much more stable vruntime numbers. time advanced in 100ms: weight=2 64765.988352 67012.881408 88501.412352 weight=3 35496.181411 34130.971298 35497.411573 Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index ed62d1cee05c..6acfb3c2398b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1323,8 +1323,8 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec) * slice expiry etc. */ -#define WEIGHT_IDLEPRIO 2 -#define WMULT_IDLEPRIO (1 << 31) +#define WEIGHT_IDLEPRIO 3 +#define WMULT_IDLEPRIO 1431655765 /* * Nice levels are multiplicative, with a gentle 10% change for every -- cgit v1.2.3 From 6bc912b71b6f33b041cfde93ca3f019cbaa852bc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Jan 2009 14:53:38 +0100 Subject: sched: SCHED_OTHER vs SCHED_IDLE isolation Stronger SCHED_IDLE isolation: - no SCHED_IDLE buddies - never let SCHED_IDLE preempt on wakeup - always preempt SCHED_IDLE on wakeup - limit SLEEPER fairness for SCHED_IDLE. Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 8e1352c75557..cdebd8089cb0 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -677,9 +677,13 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) unsigned long thresh = sysctl_sched_latency; /* - * convert the sleeper threshold into virtual time + * Convert the sleeper threshold into virtual time. + * SCHED_IDLE is a special sub-class. We care about + * fairness only relative to other SCHED_IDLE tasks, + * all of which have the same weight. */ - if (sched_feat(NORMALIZED_SLEEPER)) + if (sched_feat(NORMALIZED_SLEEPER) && + task_of(se)->policy != SCHED_IDLE) thresh = calc_delta_fair(thresh, se); vruntime -= thresh; @@ -1340,14 +1344,18 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) static void set_last_buddy(struct sched_entity *se) { - for_each_sched_entity(se) - cfs_rq_of(se)->last = se; + if (likely(task_of(se)->policy != SCHED_IDLE)) { + for_each_sched_entity(se) + cfs_rq_of(se)->last = se; + } } static void set_next_buddy(struct sched_entity *se) { - for_each_sched_entity(se) - cfs_rq_of(se)->next = se; + if (likely(task_of(se)->policy != SCHED_IDLE)) { + for_each_sched_entity(se) + cfs_rq_of(se)->next = se; + } } /* @@ -1393,12 +1401,18 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) return; /* - * Batch tasks do not preempt (their preemption is driven by + * Batch and idle tasks do not preempt (their preemption is driven by * the tick): */ - if (unlikely(p->policy == SCHED_BATCH)) + if (unlikely(p->policy != SCHED_NORMAL)) return; + /* Idle tasks are by definition preempted by everybody. */ + if (unlikely(curr->policy == SCHED_IDLE)) { + resched_task(curr); + return; + } + if (!sched_feat(WAKEUP_PREEMPT)) return; -- cgit v1.2.3 From e17036dac189dd034c092a91df56aa740db7146d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Jan 2009 14:53:39 +0100 Subject: sched: fix update_min_vruntime Impact: fix SCHED_IDLE latency problems OK, so we have 1 running task A (which is obviously curr and the tree is equally obviously empty). 'A' nicely chugs along, doing its thing, carrying min_vruntime along as it goes. Then some whacko speed freak SCHED_IDLE task gets inserted due to SMP balancing, which is very likely far right, in that case update_curr update_min_vruntime cfs_rq->rb_leftmost := true (the crazy task sitting in a tree) vruntime = se->vruntime and voila, min_vruntime is waaay right of where it ought to be. OK, so why did I write it like that to begin with... Aah, yes. Say we've just dequeued current schedule deactivate_task(prev) dequeue_entity update_min_vruntime Then we'll set vruntime = cfs_rq->min_vruntime; we find !cfs_rq->curr, but do find someone in the tree. Then we _must_ do vruntime = se->vruntime, because vruntime = min_vruntime(vruntime := cfs_rq->min_vruntime, se->vruntime) will not advance vruntime, and cause lags the other way around (which we fixed with that initial patch: 1af5f730fc1bf7c62ec9fb2d307206e18bf40a69 (sched: more accurate min_vruntime accounting). Signed-off-by: Peter Zijlstra Tested-by: Mike Galbraith Acked-by: Mike Galbraith Cc: Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index cdebd8089cb0..16b419bb8b0a 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -283,7 +283,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq) struct sched_entity, run_node); - if (vruntime == cfs_rq->min_vruntime) + if (!cfs_rq->curr) vruntime = se->vruntime; else vruntime = min_vruntime(vruntime, se->vruntime); -- cgit v1.2.3 From 88fc241f54459ac3d86c5e13b449730199f66061 Mon Sep 17 00:00:00 2001 From: Doug Chapman Date: Thu, 15 Jan 2009 10:38:56 -0800 Subject: [IA64] dump stack on kernel unaligned warnings Often the cause of kernel unaligned access warnings is not obvious from just the ip displayed in the warning. This adds the option via proc to dump the stack in addition to the warning. The default is off (just display the 1 line warning). To enable the stack to be shown: echo 1 > /proc/sys/kernel/unaligned-dump-stack Signed-off-by: Doug Chapman Signed-off-by: Tony Luck --- kernel/sysctl.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3e38b74b6124..368d1638ee78 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -144,6 +144,7 @@ extern int acct_parm[]; #ifdef CONFIG_IA64 extern int no_unaligned_warning; +extern int unaligned_dump_stack; #endif #ifdef CONFIG_RT_MUTEXES @@ -781,6 +782,14 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "unaligned-dump-stack", + .data = &unaligned_dump_stack, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif #ifdef CONFIG_DETECT_SOFTLOCKUP { -- cgit v1.2.3 From 6272d68cc6a5f90c6b1a2228cf0f67b895305d17 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Thu, 15 Jan 2009 17:17:15 +0100 Subject: sched: sched_slice() fixlet Mike's change: 0a582440f "sched: fix sched_slice())" broke group scheduling by forgetting to reload cfs_rq on each loop. This patch fixes aim7 regression and specjbb2005 regression becomes less than 1.5% on 8-core stokley. Signed-off-by: Lin Ming Signed-off-by: Peter Zijlstra Tested-by: Jayson King Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 16b419bb8b0a..5cc1c162044f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -429,7 +429,10 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq); for_each_sched_entity(se) { - struct load_weight *load = &cfs_rq->load; + struct load_weight *load; + + cfs_rq = cfs_rq_of(se); + load = &cfs_rq->load; if (unlikely(!se->on_rq)) { struct load_weight lw = cfs_rq->load; -- cgit v1.2.3 From 45ce80fb6b6f9594d1396d44dd7e7c02d596fef8 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 15 Jan 2009 13:50:59 -0800 Subject: cgroups: consolidate cgroup documents Move Documentation/cpusets.txt and Documentation/controllers/* to Documentation/cgroups/ Signed-off-by: Li Zefan Acked-by: KAMEZAWA Hiroyuki Acked-by: Balbir Singh Acked-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 647c77a88fcb..a85678865c5e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -568,7 +568,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) * load balancing domains (sched domains) as specified by that partial * partition. * - * See "What is sched_load_balance" in Documentation/cpusets.txt + * See "What is sched_load_balance" in Documentation/cgroups/cpusets.txt * for a background explanation of this. * * Does not return errors, on the theory that the callers of this -- cgit v1.2.3 From 6ae301e85c9c58d2f430a8a7057ce488b7ff76df Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Jan 2009 13:51:01 -0800 Subject: resources: fix parameter name and kernel-doc Fix __request_region() parameter kernel-doc notation and parameter name: Warning(linux-2.6.28-git10//kernel/resource.c:627): No description found for parameter 'flags' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/resource.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index ca6a1536b205..fd5d7d574bb9 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -620,6 +620,7 @@ resource_size_t resource_alignment(struct resource *res) * @start: resource start address * @n: resource region size * @name: reserving caller's ID string + * @flags: IO resource flags */ struct resource * __request_region(struct resource *parent, resource_size_t start, resource_size_t n, -- cgit v1.2.3 From 33f1d7ecc6cffff3c618a02295de969ebbacd95d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 6 Jan 2009 21:14:04 +0100 Subject: PM: Fix freezer compilation if PM_SLEEP is unset Freezer fails to compile if with the following configuration settings: CONFIG_CGROUPS=y CONFIG_CGROUP_FREEZER=y CONFIG_MODULES=y CONFIG_FREEZER=y CONFIG_PM=y CONFIG_PM_SLEEP=n Fix this by making process.o compilation depend on CONFIG_FREEZER. Reported-by: Cheng Renquan Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Len Brown --- kernel/power/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 597823b5b700..d7a10167a25b 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -4,7 +4,8 @@ EXTRA_CFLAGS += -DDEBUG endif obj-y := main.o -obj-$(CONFIG_PM_SLEEP) += process.o console.o +obj-$(CONFIG_PM_SLEEP) += console.o +obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o -- cgit v1.2.3 From 5a4ccaf37ffece09ef33f1cfec67efa8ee56f967 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 6 Jan 2009 21:15:32 +0100 Subject: kprobes: check CONFIG_FREEZER instead of CONFIG_PM Check CONFIG_FREEZER instead of CONFIG_PM because kprobe booster depends on freeze_processes() and thaw_processes() when CONFIG_PREEMPT=y. This fixes a linkage error which occurs when CONFIG_PREEMPT=y, CONFIG_PM=y and CONFIG_FREEZER=n. Reported-by: Cheng Renquan Signed-off-by: Masami Hiramatsu Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Signed-off-by: Len Brown --- kernel/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 1b9cbdc0127a..7ba8cd9845cb 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -123,7 +123,7 @@ static int collect_garbage_slots(void); static int __kprobes check_safety(void) { int ret = 0; -#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM) +#if defined(CONFIG_PREEMPT) && defined(CONFIG_FREEZER) ret = freeze_processes(); if (ret == 0) { struct task_struct *p, *q; -- cgit v1.2.3