aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-04 14:27:25 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-04 14:27:25 -0700
commit3950e975431bc914f7e81b8f2a2dbdf2064acb0f (patch)
tree32adad006224780b83a23201f97368fb45bd4354 /fs
parentfd76a74d940ae3d6b8b2395cd12914630c7e1739 (diff)
parent7fce69dff8db30cb93aace0bbebda09972027af7 (diff)
Merge branch 'exec-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull execve updates from Eric Biederman: "During the development of v5.7 I ran into bugs and quality of implementation issues related to exec that could not be easily fixed because of the way exec is implemented. So I have been diggin into exec and cleaning up what I can. This cycle I have been looking at different ideas and different implementations to see what is possible to improve exec, and cleaning the way exec interfaces with in kernel users. Only cleaning up the interfaces of exec with rest of the kernel has managed to stabalize and make it through review in time for v5.9-rc1 resulting in 2 sets of changes this cycle. - Implement kernel_execve - Make the user mode driver code a better citizen With kernel_execve the code size got a little larger as the copying of parameters from userspace and copying of parameters from userspace is now separate. The good news is kernel threads no longer need to play games with set_fs to use exec. Which when combined with the rest of Christophs set_fs changes should security bugs with set_fs much more difficult" * 'exec-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (23 commits) exec: Implement kernel_execve exec: Factor bprm_stack_limits out of prepare_arg_pages exec: Factor bprm_execve out of do_execve_common exec: Move bprm_mm_init into alloc_bprm exec: Move initialization of bprm->filename into alloc_bprm exec: Factor out alloc_bprm exec: Remove unnecessary spaces from binfmts.h umd: Stop using split_argv umd: Remove exit_umh bpfilter: Take advantage of the facilities of struct pid exit: Factor thread_group_exited out of pidfd_poll umd: Track user space drivers with struct pid bpfilter: Move bpfilter_umh back into init data exec: Remove do_execve_file umh: Stop calling do_execve_file umd: Transform fork_usermode_blob into fork_usermode_driver umd: Rename umd_info.cmdline umd_info.driver_name umd: For clarity rename umh_info umd_info umh: Separate the user mode driver and the user mode helper support umh: Remove call_usermodehelper_setup_file. ...
Diffstat (limited to 'fs')
-rw-r--r--fs/exec.c307
1 files changed, 197 insertions, 110 deletions
diff --git a/fs/exec.c b/fs/exec.c
index e6e8a9a70327..3698252719a3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -448,18 +448,26 @@ static int count(struct user_arg_ptr argv, int max)
return i;
}
-static int prepare_arg_pages(struct linux_binprm *bprm,
- struct user_arg_ptr argv, struct user_arg_ptr envp)
+static int count_strings_kernel(const char *const *argv)
{
- unsigned long limit, ptr_size;
+ int i;
- bprm->argc = count(argv, MAX_ARG_STRINGS);
- if (bprm->argc < 0)
- return bprm->argc;
+ if (!argv)
+ return 0;
- bprm->envc = count(envp, MAX_ARG_STRINGS);
- if (bprm->envc < 0)
- return bprm->envc;
+ for (i = 0; argv[i]; ++i) {
+ if (i >= MAX_ARG_STRINGS)
+ return -E2BIG;
+ if (fatal_signal_pending(current))
+ return -ERESTARTNOHAND;
+ cond_resched();
+ }
+ return i;
+}
+
+static int bprm_stack_limits(struct linux_binprm *bprm)
+{
+ unsigned long limit, ptr_size;
/*
* Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
@@ -633,6 +641,20 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
}
EXPORT_SYMBOL(copy_string_kernel);
+static int copy_strings_kernel(int argc, const char *const *argv,
+ struct linux_binprm *bprm)
+{
+ while (argc-- > 0) {
+ int ret = copy_string_kernel(argv[argc], bprm);
+ if (ret < 0)
+ return ret;
+ if (fatal_signal_pending(current))
+ return -ERESTARTNOHAND;
+ cond_resched();
+ }
+ return 0;
+}
+
#ifdef CONFIG_MMU
/*
@@ -1543,6 +1565,10 @@ static int prepare_bprm_creds(struct linux_binprm *bprm)
static void free_bprm(struct linux_binprm *bprm)
{
+ if (bprm->mm) {
+ acct_arg_size(bprm, 0);
+ mmput(bprm->mm);
+ }
free_arg_pages(bprm);
if (bprm->cred) {
mutex_unlock(&current->signal->cred_guard_mutex);
@@ -1557,9 +1583,43 @@ static void free_bprm(struct linux_binprm *bprm)
/* If a binfmt changed the interp, free it. */
if (bprm->interp != bprm->filename)
kfree(bprm->interp);
+ kfree(bprm->fdpath);
kfree(bprm);
}
+static struct linux_binprm *alloc_bprm(int fd, struct filename *filename)
+{
+ struct linux_binprm *bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
+ int retval = -ENOMEM;
+ if (!bprm)
+ goto out;
+
+ if (fd == AT_FDCWD || filename->name[0] == '/') {
+ bprm->filename = filename->name;
+ } else {
+ if (filename->name[0] == '\0')
+ bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
+ else
+ bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
+ fd, filename->name);
+ if (!bprm->fdpath)
+ goto out_free;
+
+ bprm->filename = bprm->fdpath;
+ }
+ bprm->interp = bprm->filename;
+
+ retval = bprm_mm_init(bprm);
+ if (retval)
+ goto out_free;
+ return bprm;
+
+out_free:
+ free_bprm(bprm);
+out:
+ return ERR_PTR(retval);
+}
+
int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
{
/* If a binfmt changed the interp, free it first. */
@@ -1818,53 +1878,25 @@ static int exec_binprm(struct linux_binprm *bprm)
/*
* sys_execve() executes a new program.
*/
-static int __do_execve_file(int fd, struct filename *filename,
- struct user_arg_ptr argv,
- struct user_arg_ptr envp,
- int flags, struct file *file)
+static int bprm_execve(struct linux_binprm *bprm,
+ int fd, struct filename *filename, int flags)
{
- char *pathbuf = NULL;
- struct linux_binprm *bprm;
+ struct file *file;
struct files_struct *displaced;
int retval;
- if (IS_ERR(filename))
- return PTR_ERR(filename);
-
- /*
- * We move the actual failure in case of RLIMIT_NPROC excess from
- * set*uid() to execve() because too many poorly written programs
- * don't check setuid() return code. Here we additionally recheck
- * whether NPROC limit is still exceeded.
- */
- if ((current->flags & PF_NPROC_EXCEEDED) &&
- atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
- retval = -EAGAIN;
- goto out_ret;
- }
-
- /* We're below the limit (still or again), so we don't want to make
- * further execve() calls fail. */
- current->flags &= ~PF_NPROC_EXCEEDED;
-
retval = unshare_files(&displaced);
if (retval)
- goto out_ret;
-
- retval = -ENOMEM;
- bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
- if (!bprm)
- goto out_files;
+ return retval;
retval = prepare_bprm_creds(bprm);
if (retval)
- goto out_free;
+ goto out_files;
check_unsafe_exec(bprm);
current->in_execve = 1;
- if (!file)
- file = do_open_execat(fd, filename, flags);
+ file = do_open_execat(fd, filename, flags);
retval = PTR_ERR(file);
if (IS_ERR(file))
goto out_unmark;
@@ -1872,57 +1904,20 @@ static int __do_execve_file(int fd, struct filename *filename,
sched_exec();
bprm->file = file;
- if (!filename) {
- bprm->filename = "none";
- } else if (fd == AT_FDCWD || filename->name[0] == '/') {
- bprm->filename = filename->name;
- } else {
- if (filename->name[0] == '\0')
- pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
- else
- pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
- fd, filename->name);
- if (!pathbuf) {
- retval = -ENOMEM;
- goto out_unmark;
- }
- /*
- * Record that a name derived from an O_CLOEXEC fd will be
- * inaccessible after exec. Relies on having exclusive access to
- * current->files (due to unshare_files above).
- */
- if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
- bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
- bprm->filename = pathbuf;
- }
- bprm->interp = bprm->filename;
-
- retval = bprm_mm_init(bprm);
- if (retval)
- goto out_unmark;
-
- retval = prepare_arg_pages(bprm, argv, envp);
- if (retval < 0)
- goto out;
+ /*
+ * Record that a name derived from an O_CLOEXEC fd will be
+ * inaccessible after exec. Relies on having exclusive access to
+ * current->files (due to unshare_files above).
+ */
+ if (bprm->fdpath &&
+ close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
+ bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
/* Set the unchanging part of bprm->cred */
retval = security_bprm_creds_for_exec(bprm);
if (retval)
goto out;
- retval = copy_string_kernel(bprm->filename, bprm);
- if (retval < 0)
- goto out;
-
- bprm->exec = bprm->p;
- retval = copy_strings(bprm->envc, envp, bprm);
- if (retval < 0)
- goto out;
-
- retval = copy_strings(bprm->argc, argv, bprm);
- if (retval < 0)
- goto out;
-
retval = exec_binprm(bprm);
if (retval < 0)
goto out;
@@ -1933,10 +1928,6 @@ static int __do_execve_file(int fd, struct filename *filename,
rseq_execve(current);
acct_update_integrals(current);
task_numa_free(current, false);
- free_bprm(bprm);
- kfree(pathbuf);
- if (filename)
- putname(filename);
if (displaced)
put_files_struct(displaced);
return retval;
@@ -1950,25 +1941,15 @@ out:
*/
if (bprm->point_of_no_return && !fatal_signal_pending(current))
force_sigsegv(SIGSEGV);
- if (bprm->mm) {
- acct_arg_size(bprm, 0);
- mmput(bprm->mm);
- }
out_unmark:
current->fs->in_exec = 0;
current->in_execve = 0;
-out_free:
- free_bprm(bprm);
- kfree(pathbuf);
-
out_files:
if (displaced)
reset_files_struct(displaced);
-out_ret:
- if (filename)
- putname(filename);
+
return retval;
}
@@ -1977,18 +1958,124 @@ static int do_execveat_common(int fd, struct filename *filename,
struct user_arg_ptr envp,
int flags)
{
- return __do_execve_file(fd, filename, argv, envp, flags, NULL);
+ struct linux_binprm *bprm;
+ int retval;
+
+ if (IS_ERR(filename))
+ return PTR_ERR(filename);
+
+ /*
+ * We move the actual failure in case of RLIMIT_NPROC excess from
+ * set*uid() to execve() because too many poorly written programs
+ * don't check setuid() return code. Here we additionally recheck
+ * whether NPROC limit is still exceeded.
+ */
+ if ((current->flags & PF_NPROC_EXCEEDED) &&
+ atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
+ retval = -EAGAIN;
+ goto out_ret;
+ }
+
+ /* We're below the limit (still or again), so we don't want to make
+ * further execve() calls fail. */
+ current->flags &= ~PF_NPROC_EXCEEDED;
+
+ bprm = alloc_bprm(fd, filename);
+ if (IS_ERR(bprm)) {
+ retval = PTR_ERR(bprm);
+ goto out_ret;
+ }
+
+ retval = count(argv, MAX_ARG_STRINGS);
+ if (retval < 0)
+ goto out_free;
+ bprm->argc = retval;
+
+ retval = count(envp, MAX_ARG_STRINGS);
+ if (retval < 0)
+ goto out_free;
+ bprm->envc = retval;
+
+ retval = bprm_stack_limits(bprm);
+ if (retval < 0)
+ goto out_free;
+
+ retval = copy_string_kernel(bprm->filename, bprm);
+ if (retval < 0)
+ goto out_free;
+ bprm->exec = bprm->p;
+
+ retval = copy_strings(bprm->envc, envp, bprm);
+ if (retval < 0)
+ goto out_free;
+
+ retval = copy_strings(bprm->argc, argv, bprm);
+ if (retval < 0)
+ goto out_free;
+
+ retval = bprm_execve(bprm, fd, filename, flags);
+out_free:
+ free_bprm(bprm);
+
+out_ret:
+ putname(filename);
+ return retval;
}
-int do_execve_file(struct file *file, void *__argv, void *__envp)
+int kernel_execve(const char *kernel_filename,
+ const char *const *argv, const char *const *envp)
{
- struct user_arg_ptr argv = { .ptr.native = __argv };
- struct user_arg_ptr envp = { .ptr.native = __envp };
+ struct filename *filename;
+ struct linux_binprm *bprm;
+ int fd = AT_FDCWD;
+ int retval;
+
+ filename = getname_kernel(kernel_filename);
+ if (IS_ERR(filename))
+ return PTR_ERR(filename);
+
+ bprm = alloc_bprm(fd, filename);
+ if (IS_ERR(bprm)) {
+ retval = PTR_ERR(bprm);
+ goto out_ret;
+ }
+
+ retval = count_strings_kernel(argv);
+ if (retval < 0)
+ goto out_free;
+ bprm->argc = retval;
+
+ retval = count_strings_kernel(envp);
+ if (retval < 0)
+ goto out_free;
+ bprm->envc = retval;
- return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file);
+ retval = bprm_stack_limits(bprm);
+ if (retval < 0)
+ goto out_free;
+
+ retval = copy_string_kernel(bprm->filename, bprm);
+ if (retval < 0)
+ goto out_free;
+ bprm->exec = bprm->p;
+
+ retval = copy_strings_kernel(bprm->envc, envp, bprm);
+ if (retval < 0)
+ goto out_free;
+
+ retval = copy_strings_kernel(bprm->argc, argv, bprm);
+ if (retval < 0)
+ goto out_free;
+
+ retval = bprm_execve(bprm, fd, filename, 0);
+out_free:
+ free_bprm(bprm);
+out_ret:
+ putname(filename);
+ return retval;
}
-int do_execve(struct filename *filename,
+static int do_execve(struct filename *filename,
const char __user *const __user *__argv,
const char __user *const __user *__envp)
{
@@ -1997,7 +2084,7 @@ int do_execve(struct filename *filename,
return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
}
-int do_execveat(int fd, struct filename *filename,
+static int do_execveat(int fd, struct filename *filename,
const char __user *const __user *__argv,
const char __user *const __user *__envp,
int flags)