From 25cf336de51b51a3e440e1893751f9532095eff0 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 25 Jun 2020 13:56:40 -0500
Subject: exec: Remove do_execve_file

Now that the last callser has been removed remove this code from exec.

For anyone thinking of resurrecing do_execve_file please note that
the code was buggy in several fundamental ways.

- It did not ensure the file it was passed was read-only and that
  deny_write_access had been called on it.  Which subtlely breaks
  invaniants in exec.

- The caller of do_execve_file was expected to hold and put a
  reference to the file, but an extra reference for use by exec was
  not taken so that when exec put it's reference to the file an
  underflow occured on the file reference count.

- The point of the interface was so that a pathname did not need to
  exist.  Which breaks pathname based LSMs.

Tetsuo Handa originally reported these issues[1].  While it was clear
that deny_write_access was missing the fundamental incompatibility
with the passed in O_RDWR filehandle was not immediately recognized.

All of these issues were fixed by modifying the usermode driver code
to have a path, so it did not need this hack.

Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
[1] https://lore.kernel.org/linux-fsdevel/2a8775b4-1dd5-9d5c-aa42-9872445e0942@i-love.sakura.ne.jp/
v1: https://lkml.kernel.org/r/871rm2f0hi.fsf_-_@x220.int.ebiederm.org
v2: https://lkml.kernel.org/r/87lfk54p0m.fsf_-_@x220.int.ebiederm.org
Link: https://lkml.kernel.org/r/20200702164140.4468-10-ebiederm@xmission.com
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/exec.c | 38 +++++++++-----------------------------
 1 file changed, 9 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index e6e8a9a70327..23dfbb820626 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1818,13 +1818,14 @@ static int exec_binprm(struct linux_binprm *bprm)
 /*
  * sys_execve() executes a new program.
  */
-static int __do_execve_file(int fd, struct filename *filename,
-			    struct user_arg_ptr argv,
-			    struct user_arg_ptr envp,
-			    int flags, struct file *file)
+static int do_execveat_common(int fd, struct filename *filename,
+			      struct user_arg_ptr argv,
+			      struct user_arg_ptr envp,
+			      int flags)
 {
 	char *pathbuf = NULL;
 	struct linux_binprm *bprm;
+	struct file *file;
 	struct files_struct *displaced;
 	int retval;
 
@@ -1863,8 +1864,7 @@ static int __do_execve_file(int fd, struct filename *filename,
 	check_unsafe_exec(bprm);
 	current->in_execve = 1;
 
-	if (!file)
-		file = do_open_execat(fd, filename, flags);
+	file = do_open_execat(fd, filename, flags);
 	retval = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto out_unmark;
@@ -1872,9 +1872,7 @@ static int __do_execve_file(int fd, struct filename *filename,
 	sched_exec();
 
 	bprm->file = file;
-	if (!filename) {
-		bprm->filename = "none";
-	} else if (fd == AT_FDCWD || filename->name[0] == '/') {
+	if (fd == AT_FDCWD || filename->name[0] == '/') {
 		bprm->filename = filename->name;
 	} else {
 		if (filename->name[0] == '\0')
@@ -1935,8 +1933,7 @@ static int __do_execve_file(int fd, struct filename *filename,
 	task_numa_free(current, false);
 	free_bprm(bprm);
 	kfree(pathbuf);
-	if (filename)
-		putname(filename);
+	putname(filename);
 	if (displaced)
 		put_files_struct(displaced);
 	return retval;
@@ -1967,27 +1964,10 @@ out_files:
 	if (displaced)
 		reset_files_struct(displaced);
 out_ret:
-	if (filename)
-		putname(filename);
+	putname(filename);
 	return retval;
 }
 
-static int do_execveat_common(int fd, struct filename *filename,
-			      struct user_arg_ptr argv,
-			      struct user_arg_ptr envp,
-			      int flags)
-{
-	return __do_execve_file(fd, filename, argv, envp, flags, NULL);
-}
-
-int do_execve_file(struct file *file, void *__argv, void *__envp)
-{
-	struct user_arg_ptr argv = { .ptr.native = __argv };
-	struct user_arg_ptr envp = { .ptr.native = __envp };
-
-	return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file);
-}
-
 int do_execve(struct filename *filename,
 	const char __user *const __user *__argv,
 	const char __user *const __user *__envp)
-- 
cgit v1.2.3


From 0a8f36eb48f64006dde86e2a58230d5a599eef7d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Jul 2020 15:39:45 -0500
Subject: exec: Factor out alloc_bprm

Currently it is necessary for the usermode helper code and the code
that launches init to use set_fs so that pages coming from the kernel
look like they are coming from userspace.

To allow that usage of set_fs to be removed cleanly the argument
copying from userspace needs to happen earlier.  Move the allocation
of the bprm into it's own function (alloc_bprm) and move the call of
alloc_bprm before unshare_files so that bprm can ultimately be
allocated, the arguments can be placed on the new stack, and then the
bprm can be passed into the core of exec.

Neither the allocation of struct binprm nor the unsharing depend upon each
other so swapping the order in which they are called is trivially safe.

To keep things consistent the order of cleanup at the end of
do_execve_common swapped to match the order of initialization.

Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lkml.kernel.org/r/87pn8y6x9a.fsf@x220.int.ebiederm.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/exec.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 23dfbb820626..526156d6461d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1560,6 +1560,14 @@ static void free_bprm(struct linux_binprm *bprm)
 	kfree(bprm);
 }
 
+static struct linux_binprm *alloc_bprm(void)
+{
+	struct linux_binprm *bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
+	if (!bprm)
+		return ERR_PTR(-ENOMEM);
+	return bprm;
+}
+
 int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
 {
 	/* If a binfmt changed the interp, free it first. */
@@ -1848,18 +1856,19 @@ static int do_execveat_common(int fd, struct filename *filename,
 	 * further execve() calls fail. */
 	current->flags &= ~PF_NPROC_EXCEEDED;
 
-	retval = unshare_files(&displaced);
-	if (retval)
+	bprm = alloc_bprm();
+	if (IS_ERR(bprm)) {
+		retval = PTR_ERR(bprm);
 		goto out_ret;
+	}
 
-	retval = -ENOMEM;
-	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
-	if (!bprm)
-		goto out_files;
+	retval = unshare_files(&displaced);
+	if (retval)
+		goto out_free;
 
 	retval = prepare_bprm_creds(bprm);
 	if (retval)
-		goto out_free;
+		goto out_files;
 
 	check_unsafe_exec(bprm);
 	current->in_execve = 1;
@@ -1956,13 +1965,13 @@ out_unmark:
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
 
+out_files:
+	if (displaced)
+		reset_files_struct(displaced);
 out_free:
 	free_bprm(bprm);
 	kfree(pathbuf);
 
-out_files:
-	if (displaced)
-		reset_files_struct(displaced);
 out_ret:
 	putname(filename);
 	return retval;
-- 
cgit v1.2.3


From 60d9ad1d1d7f15964d23f6e71a7adcf1bde0e18e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 11 Jul 2020 08:16:15 -0500
Subject: exec: Move initialization of bprm->filename into alloc_bprm

Currently it is necessary for the usermode helper code and the code
that launches init to use set_fs so that pages coming from the kernel
look like they are coming from userspace.

To allow that usage of set_fs to be removed cleanly the argument
copying from userspace needs to happen earlier.  Move the computation
of bprm->filename and possible allocation of a name in the case
of execveat into alloc_bprm to make that possible.

The exectuable name, the arguments, and the environment are
copied into the new usermode stack which is stored in bprm
until exec passes the point of no return.

As the executable name is copied first onto the usermode stack
it needs to be known.  As there are no dependencies to computing
the executable name, compute it early in alloc_bprm.

As an implementation detail if the filename needs to be generated
because it embeds a file descriptor store that filename in a new field
bprm->fdpath, and free it in free_bprm.  Previously this was done in
an independent variable pathbuf.  I have renamed pathbuf fdpath
because fdpath is more suggestive of what kind of path is in the
variable.  I moved fdpath into struct linux_binprm because it is
tightly tied to the other variables in struct linux_binprm, and as
such is needed to allow the call alloc_binprm to move.

Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lkml.kernel.org/r/87k0z66x8f.fsf@x220.int.ebiederm.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/exec.c | 61 +++++++++++++++++++++++++++++++++----------------------------
 1 file changed, 33 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 526156d6461d..7e8af27dd199 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1557,15 +1557,37 @@ static void free_bprm(struct linux_binprm *bprm)
 	/* If a binfmt changed the interp, free it. */
 	if (bprm->interp != bprm->filename)
 		kfree(bprm->interp);
+	kfree(bprm->fdpath);
 	kfree(bprm);
 }
 
-static struct linux_binprm *alloc_bprm(void)
+static struct linux_binprm *alloc_bprm(int fd, struct filename *filename)
 {
 	struct linux_binprm *bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
+	int retval = -ENOMEM;
 	if (!bprm)
-		return ERR_PTR(-ENOMEM);
+		goto out;
+
+	if (fd == AT_FDCWD || filename->name[0] == '/') {
+		bprm->filename = filename->name;
+	} else {
+		if (filename->name[0] == '\0')
+			bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
+		else
+			bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
+						  fd, filename->name);
+		if (!bprm->fdpath)
+			goto out_free;
+
+		bprm->filename = bprm->fdpath;
+	}
+	bprm->interp = bprm->filename;
 	return bprm;
+
+out_free:
+	free_bprm(bprm);
+out:
+	return ERR_PTR(retval);
 }
 
 int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
@@ -1831,7 +1853,6 @@ static int do_execveat_common(int fd, struct filename *filename,
 			      struct user_arg_ptr envp,
 			      int flags)
 {
-	char *pathbuf = NULL;
 	struct linux_binprm *bprm;
 	struct file *file;
 	struct files_struct *displaced;
@@ -1856,7 +1877,7 @@ static int do_execveat_common(int fd, struct filename *filename,
 	 * further execve() calls fail. */
 	current->flags &= ~PF_NPROC_EXCEEDED;
 
-	bprm = alloc_bprm();
+	bprm = alloc_bprm(fd, filename);
 	if (IS_ERR(bprm)) {
 		retval = PTR_ERR(bprm);
 		goto out_ret;
@@ -1881,28 +1902,14 @@ static int do_execveat_common(int fd, struct filename *filename,
 	sched_exec();
 
 	bprm->file = file;
-	if (fd == AT_FDCWD || filename->name[0] == '/') {
-		bprm->filename = filename->name;
-	} else {
-		if (filename->name[0] == '\0')
-			pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
-		else
-			pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
-					    fd, filename->name);
-		if (!pathbuf) {
-			retval = -ENOMEM;
-			goto out_unmark;
-		}
-		/*
-		 * Record that a name derived from an O_CLOEXEC fd will be
-		 * inaccessible after exec. Relies on having exclusive access to
-		 * current->files (due to unshare_files above).
-		 */
-		if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
-			bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
-		bprm->filename = pathbuf;
-	}
-	bprm->interp = bprm->filename;
+	/*
+	 * Record that a name derived from an O_CLOEXEC fd will be
+	 * inaccessible after exec. Relies on having exclusive access to
+	 * current->files (due to unshare_files above).
+	 */
+	if (bprm->fdpath &&
+	    close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
+		bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
 
 	retval = bprm_mm_init(bprm);
 	if (retval)
@@ -1941,7 +1948,6 @@ static int do_execveat_common(int fd, struct filename *filename,
 	acct_update_integrals(current);
 	task_numa_free(current, false);
 	free_bprm(bprm);
-	kfree(pathbuf);
 	putname(filename);
 	if (displaced)
 		put_files_struct(displaced);
@@ -1970,7 +1976,6 @@ out_files:
 		reset_files_struct(displaced);
 out_free:
 	free_bprm(bprm);
-	kfree(pathbuf);
 
 out_ret:
 	putname(filename);
-- 
cgit v1.2.3


From f18ac551e5035a52fd8edadc45e0f8eb4cec8d3e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Jul 2020 15:54:54 -0500
Subject: exec: Move bprm_mm_init into alloc_bprm

Currently it is necessary for the usermode helper code and the code that
launches init to use set_fs so that pages coming from the kernel look like
they are coming from userspace.

To allow that usage of set_fs to be removed cleanly the argument copying
from userspace needs to happen earlier.  Move the allocation and
initialization of bprm->mm into alloc_bprm so that the bprm->mm is
available early to store the new user stack into.  This is a prerequisite
for copying argv and envp into the new user stack early before ther rest of
exec.

To keep the things consistent the cleanup of bprm->mm is moved into
free_bprm.  So that bprm->mm will be cleaned up whenever bprm->mm is
allocated and free_bprm are called.

Moving bprm_mm_init earlier is safe as it does not depend on any files,
current->in_execve, current->fs->in_exec, bprm->unsafe, or the if the file
table is shared. (AKA bprm_mm_init does not depend on any of the code that
happens between alloc_bprm and where it was previously called.)

This moves bprm->mm cleanup after current->fs->in_exec is set to 0.  This
is safe because current->fs->in_exec is only used to preventy taking an
additional reference on the fs_struct.

This moves bprm->mm cleanup after current->in_execve is set to 0.  This is
safe because current->in_execve is only used by the lsms (apparmor and
tomoyou) and always for LSM specific functions, never for anything to do
with the mm.

This adds bprm->mm cleanup into the successful return path.  This is safe
because being on the successful return path implies that begin_new_exec
succeeded and set brpm->mm to NULL.  As bprm->mm is NULL bprm cleanup I am
moving into free_bprm will do nothing.

Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lkml.kernel.org/r/87eepe6x7p.fsf@x220.int.ebiederm.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/exec.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 7e8af27dd199..afb168bf5e23 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1543,6 +1543,10 @@ static int prepare_bprm_creds(struct linux_binprm *bprm)
 
 static void free_bprm(struct linux_binprm *bprm)
 {
+	if (bprm->mm) {
+		acct_arg_size(bprm, 0);
+		mmput(bprm->mm);
+	}
 	free_arg_pages(bprm);
 	if (bprm->cred) {
 		mutex_unlock(&current->signal->cred_guard_mutex);
@@ -1582,6 +1586,10 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename)
 		bprm->filename = bprm->fdpath;
 	}
 	bprm->interp = bprm->filename;
+
+	retval = bprm_mm_init(bprm);
+	if (retval)
+		goto out_free;
 	return bprm;
 
 out_free:
@@ -1911,10 +1919,6 @@ static int do_execveat_common(int fd, struct filename *filename,
 	    close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
 		bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
 
-	retval = bprm_mm_init(bprm);
-	if (retval)
-		goto out_unmark;
-
 	retval = prepare_arg_pages(bprm, argv, envp);
 	if (retval < 0)
 		goto out;
@@ -1962,10 +1966,6 @@ out:
 	 */
 	if (bprm->point_of_no_return && !fatal_signal_pending(current))
 		force_sigsegv(SIGSEGV);
-	if (bprm->mm) {
-		acct_arg_size(bprm, 0);
-		mmput(bprm->mm);
-	}
 
 out_unmark:
 	current->fs->in_exec = 0;
-- 
cgit v1.2.3


From 0c9cdff054aec0836bb38a449e860793849c3f84 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 12 Jul 2020 07:17:50 -0500
Subject: exec: Factor bprm_execve out of do_execve_common

Currently it is necessary for the usermode helper code and the code
that launches init to use set_fs so that pages coming from the kernel
look like they are coming from userspace.

To allow that usage of set_fs to be removed cleanly the argument
copying from userspace needs to happen earlier.  Factor bprm_execve
out of do_execve_common to separate out the copying of arguments
to the newe stack, and the rest of exec.

In separating bprm_execve from do_execve_common the copying
of the arguments onto the new stack happens earlier.

As the copying of the arguments does not depend any security hooks,
files, the file table, current->in_execve, current->fs->in_exec,
bprm->unsafe, or creds this is safe.

Likewise the security hook security_creds_for_exec does not depend upon
preventing the argument copying from happening.

In addition to making it possible to implement kernel_execve that
performs the copying differently, this separation of bprm_execve from
do_execve_common makes for a nice separation of responsibilities making
the exec code easier to navigate.

Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lkml.kernel.org/r/878sfm6x6x.fsf@x220.int.ebiederm.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/exec.c | 108 +++++++++++++++++++++++++++++++++-----------------------------
 1 file changed, 58 insertions(+), 50 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index afb168bf5e23..50508892fa71 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1856,44 +1856,16 @@ static int exec_binprm(struct linux_binprm *bprm)
 /*
  * sys_execve() executes a new program.
  */
-static int do_execveat_common(int fd, struct filename *filename,
-			      struct user_arg_ptr argv,
-			      struct user_arg_ptr envp,
-			      int flags)
+static int bprm_execve(struct linux_binprm *bprm,
+		       int fd, struct filename *filename, int flags)
 {
-	struct linux_binprm *bprm;
 	struct file *file;
 	struct files_struct *displaced;
 	int retval;
 
-	if (IS_ERR(filename))
-		return PTR_ERR(filename);
-
-	/*
-	 * We move the actual failure in case of RLIMIT_NPROC excess from
-	 * set*uid() to execve() because too many poorly written programs
-	 * don't check setuid() return code.  Here we additionally recheck
-	 * whether NPROC limit is still exceeded.
-	 */
-	if ((current->flags & PF_NPROC_EXCEEDED) &&
-	    atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
-		retval = -EAGAIN;
-		goto out_ret;
-	}
-
-	/* We're below the limit (still or again), so we don't want to make
-	 * further execve() calls fail. */
-	current->flags &= ~PF_NPROC_EXCEEDED;
-
-	bprm = alloc_bprm(fd, filename);
-	if (IS_ERR(bprm)) {
-		retval = PTR_ERR(bprm);
-		goto out_ret;
-	}
-
 	retval = unshare_files(&displaced);
 	if (retval)
-		goto out_free;
+		return retval;
 
 	retval = prepare_bprm_creds(bprm);
 	if (retval)
@@ -1919,28 +1891,11 @@ static int do_execveat_common(int fd, struct filename *filename,
 	    close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
 		bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
 
-	retval = prepare_arg_pages(bprm, argv, envp);
-	if (retval < 0)
-		goto out;
-
 	/* Set the unchanging part of bprm->cred */
 	retval = security_bprm_creds_for_exec(bprm);
 	if (retval)
 		goto out;
 
-	retval = copy_string_kernel(bprm->filename, bprm);
-	if (retval < 0)
-		goto out;
-
-	bprm->exec = bprm->p;
-	retval = copy_strings(bprm->envc, envp, bprm);
-	if (retval < 0)
-		goto out;
-
-	retval = copy_strings(bprm->argc, argv, bprm);
-	if (retval < 0)
-		goto out;
-
 	retval = exec_binprm(bprm);
 	if (retval < 0)
 		goto out;
@@ -1951,8 +1906,6 @@ static int do_execveat_common(int fd, struct filename *filename,
 	rseq_execve(current);
 	acct_update_integrals(current);
 	task_numa_free(current, false);
-	free_bprm(bprm);
-	putname(filename);
 	if (displaced)
 		put_files_struct(displaced);
 	return retval;
@@ -1974,6 +1927,61 @@ out_unmark:
 out_files:
 	if (displaced)
 		reset_files_struct(displaced);
+
+	return retval;
+}
+
+static int do_execveat_common(int fd, struct filename *filename,
+			      struct user_arg_ptr argv,
+			      struct user_arg_ptr envp,
+			      int flags)
+{
+	struct linux_binprm *bprm;
+	int retval;
+
+	if (IS_ERR(filename))
+		return PTR_ERR(filename);
+
+	/*
+	 * We move the actual failure in case of RLIMIT_NPROC excess from
+	 * set*uid() to execve() because too many poorly written programs
+	 * don't check setuid() return code.  Here we additionally recheck
+	 * whether NPROC limit is still exceeded.
+	 */
+	if ((current->flags & PF_NPROC_EXCEEDED) &&
+	    atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
+		retval = -EAGAIN;
+		goto out_ret;
+	}
+
+	/* We're below the limit (still or again), so we don't want to make
+	 * further execve() calls fail. */
+	current->flags &= ~PF_NPROC_EXCEEDED;
+
+	bprm = alloc_bprm(fd, filename);
+	if (IS_ERR(bprm)) {
+		retval = PTR_ERR(bprm);
+		goto out_ret;
+	}
+
+	retval = prepare_arg_pages(bprm, argv, envp);
+	if (retval < 0)
+		goto out_free;
+
+	retval = copy_string_kernel(bprm->filename, bprm);
+	if (retval < 0)
+		goto out_free;
+	bprm->exec = bprm->p;
+
+	retval = copy_strings(bprm->envc, envp, bprm);
+	if (retval < 0)
+		goto out_free;
+
+	retval = copy_strings(bprm->argc, argv, bprm);
+	if (retval < 0)
+		goto out_free;
+
+	retval = bprm_execve(bprm, fd, filename, flags);
 out_free:
 	free_bprm(bprm);
 
-- 
cgit v1.2.3


From d8b9cd549ecf0f3dc8da42ada5f0ce73e8ed5f1e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 12 Jul 2020 08:23:54 -0500
Subject: exec: Factor bprm_stack_limits out of prepare_arg_pages

In preparation for implementiong kernel_execve (which will take kernel
pointers not userspace pointers) factor out bprm_stack_limits out of
prepare_arg_pages.  This separates the counting which depends upon the
getting data from userspace from the calculations of the stack limits
which is usable in kernel_execve.

The remove prepare_args_pages and compute bprm->argc and bprm->envc
directly in do_execveat_common, before bprm_stack_limits is called.

Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lkml.kernel.org/r/87365u6x60.fsf@x220.int.ebiederm.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/exec.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 50508892fa71..f8135dc149b3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -448,19 +448,10 @@ static int count(struct user_arg_ptr argv, int max)
 	return i;
 }
 
-static int prepare_arg_pages(struct linux_binprm *bprm,
-			struct user_arg_ptr argv, struct user_arg_ptr envp)
+static int bprm_stack_limits(struct linux_binprm *bprm)
 {
 	unsigned long limit, ptr_size;
 
-	bprm->argc = count(argv, MAX_ARG_STRINGS);
-	if (bprm->argc < 0)
-		return bprm->argc;
-
-	bprm->envc = count(envp, MAX_ARG_STRINGS);
-	if (bprm->envc < 0)
-		return bprm->envc;
-
 	/*
 	 * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
 	 * (whichever is smaller) for the argv+env strings.
@@ -1964,7 +1955,17 @@ static int do_execveat_common(int fd, struct filename *filename,
 		goto out_ret;
 	}
 
-	retval = prepare_arg_pages(bprm, argv, envp);
+	retval = count(argv, MAX_ARG_STRINGS);
+	if (retval < 0)
+		goto out_free;
+	bprm->argc = retval;
+
+	retval = count(envp, MAX_ARG_STRINGS);
+	if (retval < 0)
+		goto out_free;
+	bprm->envc = retval;
+
+	retval = bprm_stack_limits(bprm);
 	if (retval < 0)
 		goto out_free;
 
-- 
cgit v1.2.3


From be619f7f063a49c656f620a46af4f8ea3e759e91 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 13 Jul 2020 12:06:48 -0500
Subject: exec: Implement kernel_execve

To allow the kernel not to play games with set_fs to call exec
implement kernel_execve.  The function kernel_execve takes pointers
into kernel memory and copies the values pointed to onto the new
userspace stack.

The calls with arguments from kernel space of do_execve are replaced
with calls to kernel_execve.

The calls do_execve and do_execveat are made static as there are now
no callers outside of exec.

The comments that mention do_execve are updated to refer to
kernel_execve or execve depending on the circumstances.  In addition
to correcting the comments, this makes it easy to grep for do_execve
and verify it is not used.

Inspired-by: https://lkml.kernel.org/r/20200627072704.2447163-1-hch@lst.de
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lkml.kernel.org/r/87wo365ikj.fsf@x220.int.ebiederm.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/exec.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 86 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index f8135dc149b3..3698252719a3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -448,6 +448,23 @@ static int count(struct user_arg_ptr argv, int max)
 	return i;
 }
 
+static int count_strings_kernel(const char *const *argv)
+{
+	int i;
+
+	if (!argv)
+		return 0;
+
+	for (i = 0; argv[i]; ++i) {
+		if (i >= MAX_ARG_STRINGS)
+			return -E2BIG;
+		if (fatal_signal_pending(current))
+			return -ERESTARTNOHAND;
+		cond_resched();
+	}
+	return i;
+}
+
 static int bprm_stack_limits(struct linux_binprm *bprm)
 {
 	unsigned long limit, ptr_size;
@@ -624,6 +641,20 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
 }
 EXPORT_SYMBOL(copy_string_kernel);
 
+static int copy_strings_kernel(int argc, const char *const *argv,
+			       struct linux_binprm *bprm)
+{
+	while (argc-- > 0) {
+		int ret = copy_string_kernel(argv[argc], bprm);
+		if (ret < 0)
+			return ret;
+		if (fatal_signal_pending(current))
+			return -ERESTARTNOHAND;
+		cond_resched();
+	}
+	return 0;
+}
+
 #ifdef CONFIG_MMU
 
 /*
@@ -1991,7 +2022,60 @@ out_ret:
 	return retval;
 }
 
-int do_execve(struct filename *filename,
+int kernel_execve(const char *kernel_filename,
+		  const char *const *argv, const char *const *envp)
+{
+	struct filename *filename;
+	struct linux_binprm *bprm;
+	int fd = AT_FDCWD;
+	int retval;
+
+	filename = getname_kernel(kernel_filename);
+	if (IS_ERR(filename))
+		return PTR_ERR(filename);
+
+	bprm = alloc_bprm(fd, filename);
+	if (IS_ERR(bprm)) {
+		retval = PTR_ERR(bprm);
+		goto out_ret;
+	}
+
+	retval = count_strings_kernel(argv);
+	if (retval < 0)
+		goto out_free;
+	bprm->argc = retval;
+
+	retval = count_strings_kernel(envp);
+	if (retval < 0)
+		goto out_free;
+	bprm->envc = retval;
+
+	retval = bprm_stack_limits(bprm);
+	if (retval < 0)
+		goto out_free;
+
+	retval = copy_string_kernel(bprm->filename, bprm);
+	if (retval < 0)
+		goto out_free;
+	bprm->exec = bprm->p;
+
+	retval = copy_strings_kernel(bprm->envc, envp, bprm);
+	if (retval < 0)
+		goto out_free;
+
+	retval = copy_strings_kernel(bprm->argc, argv, bprm);
+	if (retval < 0)
+		goto out_free;
+
+	retval = bprm_execve(bprm, fd, filename, 0);
+out_free:
+	free_bprm(bprm);
+out_ret:
+	putname(filename);
+	return retval;
+}
+
+static int do_execve(struct filename *filename,
 	const char __user *const __user *__argv,
 	const char __user *const __user *__envp)
 {
@@ -2000,7 +2084,7 @@ int do_execve(struct filename *filename,
 	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
 }
 
-int do_execveat(int fd, struct filename *filename,
+static int do_execveat(int fd, struct filename *filename,
 		const char __user *const __user *__argv,
 		const char __user *const __user *__envp,
 		int flags)
-- 
cgit v1.2.3