linux/fs/binfmt_script.c

/*
 *  linux/fs/binfmt_script.c
 *
 *  Copyright (C) 1996  Martin von Löwis
 *  original #!-checking implemented by tytso.
 */

#include <linux/module.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/binfmts.h>
#include <linux/init.h>
#include <linux/file.h>
#include <linux/err.h>
#include <linux/fs.h>

static int load_script(struct linux_binprm *bprm)
{
	const char *i_arg, *i_name;
	char *cp;
	struct file *file;
	char interp[BINPRM_BUF_SIZE];
	int retval;

	if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!'))
		return -ENOEXEC;

	/*
	 * If the script filename will be inaccessible after exec, typically
	 * because it is a "/dev/fd/<fd>/.." path against an O_CLOEXEC fd, give
	 * up now (on the assumption that the interpreter will want to load
	 * this file).
	 */
	if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
		return -ENOENT;

	/*
	 * This section does the #! interpretation.
	 * Sorta complicated, but hopefully it will work.  -TYT
	 */

	allow_write_access(bprm->file);
	fput(bprm->file);
	bprm->file = NULL;

	bprm->buf[BINPRM_BUF_SIZE - 1] = '\0';
	if ((cp = strchr(bprm->buf, '\n')) == NULL)
		cp = bprm->buf+BINPRM_BUF_SIZE-1;
	*cp = '\0';
	while (cp > bprm->buf) {
		cp--;
		if ((*cp == ' ') || (*cp == '\t'))
			*cp = '\0';
		else
			break;
	}
	for (cp = bprm->buf+2; (*cp == ' ') || (*cp == '\t'); cp++);
	if (*cp == '\0') 
		return -ENOEXEC; /* No interpreter name found */
	i_name = cp;
	i_arg = NULL;
	for ( ; *cp && (*cp != ' ') && (*cp != '\t'); cp++)
		/* nothing */ ;
	while ((*cp == ' ') || (*cp == '\t'))
		*cp++ = '\0';
	if (*cp)
		i_arg = cp;
	strcpy (interp, i_name);
	/*
	 * OK, we've parsed out the interpreter name and
	 * (optional) argument.
	 * Splice in (1) the interpreter's name for argv[0]
	 *           (2) (optional) argument to interpreter
	 *           (3) filename of shell script (replace argv[0])
	 *
	 * This is done in reverse order, because of how the
	 * user environment and arguments are stored.
	 */
	retval = remove_arg_zero(bprm);
	if (retval)
		return retval;
	retval = copy_strings_kernel(1, &bprm->interp, bprm);
	if (retval < 0) return retval; 
	bprm->argc++;
	if (i_arg) {
		retval = copy_strings_kernel(1, &i_arg, bprm);
		if (retval < 0) return retval; 
		bprm->argc++;
	}
	retval = copy_strings_kernel(1, &i_name, bprm);
	if (retval) return retval; 
	bprm->argc++;
	retval = bprm_change_interp(interp, bprm);
	if (retval < 0)
		return retval;

	/*
	 * OK, now restart the process with the interpreter's dentry.
	 */
	file = open_exec(interp);
	if (IS_ERR(file))
		return PTR_ERR(file);

	bprm->file = file;
	retval = prepare_binprm(bprm);
	if (retval < 0)
		return retval;
	return search_binary_handler(bprm);
}

static struct linux_binfmt script_format = {
	.module		= THIS_MODULE,
	.load_binary	= load_script,
};

static int __init init_script_binfmt(void)
{
	register_binfmt(&script_format);
	return 0;
}

static void __exit exit_script_binfmt(void)
{
	unregister_binfmt(&script_format);
}

core_initcall(init_script_binfmt);
module_exit(exit_script_binfmt);
MODULE_LICENSE("GPL");
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`/*`
			`* linux/fs/binfmt_script.c`
			`*`
Convert files to UTF-8 and some cleanups * Convert files to UTF-8. * Also correct some people's names (one example is Eißfeldt, which was found in a source file. Given that the author used an ß at all in a source file indicates that the real name has in fact a 'ß' and not an 'ss', which is commonly used as a substitute for 'ß' when limited to 7bit.) * Correct town names (Goettingen -> Göttingen) * Update Eberhard Mönkeberg's address (http://lkml.org/lkml/2007/1/8/313) Signed-off-by: Jan Engelhardt <jengelh@gmx.de> Signed-off-by: Adrian Bunk <bunk@kernel.org> 2007-10-20 05:21:04 +08:00			`* Copyright (C) 1996 Martin von Löwis`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`* original #!-checking implemented by tytso.`
			`*/`

			`#include <linux/module.h>`
			`#include <linux/string.h>`
			`#include <linux/stat.h>`
			`#include <linux/binfmts.h>`
			`#include <linux/init.h>`
			`#include <linux/file.h>`
			`#include <linux/err.h>`
			`#include <linux/fs.h>`

get rid of pt_regs argument of ->load_binary() Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> 2012-10-21 10:00:48 +08:00			`static int load_script(struct linux_binprm *bprm)`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`{`
Make do_execve() take a const filename pointer Make do_execve() take a const filename pointer so that kernel_execve() compiles correctly on ARM: arch/arm/kernel/sys_arm.c:88: warning: passing argument 1 of 'do_execve' discards qualifiers from pointer target type This also requires the argv and envp arguments to be consted twice, once for the pointer array and once for the strings the array points to. This is because do_execve() passes a pointer to the filename (now const) to copy_strings_kernel(). A simpler alternative would be to cast the filename pointer in do_execve() when it's passed to copy_strings_kernel(). do_execve() may not change any of the strings it is passed as part of the argv or envp lists as they are some of them in .rodata, so marking these strings as const should be fine. Further kernel_execve() and sys_execve() need to be changed to match. This has been test built on x86_64, frv, arm and mips. Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: Ralf Baechle <ralf@linux-mips.org> Acked-by: Russell King <rmk+kernel@arm.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2010-08-18 06:52:56 +08:00			`const char i_arg, i_name;`
			`char *cp;`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`struct file *file;`
			`char interp[BINPRM_BUF_SIZE];`
			`int retval;`

exec: use -ELOOP for max recursion depth To avoid an explosion of request_module calls on a chain of abusive scripts, fail maximum recursion with -ELOOP instead of -ENOEXEC. As soon as maximum recursion depth is hit, the error will fail all the way back up the chain, aborting immediately. This also has the side-effect of stopping the user's shell from attempting to reexecute the top-level file as a shell script. As seen in the dash source: if (cmd != path_bshell && errno == ENOEXEC) { argv-- = cmd; argv = cmd = path_bshell; goto repeat; } The above logic was designed for running scripts automatically that lacked the "#!" header, not to re-try failed recursion. On a legitimate -ENOEXEC, things continue to behave as the shell expects. Additionally, when tracking recursion, the binfmt handlers should not be involved. The recursion being tracked is the depth of calls through search_binary_handler(), so that function should be exclusively responsible for tracking the depth. Signed-off-by: Kees Cook <keescook@chromium.org> Cc: halfdog <me@halfdog.net> Cc: P J P <ppandit@redhat.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2012-12-18 08:03:20 +08:00			`if ((bprm->buf[0] != '#') \|\| (bprm->buf[1] != '!'))`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`return -ENOEXEC;`
syscalls: implement execveat() system call This patchset adds execveat(2) for x86, and is derived from Meredydd Luff's patch from Sept 2012 (https://lkml.org/lkml/2012/9/11/528). The primary aim of adding an execveat syscall is to allow an implementation of fexecve(3) that does not rely on the /proc filesystem, at least for executables (rather than scripts). The current glibc version of fexecve(3) is implemented via /proc, which causes problems in sandboxed or otherwise restricted environments. Given the desire for a /proc-free fexecve() implementation, HPA suggested (https://lkml.org/lkml/2006/7/11/556) that an execveat(2) syscall would be an appropriate generalization. Also, having a new syscall means that it can take a flags argument without back-compatibility concerns. The current implementation just defines the AT_EMPTY_PATH and AT_SYMLINK_NOFOLLOW flags, but other flags could be added in future -- for example, flags for new namespaces (as suggested at https://lkml.org/lkml/2006/7/11/474). Related history: - https://lkml.org/lkml/2006/12/27/123 is an example of someone realizing that fexecve() is likely to fail in a chroot environment. - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=514043 covered documenting the /proc requirement of fexecve(3) in its manpage, to "prevent other people from wasting their time". - https://bugzilla.redhat.com/show_bug.cgi?id=241609 described a problem where a process that did setuid() could not fexecve() because it no longer had access to /proc/self/fd; this has since been fixed. This patch (of 4): Add a new execveat(2) system call. execveat() is to execve() as openat() is to open(): it takes a file descriptor that refers to a directory, and resolves the filename relative to that. In addition, if the filename is empty and AT_EMPTY_PATH is specified, execveat() executes the file to which the file descriptor refers. This replicates the functionality of fexecve(), which is a system call in other UNIXen, but in Linux glibc it depends on opening "/proc/self/fd/<fd>" (and so relies on /proc being mounted). The filename fed to the executed program as argv[0] (or the name of the script fed to a script interpreter) will be of the form "/dev/fd/<fd>" (for an empty filename) or "/dev/fd/<fd>/<filename>", effectively reflecting how the executable was found. This does however mean that execution of a script in a /proc-less environment won't work; also, script execution via an O_CLOEXEC file descriptor fails (as the file will not be accessible after exec). Based on patches by Meredydd Luff. Signed-off-by: David Drysdale <drysdale@google.com> Cc: Meredydd Luff <meredydd@senatehouse.org> Cc: Shuah Khan <shuah.kh@samsung.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Kees Cook <keescook@chromium.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Rich Felker <dalias@aerifal.cx> Cc: Christoph Hellwig <hch@infradead.org> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2014-12-13 08:57:29 +08:00
			`/*`
			`* If the script filename will be inaccessible after exec, typically`
			`* because it is a "/dev/fd/<fd>/.." path against an O_CLOEXEC fd, give`
			`* up now (on the assumption that the interpreter will want to load`
			`* this file).`
			`*/`
			`if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)`
			`return -ENOENT;`

Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`/*`
			`* This section does the #! interpretation.`
			`* Sorta complicated, but hopefully it will work. -TYT`
			`*/`

			`allow_write_access(bprm->file);`
			`fput(bprm->file);`
			`bprm->file = NULL;`

			`bprm->buf[BINPRM_BUF_SIZE - 1] = '\0';`
			`if ((cp = strchr(bprm->buf, '\n')) == NULL)`
			`cp = bprm->buf+BINPRM_BUF_SIZE-1;`
			`*cp = '\0';`
			`while (cp > bprm->buf) {`
			`cp--;`
			`if ((cp == ' ') \|\| (cp == '\t'))`
			`*cp = '\0';`
			`else`
			`break;`
			`}`
			`for (cp = bprm->buf+2; (cp == ' ') \|\| (cp == '\t'); cp++);`
			`if (*cp == '\0')`
			`return -ENOEXEC; /* No interpreter name found */`
			`i_name = cp;`
			`i_arg = NULL;`
			`for ( ; cp && (cp != ' ') && (*cp != '\t'); cp++)`
			`/* nothing */ ;`
			`while ((cp == ' ') \|\| (cp == '\t'))`
			`*cp++ = '\0';`
			`if (*cp)`
			`i_arg = cp;`
			`strcpy (interp, i_name);`
			`/*`
			`* OK, we've parsed out the interpreter name and`
			`* (optional) argument.`
			`* Splice in (1) the interpreter's name for argv[0]`
			`* (2) (optional) argument to interpreter`
			`* (3) filename of shell script (replace argv[0])`
			`*`
			`* This is done in reverse order, because of how the`
			`* user environment and arguments are stored.`
			`*/`
mm: variable length argument support Remove the arg+env limit of MAX_ARG_PAGES by copying the strings directly from the old mm into the new mm. We create the new mm before the binfmt code runs, and place the new stack at the very top of the address space. Once the binfmt code runs and figures out where the stack should be, we move it downwards. It is a bit peculiar in that we have one task with two mm's, one of which is inactive. [a.p.zijlstra@chello.nl: limit stack size] Signed-off-by: Ollie Wild <aaw@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: <linux-arch@vger.kernel.org> Cc: Hugh Dickins <hugh@veritas.com> [bunk@stusta.de: unexport bprm_mm_init] Signed-off-by: Adrian Bunk <bunk@stusta.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2007-07-19 16:48:16 +08:00			`retval = remove_arg_zero(bprm);`
			`if (retval)`
			`return retval;`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`retval = copy_strings_kernel(1, &bprm->interp, bprm);`
			`if (retval < 0) return retval;`
			`bprm->argc++;`
			`if (i_arg) {`
			`retval = copy_strings_kernel(1, &i_arg, bprm);`
			`if (retval < 0) return retval;`
			`bprm->argc++;`
			`}`
			`retval = copy_strings_kernel(1, &i_name, bprm);`
			`if (retval) return retval;`
			`bprm->argc++;`
exec: do not leave bprm->interp on stack If a series of scripts are executed, each triggering module loading via unprintable bytes in the script header, kernel stack contents can leak into the command line. Normally execution of binfmt_script and binfmt_misc happens recursively. However, when modules are enabled, and unprintable bytes exist in the bprm->buf, execution will restart after attempting to load matching binfmt modules. Unfortunately, the logic in binfmt_script and binfmt_misc does not expect to get restarted. They leave bprm->interp pointing to their local stack. This means on restart bprm->interp is left pointing into unused stack memory which can then be copied into the userspace argv areas. After additional study, it seems that both recursion and restart remains the desirable way to handle exec with scripts, misc, and modules. As such, we need to protect the changes to interp. This changes the logic to require allocation for any changes to the bprm->interp. To avoid adding a new kmalloc to every exec, the default value is left as-is. Only when passing through binfmt_script or binfmt_misc does an allocation take place. For a proof of concept, see DoTest.sh from: http://www.halfdog.net/Security/2012/LinuxKernelBinfmtScriptStackDataDisclosure/ Signed-off-by: Kees Cook <keescook@chromium.org> Cc: halfdog <me@halfdog.net> Cc: P J P <ppandit@redhat.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2012-12-21 07:05:16 +08:00			`retval = bprm_change_interp(interp, bprm);`
			`if (retval < 0)`
			`return retval;`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00
			`/*`
			`* OK, now restart the process with the interpreter's dentry.`
			`*/`
			`file = open_exec(interp);`
			`if (IS_ERR(file))`
			`return PTR_ERR(file);`

			`bprm->file = file;`
			`retval = prepare_binprm(bprm);`
			`if (retval < 0)`
			`return retval;`
get rid of pt_regs argument of search_binary_handler() Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> 2012-10-21 09:53:31 +08:00			`return search_binary_handler(bprm);`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`}`

			`static struct linux_binfmt script_format = {`
			`.module = THIS_MODULE,`
			`.load_binary = load_script,`
			`};`

			`static int __init init_script_binfmt(void)`
			`{`
__register_binfmt() made void Just don't pass NULL to it - nobody does, anyway. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> 2012-03-17 15:05:16 +08:00			`register_binfmt(&script_format);`
			`return 0;`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`}`

			`static void __exit exit_script_binfmt(void)`
			`{`
			`unregister_binfmt(&script_format);`
			`}`

			`core_initcall(init_script_binfmt);`
			`module_exit(exit_script_binfmt);`
			`MODULE_LICENSE("GPL");`