xref: /linux-6.15/fs/exec.c (revision 169eae77)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  *  linux/fs/exec.c
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  *  Copyright (C) 1991, 1992  Linus Torvalds
61da177e4SLinus Torvalds  */
71da177e4SLinus Torvalds 
81da177e4SLinus Torvalds /*
91da177e4SLinus Torvalds  * #!-checking implemented by tytso.
101da177e4SLinus Torvalds  */
111da177e4SLinus Torvalds /*
121da177e4SLinus Torvalds  * Demand-loading implemented 01.12.91 - no need to read anything but
131da177e4SLinus Torvalds  * the header into memory. The inode of the executable is put into
141da177e4SLinus Torvalds  * "current->executable", and page faults do the actual loading. Clean.
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  * Once more I can proudly say that linux stood up to being changed: it
171da177e4SLinus Torvalds  * was less than 2 hours work to get demand-loading completely implemented.
181da177e4SLinus Torvalds  *
191da177e4SLinus Torvalds  * Demand loading changed July 1993 by Eric Youngdale.   Use mmap instead,
201da177e4SLinus Torvalds  * current->executable is only used by the procfs.  This allows a dispatch
211da177e4SLinus Torvalds  * table to check for several different types  of binary formats.  We keep
221da177e4SLinus Torvalds  * trying until we recognize the file or we run out of supported binary
231da177e4SLinus Torvalds  * formats.
241da177e4SLinus Torvalds  */
251da177e4SLinus Torvalds 
26b89999d0SScott Branden #include <linux/kernel_read_file.h>
271da177e4SLinus Torvalds #include <linux/slab.h>
281da177e4SLinus Torvalds #include <linux/file.h>
299f3acc31SAl Viro #include <linux/fdtable.h>
30ba92a43dSHugh Dickins #include <linux/mm.h>
311da177e4SLinus Torvalds #include <linux/stat.h>
321da177e4SLinus Torvalds #include <linux/fcntl.h>
33ba92a43dSHugh Dickins #include <linux/swap.h>
3474aadce9SNeil Horman #include <linux/string.h>
351da177e4SLinus Torvalds #include <linux/init.h>
366e84f315SIngo Molnar #include <linux/sched/mm.h>
37f7ccbae4SIngo Molnar #include <linux/sched/coredump.h>
383f07c014SIngo Molnar #include <linux/sched/signal.h>
396a3827d7SIngo Molnar #include <linux/sched/numa_balancing.h>
4029930025SIngo Molnar #include <linux/sched/task.h>
41ca5b172bSHugh Dickins #include <linux/pagemap.h>
42cdd6c482SIngo Molnar #include <linux/perf_event.h>
431da177e4SLinus Torvalds #include <linux/highmem.h>
441da177e4SLinus Torvalds #include <linux/spinlock.h>
451da177e4SLinus Torvalds #include <linux/key.h>
461da177e4SLinus Torvalds #include <linux/personality.h>
471da177e4SLinus Torvalds #include <linux/binfmts.h>
481da177e4SLinus Torvalds #include <linux/utsname.h>
4984d73786SSukadev Bhattiprolu #include <linux/pid_namespace.h>
501da177e4SLinus Torvalds #include <linux/module.h>
511da177e4SLinus Torvalds #include <linux/namei.h>
521da177e4SLinus Torvalds #include <linux/mount.h>
531da177e4SLinus Torvalds #include <linux/security.h>
541da177e4SLinus Torvalds #include <linux/syscalls.h>
558f0ab514SJay Lan #include <linux/tsacct_kern.h>
569f46080cSMatt Helsley #include <linux/cn_proc.h>
57473ae30bSAl Viro #include <linux/audit.h>
585f4123beSJohannes Berg #include <linux/kmod.h>
596110e3abSEric Paris #include <linux/fsnotify.h>
605ad4e53bSAl Viro #include <linux/fs_struct.h>
613d5992d2SYing Han #include <linux/oom.h>
620e028465SOleg Nesterov #include <linux/compat.h>
63b44a7dfcSMimi Zohar #include <linux/vmalloc.h>
640f212204SJens Axboe #include <linux/io_uring.h>
651446e1dfSGabriel Krisman Bertazi #include <linux/syscall_user_dispatch.h>
6666ad3986SLuis Chamberlain #include <linux/coredump.h>
672b5f9dadSAndrei Vagin #include <linux/time_namespace.h>
68fd593511SBeau Belgrave #include <linux/user_events.h>
69932562a6SKent Overstreet #include <linux/rseq.h>
703a9e567cSJinjiang Tu #include <linux/ksm.h>
711da177e4SLinus Torvalds 
727c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
731da177e4SLinus Torvalds #include <asm/mmu_context.h>
74b6a2fea3SOllie Wild #include <asm/tlb.h>
7543d2b113SKAMEZAWA Hiroyuki 
7643d2b113SKAMEZAWA Hiroyuki #include <trace/events/task.h>
77a6f76f23SDavid Howells #include "internal.h"
781da177e4SLinus Torvalds 
794ff16c25SDavid Smith #include <trace/events/sched.h>
804ff16c25SDavid Smith 
8156305aa9SEric W. Biederman static int bprm_creds_from_file(struct linux_binprm *bprm);
8256305aa9SEric W. Biederman 
83d6e71144SAlan Cox int suid_dumpable = 0;
84d6e71144SAlan Cox 
85e4dc1b14SAlexey Dobriyan static LIST_HEAD(formats);
861da177e4SLinus Torvalds static DEFINE_RWLOCK(binfmt_lock);
871da177e4SLinus Torvalds 
__register_binfmt(struct linux_binfmt * fmt,int insert)888fc3dc5aSAl Viro void __register_binfmt(struct linux_binfmt * fmt, int insert)
891da177e4SLinus Torvalds {
901da177e4SLinus Torvalds 	write_lock(&binfmt_lock);
9174641f58SIvan Kokshaysky 	insert ? list_add(&fmt->lh, &formats) :
9274641f58SIvan Kokshaysky 		 list_add_tail(&fmt->lh, &formats);
931da177e4SLinus Torvalds 	write_unlock(&binfmt_lock);
941da177e4SLinus Torvalds }
951da177e4SLinus Torvalds 
9674641f58SIvan Kokshaysky EXPORT_SYMBOL(__register_binfmt);
971da177e4SLinus Torvalds 
unregister_binfmt(struct linux_binfmt * fmt)98f6b450d4SAlexey Dobriyan void unregister_binfmt(struct linux_binfmt * fmt)
991da177e4SLinus Torvalds {
1001da177e4SLinus Torvalds 	write_lock(&binfmt_lock);
101e4dc1b14SAlexey Dobriyan 	list_del(&fmt->lh);
1021da177e4SLinus Torvalds 	write_unlock(&binfmt_lock);
1031da177e4SLinus Torvalds }
1041da177e4SLinus Torvalds 
1051da177e4SLinus Torvalds EXPORT_SYMBOL(unregister_binfmt);
1061da177e4SLinus Torvalds 
put_binfmt(struct linux_binfmt * fmt)1071da177e4SLinus Torvalds static inline void put_binfmt(struct linux_binfmt * fmt)
1081da177e4SLinus Torvalds {
1091da177e4SLinus Torvalds 	module_put(fmt->module);
1101da177e4SLinus Torvalds }
1111da177e4SLinus Torvalds 
path_noexec(const struct path * path)11290f8572bSEric W. Biederman bool path_noexec(const struct path *path)
11390f8572bSEric W. Biederman {
11490f8572bSEric W. Biederman 	return (path->mnt->mnt_flags & MNT_NOEXEC) ||
11590f8572bSEric W. Biederman 	       (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
11690f8572bSEric W. Biederman }
11790f8572bSEric W. Biederman 
11869369a70SJosh Triplett #ifdef CONFIG_USELIB
1191da177e4SLinus Torvalds /*
1201da177e4SLinus Torvalds  * Note that a shared library must be both readable and executable due to
1211da177e4SLinus Torvalds  * security reasons.
1221da177e4SLinus Torvalds  *
123b452722eSTom Rix  * Also note that we take the address to load from the file itself.
1241da177e4SLinus Torvalds  */
SYSCALL_DEFINE1(uselib,const char __user *,library)1251e7bfb21SHeiko Carstens SYSCALL_DEFINE1(uselib, const char __user *, library)
1261da177e4SLinus Torvalds {
12772c2d531SAl Viro 	struct linux_binfmt *fmt;
1281da177e4SLinus Torvalds 	struct file *file;
12991a27b2aSJeff Layton 	struct filename *tmp = getname(library);
130964bd183SAl Viro 	int error = PTR_ERR(tmp);
13147c805dcSAl Viro 	static const struct open_flags uselib_flags = {
1323eab8301SLinus Torvalds 		.open_flag = O_LARGEFILE | O_RDONLY,
13362fb4a15SAl Viro 		.acc_mode = MAY_READ | MAY_EXEC,
134f9652e10SAl Viro 		.intent = LOOKUP_OPEN,
135f9652e10SAl Viro 		.lookup_flags = LOOKUP_FOLLOW,
13647c805dcSAl Viro 	};
1371da177e4SLinus Torvalds 
1386e8341a1SAl Viro 	if (IS_ERR(tmp))
1391da177e4SLinus Torvalds 		goto out;
1401da177e4SLinus Torvalds 
141f9652e10SAl Viro 	file = do_filp_open(AT_FDCWD, tmp, &uselib_flags);
1426e8341a1SAl Viro 	putname(tmp);
1431da177e4SLinus Torvalds 	error = PTR_ERR(file);
1441da177e4SLinus Torvalds 	if (IS_ERR(file))
1451da177e4SLinus Torvalds 		goto out;
1461da177e4SLinus Torvalds 
147633fb6acSKees Cook 	/*
1480d196e75SMateusz Guzik 	 * Check do_open_execat() for an explanation.
149633fb6acSKees Cook 	 */
150db19c91cSKees Cook 	error = -EACCES;
1510d196e75SMateusz Guzik 	if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
1520d196e75SMateusz Guzik 	    path_noexec(&file->f_path))
1536e8341a1SAl Viro 		goto exit;
1546e8341a1SAl Viro 
1551da177e4SLinus Torvalds 	error = -ENOEXEC;
1561da177e4SLinus Torvalds 
1571da177e4SLinus Torvalds 	read_lock(&binfmt_lock);
158e4dc1b14SAlexey Dobriyan 	list_for_each_entry(fmt, &formats, lh) {
1591da177e4SLinus Torvalds 		if (!fmt->load_shlib)
1601da177e4SLinus Torvalds 			continue;
1611da177e4SLinus Torvalds 		if (!try_module_get(fmt->module))
1621da177e4SLinus Torvalds 			continue;
1631da177e4SLinus Torvalds 		read_unlock(&binfmt_lock);
1641da177e4SLinus Torvalds 		error = fmt->load_shlib(file);
1651da177e4SLinus Torvalds 		read_lock(&binfmt_lock);
1661da177e4SLinus Torvalds 		put_binfmt(fmt);
1671da177e4SLinus Torvalds 		if (error != -ENOEXEC)
1681da177e4SLinus Torvalds 			break;
1691da177e4SLinus Torvalds 	}
1701da177e4SLinus Torvalds 	read_unlock(&binfmt_lock);
1716e8341a1SAl Viro exit:
1721da177e4SLinus Torvalds 	fput(file);
1731da177e4SLinus Torvalds out:
1741da177e4SLinus Torvalds 	return error;
1751da177e4SLinus Torvalds }
17669369a70SJosh Triplett #endif /* #ifdef CONFIG_USELIB */
1771da177e4SLinus Torvalds 
178b6a2fea3SOllie Wild #ifdef CONFIG_MMU
179ae6b585eSOleg Nesterov /*
180ae6b585eSOleg Nesterov  * The nascent bprm->mm is not visible until exec_mmap() but it can
181ae6b585eSOleg Nesterov  * use a lot of memory, account these pages in current->mm temporary
182ae6b585eSOleg Nesterov  * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
183ae6b585eSOleg Nesterov  * change the counter back via acct_arg_size(0).
184ae6b585eSOleg Nesterov  */
acct_arg_size(struct linux_binprm * bprm,unsigned long pages)1850e028465SOleg Nesterov static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
1863c77f845SOleg Nesterov {
1873c77f845SOleg Nesterov 	struct mm_struct *mm = current->mm;
1883c77f845SOleg Nesterov 	long diff = (long)(pages - bprm->vma_pages);
1893c77f845SOleg Nesterov 
1903c77f845SOleg Nesterov 	if (!mm || !diff)
1913c77f845SOleg Nesterov 		return;
1923c77f845SOleg Nesterov 
1933c77f845SOleg Nesterov 	bprm->vma_pages = pages;
1943c77f845SOleg Nesterov 	add_mm_counter(mm, MM_ANONPAGES, diff);
1953c77f845SOleg Nesterov }
1963c77f845SOleg Nesterov 
get_arg_page(struct linux_binprm * bprm,unsigned long pos,int write)1970e028465SOleg Nesterov static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
198b6a2fea3SOllie Wild 		int write)
199b6a2fea3SOllie Wild {
200b6a2fea3SOllie Wild 	struct page *page;
201f313c51dSLinus Torvalds 	struct vm_area_struct *vma = bprm->vma;
202f313c51dSLinus Torvalds 	struct mm_struct *mm = bprm->mm;
203b6a2fea3SOllie Wild 	int ret;
204b6a2fea3SOllie Wild 
205f313c51dSLinus Torvalds 	/*
206f313c51dSLinus Torvalds 	 * Avoid relying on expanding the stack down in GUP (which
207f313c51dSLinus Torvalds 	 * does not work for STACK_GROWSUP anyway), and just do it
2087a571499SLorenzo Stoakes 	 * ahead of time.
209f313c51dSLinus Torvalds 	 */
2107a571499SLorenzo Stoakes 	if (!mmap_read_lock_maybe_expand(mm, vma, pos, write))
211b6a2fea3SOllie Wild 		return NULL;
2129beae1eaSLorenzo Stoakes 
2131e987790SDave Hansen 	/*
2141e987790SDave Hansen 	 * We are doing an exec().  'current' is the process
215f313c51dSLinus Torvalds 	 * doing the exec and 'mm' is the new process's mm.
2161e987790SDave Hansen 	 */
217f313c51dSLinus Torvalds 	ret = get_user_pages_remote(mm, pos, 1,
218f313c51dSLinus Torvalds 			write ? FOLL_WRITE : 0,
219ca5e8632SLorenzo Stoakes 			&page, NULL);
220f313c51dSLinus Torvalds 	mmap_read_unlock(mm);
221b6a2fea3SOllie Wild 	if (ret <= 0)
222b6a2fea3SOllie Wild 		return NULL;
223b6a2fea3SOllie Wild 
224655c16a8SOleg Nesterov 	if (write)
225f313c51dSLinus Torvalds 		acct_arg_size(bprm, vma_pages(vma));
226b6a2fea3SOllie Wild 
227b6a2fea3SOllie Wild 	return page;
228b6a2fea3SOllie Wild }
229b6a2fea3SOllie Wild 
put_arg_page(struct page * page)230b6a2fea3SOllie Wild static void put_arg_page(struct page *page)
231b6a2fea3SOllie Wild {
232b6a2fea3SOllie Wild 	put_page(page);
233b6a2fea3SOllie Wild }
234b6a2fea3SOllie Wild 
free_arg_pages(struct linux_binprm * bprm)235b6a2fea3SOllie Wild static void free_arg_pages(struct linux_binprm *bprm)
236b6a2fea3SOllie Wild {
237b6a2fea3SOllie Wild }
238b6a2fea3SOllie Wild 
flush_arg_page(struct linux_binprm * bprm,unsigned long pos,struct page * page)239b6a2fea3SOllie Wild static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
240b6a2fea3SOllie Wild 		struct page *page)
241b6a2fea3SOllie Wild {
242b6a2fea3SOllie Wild 	flush_cache_page(bprm->vma, pos, page_to_pfn(page));
243b6a2fea3SOllie Wild }
244b6a2fea3SOllie Wild 
__bprm_mm_init(struct linux_binprm * bprm)245b6a2fea3SOllie Wild static int __bprm_mm_init(struct linux_binprm *bprm)
246b6a2fea3SOllie Wild {
247eaccbfa5SLuiz Fernando N. Capitulino 	int err;
248b6a2fea3SOllie Wild 	struct vm_area_struct *vma = NULL;
249b6a2fea3SOllie Wild 	struct mm_struct *mm = bprm->mm;
250b6a2fea3SOllie Wild 
251490fc053SLinus Torvalds 	bprm->vma = vma = vm_area_alloc(mm);
252b6a2fea3SOllie Wild 	if (!vma)
253eaccbfa5SLuiz Fernando N. Capitulino 		return -ENOMEM;
254bfd40eafSKirill A. Shutemov 	vma_set_anonymous(vma);
255b6a2fea3SOllie Wild 
256d8ed45c5SMichel Lespinasse 	if (mmap_write_lock_killable(mm)) {
257f268dfe9SMichal Hocko 		err = -EINTR;
258f268dfe9SMichal Hocko 		goto err_free;
259f268dfe9SMichal Hocko 	}
260b6a2fea3SOllie Wild 
261b6a2fea3SOllie Wild 	/*
2623a9e567cSJinjiang Tu 	 * Need to be called with mmap write lock
2633a9e567cSJinjiang Tu 	 * held, to avoid race with ksmd.
2643a9e567cSJinjiang Tu 	 */
2653a9e567cSJinjiang Tu 	err = ksm_execve(mm);
2663a9e567cSJinjiang Tu 	if (err)
2673a9e567cSJinjiang Tu 		goto err_ksm;
2683a9e567cSJinjiang Tu 
2693a9e567cSJinjiang Tu 	/*
270b6a2fea3SOllie Wild 	 * Place the stack at the largest stack address the architecture
271b6a2fea3SOllie Wild 	 * supports. Later, we'll move this to an appropriate place. We don't
272b6a2fea3SOllie Wild 	 * use STACK_TOP because that can depend on attributes which aren't
273b6a2fea3SOllie Wild 	 * configured yet.
274b6a2fea3SOllie Wild 	 */
275aacb3d17SMichal Hocko 	BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
276b6a2fea3SOllie Wild 	vma->vm_end = STACK_TOP_MAX;
277b6a2fea3SOllie Wild 	vma->vm_start = vma->vm_end - PAGE_SIZE;
2781c71222eSSuren Baghdasaryan 	vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP);
2793ed75eb8SColy Li 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
280462e635eSTavis Ormandy 
281b6a2fea3SOllie Wild 	err = insert_vm_struct(mm, vma);
282eaccbfa5SLuiz Fernando N. Capitulino 	if (err)
283b6a2fea3SOllie Wild 		goto err;
284b6a2fea3SOllie Wild 
285b6a2fea3SOllie Wild 	mm->stack_vm = mm->total_vm = 1;
286d8ed45c5SMichel Lespinasse 	mmap_write_unlock(mm);
287b6a2fea3SOllie Wild 	bprm->p = vma->vm_end - sizeof(void *);
288b6a2fea3SOllie Wild 	return 0;
289b6a2fea3SOllie Wild err:
2903a9e567cSJinjiang Tu 	ksm_exit(mm);
2913a9e567cSJinjiang Tu err_ksm:
292d8ed45c5SMichel Lespinasse 	mmap_write_unlock(mm);
293f268dfe9SMichal Hocko err_free:
294b6a2fea3SOllie Wild 	bprm->vma = NULL;
2953928d4f5SLinus Torvalds 	vm_area_free(vma);
296b6a2fea3SOllie Wild 	return err;
297b6a2fea3SOllie Wild }
298b6a2fea3SOllie Wild 
valid_arg_len(struct linux_binprm * bprm,long len)299b6a2fea3SOllie Wild static bool valid_arg_len(struct linux_binprm *bprm, long len)
300b6a2fea3SOllie Wild {
301b6a2fea3SOllie Wild 	return len <= MAX_ARG_STRLEN;
302b6a2fea3SOllie Wild }
303b6a2fea3SOllie Wild 
304b6a2fea3SOllie Wild #else
305b6a2fea3SOllie Wild 
acct_arg_size(struct linux_binprm * bprm,unsigned long pages)3060e028465SOleg Nesterov static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
3073c77f845SOleg Nesterov {
3083c77f845SOleg Nesterov }
3093c77f845SOleg Nesterov 
get_arg_page(struct linux_binprm * bprm,unsigned long pos,int write)3100e028465SOleg Nesterov static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
311b6a2fea3SOllie Wild 		int write)
312b6a2fea3SOllie Wild {
313b6a2fea3SOllie Wild 	struct page *page;
314b6a2fea3SOllie Wild 
315b6a2fea3SOllie Wild 	page = bprm->page[pos / PAGE_SIZE];
316b6a2fea3SOllie Wild 	if (!page && write) {
317b6a2fea3SOllie Wild 		page = alloc_page(GFP_HIGHUSER|__GFP_ZERO);
318b6a2fea3SOllie Wild 		if (!page)
319b6a2fea3SOllie Wild 			return NULL;
320b6a2fea3SOllie Wild 		bprm->page[pos / PAGE_SIZE] = page;
321b6a2fea3SOllie Wild 	}
322b6a2fea3SOllie Wild 
323b6a2fea3SOllie Wild 	return page;
324b6a2fea3SOllie Wild }
325b6a2fea3SOllie Wild 
put_arg_page(struct page * page)326b6a2fea3SOllie Wild static void put_arg_page(struct page *page)
327b6a2fea3SOllie Wild {
328b6a2fea3SOllie Wild }
329b6a2fea3SOllie Wild 
free_arg_page(struct linux_binprm * bprm,int i)330b6a2fea3SOllie Wild static void free_arg_page(struct linux_binprm *bprm, int i)
331b6a2fea3SOllie Wild {
332b6a2fea3SOllie Wild 	if (bprm->page[i]) {
333b6a2fea3SOllie Wild 		__free_page(bprm->page[i]);
334b6a2fea3SOllie Wild 		bprm->page[i] = NULL;
335b6a2fea3SOllie Wild 	}
336b6a2fea3SOllie Wild }
337b6a2fea3SOllie Wild 
free_arg_pages(struct linux_binprm * bprm)338b6a2fea3SOllie Wild static void free_arg_pages(struct linux_binprm *bprm)
339b6a2fea3SOllie Wild {
340b6a2fea3SOllie Wild 	int i;
341b6a2fea3SOllie Wild 
342b6a2fea3SOllie Wild 	for (i = 0; i < MAX_ARG_PAGES; i++)
343b6a2fea3SOllie Wild 		free_arg_page(bprm, i);
344b6a2fea3SOllie Wild }
345b6a2fea3SOllie Wild 
flush_arg_page(struct linux_binprm * bprm,unsigned long pos,struct page * page)346b6a2fea3SOllie Wild static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
347b6a2fea3SOllie Wild 		struct page *page)
348b6a2fea3SOllie Wild {
349b6a2fea3SOllie Wild }
350b6a2fea3SOllie Wild 
__bprm_mm_init(struct linux_binprm * bprm)351b6a2fea3SOllie Wild static int __bprm_mm_init(struct linux_binprm *bprm)
352b6a2fea3SOllie Wild {
353b6a2fea3SOllie Wild 	bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
354b6a2fea3SOllie Wild 	return 0;
355b6a2fea3SOllie Wild }
356b6a2fea3SOllie Wild 
valid_arg_len(struct linux_binprm * bprm,long len)357b6a2fea3SOllie Wild static bool valid_arg_len(struct linux_binprm *bprm, long len)
358b6a2fea3SOllie Wild {
359b6a2fea3SOllie Wild 	return len <= bprm->p;
360b6a2fea3SOllie Wild }
361b6a2fea3SOllie Wild 
362b6a2fea3SOllie Wild #endif /* CONFIG_MMU */
363b6a2fea3SOllie Wild 
364b6a2fea3SOllie Wild /*
365b6a2fea3SOllie Wild  * Create a new mm_struct and populate it with a temporary stack
366b6a2fea3SOllie Wild  * vm_area_struct.  We don't have enough context at this point to set the stack
367b6a2fea3SOllie Wild  * flags, permissions, and offset, so we use temporary values.  We'll update
368b6a2fea3SOllie Wild  * them later in setup_arg_pages().
369b6a2fea3SOllie Wild  */
bprm_mm_init(struct linux_binprm * bprm)3709cc64ceaSYuanhan Liu static int bprm_mm_init(struct linux_binprm *bprm)
371b6a2fea3SOllie Wild {
372b6a2fea3SOllie Wild 	int err;
373b6a2fea3SOllie Wild 	struct mm_struct *mm = NULL;
374b6a2fea3SOllie Wild 
375b6a2fea3SOllie Wild 	bprm->mm = mm = mm_alloc();
376b6a2fea3SOllie Wild 	err = -ENOMEM;
377b6a2fea3SOllie Wild 	if (!mm)
378b6a2fea3SOllie Wild 		goto err;
379b6a2fea3SOllie Wild 
380c31dbb14SKees Cook 	/* Save current stack limit for all calculations made during exec. */
381c31dbb14SKees Cook 	task_lock(current->group_leader);
382c31dbb14SKees Cook 	bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
383c31dbb14SKees Cook 	task_unlock(current->group_leader);
384c31dbb14SKees Cook 
385b6a2fea3SOllie Wild 	err = __bprm_mm_init(bprm);
386b6a2fea3SOllie Wild 	if (err)
387b6a2fea3SOllie Wild 		goto err;
388b6a2fea3SOllie Wild 
389b6a2fea3SOllie Wild 	return 0;
390b6a2fea3SOllie Wild 
391b6a2fea3SOllie Wild err:
392b6a2fea3SOllie Wild 	if (mm) {
393b6a2fea3SOllie Wild 		bprm->mm = NULL;
394b6a2fea3SOllie Wild 		mmdrop(mm);
395b6a2fea3SOllie Wild 	}
396b6a2fea3SOllie Wild 
397b6a2fea3SOllie Wild 	return err;
398b6a2fea3SOllie Wild }
399b6a2fea3SOllie Wild 
400ba2d0162SOleg Nesterov struct user_arg_ptr {
4010e028465SOleg Nesterov #ifdef CONFIG_COMPAT
4020e028465SOleg Nesterov 	bool is_compat;
4030e028465SOleg Nesterov #endif
4040e028465SOleg Nesterov 	union {
405ba2d0162SOleg Nesterov 		const char __user *const __user *native;
4060e028465SOleg Nesterov #ifdef CONFIG_COMPAT
40738b983b3SAl Viro 		const compat_uptr_t __user *compat;
4080e028465SOleg Nesterov #endif
4090e028465SOleg Nesterov 	} ptr;
410ba2d0162SOleg Nesterov };
411ba2d0162SOleg Nesterov 
get_user_arg_ptr(struct user_arg_ptr argv,int nr)412ba2d0162SOleg Nesterov static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
4131d1dbf81SOleg Nesterov {
4140e028465SOleg Nesterov 	const char __user *native;
4151d1dbf81SOleg Nesterov 
4160e028465SOleg Nesterov #ifdef CONFIG_COMPAT
4170e028465SOleg Nesterov 	if (unlikely(argv.is_compat)) {
4180e028465SOleg Nesterov 		compat_uptr_t compat;
4190e028465SOleg Nesterov 
4200e028465SOleg Nesterov 		if (get_user(compat, argv.ptr.compat + nr))
4211d1dbf81SOleg Nesterov 			return ERR_PTR(-EFAULT);
4221d1dbf81SOleg Nesterov 
4230e028465SOleg Nesterov 		return compat_ptr(compat);
4240e028465SOleg Nesterov 	}
4250e028465SOleg Nesterov #endif
4260e028465SOleg Nesterov 
4270e028465SOleg Nesterov 	if (get_user(native, argv.ptr.native + nr))
4280e028465SOleg Nesterov 		return ERR_PTR(-EFAULT);
4290e028465SOleg Nesterov 
4300e028465SOleg Nesterov 	return native;
4311d1dbf81SOleg Nesterov }
4321d1dbf81SOleg Nesterov 
4331da177e4SLinus Torvalds /*
4341da177e4SLinus Torvalds  * count() counts the number of strings in array ARGV.
4351da177e4SLinus Torvalds  */
count(struct user_arg_ptr argv,int max)436ba2d0162SOleg Nesterov static int count(struct user_arg_ptr argv, int max)
4371da177e4SLinus Torvalds {
4381da177e4SLinus Torvalds 	int i = 0;
4391da177e4SLinus Torvalds 
4400e028465SOleg Nesterov 	if (argv.ptr.native != NULL) {
4411da177e4SLinus Torvalds 		for (;;) {
4421d1dbf81SOleg Nesterov 			const char __user *p = get_user_arg_ptr(argv, i);
4431da177e4SLinus Torvalds 
4441da177e4SLinus Torvalds 			if (!p)
4451da177e4SLinus Torvalds 				break;
4461d1dbf81SOleg Nesterov 
4471d1dbf81SOleg Nesterov 			if (IS_ERR(p))
4481d1dbf81SOleg Nesterov 				return -EFAULT;
4491d1dbf81SOleg Nesterov 
4506d92d4f6SXi Wang 			if (i >= max)
4511da177e4SLinus Torvalds 				return -E2BIG;
4526d92d4f6SXi Wang 			++i;
4539aea5a65SRoland McGrath 
4549aea5a65SRoland McGrath 			if (fatal_signal_pending(current))
4559aea5a65SRoland McGrath 				return -ERESTARTNOHAND;
4561da177e4SLinus Torvalds 			cond_resched();
4571da177e4SLinus Torvalds 		}
4581da177e4SLinus Torvalds 	}
4591da177e4SLinus Torvalds 	return i;
4601da177e4SLinus Torvalds }
4611da177e4SLinus Torvalds 
count_strings_kernel(const char * const * argv)462be619f7fSEric W. Biederman static int count_strings_kernel(const char *const *argv)
463be619f7fSEric W. Biederman {
464be619f7fSEric W. Biederman 	int i;
465be619f7fSEric W. Biederman 
466be619f7fSEric W. Biederman 	if (!argv)
467be619f7fSEric W. Biederman 		return 0;
468be619f7fSEric W. Biederman 
469be619f7fSEric W. Biederman 	for (i = 0; argv[i]; ++i) {
470be619f7fSEric W. Biederman 		if (i >= MAX_ARG_STRINGS)
471be619f7fSEric W. Biederman 			return -E2BIG;
472be619f7fSEric W. Biederman 		if (fatal_signal_pending(current))
473be619f7fSEric W. Biederman 			return -ERESTARTNOHAND;
474be619f7fSEric W. Biederman 		cond_resched();
475be619f7fSEric W. Biederman 	}
476be619f7fSEric W. Biederman 	return i;
477be619f7fSEric W. Biederman }
478be619f7fSEric W. Biederman 
bprm_set_stack_limit(struct linux_binprm * bprm,unsigned long limit)479084ebf7cSKees Cook static inline int bprm_set_stack_limit(struct linux_binprm *bprm,
480084ebf7cSKees Cook 				       unsigned long limit)
481084ebf7cSKees Cook {
482084ebf7cSKees Cook #ifdef CONFIG_MMU
48321f93108SKees Cook 	/* Avoid a pathological bprm->p. */
48421f93108SKees Cook 	if (bprm->p < limit)
48521f93108SKees Cook 		return -E2BIG;
486084ebf7cSKees Cook 	bprm->argmin = bprm->p - limit;
487084ebf7cSKees Cook #endif
488084ebf7cSKees Cook 	return 0;
489084ebf7cSKees Cook }
bprm_hit_stack_limit(struct linux_binprm * bprm)490084ebf7cSKees Cook static inline bool bprm_hit_stack_limit(struct linux_binprm *bprm)
491084ebf7cSKees Cook {
492084ebf7cSKees Cook #ifdef CONFIG_MMU
493084ebf7cSKees Cook 	return bprm->p < bprm->argmin;
494084ebf7cSKees Cook #else
495084ebf7cSKees Cook 	return false;
496084ebf7cSKees Cook #endif
497084ebf7cSKees Cook }
498084ebf7cSKees Cook 
49960371f43SKees Cook /*
50060371f43SKees Cook  * Calculate bprm->argmin from:
50160371f43SKees Cook  * - _STK_LIM
50260371f43SKees Cook  * - ARG_MAX
50360371f43SKees Cook  * - bprm->rlim_stack.rlim_cur
50460371f43SKees Cook  * - bprm->argc
50560371f43SKees Cook  * - bprm->envc
50660371f43SKees Cook  * - bprm->p
50760371f43SKees Cook  */
bprm_stack_limits(struct linux_binprm * bprm)508d8b9cd54SEric W. Biederman static int bprm_stack_limits(struct linux_binprm *bprm)
509655c16a8SOleg Nesterov {
510655c16a8SOleg Nesterov 	unsigned long limit, ptr_size;
511655c16a8SOleg Nesterov 
512655c16a8SOleg Nesterov 	/*
513655c16a8SOleg Nesterov 	 * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
514655c16a8SOleg Nesterov 	 * (whichever is smaller) for the argv+env strings.
515655c16a8SOleg Nesterov 	 * This ensures that:
516655c16a8SOleg Nesterov 	 *  - the remaining binfmt code will not run out of stack space,
517655c16a8SOleg Nesterov 	 *  - the program will have a reasonable amount of stack left
518655c16a8SOleg Nesterov 	 *    to work from.
519655c16a8SOleg Nesterov 	 */
520655c16a8SOleg Nesterov 	limit = _STK_LIM / 4 * 3;
521655c16a8SOleg Nesterov 	limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
522655c16a8SOleg Nesterov 	/*
523655c16a8SOleg Nesterov 	 * We've historically supported up to 32 pages (ARG_MAX)
524655c16a8SOleg Nesterov 	 * of argument strings even with small stacks
525655c16a8SOleg Nesterov 	 */
526655c16a8SOleg Nesterov 	limit = max_t(unsigned long, limit, ARG_MAX);
52721f93108SKees Cook 	/* Reject totally pathological counts. */
52821f93108SKees Cook 	if (bprm->argc < 0 || bprm->envc < 0)
52921f93108SKees Cook 		return -E2BIG;
530655c16a8SOleg Nesterov 	/*
531655c16a8SOleg Nesterov 	 * We must account for the size of all the argv and envp pointers to
532655c16a8SOleg Nesterov 	 * the argv and envp strings, since they will also take up space in
533655c16a8SOleg Nesterov 	 * the stack. They aren't stored until much later when we can't
534655c16a8SOleg Nesterov 	 * signal to the parent that the child has run out of stack space.
535655c16a8SOleg Nesterov 	 * Instead, calculate it here so it's possible to fail gracefully.
536dcd46d89SKees Cook 	 *
537dcd46d89SKees Cook 	 * In the case of argc = 0, make sure there is space for adding a
538dcd46d89SKees Cook 	 * empty string (which will bump argc to 1), to ensure confused
539dcd46d89SKees Cook 	 * userspace programs don't start processing from argv[1], thinking
540dcd46d89SKees Cook 	 * argc can never be 0, to keep them from walking envp by accident.
541dcd46d89SKees Cook 	 * See do_execveat_common().
542655c16a8SOleg Nesterov 	 */
54321f93108SKees Cook 	if (check_add_overflow(max(bprm->argc, 1), bprm->envc, &ptr_size) ||
54421f93108SKees Cook 	    check_mul_overflow(ptr_size, sizeof(void *), &ptr_size))
54521f93108SKees Cook 		return -E2BIG;
546655c16a8SOleg Nesterov 	if (limit <= ptr_size)
547655c16a8SOleg Nesterov 		return -E2BIG;
548655c16a8SOleg Nesterov 	limit -= ptr_size;
549655c16a8SOleg Nesterov 
550084ebf7cSKees Cook 	return bprm_set_stack_limit(bprm, limit);
551655c16a8SOleg Nesterov }
552655c16a8SOleg Nesterov 
5531da177e4SLinus Torvalds /*
554b6a2fea3SOllie Wild  * 'copy_strings()' copies argument/environment strings from the old
555b6a2fea3SOllie Wild  * processes's memory to the new process's stack.  The call to get_user_pages()
556b6a2fea3SOllie Wild  * ensures the destination page is created and not swapped out.
5571da177e4SLinus Torvalds  */
copy_strings(int argc,struct user_arg_ptr argv,struct linux_binprm * bprm)558ba2d0162SOleg Nesterov static int copy_strings(int argc, struct user_arg_ptr argv,
55975c96f85SAdrian Bunk 			struct linux_binprm *bprm)
5601da177e4SLinus Torvalds {
5611da177e4SLinus Torvalds 	struct page *kmapped_page = NULL;
5621da177e4SLinus Torvalds 	char *kaddr = NULL;
563b6a2fea3SOllie Wild 	unsigned long kpos = 0;
5641da177e4SLinus Torvalds 	int ret;
5651da177e4SLinus Torvalds 
5661da177e4SLinus Torvalds 	while (argc-- > 0) {
567d7627467SDavid Howells 		const char __user *str;
5681da177e4SLinus Torvalds 		int len;
5691da177e4SLinus Torvalds 		unsigned long pos;
5701da177e4SLinus Torvalds 
5711da177e4SLinus Torvalds 		ret = -EFAULT;
5721d1dbf81SOleg Nesterov 		str = get_user_arg_ptr(argv, argc);
5731d1dbf81SOleg Nesterov 		if (IS_ERR(str))
5741da177e4SLinus Torvalds 			goto out;
5751da177e4SLinus Torvalds 
5761d1dbf81SOleg Nesterov 		len = strnlen_user(str, MAX_ARG_STRLEN);
5771d1dbf81SOleg Nesterov 		if (!len)
5781da177e4SLinus Torvalds 			goto out;
5791d1dbf81SOleg Nesterov 
5801d1dbf81SOleg Nesterov 		ret = -E2BIG;
5811d1dbf81SOleg Nesterov 		if (!valid_arg_len(bprm, len))
5821d1dbf81SOleg Nesterov 			goto out;
5831da177e4SLinus Torvalds 
584b452722eSTom Rix 		/* We're going to work our way backwards. */
5851da177e4SLinus Torvalds 		pos = bprm->p;
586b6a2fea3SOllie Wild 		str += len;
587b6a2fea3SOllie Wild 		bprm->p -= len;
588084ebf7cSKees Cook 		if (bprm_hit_stack_limit(bprm))
589655c16a8SOleg Nesterov 			goto out;
5901da177e4SLinus Torvalds 
5911da177e4SLinus Torvalds 		while (len > 0) {
5921da177e4SLinus Torvalds 			int offset, bytes_to_copy;
5931da177e4SLinus Torvalds 
5949aea5a65SRoland McGrath 			if (fatal_signal_pending(current)) {
5959aea5a65SRoland McGrath 				ret = -ERESTARTNOHAND;
5969aea5a65SRoland McGrath 				goto out;
5979aea5a65SRoland McGrath 			}
5987993bc1fSRoland McGrath 			cond_resched();
5997993bc1fSRoland McGrath 
6001da177e4SLinus Torvalds 			offset = pos % PAGE_SIZE;
601b6a2fea3SOllie Wild 			if (offset == 0)
602b6a2fea3SOllie Wild 				offset = PAGE_SIZE;
603b6a2fea3SOllie Wild 
604b6a2fea3SOllie Wild 			bytes_to_copy = offset;
605b6a2fea3SOllie Wild 			if (bytes_to_copy > len)
606b6a2fea3SOllie Wild 				bytes_to_copy = len;
607b6a2fea3SOllie Wild 
608b6a2fea3SOllie Wild 			offset -= bytes_to_copy;
609b6a2fea3SOllie Wild 			pos -= bytes_to_copy;
610b6a2fea3SOllie Wild 			str -= bytes_to_copy;
611b6a2fea3SOllie Wild 			len -= bytes_to_copy;
612b6a2fea3SOllie Wild 
613b6a2fea3SOllie Wild 			if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
614b6a2fea3SOllie Wild 				struct page *page;
615b6a2fea3SOllie Wild 
616b6a2fea3SOllie Wild 				page = get_arg_page(bprm, pos, 1);
6171da177e4SLinus Torvalds 				if (!page) {
618b6a2fea3SOllie Wild 					ret = -E2BIG;
6191da177e4SLinus Torvalds 					goto out;
6201da177e4SLinus Torvalds 				}
6211da177e4SLinus Torvalds 
622b6a2fea3SOllie Wild 				if (kmapped_page) {
623f358afc5SChristoph Hellwig 					flush_dcache_page(kmapped_page);
6243a608cfeSFabio M. De Francesco 					kunmap_local(kaddr);
625b6a2fea3SOllie Wild 					put_arg_page(kmapped_page);
626b6a2fea3SOllie Wild 				}
6271da177e4SLinus Torvalds 				kmapped_page = page;
6283a608cfeSFabio M. De Francesco 				kaddr = kmap_local_page(kmapped_page);
629b6a2fea3SOllie Wild 				kpos = pos & PAGE_MASK;
630b6a2fea3SOllie Wild 				flush_arg_page(bprm, kpos, kmapped_page);
6311da177e4SLinus Torvalds 			}
632b6a2fea3SOllie Wild 			if (copy_from_user(kaddr+offset, str, bytes_to_copy)) {
6331da177e4SLinus Torvalds 				ret = -EFAULT;
6341da177e4SLinus Torvalds 				goto out;
6351da177e4SLinus Torvalds 			}
6361da177e4SLinus Torvalds 		}
6371da177e4SLinus Torvalds 	}
6381da177e4SLinus Torvalds 	ret = 0;
6391da177e4SLinus Torvalds out:
640b6a2fea3SOllie Wild 	if (kmapped_page) {
641f358afc5SChristoph Hellwig 		flush_dcache_page(kmapped_page);
6423a608cfeSFabio M. De Francesco 		kunmap_local(kaddr);
643b6a2fea3SOllie Wild 		put_arg_page(kmapped_page);
644b6a2fea3SOllie Wild 	}
6451da177e4SLinus Torvalds 	return ret;
6461da177e4SLinus Torvalds }
6471da177e4SLinus Torvalds 
6481da177e4SLinus Torvalds /*
649986db2d1SChristoph Hellwig  * Copy and argument/environment string from the kernel to the processes stack.
6501da177e4SLinus Torvalds  */
copy_string_kernel(const char * arg,struct linux_binprm * bprm)651986db2d1SChristoph Hellwig int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
6521da177e4SLinus Torvalds {
653762a3af6SChristoph Hellwig 	int len = strnlen(arg, MAX_ARG_STRLEN) + 1 /* terminating NUL */;
654762a3af6SChristoph Hellwig 	unsigned long pos = bprm->p;
655ba2d0162SOleg Nesterov 
656762a3af6SChristoph Hellwig 	if (len == 0)
657762a3af6SChristoph Hellwig 		return -EFAULT;
658762a3af6SChristoph Hellwig 	if (!valid_arg_len(bprm, len))
659762a3af6SChristoph Hellwig 		return -E2BIG;
660ba2d0162SOleg Nesterov 
661762a3af6SChristoph Hellwig 	/* We're going to work our way backwards. */
662762a3af6SChristoph Hellwig 	arg += len;
663762a3af6SChristoph Hellwig 	bprm->p -= len;
664084ebf7cSKees Cook 	if (bprm_hit_stack_limit(bprm))
665762a3af6SChristoph Hellwig 		return -E2BIG;
666762a3af6SChristoph Hellwig 
667762a3af6SChristoph Hellwig 	while (len > 0) {
668762a3af6SChristoph Hellwig 		unsigned int bytes_to_copy = min_t(unsigned int, len,
669762a3af6SChristoph Hellwig 				min_not_zero(offset_in_page(pos), PAGE_SIZE));
670762a3af6SChristoph Hellwig 		struct page *page;
671762a3af6SChristoph Hellwig 
672762a3af6SChristoph Hellwig 		pos -= bytes_to_copy;
673762a3af6SChristoph Hellwig 		arg -= bytes_to_copy;
674762a3af6SChristoph Hellwig 		len -= bytes_to_copy;
675762a3af6SChristoph Hellwig 
676762a3af6SChristoph Hellwig 		page = get_arg_page(bprm, pos, 1);
677762a3af6SChristoph Hellwig 		if (!page)
678762a3af6SChristoph Hellwig 			return -E2BIG;
679762a3af6SChristoph Hellwig 		flush_arg_page(bprm, pos & PAGE_MASK, page);
680c6e8e36cSFabio M. De Francesco 		memcpy_to_page(page, offset_in_page(pos), arg, bytes_to_copy);
681762a3af6SChristoph Hellwig 		put_arg_page(page);
6821da177e4SLinus Torvalds 	}
683762a3af6SChristoph Hellwig 
684762a3af6SChristoph Hellwig 	return 0;
6851da177e4SLinus Torvalds }
686986db2d1SChristoph Hellwig EXPORT_SYMBOL(copy_string_kernel);
6871da177e4SLinus Torvalds 
copy_strings_kernel(int argc,const char * const * argv,struct linux_binprm * bprm)688be619f7fSEric W. Biederman static int copy_strings_kernel(int argc, const char *const *argv,
689be619f7fSEric W. Biederman 			       struct linux_binprm *bprm)
690be619f7fSEric W. Biederman {
691be619f7fSEric W. Biederman 	while (argc-- > 0) {
692be619f7fSEric W. Biederman 		int ret = copy_string_kernel(argv[argc], bprm);
693be619f7fSEric W. Biederman 		if (ret < 0)
694be619f7fSEric W. Biederman 			return ret;
695be619f7fSEric W. Biederman 		if (fatal_signal_pending(current))
696be619f7fSEric W. Biederman 			return -ERESTARTNOHAND;
697be619f7fSEric W. Biederman 		cond_resched();
698be619f7fSEric W. Biederman 	}
699be619f7fSEric W. Biederman 	return 0;
700be619f7fSEric W. Biederman }
701be619f7fSEric W. Biederman 
7021da177e4SLinus Torvalds #ifdef CONFIG_MMU
703b6a2fea3SOllie Wild 
7041da177e4SLinus Torvalds /*
705b6a2fea3SOllie Wild  * Finalizes the stack vm_area_struct. The flags and permissions are updated,
706b6a2fea3SOllie Wild  * the stack is optionally relocated, and some extra space is added.
707b6a2fea3SOllie Wild  */
setup_arg_pages(struct linux_binprm * bprm,unsigned long stack_top,int executable_stack)708b6a2fea3SOllie Wild int setup_arg_pages(struct linux_binprm *bprm,
709b6a2fea3SOllie Wild 		    unsigned long stack_top,
710b6a2fea3SOllie Wild 		    int executable_stack)
7111da177e4SLinus Torvalds {
712b6a2fea3SOllie Wild 	unsigned long ret;
713b6a2fea3SOllie Wild 	unsigned long stack_shift;
714b6a2fea3SOllie Wild 	struct mm_struct *mm = current->mm;
715b6a2fea3SOllie Wild 	struct vm_area_struct *vma = bprm->vma;
716b6a2fea3SOllie Wild 	struct vm_area_struct *prev = NULL;
717b6a2fea3SOllie Wild 	unsigned long vm_flags;
718b6a2fea3SOllie Wild 	unsigned long stack_base;
719803bf5ecSMichael Neuling 	unsigned long stack_size;
720803bf5ecSMichael Neuling 	unsigned long stack_expand;
721803bf5ecSMichael Neuling 	unsigned long rlim_stack;
7224a18419fSNadav Amit 	struct mmu_gather tlb;
7232286a691SLiam R. Howlett 	struct vma_iterator vmi;
7241da177e4SLinus Torvalds 
725b6a2fea3SOllie Wild #ifdef CONFIG_STACK_GROWSUP
726d71f290bSJames Hogan 	/* Limit stack size */
727c31dbb14SKees Cook 	stack_base = bprm->rlim_stack.rlim_max;
72822ee3ea5SHelge Deller 
72922ee3ea5SHelge Deller 	stack_base = calc_max_stack_size(stack_base);
730b6a2fea3SOllie Wild 
731d045c77cSHelge Deller 	/* Add space for stack randomization. */
732f31b2569SHelge Deller 	if (current->flags & PF_RANDOMIZE)
733d045c77cSHelge Deller 		stack_base += (STACK_RND_MASK << PAGE_SHIFT);
734d045c77cSHelge Deller 
735b6a2fea3SOllie Wild 	/* Make sure we didn't let the argument array grow too large. */
736b6a2fea3SOllie Wild 	if (vma->vm_end - vma->vm_start > stack_base)
737b6a2fea3SOllie Wild 		return -ENOMEM;
738b6a2fea3SOllie Wild 
739b6a2fea3SOllie Wild 	stack_base = PAGE_ALIGN(stack_top - stack_base);
740b6a2fea3SOllie Wild 
741b6a2fea3SOllie Wild 	stack_shift = vma->vm_start - stack_base;
742b6a2fea3SOllie Wild 	mm->arg_start = bprm->p - stack_shift;
743b6a2fea3SOllie Wild 	bprm->p = vma->vm_end - stack_shift;
744b6a2fea3SOllie Wild #else
745b6a2fea3SOllie Wild 	stack_top = arch_align_stack(stack_top);
746b6a2fea3SOllie Wild 	stack_top = PAGE_ALIGN(stack_top);
7471b528181SRoland McGrath 
7481b528181SRoland McGrath 	if (unlikely(stack_top < mmap_min_addr) ||
7491b528181SRoland McGrath 	    unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
7501b528181SRoland McGrath 		return -ENOMEM;
7511b528181SRoland McGrath 
752b6a2fea3SOllie Wild 	stack_shift = vma->vm_end - stack_top;
753b6a2fea3SOllie Wild 
754b6a2fea3SOllie Wild 	bprm->p -= stack_shift;
755b6a2fea3SOllie Wild 	mm->arg_start = bprm->p;
756b6a2fea3SOllie Wild #endif
757b6a2fea3SOllie Wild 
758b6a2fea3SOllie Wild 	bprm->exec -= stack_shift;
759b6a2fea3SOllie Wild 
760d8ed45c5SMichel Lespinasse 	if (mmap_write_lock_killable(mm))
761f268dfe9SMichal Hocko 		return -EINTR;
762f268dfe9SMichal Hocko 
76396a8e13eSHugh Dickins 	vm_flags = VM_STACK_FLAGS;
764b6a2fea3SOllie Wild 
765b6a2fea3SOllie Wild 	/*
766b6a2fea3SOllie Wild 	 * Adjust stack execute permissions; explicitly enable for
767b6a2fea3SOllie Wild 	 * EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone
768b6a2fea3SOllie Wild 	 * (arch default) otherwise.
769b6a2fea3SOllie Wild 	 */
770b6a2fea3SOllie Wild 	if (unlikely(executable_stack == EXSTACK_ENABLE_X))
771b6a2fea3SOllie Wild 		vm_flags |= VM_EXEC;
772b6a2fea3SOllie Wild 	else if (executable_stack == EXSTACK_DISABLE_X)
773b6a2fea3SOllie Wild 		vm_flags &= ~VM_EXEC;
774b6a2fea3SOllie Wild 	vm_flags |= mm->def_flags;
775a8bef8ffSMel Gorman 	vm_flags |= VM_STACK_INCOMPLETE_SETUP;
776b6a2fea3SOllie Wild 
7772286a691SLiam R. Howlett 	vma_iter_init(&vmi, mm, vma->vm_start);
7782286a691SLiam R. Howlett 
7794a18419fSNadav Amit 	tlb_gather_mmu(&tlb, mm);
7802286a691SLiam R. Howlett 	ret = mprotect_fixup(&vmi, &tlb, vma, &prev, vma->vm_start, vma->vm_end,
781b6a2fea3SOllie Wild 			vm_flags);
7824a18419fSNadav Amit 	tlb_finish_mmu(&tlb);
7834a18419fSNadav Amit 
784b6a2fea3SOllie Wild 	if (ret)
785b6a2fea3SOllie Wild 		goto out_unlock;
786b6a2fea3SOllie Wild 	BUG_ON(prev != vma);
787b6a2fea3SOllie Wild 
78847a2ebb7SAlexey Dobriyan 	if (unlikely(vm_flags & VM_EXEC)) {
78947a2ebb7SAlexey Dobriyan 		pr_warn_once("process '%pD4' started with executable stack\n",
79047a2ebb7SAlexey Dobriyan 			     bprm->file);
79147a2ebb7SAlexey Dobriyan 	}
79247a2ebb7SAlexey Dobriyan 
793b6a2fea3SOllie Wild 	/* Move stack pages down in memory. */
794b6a2fea3SOllie Wild 	if (stack_shift) {
795d61f0d59SLorenzo Stoakes 		/*
796d61f0d59SLorenzo Stoakes 		 * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX.  Once
797d61f0d59SLorenzo Stoakes 		 * the binfmt code determines where the new stack should reside, we shift it to
798d61f0d59SLorenzo Stoakes 		 * its final location.
799d61f0d59SLorenzo Stoakes 		 */
800d61f0d59SLorenzo Stoakes 		ret = relocate_vma_down(vma, stack_shift);
801fc63cf23SAnton Blanchard 		if (ret)
802fc63cf23SAnton Blanchard 			goto out_unlock;
8031da177e4SLinus Torvalds 	}
8041da177e4SLinus Torvalds 
805a8bef8ffSMel Gorman 	/* mprotect_fixup is overkill to remove the temporary stack flags */
8061c71222eSSuren Baghdasaryan 	vm_flags_clear(vma, VM_STACK_INCOMPLETE_SETUP);
807a8bef8ffSMel Gorman 
8085ef097ddSMichael Neuling 	stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */
809803bf5ecSMichael Neuling 	stack_size = vma->vm_end - vma->vm_start;
810803bf5ecSMichael Neuling 	/*
811803bf5ecSMichael Neuling 	 * Align this down to a page boundary as expand_stack
812803bf5ecSMichael Neuling 	 * will align it up.
813803bf5ecSMichael Neuling 	 */
814c31dbb14SKees Cook 	rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
815bfb4a2b9SRolf Eike Beer 
816bfb4a2b9SRolf Eike Beer 	stack_expand = min(rlim_stack, stack_size + stack_expand);
817bfb4a2b9SRolf Eike Beer 
818b6a2fea3SOllie Wild #ifdef CONFIG_STACK_GROWSUP
819bfb4a2b9SRolf Eike Beer 	stack_base = vma->vm_start + stack_expand;
820b6a2fea3SOllie Wild #else
821bfb4a2b9SRolf Eike Beer 	stack_base = vma->vm_end - stack_expand;
822b6a2fea3SOllie Wild #endif
8233af9e859SEric B Munson 	current->mm->start_stack = bprm->p;
8248d7071afSLinus Torvalds 	ret = expand_stack_locked(vma, stack_base);
825b6a2fea3SOllie Wild 	if (ret)
826b6a2fea3SOllie Wild 		ret = -EFAULT;
827b6a2fea3SOllie Wild 
828b6a2fea3SOllie Wild out_unlock:
829d8ed45c5SMichel Lespinasse 	mmap_write_unlock(mm);
830fc63cf23SAnton Blanchard 	return ret;
831b6a2fea3SOllie Wild }
832b6a2fea3SOllie Wild EXPORT_SYMBOL(setup_arg_pages);
833b6a2fea3SOllie Wild 
8347e7ec6a9SNicolas Pitre #else
8357e7ec6a9SNicolas Pitre 
8367e7ec6a9SNicolas Pitre /*
8377e7ec6a9SNicolas Pitre  * Transfer the program arguments and environment from the holding pages
8387e7ec6a9SNicolas Pitre  * onto the stack. The provided stack pointer is adjusted accordingly.
8397e7ec6a9SNicolas Pitre  */
transfer_args_to_stack(struct linux_binprm * bprm,unsigned long * sp_location)8407e7ec6a9SNicolas Pitre int transfer_args_to_stack(struct linux_binprm *bprm,
8417e7ec6a9SNicolas Pitre 			   unsigned long *sp_location)
8427e7ec6a9SNicolas Pitre {
8437e7ec6a9SNicolas Pitre 	unsigned long index, stop, sp;
8447e7ec6a9SNicolas Pitre 	int ret = 0;
8457e7ec6a9SNicolas Pitre 
8467e7ec6a9SNicolas Pitre 	stop = bprm->p >> PAGE_SHIFT;
8477e7ec6a9SNicolas Pitre 	sp = *sp_location;
8487e7ec6a9SNicolas Pitre 
8497e7ec6a9SNicolas Pitre 	for (index = MAX_ARG_PAGES - 1; index >= stop; index--) {
8507e7ec6a9SNicolas Pitre 		unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0;
8513a608cfeSFabio M. De Francesco 		char *src = kmap_local_page(bprm->page[index]) + offset;
8527e7ec6a9SNicolas Pitre 		sp -= PAGE_SIZE - offset;
8537e7ec6a9SNicolas Pitre 		if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0)
8547e7ec6a9SNicolas Pitre 			ret = -EFAULT;
8553a608cfeSFabio M. De Francesco 		kunmap_local(src);
8567e7ec6a9SNicolas Pitre 		if (ret)
8577e7ec6a9SNicolas Pitre 			goto out;
8587e7ec6a9SNicolas Pitre 	}
8597e7ec6a9SNicolas Pitre 
8602aea94acSMax Filippov 	bprm->exec += *sp_location - MAX_ARG_PAGES * PAGE_SIZE;
8617e7ec6a9SNicolas Pitre 	*sp_location = sp;
8627e7ec6a9SNicolas Pitre 
8637e7ec6a9SNicolas Pitre out:
8647e7ec6a9SNicolas Pitre 	return ret;
8657e7ec6a9SNicolas Pitre }
8667e7ec6a9SNicolas Pitre EXPORT_SYMBOL(transfer_args_to_stack);
8677e7ec6a9SNicolas Pitre 
8681da177e4SLinus Torvalds #endif /* CONFIG_MMU */
8691da177e4SLinus Torvalds 
870bdd8f624SKees Cook /*
871bdd8f624SKees Cook  * On success, caller must call do_close_execat() on the returned
872bdd8f624SKees Cook  * struct file to close it.
873bdd8f624SKees Cook  */
do_open_execat(int fd,struct filename * name,int flags)87451f39a1fSDavid Drysdale static struct file *do_open_execat(int fd, struct filename *name, int flags)
8751da177e4SLinus Torvalds {
8763b832035SChristian Brauner 	int err;
8773b832035SChristian Brauner 	struct file *file __free(fput) = NULL;
87851f39a1fSDavid Drysdale 	struct open_flags open_exec_flags = {
87947c805dcSAl Viro 		.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
88062fb4a15SAl Viro 		.acc_mode = MAY_EXEC,
881f9652e10SAl Viro 		.intent = LOOKUP_OPEN,
882f9652e10SAl Viro 		.lookup_flags = LOOKUP_FOLLOW,
88347c805dcSAl Viro 	};
8841da177e4SLinus Torvalds 
885a5874fdeSMickaël Salaün 	if ((flags &
886a5874fdeSMickaël Salaün 	     ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH | AT_EXECVE_CHECK)) != 0)
88751f39a1fSDavid Drysdale 		return ERR_PTR(-EINVAL);
88851f39a1fSDavid Drysdale 	if (flags & AT_SYMLINK_NOFOLLOW)
88951f39a1fSDavid Drysdale 		open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
89051f39a1fSDavid Drysdale 	if (flags & AT_EMPTY_PATH)
89151f39a1fSDavid Drysdale 		open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
89251f39a1fSDavid Drysdale 
89351f39a1fSDavid Drysdale 	file = do_filp_open(fd, name, &open_exec_flags);
8946e8341a1SAl Viro 	if (IS_ERR(file))
895e56b6a5dSChristoph Hellwig 		return file;
896e56b6a5dSChristoph Hellwig 
8970d196e75SMateusz Guzik 	/*
8980d196e75SMateusz Guzik 	 * In the past the regular type check was here. It moved to may_open() in
8990d196e75SMateusz Guzik 	 * 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is
9000d196e75SMateusz Guzik 	 * an invariant that all non-regular files error out before we get here.
9010d196e75SMateusz Guzik 	 */
9020d196e75SMateusz Guzik 	if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
9033b832035SChristian Brauner 	    path_noexec(&file->f_path))
9040d196e75SMateusz Guzik 		return ERR_PTR(-EACCES);
9050d196e75SMateusz Guzik 
9060357ef03SAmir Goldstein 	err = exe_file_deny_write_access(file);
9073b832035SChristian Brauner 	if (err)
9083b832035SChristian Brauner 		return ERR_PTR(err);
9093b832035SChristian Brauner 
9103b832035SChristian Brauner 	return no_free_ptr(file);
911e56b6a5dSChristoph Hellwig }
912c4ad8f98SLinus Torvalds 
913bdd8f624SKees Cook /**
914bdd8f624SKees Cook  * open_exec - Open a path name for execution
915bdd8f624SKees Cook  *
916bdd8f624SKees Cook  * @name: path name to open with the intent of executing it.
917bdd8f624SKees Cook  *
918bdd8f624SKees Cook  * Returns ERR_PTR on failure or allocated struct file on success.
919bdd8f624SKees Cook  *
9203b832035SChristian Brauner  * As this is a wrapper for the internal do_open_execat(), callers
9210357ef03SAmir Goldstein  * must call exe_file_allow_write_access() before fput() on release. Also see
922bdd8f624SKees Cook  * do_close_execat().
923bdd8f624SKees Cook  */
open_exec(const char * name)924c4ad8f98SLinus Torvalds struct file *open_exec(const char *name)
925c4ad8f98SLinus Torvalds {
92651689104SPaul Moore 	struct filename *filename = getname_kernel(name);
92751689104SPaul Moore 	struct file *f = ERR_CAST(filename);
92851689104SPaul Moore 
92951689104SPaul Moore 	if (!IS_ERR(filename)) {
93051689104SPaul Moore 		f = do_open_execat(AT_FDCWD, filename, 0);
93151689104SPaul Moore 		putname(filename);
93251689104SPaul Moore 	}
93351689104SPaul Moore 	return f;
934c4ad8f98SLinus Torvalds }
9351da177e4SLinus Torvalds EXPORT_SYMBOL(open_exec);
9361da177e4SLinus Torvalds 
937987f20a9SEric W. Biederman #if defined(CONFIG_BINFMT_FLAT) || defined(CONFIG_BINFMT_ELF_FDPIC)
read_code(struct file * file,unsigned long addr,loff_t pos,size_t len)9383dc20cb2SAl Viro ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
9393dc20cb2SAl Viro {
940ec695579SAl Viro 	ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
9413dc20cb2SAl Viro 	if (res > 0)
942bce2b68bSChristoph Hellwig 		flush_icache_user_range(addr, addr + len);
9433dc20cb2SAl Viro 	return res;
9443dc20cb2SAl Viro }
9453dc20cb2SAl Viro EXPORT_SYMBOL(read_code);
94648304f79SChristoph Hellwig #endif
9473dc20cb2SAl Viro 
948eea96732SEric W. Biederman /*
949eea96732SEric W. Biederman  * Maps the mm_struct mm into the current task struct.
950f7cfd871SEric W. Biederman  * On success, this function returns with exec_update_lock
951f7cfd871SEric W. Biederman  * held for writing.
952eea96732SEric W. Biederman  */
exec_mmap(struct mm_struct * mm)9531da177e4SLinus Torvalds static int exec_mmap(struct mm_struct *mm)
9541da177e4SLinus Torvalds {
9551da177e4SLinus Torvalds 	struct task_struct *tsk;
9561da177e4SLinus Torvalds 	struct mm_struct *old_mm, *active_mm;
957eea96732SEric W. Biederman 	int ret;
9581da177e4SLinus Torvalds 
9591da177e4SLinus Torvalds 	/* Notify parent that we're no longer interested in the old VM */
9601da177e4SLinus Torvalds 	tsk = current;
9611da177e4SLinus Torvalds 	old_mm = current->mm;
9624610ba7aSThomas Gleixner 	exec_mm_release(tsk, old_mm);
9631da177e4SLinus Torvalds 
964f7cfd871SEric W. Biederman 	ret = down_write_killable(&tsk->signal->exec_update_lock);
965eea96732SEric W. Biederman 	if (ret)
966eea96732SEric W. Biederman 		return ret;
967eea96732SEric W. Biederman 
9681da177e4SLinus Torvalds 	if (old_mm) {
9691da177e4SLinus Torvalds 		/*
9707e3c4fb7SEric W. Biederman 		 * If there is a pending fatal signal perhaps a signal
9717e3c4fb7SEric W. Biederman 		 * whose default action is to create a coredump get
9727e3c4fb7SEric W. Biederman 		 * out and die instead of going through with the exec.
9731da177e4SLinus Torvalds 		 */
9747e3c4fb7SEric W. Biederman 		ret = mmap_read_lock_killable(old_mm);
9757e3c4fb7SEric W. Biederman 		if (ret) {
976f7cfd871SEric W. Biederman 			up_write(&tsk->signal->exec_update_lock);
9777e3c4fb7SEric W. Biederman 			return ret;
9781da177e4SLinus Torvalds 		}
9791da177e4SLinus Torvalds 	}
980eea96732SEric W. Biederman 
9811da177e4SLinus Torvalds 	task_lock(tsk);
982227a4aadSMathieu Desnoyers 	membarrier_exec_mmap(mm);
983d53c3dfbSNicholas Piggin 
984d53c3dfbSNicholas Piggin 	local_irq_disable();
985d53c3dfbSNicholas Piggin 	active_mm = tsk->active_mm;
9861da177e4SLinus Torvalds 	tsk->active_mm = mm;
987d53c3dfbSNicholas Piggin 	tsk->mm = mm;
9887e019dccSMathieu Desnoyers 	mm_init_cid(mm, tsk);
989d53c3dfbSNicholas Piggin 	/*
990d53c3dfbSNicholas Piggin 	 * This prevents preemption while active_mm is being loaded and
991d53c3dfbSNicholas Piggin 	 * it and mm are being updated, which could cause problems for
992d53c3dfbSNicholas Piggin 	 * lazy tlb mm refcounting when these are updated by context
993d53c3dfbSNicholas Piggin 	 * switches. Not all architectures can handle irqs off over
994d53c3dfbSNicholas Piggin 	 * activate_mm yet.
995d53c3dfbSNicholas Piggin 	 */
996d53c3dfbSNicholas Piggin 	if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
997d53c3dfbSNicholas Piggin 		local_irq_enable();
9981da177e4SLinus Torvalds 	activate_mm(active_mm, mm);
999d53c3dfbSNicholas Piggin 	if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
1000d53c3dfbSNicholas Piggin 		local_irq_enable();
1001dda1c41aSSebastian Andrzej Siewior 	lru_gen_add_mm(mm);
10021da177e4SLinus Torvalds 	task_unlock(tsk);
1003bd74fdaeSYu Zhao 	lru_gen_use_mm(mm);
10041da177e4SLinus Torvalds 	if (old_mm) {
1005d8ed45c5SMichel Lespinasse 		mmap_read_unlock(old_mm);
10067dddb12cSEric Sesterhenn 		BUG_ON(active_mm != old_mm);
1007701085b2SOleg Nesterov 		setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
100831a78f23SBalbir Singh 		mm_update_next_owner(old_mm);
10091da177e4SLinus Torvalds 		mmput(old_mm);
10101da177e4SLinus Torvalds 		return 0;
10111da177e4SLinus Torvalds 	}
1012aa464ba9SNicholas Piggin 	mmdrop_lazy_tlb(active_mm);
10131da177e4SLinus Torvalds 	return 0;
10141da177e4SLinus Torvalds }
10151da177e4SLinus Torvalds 
de_thread(struct task_struct * tsk)1016858119e1SArjan van de Ven static int de_thread(struct task_struct *tsk)
10171da177e4SLinus Torvalds {
10181da177e4SLinus Torvalds 	struct signal_struct *sig = tsk->signal;
1019b2c903b8SOleg Nesterov 	struct sighand_struct *oldsighand = tsk->sighand;
10201da177e4SLinus Torvalds 	spinlock_t *lock = &oldsighand->siglock;
10211da177e4SLinus Torvalds 
1022aafe6c2aSEric W. Biederman 	if (thread_group_empty(tsk))
10231da177e4SLinus Torvalds 		goto no_thread_group;
10241da177e4SLinus Torvalds 
10251da177e4SLinus Torvalds 	/*
10261da177e4SLinus Torvalds 	 * Kill all other threads in the thread group.
10271da177e4SLinus Torvalds 	 */
10281da177e4SLinus Torvalds 	spin_lock_irq(lock);
102949697335SEric W. Biederman 	if ((sig->flags & SIGNAL_GROUP_EXIT) || sig->group_exec_task) {
10301da177e4SLinus Torvalds 		/*
10311da177e4SLinus Torvalds 		 * Another group action in progress, just
10321da177e4SLinus Torvalds 		 * return so that the signal is processed.
10331da177e4SLinus Torvalds 		 */
10341da177e4SLinus Torvalds 		spin_unlock_irq(lock);
10351da177e4SLinus Torvalds 		return -EAGAIN;
10361da177e4SLinus Torvalds 	}
10371da177e4SLinus Torvalds 
103860700e38SEric W. Biederman 	sig->group_exec_task = tsk;
1039d344193aSOleg Nesterov 	sig->notify_count = zap_other_threads(tsk);
1040d344193aSOleg Nesterov 	if (!thread_group_leader(tsk))
1041d344193aSOleg Nesterov 		sig->notify_count--;
1042d344193aSOleg Nesterov 
1043d344193aSOleg Nesterov 	while (sig->notify_count) {
1044d5bbd43dSOleg Nesterov 		__set_current_state(TASK_KILLABLE);
10451da177e4SLinus Torvalds 		spin_unlock_irq(lock);
1046a72173ecSRafael J. Wysocki 		schedule();
104708d405c8SDavidlohr Bueso 		if (__fatal_signal_pending(tsk))
1048d5bbd43dSOleg Nesterov 			goto killed;
10491da177e4SLinus Torvalds 		spin_lock_irq(lock);
10501da177e4SLinus Torvalds 	}
10511da177e4SLinus Torvalds 	spin_unlock_irq(lock);
10521da177e4SLinus Torvalds 
10531da177e4SLinus Torvalds 	/*
10541da177e4SLinus Torvalds 	 * At this point all other threads have exited, all we have to
10551da177e4SLinus Torvalds 	 * do is to wait for the thread group leader to become inactive,
10561da177e4SLinus Torvalds 	 * and to assume its PID:
10571da177e4SLinus Torvalds 	 */
1058aafe6c2aSEric W. Biederman 	if (!thread_group_leader(tsk)) {
10598187926bSOleg Nesterov 		struct task_struct *leader = tsk->group_leader;
10606db840faSOleg Nesterov 
10616db840faSOleg Nesterov 		for (;;) {
1062780de9ddSIngo Molnar 			cgroup_threadgroup_change_begin(tsk);
10636db840faSOleg Nesterov 			write_lock_irq(&tasklist_lock);
1064dfcce791SKirill Tkhai 			/*
1065dfcce791SKirill Tkhai 			 * Do this under tasklist_lock to ensure that
106660700e38SEric W. Biederman 			 * exit_notify() can't miss ->group_exec_task
1067dfcce791SKirill Tkhai 			 */
1068dfcce791SKirill Tkhai 			sig->notify_count = -1;
10696db840faSOleg Nesterov 			if (likely(leader->exit_state))
10706db840faSOleg Nesterov 				break;
1071d5bbd43dSOleg Nesterov 			__set_current_state(TASK_KILLABLE);
10726db840faSOleg Nesterov 			write_unlock_irq(&tasklist_lock);
1073780de9ddSIngo Molnar 			cgroup_threadgroup_change_end(tsk);
1074a72173ecSRafael J. Wysocki 			schedule();
107508d405c8SDavidlohr Bueso 			if (__fatal_signal_pending(tsk))
1076d5bbd43dSOleg Nesterov 				goto killed;
10776db840faSOleg Nesterov 		}
10781da177e4SLinus Torvalds 
1079f5e90281SRoland McGrath 		/*
1080f5e90281SRoland McGrath 		 * The only record we have of the real-time age of a
1081f5e90281SRoland McGrath 		 * process, regardless of execs it's done, is start_time.
1082f5e90281SRoland McGrath 		 * All the past CPU time is accumulated in signal_struct
1083f5e90281SRoland McGrath 		 * from sister threads now dead.  But in this non-leader
1084f5e90281SRoland McGrath 		 * exec, nothing survives from the original leader thread,
1085f5e90281SRoland McGrath 		 * whose birth marks the true age of this process now.
1086f5e90281SRoland McGrath 		 * When we take on its identity by switching to its PID, we
1087f5e90281SRoland McGrath 		 * also take its birthdate (always earlier than our own).
1088f5e90281SRoland McGrath 		 */
1089aafe6c2aSEric W. Biederman 		tsk->start_time = leader->start_time;
1090cf25e24dSPeter Zijlstra 		tsk->start_boottime = leader->start_boottime;
1091f5e90281SRoland McGrath 
1092bac0abd6SPavel Emelyanov 		BUG_ON(!same_thread_group(leader, tsk));
10931da177e4SLinus Torvalds 		/*
10941da177e4SLinus Torvalds 		 * An exec() starts a new thread group with the
10951da177e4SLinus Torvalds 		 * TGID of the previous thread group. Rehash the
10961da177e4SLinus Torvalds 		 * two threads with a switched PID, and release
10971da177e4SLinus Torvalds 		 * the former thread group leader:
10981da177e4SLinus Torvalds 		 */
1099d73d6529SEric W. Biederman 
1100d73d6529SEric W. Biederman 		/* Become a process group leader with the old leader's pid.
1101c18258c6SEric W. Biederman 		 * The old leader becomes a thread of the this thread group.
1102d73d6529SEric W. Biederman 		 */
11036b03d130SEric W. Biederman 		exchange_tids(tsk, leader);
11046883f81aSEric W. Biederman 		transfer_pid(leader, tsk, PIDTYPE_TGID);
1105aafe6c2aSEric W. Biederman 		transfer_pid(leader, tsk, PIDTYPE_PGID);
1106aafe6c2aSEric W. Biederman 		transfer_pid(leader, tsk, PIDTYPE_SID);
11079cd80bbbSOleg Nesterov 
1108aafe6c2aSEric W. Biederman 		list_replace_rcu(&leader->tasks, &tsk->tasks);
11099cd80bbbSOleg Nesterov 		list_replace_init(&leader->sibling, &tsk->sibling);
11101da177e4SLinus Torvalds 
1111aafe6c2aSEric W. Biederman 		tsk->group_leader = tsk;
1112aafe6c2aSEric W. Biederman 		leader->group_leader = tsk;
1113de12a787SEric W. Biederman 
1114aafe6c2aSEric W. Biederman 		tsk->exit_signal = SIGCHLD;
1115087806b1SOleg Nesterov 		leader->exit_signal = -1;
1116962b564cSOleg Nesterov 
1117962b564cSOleg Nesterov 		BUG_ON(leader->exit_state != EXIT_ZOMBIE);
1118962b564cSOleg Nesterov 		leader->exit_state = EXIT_DEAD;
1119eac1b5e5SOleg Nesterov 		/*
1120eac1b5e5SOleg Nesterov 		 * We are going to release_task()->ptrace_unlink() silently,
1121eac1b5e5SOleg Nesterov 		 * the tracer can sleep in do_wait(). EXIT_DEAD guarantees
11225036793dSZhang Jiaming 		 * the tracer won't block again waiting for this thread.
1123eac1b5e5SOleg Nesterov 		 */
1124eac1b5e5SOleg Nesterov 		if (unlikely(leader->ptrace))
1125eac1b5e5SOleg Nesterov 			__wake_up_parent(leader, leader->parent);
11261da177e4SLinus Torvalds 		write_unlock_irq(&tasklist_lock);
1127780de9ddSIngo Molnar 		cgroup_threadgroup_change_end(tsk);
11288187926bSOleg Nesterov 
11298187926bSOleg Nesterov 		release_task(leader);
11301da177e4SLinus Torvalds 	}
11311da177e4SLinus Torvalds 
113260700e38SEric W. Biederman 	sig->group_exec_task = NULL;
11336db840faSOleg Nesterov 	sig->notify_count = 0;
11341da177e4SLinus Torvalds 
11351da177e4SLinus Torvalds no_thread_group:
1136e6368253SOleg Nesterov 	/* we have changed execution domain */
1137e6368253SOleg Nesterov 	tsk->exit_signal = SIGCHLD;
1138e6368253SOleg Nesterov 
113902169155SEric W. Biederman 	BUG_ON(!thread_group_leader(tsk));
114002169155SEric W. Biederman 	return 0;
114102169155SEric W. Biederman 
114202169155SEric W. Biederman killed:
114302169155SEric W. Biederman 	/* protects against exit_notify() and __exit_signal() */
114402169155SEric W. Biederman 	read_lock(&tasklist_lock);
114560700e38SEric W. Biederman 	sig->group_exec_task = NULL;
114602169155SEric W. Biederman 	sig->notify_count = 0;
114702169155SEric W. Biederman 	read_unlock(&tasklist_lock);
114802169155SEric W. Biederman 	return -EAGAIN;
114902169155SEric W. Biederman }
115002169155SEric W. Biederman 
115102169155SEric W. Biederman 
11527a60ef48SEric W. Biederman /*
11537a60ef48SEric W. Biederman  * This function makes sure the current process has its own signal table,
11547a60ef48SEric W. Biederman  * so that flush_signal_handlers can later reset the handlers without
11557a60ef48SEric W. Biederman  * disturbing other processes.  (Other processes might share the signal
11567a60ef48SEric W. Biederman  * table via the CLONE_SIGHAND option to clone().)
11577a60ef48SEric W. Biederman  */
unshare_sighand(struct task_struct * me)115802169155SEric W. Biederman static int unshare_sighand(struct task_struct *me)
115902169155SEric W. Biederman {
116002169155SEric W. Biederman 	struct sighand_struct *oldsighand = me->sighand;
1161329f7dbaSOleg Nesterov 
1162d036bda7SElena Reshetova 	if (refcount_read(&oldsighand->count) != 1) {
1163b2c903b8SOleg Nesterov 		struct sighand_struct *newsighand;
11641da177e4SLinus Torvalds 		/*
1165b2c903b8SOleg Nesterov 		 * This ->sighand is shared with the CLONE_SIGHAND
1166b2c903b8SOleg Nesterov 		 * but not CLONE_THREAD task, switch to the new one.
11671da177e4SLinus Torvalds 		 */
1168b2c903b8SOleg Nesterov 		newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
1169b2c903b8SOleg Nesterov 		if (!newsighand)
1170b2c903b8SOleg Nesterov 			return -ENOMEM;
1171b2c903b8SOleg Nesterov 
1172d036bda7SElena Reshetova 		refcount_set(&newsighand->count, 1);
11731da177e4SLinus Torvalds 
11741da177e4SLinus Torvalds 		write_lock_irq(&tasklist_lock);
11751da177e4SLinus Torvalds 		spin_lock(&oldsighand->siglock);
11765bf2fedcSBernd Edlinger 		memcpy(newsighand->action, oldsighand->action,
11775bf2fedcSBernd Edlinger 		       sizeof(newsighand->action));
117802169155SEric W. Biederman 		rcu_assign_pointer(me->sighand, newsighand);
11791da177e4SLinus Torvalds 		spin_unlock(&oldsighand->siglock);
11801da177e4SLinus Torvalds 		write_unlock_irq(&tasklist_lock);
11811da177e4SLinus Torvalds 
1182fba2afaaSDavide Libenzi 		__cleanup_sighand(oldsighand);
11831da177e4SLinus Torvalds 	}
11841da177e4SLinus Torvalds 	return 0;
11851da177e4SLinus Torvalds }
11861da177e4SLinus Torvalds 
11876a6d27deSAl Viro /*
11883a3f61ceSKees Cook  * This is unlocked -- the string will always be NUL-terminated, but
11893a3f61ceSKees Cook  * may show overlapping contents if racing concurrent reads.
11906a6d27deSAl Viro  */
__set_task_comm(struct task_struct * tsk,const char * buf,bool exec)119182b89778SAdrian Hunter void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
11921da177e4SLinus Torvalds {
11933a3f61ceSKees Cook 	size_t len = min(strlen(buf), sizeof(tsk->comm) - 1);
11943a3f61ceSKees Cook 
119543d2b113SKAMEZAWA Hiroyuki 	trace_task_rename(tsk, buf);
11963a3f61ceSKees Cook 	memcpy(tsk->comm, buf, len);
11973a3f61ceSKees Cook 	memset(&tsk->comm[len], 0, sizeof(tsk->comm) - len);
119882b89778SAdrian Hunter 	perf_event_comm(tsk, exec);
11991da177e4SLinus Torvalds }
12001da177e4SLinus Torvalds 
1201a9208e42SKees Cook /*
1202a9208e42SKees Cook  * Calling this is the point of no return. None of the failures will be
1203a9208e42SKees Cook  * seen by userspace since either the process is already taking a fatal
1204a9208e42SKees Cook  * signal (via de_thread() or coredump), or will have SEGV raised
120513c432b5SEric W. Biederman  * (after exec_mmap()) by search_binary_handler (see below).
1206a9208e42SKees Cook  */
begin_new_exec(struct linux_binprm * bprm)12072388777aSEric W. Biederman int begin_new_exec(struct linux_binprm * bprm)
12081da177e4SLinus Torvalds {
12092ca7be7dSEric W. Biederman 	struct task_struct *me = current;
1210221af7f8SLinus Torvalds 	int retval;
12111da177e4SLinus Torvalds 
121256305aa9SEric W. Biederman 	/* Once we are committed compute the creds */
121356305aa9SEric W. Biederman 	retval = bprm_creds_from_file(bprm);
121456305aa9SEric W. Biederman 	if (retval)
121556305aa9SEric W. Biederman 		return retval;
121656305aa9SEric W. Biederman 
12171da177e4SLinus Torvalds 	/*
1218c8238994SMarco Elver 	 * This tracepoint marks the point before flushing the old exec where
1219c8238994SMarco Elver 	 * the current task is still unchanged, but errors are fatal (point of
1220c8238994SMarco Elver 	 * no return). The later "sched_process_exec" tracepoint is called after
1221c8238994SMarco Elver 	 * the current task has successfully switched to the new exec.
1222c8238994SMarco Elver 	 */
1223c8238994SMarco Elver 	trace_sched_prepare_exec(current, bprm);
1224c8238994SMarco Elver 
1225c8238994SMarco Elver 	/*
12266834e0bbSEric W. Biederman 	 * Ensure all future errors are fatal.
12276834e0bbSEric W. Biederman 	 */
12286834e0bbSEric W. Biederman 	bprm->point_of_no_return = true;
12296834e0bbSEric W. Biederman 
1230af7bb0d2SOleg Nesterov 	/* Make this the only thread in the thread group */
12312ca7be7dSEric W. Biederman 	retval = de_thread(me);
12321da177e4SLinus Torvalds 	if (retval)
12331da177e4SLinus Torvalds 		goto out;
1234af7bb0d2SOleg Nesterov 	/* see the comment in check_unsafe_exec() */
1235af7bb0d2SOleg Nesterov 	current->fs->in_exec = 0;
12366e399cd1SDavidlohr Bueso 	/*
12379ee1206dSEric W. Biederman 	 * Cancel any io_uring activity across execve
12389ee1206dSEric W. Biederman 	 */
12399ee1206dSEric W. Biederman 	io_uring_task_cancel();
12409ee1206dSEric W. Biederman 
1241b6043501SEric W. Biederman 	/* Ensure the files table is not shared. */
12421f702603SEric W. Biederman 	retval = unshare_files();
1243b6043501SEric W. Biederman 	if (retval)
1244b6043501SEric W. Biederman 		goto out;
1245b6043501SEric W. Biederman 
12466e399cd1SDavidlohr Bueso 	/*
12476e399cd1SDavidlohr Bueso 	 * Must be called _before_ exec_mmap() as bprm->mm is
1248a7031f14SMateusz Guzik 	 * not visible until then. Doing it here also ensures
1249a7031f14SMateusz Guzik 	 * we don't race against replace_mm_exe_file().
12506e399cd1SDavidlohr Bueso 	 */
1251fe69d560SDavid Hildenbrand 	retval = set_mm_exe_file(bprm->mm, bprm->file);
1252fe69d560SDavid Hildenbrand 	if (retval)
1253fe69d560SDavid Hildenbrand 		goto out;
12546e399cd1SDavidlohr Bueso 
1255b8a61c9eSEric W. Biederman 	/* If the binary is not readable then enforce mm->dumpable=0 */
1256f87d1c95SEric W. Biederman 	would_dump(bprm, bprm->file);
1257b8a61c9eSEric W. Biederman 	if (bprm->have_execfd)
1258b8a61c9eSEric W. Biederman 		would_dump(bprm, bprm->executable);
1259f87d1c95SEric W. Biederman 
12601da177e4SLinus Torvalds 	/*
12611da177e4SLinus Torvalds 	 * Release all of the old mmap stuff
12621da177e4SLinus Torvalds 	 */
12633c77f845SOleg Nesterov 	acct_arg_size(bprm, 0);
12641da177e4SLinus Torvalds 	retval = exec_mmap(bprm->mm);
12651da177e4SLinus Torvalds 	if (retval)
1266fd8328beSAl Viro 		goto out;
12671da177e4SLinus Torvalds 
1268a9208e42SKees Cook 	bprm->mm = NULL;
12697ab02af4SLinus Torvalds 
12702b5f9dadSAndrei Vagin 	retval = exec_task_namespaces();
12712b5f9dadSAndrei Vagin 	if (retval)
12722b5f9dadSAndrei Vagin 		goto out_unlock;
12732b5f9dadSAndrei Vagin 
1274ccf0fa6bSEric W. Biederman #ifdef CONFIG_POSIX_TIMERS
1275e362359aSThadeu Lima de Souza Cascardo 	spin_lock_irq(&me->sighand->siglock);
1276e362359aSThadeu Lima de Souza Cascardo 	posix_cpu_timers_exit(me);
1277e362359aSThadeu Lima de Souza Cascardo 	spin_unlock_irq(&me->sighand->siglock);
1278d5b36a4dSOleg Nesterov 	exit_itimers(me);
1279ccf0fa6bSEric W. Biederman 	flush_itimer_signals();
1280ccf0fa6bSEric W. Biederman #endif
1281ccf0fa6bSEric W. Biederman 
1282ccf0fa6bSEric W. Biederman 	/*
1283ccf0fa6bSEric W. Biederman 	 * Make the signal table private.
1284ccf0fa6bSEric W. Biederman 	 */
1285ccf0fa6bSEric W. Biederman 	retval = unshare_sighand(me);
1286ccf0fa6bSEric W. Biederman 	if (retval)
128789826cceSEric W. Biederman 		goto out_unlock;
1288ccf0fa6bSEric W. Biederman 
12891b2552cbSEric W. Biederman 	me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC |
1290b88fae64SZhang Yi 					PF_NOFREEZE | PF_NO_SETAFFINITY);
12917ab02af4SLinus Torvalds 	flush_thread();
12922ca7be7dSEric W. Biederman 	me->personality &= ~bprm->per_clear;
12937ab02af4SLinus Torvalds 
12941446e1dfSGabriel Krisman Bertazi 	clear_syscall_work_syscall_user_dispatch(me);
12951446e1dfSGabriel Krisman Bertazi 
1296613cc2b6SAleksa Sarai 	/*
1297613cc2b6SAleksa Sarai 	 * We have to apply CLOEXEC before we change whether the process is
1298613cc2b6SAleksa Sarai 	 * dumpable (in setup_new_exec) to avoid a race with a process in userspace
1299613cc2b6SAleksa Sarai 	 * trying to access the should-be-closed file descriptors of a process
1300613cc2b6SAleksa Sarai 	 * undergoing exec(2).
1301613cc2b6SAleksa Sarai 	 */
13022ca7be7dSEric W. Biederman 	do_close_on_exec(me->files);
1303df9e4d2cSEric W. Biederman 
1304df9e4d2cSEric W. Biederman 	if (bprm->secureexec) {
1305df9e4d2cSEric W. Biederman 		/* Make sure parent cannot signal privileged process. */
1306df9e4d2cSEric W. Biederman 		me->pdeath_signal = 0;
1307df9e4d2cSEric W. Biederman 
1308df9e4d2cSEric W. Biederman 		/*
1309df9e4d2cSEric W. Biederman 		 * For secureexec, reset the stack limit to sane default to
1310df9e4d2cSEric W. Biederman 		 * avoid bad behavior from the prior rlimits. This has to
1311df9e4d2cSEric W. Biederman 		 * happen before arch_pick_mmap_layout(), which examines
1312df9e4d2cSEric W. Biederman 		 * RLIMIT_STACK, but after the point of no return to avoid
1313df9e4d2cSEric W. Biederman 		 * needing to clean up the change on failure.
1314df9e4d2cSEric W. Biederman 		 */
1315df9e4d2cSEric W. Biederman 		if (bprm->rlim_stack.rlim_cur > _STK_LIM)
1316df9e4d2cSEric W. Biederman 			bprm->rlim_stack.rlim_cur = _STK_LIM;
1317df9e4d2cSEric W. Biederman 	}
1318df9e4d2cSEric W. Biederman 
1319df9e4d2cSEric W. Biederman 	me->sas_ss_sp = me->sas_ss_size = 0;
1320df9e4d2cSEric W. Biederman 
1321df9e4d2cSEric W. Biederman 	/*
1322df9e4d2cSEric W. Biederman 	 * Figure out dumpability. Note that this checking only of current
1323df9e4d2cSEric W. Biederman 	 * is wrong, but userspace depends on it. This should be testing
1324df9e4d2cSEric W. Biederman 	 * bprm->secureexec instead.
1325df9e4d2cSEric W. Biederman 	 */
1326df9e4d2cSEric W. Biederman 	if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP ||
1327df9e4d2cSEric W. Biederman 	    !(uid_eq(current_euid(), current_uid()) &&
1328df9e4d2cSEric W. Biederman 	      gid_eq(current_egid(), current_gid())))
1329df9e4d2cSEric W. Biederman 		set_dumpable(current->mm, suid_dumpable);
1330df9e4d2cSEric W. Biederman 	else
1331df9e4d2cSEric W. Biederman 		set_dumpable(current->mm, SUID_DUMP_USER);
1332df9e4d2cSEric W. Biederman 
1333df9e4d2cSEric W. Biederman 	perf_event_exec();
1334543841d1SKees Cook 
1335543841d1SKees Cook 	/*
1336543841d1SKees Cook 	 * If the original filename was empty, alloc_bprm() made up a path
1337543841d1SKees Cook 	 * that will probably not be useful to admins running ps or similar.
1338543841d1SKees Cook 	 * Let's fix it up to be something reasonable.
1339543841d1SKees Cook 	 */
1340543841d1SKees Cook 	if (bprm->comm_from_dentry) {
1341543841d1SKees Cook 		/*
1342543841d1SKees Cook 		 * Hold RCU lock to keep the name from being freed behind our back.
1343543841d1SKees Cook 		 * Use acquire semantics to make sure the terminating NUL from
1344543841d1SKees Cook 		 * __d_alloc() is seen.
1345543841d1SKees Cook 		 *
1346543841d1SKees Cook 		 * Note, we're deliberately sloppy here. We don't need to care about
1347543841d1SKees Cook 		 * detecting a concurrent rename and just want a terminated name.
1348543841d1SKees Cook 		 */
1349543841d1SKees Cook 		rcu_read_lock();
1350543841d1SKees Cook 		__set_task_comm(me, smp_load_acquire(&bprm->file->f_path.dentry->d_name.name),
1351543841d1SKees Cook 				true);
1352543841d1SKees Cook 		rcu_read_unlock();
1353543841d1SKees Cook 	} else {
1354df9e4d2cSEric W. Biederman 		__set_task_comm(me, kbasename(bprm->filename), true);
1355543841d1SKees Cook 	}
1356df9e4d2cSEric W. Biederman 
1357df9e4d2cSEric W. Biederman 	/* An exec changes our domain. We are no longer part of the thread
1358df9e4d2cSEric W. Biederman 	   group */
1359df9e4d2cSEric W. Biederman 	WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1);
1360df9e4d2cSEric W. Biederman 	flush_signal_handlers(me, 0);
1361df9e4d2cSEric W. Biederman 
1362905ae01cSAlexey Gladkov 	retval = set_cred_ucounts(bprm->cred);
1363905ae01cSAlexey Gladkov 	if (retval < 0)
1364905ae01cSAlexey Gladkov 		goto out_unlock;
1365905ae01cSAlexey Gladkov 
1366df9e4d2cSEric W. Biederman 	/*
1367df9e4d2cSEric W. Biederman 	 * install the new credentials for this executable
1368df9e4d2cSEric W. Biederman 	 */
1369df9e4d2cSEric W. Biederman 	security_bprm_committing_creds(bprm);
1370df9e4d2cSEric W. Biederman 
1371df9e4d2cSEric W. Biederman 	commit_creds(bprm->cred);
1372df9e4d2cSEric W. Biederman 	bprm->cred = NULL;
1373df9e4d2cSEric W. Biederman 
1374df9e4d2cSEric W. Biederman 	/*
1375df9e4d2cSEric W. Biederman 	 * Disable monitoring for regular users
1376df9e4d2cSEric W. Biederman 	 * when executing setuid binaries. Must
1377df9e4d2cSEric W. Biederman 	 * wait until new credentials are committed
1378df9e4d2cSEric W. Biederman 	 * by commit_creds() above
1379df9e4d2cSEric W. Biederman 	 */
1380df9e4d2cSEric W. Biederman 	if (get_dumpable(me->mm) != SUID_DUMP_USER)
1381df9e4d2cSEric W. Biederman 		perf_event_exit_task(me);
1382df9e4d2cSEric W. Biederman 	/*
1383df9e4d2cSEric W. Biederman 	 * cred_guard_mutex must be held at least to this point to prevent
1384df9e4d2cSEric W. Biederman 	 * ptrace_attach() from altering our determination of the task's
1385df9e4d2cSEric W. Biederman 	 * credentials; any time after this it may be unlocked.
1386df9e4d2cSEric W. Biederman 	 */
1387df9e4d2cSEric W. Biederman 	security_bprm_committed_creds(bprm);
1388b8a61c9eSEric W. Biederman 
1389b8a61c9eSEric W. Biederman 	/* Pass the opened binary to the interpreter. */
1390b8a61c9eSEric W. Biederman 	if (bprm->have_execfd) {
1391b8a61c9eSEric W. Biederman 		retval = get_unused_fd_flags(0);
1392b8a61c9eSEric W. Biederman 		if (retval < 0)
1393b8a61c9eSEric W. Biederman 			goto out_unlock;
1394b8a61c9eSEric W. Biederman 		fd_install(retval, bprm->executable);
1395b8a61c9eSEric W. Biederman 		bprm->executable = NULL;
1396b8a61c9eSEric W. Biederman 		bprm->execfd = retval;
1397b8a61c9eSEric W. Biederman 	}
1398221af7f8SLinus Torvalds 	return 0;
1399221af7f8SLinus Torvalds 
140089826cceSEric W. Biederman out_unlock:
1401f7cfd871SEric W. Biederman 	up_write(&me->signal->exec_update_lock);
140284c39ec5SBernd Edlinger 	if (!bprm->cred)
140384c39ec5SBernd Edlinger 		mutex_unlock(&me->signal->cred_guard_mutex);
140484c39ec5SBernd Edlinger 
1405221af7f8SLinus Torvalds out:
1406221af7f8SLinus Torvalds 	return retval;
1407221af7f8SLinus Torvalds }
14082388777aSEric W. Biederman EXPORT_SYMBOL(begin_new_exec);
1409221af7f8SLinus Torvalds 
would_dump(struct linux_binprm * bprm,struct file * file)14101b5d783cSAl Viro void would_dump(struct linux_binprm *bprm, struct file *file)
14111b5d783cSAl Viro {
1412f84df2a6SEric W. Biederman 	struct inode *inode = file_inode(file);
14134609e1f1SChristian Brauner 	struct mnt_idmap *idmap = file_mnt_idmap(file);
14144609e1f1SChristian Brauner 	if (inode_permission(idmap, inode, MAY_READ) < 0) {
1415f84df2a6SEric W. Biederman 		struct user_namespace *old, *user_ns;
14161b5d783cSAl Viro 		bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
1417f84df2a6SEric W. Biederman 
1418f84df2a6SEric W. Biederman 		/* Ensure mm->user_ns contains the executable */
1419f84df2a6SEric W. Biederman 		user_ns = old = bprm->mm->user_ns;
1420f84df2a6SEric W. Biederman 		while ((user_ns != &init_user_ns) &&
14219452e93eSChristian Brauner 		       !privileged_wrt_inode_uidgid(user_ns, idmap, inode))
1422f84df2a6SEric W. Biederman 			user_ns = user_ns->parent;
1423f84df2a6SEric W. Biederman 
1424f84df2a6SEric W. Biederman 		if (old != user_ns) {
1425f84df2a6SEric W. Biederman 			bprm->mm->user_ns = get_user_ns(user_ns);
1426f84df2a6SEric W. Biederman 			put_user_ns(old);
1427f84df2a6SEric W. Biederman 		}
1428f84df2a6SEric W. Biederman 	}
14291b5d783cSAl Viro }
14301b5d783cSAl Viro EXPORT_SYMBOL(would_dump);
14311b5d783cSAl Viro 
setup_new_exec(struct linux_binprm * bprm)1432221af7f8SLinus Torvalds void setup_new_exec(struct linux_binprm * bprm)
1433221af7f8SLinus Torvalds {
1434df9e4d2cSEric W. Biederman 	/* Setup things that can depend upon the personality */
14357d503febSEric W. Biederman 	struct task_struct *me = current;
143646d98eb4SKees Cook 
14377d503febSEric W. Biederman 	arch_pick_mmap_layout(me->mm, &bprm->rlim_stack);
1438d6e71144SAlan Cox 
1439e9ea1e7fSKyle Huey 	arch_setup_new_exec();
14401da177e4SLinus Torvalds 
14410551fbd2SBenjamin Herrenschmidt 	/* Set the new mm task size. We have to do that late because it may
14420551fbd2SBenjamin Herrenschmidt 	 * depend on TIF_32BIT which is only updated in flush_thread() on
14430551fbd2SBenjamin Herrenschmidt 	 * some architectures like powerpc
14440551fbd2SBenjamin Herrenschmidt 	 */
14457d503febSEric W. Biederman 	me->mm->task_size = TASK_SIZE;
1446f7cfd871SEric W. Biederman 	up_write(&me->signal->exec_update_lock);
14477d503febSEric W. Biederman 	mutex_unlock(&me->signal->cred_guard_mutex);
14481da177e4SLinus Torvalds }
1449221af7f8SLinus Torvalds EXPORT_SYMBOL(setup_new_exec);
14501da177e4SLinus Torvalds 
1451b8383831SKees Cook /* Runs immediately before start_thread() takes over. */
finalize_exec(struct linux_binprm * bprm)1452b8383831SKees Cook void finalize_exec(struct linux_binprm *bprm)
1453b8383831SKees Cook {
1454c31dbb14SKees Cook 	/* Store any stack rlimit changes before starting thread. */
1455c31dbb14SKees Cook 	task_lock(current->group_leader);
1456c31dbb14SKees Cook 	current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack;
1457c31dbb14SKees Cook 	task_unlock(current->group_leader);
1458b8383831SKees Cook }
1459b8383831SKees Cook EXPORT_SYMBOL(finalize_exec);
1460b8383831SKees Cook 
14611da177e4SLinus Torvalds /*
1462a2a8474cSOleg Nesterov  * Prepare credentials and lock ->cred_guard_mutex.
146396ecee29SEric W. Biederman  * setup_new_exec() commits the new creds and drops the lock.
14643d742d4bSRandy Dunlap  * Or, if exec fails before, free_bprm() should release ->cred
1465a2a8474cSOleg Nesterov  * and unlock.
1466a2a8474cSOleg Nesterov  */
prepare_bprm_creds(struct linux_binprm * bprm)14674addd264SChanho Min static int prepare_bprm_creds(struct linux_binprm *bprm)
1468a2a8474cSOleg Nesterov {
14699b1bf12dSKOSAKI Motohiro 	if (mutex_lock_interruptible(&current->signal->cred_guard_mutex))
1470a2a8474cSOleg Nesterov 		return -ERESTARTNOINTR;
1471a2a8474cSOleg Nesterov 
1472a2a8474cSOleg Nesterov 	bprm->cred = prepare_exec_creds();
1473a2a8474cSOleg Nesterov 	if (likely(bprm->cred))
1474a2a8474cSOleg Nesterov 		return 0;
1475a2a8474cSOleg Nesterov 
14769b1bf12dSKOSAKI Motohiro 	mutex_unlock(&current->signal->cred_guard_mutex);
1477a2a8474cSOleg Nesterov 	return -ENOMEM;
1478a2a8474cSOleg Nesterov }
1479a2a8474cSOleg Nesterov 
1480bdd8f624SKees Cook /* Matches do_open_execat() */
do_close_execat(struct file * file)1481bdd8f624SKees Cook static void do_close_execat(struct file *file)
1482bdd8f624SKees Cook {
14833b832035SChristian Brauner 	if (!file)
14843b832035SChristian Brauner 		return;
14850357ef03SAmir Goldstein 	exe_file_allow_write_access(file);
1486bdd8f624SKees Cook 	fput(file);
1487bdd8f624SKees Cook }
1488bdd8f624SKees Cook 
free_bprm(struct linux_binprm * bprm)1489c4ad8f98SLinus Torvalds static void free_bprm(struct linux_binprm *bprm)
1490a2a8474cSOleg Nesterov {
1491f18ac551SEric W. Biederman 	if (bprm->mm) {
1492f18ac551SEric W. Biederman 		acct_arg_size(bprm, 0);
1493f18ac551SEric W. Biederman 		mmput(bprm->mm);
1494f18ac551SEric W. Biederman 	}
1495a2a8474cSOleg Nesterov 	free_arg_pages(bprm);
1496a2a8474cSOleg Nesterov 	if (bprm->cred) {
1497af7bb0d2SOleg Nesterov 		/* in case exec fails before de_thread() succeeds */
1498af7bb0d2SOleg Nesterov 		current->fs->in_exec = 0;
14999b1bf12dSKOSAKI Motohiro 		mutex_unlock(&current->signal->cred_guard_mutex);
1500a2a8474cSOleg Nesterov 		abort_creds(bprm->cred);
1501a2a8474cSOleg Nesterov 	}
1502bdd8f624SKees Cook 	do_close_execat(bprm->file);
1503b8a61c9eSEric W. Biederman 	if (bprm->executable)
1504b8a61c9eSEric W. Biederman 		fput(bprm->executable);
1505b66c5984SKees Cook 	/* If a binfmt changed the interp, free it. */
1506b66c5984SKees Cook 	if (bprm->interp != bprm->filename)
1507b66c5984SKees Cook 		kfree(bprm->interp);
150860d9ad1dSEric W. Biederman 	kfree(bprm->fdpath);
1509a2a8474cSOleg Nesterov 	kfree(bprm);
1510a2a8474cSOleg Nesterov }
1511a2a8474cSOleg Nesterov 
alloc_bprm(int fd,struct filename * filename,int flags)1512978ffcbfSLinus Torvalds static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int flags)
15130a8f36ebSEric W. Biederman {
1514978ffcbfSLinus Torvalds 	struct linux_binprm *bprm;
1515978ffcbfSLinus Torvalds 	struct file *file;
151660d9ad1dSEric W. Biederman 	int retval = -ENOMEM;
1517978ffcbfSLinus Torvalds 
1518978ffcbfSLinus Torvalds 	file = do_open_execat(fd, filename, flags);
1519978ffcbfSLinus Torvalds 	if (IS_ERR(file))
1520978ffcbfSLinus Torvalds 		return ERR_CAST(file);
1521978ffcbfSLinus Torvalds 
1522978ffcbfSLinus Torvalds 	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1523978ffcbfSLinus Torvalds 	if (!bprm) {
1524bdd8f624SKees Cook 		do_close_execat(file);
1525978ffcbfSLinus Torvalds 		return ERR_PTR(-ENOMEM);
1526978ffcbfSLinus Torvalds 	}
1527978ffcbfSLinus Torvalds 
1528978ffcbfSLinus Torvalds 	bprm->file = file;
152960d9ad1dSEric W. Biederman 
153060d9ad1dSEric W. Biederman 	if (fd == AT_FDCWD || filename->name[0] == '/') {
153160d9ad1dSEric W. Biederman 		bprm->filename = filename->name;
153260d9ad1dSEric W. Biederman 	} else {
1533543841d1SKees Cook 		if (filename->name[0] == '\0') {
153460d9ad1dSEric W. Biederman 			bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
1535543841d1SKees Cook 			bprm->comm_from_dentry = 1;
1536543841d1SKees Cook 		} else {
153760d9ad1dSEric W. Biederman 			bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
153860d9ad1dSEric W. Biederman 						  fd, filename->name);
1539543841d1SKees Cook 		}
154060d9ad1dSEric W. Biederman 		if (!bprm->fdpath)
154160d9ad1dSEric W. Biederman 			goto out_free;
154260d9ad1dSEric W. Biederman 
1543978ffcbfSLinus Torvalds 		/*
1544978ffcbfSLinus Torvalds 		 * Record that a name derived from an O_CLOEXEC fd will be
1545978ffcbfSLinus Torvalds 		 * inaccessible after exec.  This allows the code in exec to
1546978ffcbfSLinus Torvalds 		 * choose to fail when the executable is not mmaped into the
1547978ffcbfSLinus Torvalds 		 * interpreter and an open file descriptor is not passed to
1548978ffcbfSLinus Torvalds 		 * the interpreter.  This makes for a better user experience
1549978ffcbfSLinus Torvalds 		 * than having the interpreter start and then immediately fail
1550978ffcbfSLinus Torvalds 		 * when it finds the executable is inaccessible.
1551978ffcbfSLinus Torvalds 		 */
1552978ffcbfSLinus Torvalds 		if (get_close_on_exec(fd))
1553978ffcbfSLinus Torvalds 			bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
1554978ffcbfSLinus Torvalds 
155560d9ad1dSEric W. Biederman 		bprm->filename = bprm->fdpath;
155660d9ad1dSEric W. Biederman 	}
155760d9ad1dSEric W. Biederman 	bprm->interp = bprm->filename;
1558f18ac551SEric W. Biederman 
1559a5874fdeSMickaël Salaün 	/*
1560a5874fdeSMickaël Salaün 	 * At this point, security_file_open() has already been called (with
1561a5874fdeSMickaël Salaün 	 * __FMODE_EXEC) and access control checks for AT_EXECVE_CHECK will
1562a5874fdeSMickaël Salaün 	 * stop just after the security_bprm_creds_for_exec() call in
1563a5874fdeSMickaël Salaün 	 * bprm_execve().  Indeed, the kernel should not try to parse the
1564a5874fdeSMickaël Salaün 	 * content of the file with exec_binprm() nor change the calling
1565a5874fdeSMickaël Salaün 	 * thread, which means that the following security functions will not
1566a5874fdeSMickaël Salaün 	 * be called:
1567a5874fdeSMickaël Salaün 	 * - security_bprm_check()
1568a5874fdeSMickaël Salaün 	 * - security_bprm_creds_from_file()
1569a5874fdeSMickaël Salaün 	 * - security_bprm_committing_creds()
1570a5874fdeSMickaël Salaün 	 * - security_bprm_committed_creds()
1571a5874fdeSMickaël Salaün 	 */
1572a5874fdeSMickaël Salaün 	bprm->is_check = !!(flags & AT_EXECVE_CHECK);
1573a5874fdeSMickaël Salaün 
1574f18ac551SEric W. Biederman 	retval = bprm_mm_init(bprm);
1575978ffcbfSLinus Torvalds 	if (!retval)
15760a8f36ebSEric W. Biederman 		return bprm;
157760d9ad1dSEric W. Biederman 
157860d9ad1dSEric W. Biederman out_free:
157960d9ad1dSEric W. Biederman 	free_bprm(bprm);
158060d9ad1dSEric W. Biederman 	return ERR_PTR(retval);
15810a8f36ebSEric W. Biederman }
15820a8f36ebSEric W. Biederman 
bprm_change_interp(const char * interp,struct linux_binprm * bprm)1583c2315c18SOleg Nesterov int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
1584b66c5984SKees Cook {
1585b66c5984SKees Cook 	/* If a binfmt changed the interp, free it first. */
1586b66c5984SKees Cook 	if (bprm->interp != bprm->filename)
1587b66c5984SKees Cook 		kfree(bprm->interp);
1588b66c5984SKees Cook 	bprm->interp = kstrdup(interp, GFP_KERNEL);
1589b66c5984SKees Cook 	if (!bprm->interp)
1590b66c5984SKees Cook 		return -ENOMEM;
1591b66c5984SKees Cook 	return 0;
1592b66c5984SKees Cook }
1593b66c5984SKees Cook EXPORT_SYMBOL(bprm_change_interp);
1594b66c5984SKees Cook 
1595a2a8474cSOleg Nesterov /*
1596a6f76f23SDavid Howells  * determine how safe it is to execute the proposed program
15979b1bf12dSKOSAKI Motohiro  * - the caller must hold ->cred_guard_mutex to protect against
1598c2e1f2e3SKees Cook  *   PTRACE_ATTACH or seccomp thread-sync
1599a6f76f23SDavid Howells  */
check_unsafe_exec(struct linux_binprm * bprm)16009e00cdb0SOleg Nesterov static void check_unsafe_exec(struct linux_binprm *bprm)
1601a6f76f23SDavid Howells {
16020bf2f3aeSDavid Howells 	struct task_struct *p = current, *t;
1603f1191b50SAl Viro 	unsigned n_fs;
1604a6f76f23SDavid Howells 
16059227dd2aSEric W. Biederman 	if (p->ptrace)
16064b9d33e6STejun Heo 		bprm->unsafe |= LSM_UNSAFE_PTRACE;
1607a6f76f23SDavid Howells 
1608259e5e6cSAndy Lutomirski 	/*
1609259e5e6cSAndy Lutomirski 	 * This isn't strictly necessary, but it makes it harder for LSMs to
1610259e5e6cSAndy Lutomirski 	 * mess up.
1611259e5e6cSAndy Lutomirski 	 */
16121d4457f9SKees Cook 	if (task_no_new_privs(current))
1613259e5e6cSAndy Lutomirski 		bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
1614259e5e6cSAndy Lutomirski 
1615275498a9SKees Cook 	/*
1616275498a9SKees Cook 	 * If another task is sharing our fs, we cannot safely
1617275498a9SKees Cook 	 * suid exec because the differently privileged task
1618275498a9SKees Cook 	 * will be able to manipulate the current directory, etc.
1619275498a9SKees Cook 	 * It would be nice to force an unshare instead...
1620af7bb0d2SOleg Nesterov 	 *
1621af7bb0d2SOleg Nesterov 	 * Otherwise we set fs->in_exec = 1 to deny clone(CLONE_FS)
1622af7bb0d2SOleg Nesterov 	 * from another sub-thread until de_thread() succeeds, this
1623af7bb0d2SOleg Nesterov 	 * state is protected by cred_guard_mutex we hold.
1624275498a9SKees Cook 	 */
16250bf2f3aeSDavid Howells 	n_fs = 1;
16262a4419b5SNick Piggin 	spin_lock(&p->fs->lock);
1627437f7fdbSOleg Nesterov 	rcu_read_lock();
162861a7a5e2SOleg Nesterov 	for_other_threads(p, t) {
16290bf2f3aeSDavid Howells 		if (t->fs == p->fs)
16300bf2f3aeSDavid Howells 			n_fs++;
16310bf2f3aeSDavid Howells 	}
1632437f7fdbSOleg Nesterov 	rcu_read_unlock();
16330bf2f3aeSDavid Howells 
163490383cc0SKees Cook 	/* "users" and "in_exec" locked for copy_fs() */
16359e00cdb0SOleg Nesterov 	if (p->fs->users > n_fs)
1636a6f76f23SDavid Howells 		bprm->unsafe |= LSM_UNSAFE_SHARE;
16379e00cdb0SOleg Nesterov 	else
1638498052bbSAl Viro 		p->fs->in_exec = 1;
16392a4419b5SNick Piggin 	spin_unlock(&p->fs->lock);
1640a6f76f23SDavid Howells }
1641a6f76f23SDavid Howells 
bprm_fill_uid(struct linux_binprm * bprm,struct file * file)164256305aa9SEric W. Biederman static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
16438b01fc86SJann Horn {
164456305aa9SEric W. Biederman 	/* Handle suid and sgid on files */
1645e67fe633SChristian Brauner 	struct mnt_idmap *idmap;
1646e6ae4381SAl Viro 	struct inode *inode = file_inode(file);
16478b01fc86SJann Horn 	unsigned int mode;
1648a2bd096fSChristian Brauner 	vfsuid_t vfsuid;
1649a2bd096fSChristian Brauner 	vfsgid_t vfsgid;
1650f50733b4SKees Cook 	int err;
16518b01fc86SJann Horn 
165256305aa9SEric W. Biederman 	if (!mnt_may_suid(file->f_path.mnt))
16538b01fc86SJann Horn 		return;
16548b01fc86SJann Horn 
16558b01fc86SJann Horn 	if (task_no_new_privs(current))
16568b01fc86SJann Horn 		return;
16578b01fc86SJann Horn 
16588b01fc86SJann Horn 	mode = READ_ONCE(inode->i_mode);
16598b01fc86SJann Horn 	if (!(mode & (S_ISUID|S_ISGID)))
16608b01fc86SJann Horn 		return;
16618b01fc86SJann Horn 
1662e67fe633SChristian Brauner 	idmap = file_mnt_idmap(file);
16631ab29965SChristian Brauner 
16648b01fc86SJann Horn 	/* Be careful if suid/sgid is set */
16655955102cSAl Viro 	inode_lock(inode);
16668b01fc86SJann Horn 
1667f50733b4SKees Cook 	/* Atomically reload and check mode/uid/gid now that lock held. */
16688b01fc86SJann Horn 	mode = inode->i_mode;
1669e67fe633SChristian Brauner 	vfsuid = i_uid_into_vfsuid(idmap, inode);
1670e67fe633SChristian Brauner 	vfsgid = i_gid_into_vfsgid(idmap, inode);
1671f50733b4SKees Cook 	err = inode_permission(idmap, inode, MAY_EXEC);
16725955102cSAl Viro 	inode_unlock(inode);
16738b01fc86SJann Horn 
1674f50733b4SKees Cook 	/* Did the exec bit vanish out from under us? Give up. */
1675f50733b4SKees Cook 	if (err)
1676f50733b4SKees Cook 		return;
1677f50733b4SKees Cook 
16788b01fc86SJann Horn 	/* We ignore suid/sgid if there are no mappings for them in the ns */
1679a2bd096fSChristian Brauner 	if (!vfsuid_has_mapping(bprm->cred->user_ns, vfsuid) ||
1680a2bd096fSChristian Brauner 	    !vfsgid_has_mapping(bprm->cred->user_ns, vfsgid))
16818b01fc86SJann Horn 		return;
16828b01fc86SJann Horn 
16838b01fc86SJann Horn 	if (mode & S_ISUID) {
16848b01fc86SJann Horn 		bprm->per_clear |= PER_CLEAR_ON_SETID;
1685a2bd096fSChristian Brauner 		bprm->cred->euid = vfsuid_into_kuid(vfsuid);
16868b01fc86SJann Horn 	}
16878b01fc86SJann Horn 
16888b01fc86SJann Horn 	if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
16898b01fc86SJann Horn 		bprm->per_clear |= PER_CLEAR_ON_SETID;
1690a2bd096fSChristian Brauner 		bprm->cred->egid = vfsgid_into_kgid(vfsgid);
16918b01fc86SJann Horn 	}
16928b01fc86SJann Horn }
16938b01fc86SJann Horn 
1694a6f76f23SDavid Howells /*
169556305aa9SEric W. Biederman  * Compute brpm->cred based upon the final binary.
169656305aa9SEric W. Biederman  */
bprm_creds_from_file(struct linux_binprm * bprm)169756305aa9SEric W. Biederman static int bprm_creds_from_file(struct linux_binprm *bprm)
169856305aa9SEric W. Biederman {
169956305aa9SEric W. Biederman 	/* Compute creds based on which file? */
170056305aa9SEric W. Biederman 	struct file *file = bprm->execfd_creds ? bprm->executable : bprm->file;
170156305aa9SEric W. Biederman 
170256305aa9SEric W. Biederman 	bprm_fill_uid(bprm, file);
170356305aa9SEric W. Biederman 	return security_bprm_creds_from_file(bprm, file);
170456305aa9SEric W. Biederman }
170556305aa9SEric W. Biederman 
170656305aa9SEric W. Biederman /*
17071da177e4SLinus Torvalds  * Fill the binprm structure from the inode.
170856305aa9SEric W. Biederman  * Read the first BINPRM_BUF_SIZE bytes
1709a6f76f23SDavid Howells  *
1710a6f76f23SDavid Howells  * This may be called multiple times for binary chains (scripts for example).
17111da177e4SLinus Torvalds  */
prepare_binprm(struct linux_binprm * bprm)17128b72ca90SEric W. Biederman static int prepare_binprm(struct linux_binprm *bprm)
17131da177e4SLinus Torvalds {
1714bdd1d2d3SChristoph Hellwig 	loff_t pos = 0;
17151da177e4SLinus Torvalds 
17161da177e4SLinus Torvalds 	memset(bprm->buf, 0, BINPRM_BUF_SIZE);
1717bdd1d2d3SChristoph Hellwig 	return kernel_read(bprm->file, bprm->buf, BINPRM_BUF_SIZE, &pos);
17181da177e4SLinus Torvalds }
17191da177e4SLinus Torvalds 
17204fc75ff4SNick Piggin /*
17214fc75ff4SNick Piggin  * Arguments are '\0' separated strings found at the location bprm->p
17224fc75ff4SNick Piggin  * points to; chop off the first by relocating brpm->p to right after
17234fc75ff4SNick Piggin  * the first '\0' encountered.
17244fc75ff4SNick Piggin  */
remove_arg_zero(struct linux_binprm * bprm)1725b6a2fea3SOllie Wild int remove_arg_zero(struct linux_binprm *bprm)
17261da177e4SLinus Torvalds {
17271da177e4SLinus Torvalds 	unsigned long offset;
17281da177e4SLinus Torvalds 	char *kaddr;
17291da177e4SLinus Torvalds 	struct page *page;
17301da177e4SLinus Torvalds 
1731b6a2fea3SOllie Wild 	if (!bprm->argc)
1732b6a2fea3SOllie Wild 		return 0;
17331da177e4SLinus Torvalds 
1734b6a2fea3SOllie Wild 	do {
1735b6a2fea3SOllie Wild 		offset = bprm->p & ~PAGE_MASK;
1736b6a2fea3SOllie Wild 		page = get_arg_page(bprm, bprm->p, 0);
1737725d5026SKees Cook 		if (!page)
1738725d5026SKees Cook 			return -EFAULT;
17393a608cfeSFabio M. De Francesco 		kaddr = kmap_local_page(page);
17404fc75ff4SNick Piggin 
1741b6a2fea3SOllie Wild 		for (; offset < PAGE_SIZE && kaddr[offset];
1742b6a2fea3SOllie Wild 				offset++, bprm->p++)
1743b6a2fea3SOllie Wild 			;
17444fc75ff4SNick Piggin 
17453a608cfeSFabio M. De Francesco 		kunmap_local(kaddr);
1746b6a2fea3SOllie Wild 		put_arg_page(page);
1747b6a2fea3SOllie Wild 	} while (offset == PAGE_SIZE);
17484fc75ff4SNick Piggin 
1749b6a2fea3SOllie Wild 	bprm->p++;
17501da177e4SLinus Torvalds 	bprm->argc--;
1751b6a2fea3SOllie Wild 
1752725d5026SKees Cook 	return 0;
17531da177e4SLinus Torvalds }
17541da177e4SLinus Torvalds EXPORT_SYMBOL(remove_arg_zero);
17551da177e4SLinus Torvalds 
17561da177e4SLinus Torvalds /*
17571da177e4SLinus Torvalds  * cycle the list of binary formats handler, until one recognizes the image
17581da177e4SLinus Torvalds  */
search_binary_handler(struct linux_binprm * bprm)1759bc2bf338SEric W. Biederman static int search_binary_handler(struct linux_binprm *bprm)
17601da177e4SLinus Torvalds {
17611da177e4SLinus Torvalds 	struct linux_binfmt *fmt;
1762cb7b6b1cSOleg Nesterov 	int retval;
17631da177e4SLinus Torvalds 
17648b72ca90SEric W. Biederman 	retval = prepare_binprm(bprm);
17658b72ca90SEric W. Biederman 	if (retval < 0)
17668b72ca90SEric W. Biederman 		return retval;
1767d7402698SKees Cook 
17681da177e4SLinus Torvalds 	retval = security_bprm_check(bprm);
17691da177e4SLinus Torvalds 	if (retval)
17701da177e4SLinus Torvalds 		return retval;
17711da177e4SLinus Torvalds 
17721da177e4SLinus Torvalds 	read_lock(&binfmt_lock);
1773e4dc1b14SAlexey Dobriyan 	list_for_each_entry(fmt, &formats, lh) {
17741da177e4SLinus Torvalds 		if (!try_module_get(fmt->module))
17751da177e4SLinus Torvalds 			continue;
17761da177e4SLinus Torvalds 		read_unlock(&binfmt_lock);
1777d53ddd01SAlexey Dobriyan 
177892eaa565SOleg Nesterov 		retval = fmt->load_binary(bprm);
1779d53ddd01SAlexey Dobriyan 
17801da177e4SLinus Torvalds 		read_lock(&binfmt_lock);
17811da177e4SLinus Torvalds 		put_binfmt(fmt);
1782bc2bf338SEric W. Biederman 		if (bprm->point_of_no_return || (retval != -ENOEXEC)) {
178319d860a1SAl Viro 			read_unlock(&binfmt_lock);
178419d860a1SAl Viro 			return retval;
178519d860a1SAl Viro 		}
17861da177e4SLinus Torvalds 	}
17871da177e4SLinus Torvalds 	read_unlock(&binfmt_lock);
1788cb7b6b1cSOleg Nesterov 
1789fa1bdca9SNir Lichtman 	return -ENOEXEC;
17901da177e4SLinus Torvalds }
17911da177e4SLinus Torvalds 
1792275498a9SKees Cook /* binfmt handlers will call back into begin_new_exec() on success. */
exec_binprm(struct linux_binprm * bprm)17935d1baf3bSOleg Nesterov static int exec_binprm(struct linux_binprm *bprm)
17945d1baf3bSOleg Nesterov {
17955d1baf3bSOleg Nesterov 	pid_t old_pid, old_vpid;
1796bc2bf338SEric W. Biederman 	int ret, depth;
17975d1baf3bSOleg Nesterov 
17985d1baf3bSOleg Nesterov 	/* Need to fetch pid before load_binary changes it */
17995d1baf3bSOleg Nesterov 	old_pid = current->pid;
18005d1baf3bSOleg Nesterov 	rcu_read_lock();
18015d1baf3bSOleg Nesterov 	old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
18025d1baf3bSOleg Nesterov 	rcu_read_unlock();
18035d1baf3bSOleg Nesterov 
1804bc2bf338SEric W. Biederman 	/* This allows 4 levels of binfmt rewrites before failing hard. */
1805bc2bf338SEric W. Biederman 	for (depth = 0;; depth++) {
1806bc2bf338SEric W. Biederman 		struct file *exec;
1807bc2bf338SEric W. Biederman 		if (depth > 5)
1808bc2bf338SEric W. Biederman 			return -ELOOP;
1809bc2bf338SEric W. Biederman 
18105d1baf3bSOleg Nesterov 		ret = search_binary_handler(bprm);
1811bc2bf338SEric W. Biederman 		if (ret < 0)
1812bc2bf338SEric W. Biederman 			return ret;
1813bc2bf338SEric W. Biederman 		if (!bprm->interpreter)
1814bc2bf338SEric W. Biederman 			break;
1815bc2bf338SEric W. Biederman 
1816bc2bf338SEric W. Biederman 		exec = bprm->file;
1817bc2bf338SEric W. Biederman 		bprm->file = bprm->interpreter;
1818bc2bf338SEric W. Biederman 		bprm->interpreter = NULL;
1819bc2bf338SEric W. Biederman 
18200357ef03SAmir Goldstein 		exe_file_allow_write_access(exec);
1821bc2bf338SEric W. Biederman 		if (unlikely(bprm->have_execfd)) {
1822bc2bf338SEric W. Biederman 			if (bprm->executable) {
1823bc2bf338SEric W. Biederman 				fput(exec);
1824bc2bf338SEric W. Biederman 				return -ENOEXEC;
1825bc2bf338SEric W. Biederman 			}
1826bc2bf338SEric W. Biederman 			bprm->executable = exec;
1827bc2bf338SEric W. Biederman 		} else
1828bc2bf338SEric W. Biederman 			fput(exec);
1829bc2bf338SEric W. Biederman 	}
1830bc2bf338SEric W. Biederman 
18319410d228SRichard Guy Briggs 	audit_bprm(bprm);
18325d1baf3bSOleg Nesterov 	trace_sched_process_exec(current, old_pid, bprm);
18335d1baf3bSOleg Nesterov 	ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
18349beb266fSOleg Nesterov 	proc_exec_connector(current);
1835bc2bf338SEric W. Biederman 	return 0;
18365d1baf3bSOleg Nesterov }
18375d1baf3bSOleg Nesterov 
bprm_execve(struct linux_binprm * bprm)1838978ffcbfSLinus Torvalds static int bprm_execve(struct linux_binprm *bprm)
18390c9cdff0SEric W. Biederman {
18400c9cdff0SEric W. Biederman 	int retval;
18410c9cdff0SEric W. Biederman 
18420c9cdff0SEric W. Biederman 	retval = prepare_bprm_creds(bprm);
18430c9cdff0SEric W. Biederman 	if (retval)
1844b6043501SEric W. Biederman 		return retval;
18450c9cdff0SEric W. Biederman 
1846275498a9SKees Cook 	/*
1847275498a9SKees Cook 	 * Check for unsafe execution states before exec_binprm(), which
1848275498a9SKees Cook 	 * will call back into begin_new_exec(), into bprm_creds_from_file(),
1849275498a9SKees Cook 	 * where setuid-ness is evaluated.
1850275498a9SKees Cook 	 */
18510c9cdff0SEric W. Biederman 	check_unsafe_exec(bprm);
18520c9cdff0SEric W. Biederman 	current->in_execve = 1;
1853af7f588dSMathieu Desnoyers 	sched_mm_cid_before_execve(current);
18540c9cdff0SEric W. Biederman 
18550c9cdff0SEric W. Biederman 	sched_exec();
18560c9cdff0SEric W. Biederman 
18570c9cdff0SEric W. Biederman 	/* Set the unchanging part of bprm->cred */
18580c9cdff0SEric W. Biederman 	retval = security_bprm_creds_for_exec(bprm);
1859a5874fdeSMickaël Salaün 	if (retval || bprm->is_check)
18600c9cdff0SEric W. Biederman 		goto out;
18610c9cdff0SEric W. Biederman 
18620c9cdff0SEric W. Biederman 	retval = exec_binprm(bprm);
18630c9cdff0SEric W. Biederman 	if (retval < 0)
18640c9cdff0SEric W. Biederman 		goto out;
18650c9cdff0SEric W. Biederman 
1866af7f588dSMathieu Desnoyers 	sched_mm_cid_after_execve(current);
1867*169eae77SMathieu Desnoyers 	rseq_execve(current);
18680c9cdff0SEric W. Biederman 	/* execve succeeded */
18690c9cdff0SEric W. Biederman 	current->in_execve = 0;
1870fd593511SBeau Belgrave 	user_events_execve(current);
18710c9cdff0SEric W. Biederman 	acct_update_integrals(current);
18720c9cdff0SEric W. Biederman 	task_numa_free(current, false);
18730c9cdff0SEric W. Biederman 	return retval;
18740c9cdff0SEric W. Biederman 
18750c9cdff0SEric W. Biederman out:
18760c9cdff0SEric W. Biederman 	/*
18773d742d4bSRandy Dunlap 	 * If past the point of no return ensure the code never
18780c9cdff0SEric W. Biederman 	 * returns to the userspace process.  Use an existing fatal
18790c9cdff0SEric W. Biederman 	 * signal if present otherwise terminate the process with
18800c9cdff0SEric W. Biederman 	 * SIGSEGV.
18810c9cdff0SEric W. Biederman 	 */
18820c9cdff0SEric W. Biederman 	if (bprm->point_of_no_return && !fatal_signal_pending(current))
1883e21294a7SEric W. Biederman 		force_fatal_sig(SIGSEGV);
18840c9cdff0SEric W. Biederman 
1885af7f588dSMathieu Desnoyers 	sched_mm_cid_after_execve(current);
1886*169eae77SMathieu Desnoyers 	rseq_set_notify_resume(current);
18870c9cdff0SEric W. Biederman 	current->in_execve = 0;
18880c9cdff0SEric W. Biederman 
18890c9cdff0SEric W. Biederman 	return retval;
18900c9cdff0SEric W. Biederman }
18910c9cdff0SEric W. Biederman 
do_execveat_common(int fd,struct filename * filename,struct user_arg_ptr argv,struct user_arg_ptr envp,int flags)189225cf336dSEric W. Biederman static int do_execveat_common(int fd, struct filename *filename,
1893ba2d0162SOleg Nesterov 			      struct user_arg_ptr argv,
189451f39a1fSDavid Drysdale 			      struct user_arg_ptr envp,
189525cf336dSEric W. Biederman 			      int flags)
18961da177e4SLinus Torvalds {
18971da177e4SLinus Torvalds 	struct linux_binprm *bprm;
18981da177e4SLinus Torvalds 	int retval;
189972fa5997SVasiliy Kulikov 
1900c4ad8f98SLinus Torvalds 	if (IS_ERR(filename))
1901c4ad8f98SLinus Torvalds 		return PTR_ERR(filename);
1902c4ad8f98SLinus Torvalds 
190372fa5997SVasiliy Kulikov 	/*
190472fa5997SVasiliy Kulikov 	 * We move the actual failure in case of RLIMIT_NPROC excess from
190572fa5997SVasiliy Kulikov 	 * set*uid() to execve() because too many poorly written programs
190672fa5997SVasiliy Kulikov 	 * don't check setuid() return code.  Here we additionally recheck
190772fa5997SVasiliy Kulikov 	 * whether NPROC limit is still exceeded.
190872fa5997SVasiliy Kulikov 	 */
190972fa5997SVasiliy Kulikov 	if ((current->flags & PF_NPROC_EXCEEDED) &&
1910de399236SAlexey Gladkov 	    is_rlimit_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
191172fa5997SVasiliy Kulikov 		retval = -EAGAIN;
191272fa5997SVasiliy Kulikov 		goto out_ret;
191372fa5997SVasiliy Kulikov 	}
191472fa5997SVasiliy Kulikov 
191572fa5997SVasiliy Kulikov 	/* We're below the limit (still or again), so we don't want to make
191672fa5997SVasiliy Kulikov 	 * further execve() calls fail. */
191772fa5997SVasiliy Kulikov 	current->flags &= ~PF_NPROC_EXCEEDED;
19181da177e4SLinus Torvalds 
1919978ffcbfSLinus Torvalds 	bprm = alloc_bprm(fd, filename, flags);
19200a8f36ebSEric W. Biederman 	if (IS_ERR(bprm)) {
19210a8f36ebSEric W. Biederman 		retval = PTR_ERR(bprm);
19220a8f36ebSEric W. Biederman 		goto out_ret;
19230a8f36ebSEric W. Biederman 	}
19240a8f36ebSEric W. Biederman 
1925d8b9cd54SEric W. Biederman 	retval = count(argv, MAX_ARG_STRINGS);
1926d8b9cd54SEric W. Biederman 	if (retval < 0)
1927d8b9cd54SEric W. Biederman 		goto out_free;
1928d8b9cd54SEric W. Biederman 	bprm->argc = retval;
1929d8b9cd54SEric W. Biederman 
1930d8b9cd54SEric W. Biederman 	retval = count(envp, MAX_ARG_STRINGS);
1931d8b9cd54SEric W. Biederman 	if (retval < 0)
1932d8b9cd54SEric W. Biederman 		goto out_free;
1933d8b9cd54SEric W. Biederman 	bprm->envc = retval;
1934d8b9cd54SEric W. Biederman 
1935d8b9cd54SEric W. Biederman 	retval = bprm_stack_limits(bprm);
1936655c16a8SOleg Nesterov 	if (retval < 0)
19370c9cdff0SEric W. Biederman 		goto out_free;
19381da177e4SLinus Torvalds 
1939986db2d1SChristoph Hellwig 	retval = copy_string_kernel(bprm->filename, bprm);
19401da177e4SLinus Torvalds 	if (retval < 0)
19410c9cdff0SEric W. Biederman 		goto out_free;
19421da177e4SLinus Torvalds 	bprm->exec = bprm->p;
19430c9cdff0SEric W. Biederman 
19441da177e4SLinus Torvalds 	retval = copy_strings(bprm->envc, envp, bprm);
19451da177e4SLinus Torvalds 	if (retval < 0)
19460c9cdff0SEric W. Biederman 		goto out_free;
19471da177e4SLinus Torvalds 
19481da177e4SLinus Torvalds 	retval = copy_strings(bprm->argc, argv, bprm);
19491da177e4SLinus Torvalds 	if (retval < 0)
19500c9cdff0SEric W. Biederman 		goto out_free;
19511da177e4SLinus Torvalds 
1952dcd46d89SKees Cook 	/*
1953dcd46d89SKees Cook 	 * When argv is empty, add an empty string ("") as argv[0] to
1954dcd46d89SKees Cook 	 * ensure confused userspace programs that start processing
1955dcd46d89SKees Cook 	 * from argv[1] won't end up walking envp. See also
1956dcd46d89SKees Cook 	 * bprm_stack_limits().
1957dcd46d89SKees Cook 	 */
1958dcd46d89SKees Cook 	if (bprm->argc == 0) {
1959dcd46d89SKees Cook 		retval = copy_string_kernel("", bprm);
1960dcd46d89SKees Cook 		if (retval < 0)
1961dcd46d89SKees Cook 			goto out_free;
1962dcd46d89SKees Cook 		bprm->argc = 1;
19634188fc31S[email protected] 
19644188fc31S[email protected] 		pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n",
19654188fc31S[email protected] 			     current->comm, bprm->filename);
1966dcd46d89SKees Cook 	}
1967dcd46d89SKees Cook 
1968978ffcbfSLinus Torvalds 	retval = bprm_execve(bprm);
1969a6f76f23SDavid Howells out_free:
197008a6fac1SAl Viro 	free_bprm(bprm);
19711da177e4SLinus Torvalds 
19721da177e4SLinus Torvalds out_ret:
1973c4ad8f98SLinus Torvalds 	putname(filename);
19741da177e4SLinus Torvalds 	return retval;
19751da177e4SLinus Torvalds }
19761da177e4SLinus Torvalds 
kernel_execve(const char * kernel_filename,const char * const * argv,const char * const * envp)1977be619f7fSEric W. Biederman int kernel_execve(const char *kernel_filename,
1978be619f7fSEric W. Biederman 		  const char *const *argv, const char *const *envp)
1979be619f7fSEric W. Biederman {
1980be619f7fSEric W. Biederman 	struct filename *filename;
1981be619f7fSEric W. Biederman 	struct linux_binprm *bprm;
1982be619f7fSEric W. Biederman 	int fd = AT_FDCWD;
1983be619f7fSEric W. Biederman 	int retval;
1984be619f7fSEric W. Biederman 
19851b2552cbSEric W. Biederman 	/* It is non-sense for kernel threads to call execve */
19861b2552cbSEric W. Biederman 	if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
1987343f4c49SEric W. Biederman 		return -EINVAL;
1988343f4c49SEric W. Biederman 
1989be619f7fSEric W. Biederman 	filename = getname_kernel(kernel_filename);
1990be619f7fSEric W. Biederman 	if (IS_ERR(filename))
1991be619f7fSEric W. Biederman 		return PTR_ERR(filename);
1992be619f7fSEric W. Biederman 
1993978ffcbfSLinus Torvalds 	bprm = alloc_bprm(fd, filename, 0);
1994be619f7fSEric W. Biederman 	if (IS_ERR(bprm)) {
1995be619f7fSEric W. Biederman 		retval = PTR_ERR(bprm);
1996be619f7fSEric W. Biederman 		goto out_ret;
1997be619f7fSEric W. Biederman 	}
1998be619f7fSEric W. Biederman 
1999be619f7fSEric W. Biederman 	retval = count_strings_kernel(argv);
2000dcd46d89SKees Cook 	if (WARN_ON_ONCE(retval == 0))
2001dcd46d89SKees Cook 		retval = -EINVAL;
2002be619f7fSEric W. Biederman 	if (retval < 0)
2003be619f7fSEric W. Biederman 		goto out_free;
2004be619f7fSEric W. Biederman 	bprm->argc = retval;
2005be619f7fSEric W. Biederman 
2006be619f7fSEric W. Biederman 	retval = count_strings_kernel(envp);
2007be619f7fSEric W. Biederman 	if (retval < 0)
2008be619f7fSEric W. Biederman 		goto out_free;
2009be619f7fSEric W. Biederman 	bprm->envc = retval;
2010be619f7fSEric W. Biederman 
2011be619f7fSEric W. Biederman 	retval = bprm_stack_limits(bprm);
2012be619f7fSEric W. Biederman 	if (retval < 0)
2013be619f7fSEric W. Biederman 		goto out_free;
2014be619f7fSEric W. Biederman 
2015be619f7fSEric W. Biederman 	retval = copy_string_kernel(bprm->filename, bprm);
2016be619f7fSEric W. Biederman 	if (retval < 0)
2017be619f7fSEric W. Biederman 		goto out_free;
2018be619f7fSEric W. Biederman 	bprm->exec = bprm->p;
2019be619f7fSEric W. Biederman 
2020be619f7fSEric W. Biederman 	retval = copy_strings_kernel(bprm->envc, envp, bprm);
2021be619f7fSEric W. Biederman 	if (retval < 0)
2022be619f7fSEric W. Biederman 		goto out_free;
2023be619f7fSEric W. Biederman 
2024be619f7fSEric W. Biederman 	retval = copy_strings_kernel(bprm->argc, argv, bprm);
2025be619f7fSEric W. Biederman 	if (retval < 0)
2026be619f7fSEric W. Biederman 		goto out_free;
2027be619f7fSEric W. Biederman 
2028978ffcbfSLinus Torvalds 	retval = bprm_execve(bprm);
2029be619f7fSEric W. Biederman out_free:
2030be619f7fSEric W. Biederman 	free_bprm(bprm);
2031be619f7fSEric W. Biederman out_ret:
2032be619f7fSEric W. Biederman 	putname(filename);
2033be619f7fSEric W. Biederman 	return retval;
2034be619f7fSEric W. Biederman }
2035be619f7fSEric W. Biederman 
do_execve(struct filename * filename,const char __user * const __user * __argv,const char __user * const __user * __envp)2036be619f7fSEric W. Biederman static int do_execve(struct filename *filename,
2037ba2d0162SOleg Nesterov 	const char __user *const __user *__argv,
2038da3d4c5fSAl Viro 	const char __user *const __user *__envp)
2039ba2d0162SOleg Nesterov {
20400e028465SOleg Nesterov 	struct user_arg_ptr argv = { .ptr.native = __argv };
20410e028465SOleg Nesterov 	struct user_arg_ptr envp = { .ptr.native = __envp };
204251f39a1fSDavid Drysdale 	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
204351f39a1fSDavid Drysdale }
204451f39a1fSDavid Drysdale 
do_execveat(int fd,struct filename * filename,const char __user * const __user * __argv,const char __user * const __user * __envp,int flags)2045be619f7fSEric W. Biederman static int do_execveat(int fd, struct filename *filename,
204651f39a1fSDavid Drysdale 		const char __user *const __user *__argv,
204751f39a1fSDavid Drysdale 		const char __user *const __user *__envp,
204851f39a1fSDavid Drysdale 		int flags)
204951f39a1fSDavid Drysdale {
205051f39a1fSDavid Drysdale 	struct user_arg_ptr argv = { .ptr.native = __argv };
205151f39a1fSDavid Drysdale 	struct user_arg_ptr envp = { .ptr.native = __envp };
205251f39a1fSDavid Drysdale 
205351f39a1fSDavid Drysdale 	return do_execveat_common(fd, filename, argv, envp, flags);
2054ba2d0162SOleg Nesterov }
2055ba2d0162SOleg Nesterov 
20560e028465SOleg Nesterov #ifdef CONFIG_COMPAT
compat_do_execve(struct filename * filename,const compat_uptr_t __user * __argv,const compat_uptr_t __user * __envp)2057c4ad8f98SLinus Torvalds static int compat_do_execve(struct filename *filename,
205838b983b3SAl Viro 	const compat_uptr_t __user *__argv,
2059d03d26e5SAl Viro 	const compat_uptr_t __user *__envp)
20600e028465SOleg Nesterov {
20610e028465SOleg Nesterov 	struct user_arg_ptr argv = {
20620e028465SOleg Nesterov 		.is_compat = true,
20630e028465SOleg Nesterov 		.ptr.compat = __argv,
20640e028465SOleg Nesterov 	};
20650e028465SOleg Nesterov 	struct user_arg_ptr envp = {
20660e028465SOleg Nesterov 		.is_compat = true,
20670e028465SOleg Nesterov 		.ptr.compat = __envp,
20680e028465SOleg Nesterov 	};
206951f39a1fSDavid Drysdale 	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
207051f39a1fSDavid Drysdale }
207151f39a1fSDavid Drysdale 
compat_do_execveat(int fd,struct filename * filename,const compat_uptr_t __user * __argv,const compat_uptr_t __user * __envp,int flags)207251f39a1fSDavid Drysdale static int compat_do_execveat(int fd, struct filename *filename,
207351f39a1fSDavid Drysdale 			      const compat_uptr_t __user *__argv,
207451f39a1fSDavid Drysdale 			      const compat_uptr_t __user *__envp,
207551f39a1fSDavid Drysdale 			      int flags)
207651f39a1fSDavid Drysdale {
207751f39a1fSDavid Drysdale 	struct user_arg_ptr argv = {
207851f39a1fSDavid Drysdale 		.is_compat = true,
207951f39a1fSDavid Drysdale 		.ptr.compat = __argv,
208051f39a1fSDavid Drysdale 	};
208151f39a1fSDavid Drysdale 	struct user_arg_ptr envp = {
208251f39a1fSDavid Drysdale 		.is_compat = true,
208351f39a1fSDavid Drysdale 		.ptr.compat = __envp,
208451f39a1fSDavid Drysdale 	};
208551f39a1fSDavid Drysdale 	return do_execveat_common(fd, filename, argv, envp, flags);
20860e028465SOleg Nesterov }
20870e028465SOleg Nesterov #endif
20880e028465SOleg Nesterov 
set_binfmt(struct linux_binfmt * new)2089964ee7dfSOleg Nesterov void set_binfmt(struct linux_binfmt *new)
20901da177e4SLinus Torvalds {
2091801460d0SHiroshi Shimamoto 	struct mm_struct *mm = current->mm;
20921da177e4SLinus Torvalds 
2093801460d0SHiroshi Shimamoto 	if (mm->binfmt)
2094801460d0SHiroshi Shimamoto 		module_put(mm->binfmt->module);
2095801460d0SHiroshi Shimamoto 
2096801460d0SHiroshi Shimamoto 	mm->binfmt = new;
2097964ee7dfSOleg Nesterov 	if (new)
2098964ee7dfSOleg Nesterov 		__module_get(new->module);
20991da177e4SLinus Torvalds }
21001da177e4SLinus Torvalds EXPORT_SYMBOL(set_binfmt);
21011da177e4SLinus Torvalds 
21026c5d5238SKawai, Hidehiro /*
21037288e118SOleg Nesterov  * set_dumpable stores three-value SUID_DUMP_* into mm->flags.
21046c5d5238SKawai, Hidehiro  */
set_dumpable(struct mm_struct * mm,int value)21056c5d5238SKawai, Hidehiro void set_dumpable(struct mm_struct *mm, int value)
21066c5d5238SKawai, Hidehiro {
21077288e118SOleg Nesterov 	if (WARN_ON((unsigned)value > SUID_DUMP_ROOT))
21087288e118SOleg Nesterov 		return;
21097288e118SOleg Nesterov 
211026e15225SVineet Gupta 	set_mask_bits(&mm->flags, MMF_DUMPABLE_MASK, value);
21116c5d5238SKawai, Hidehiro }
21126c5d5238SKawai, Hidehiro 
SYSCALL_DEFINE3(execve,const char __user *,filename,const char __user * const __user *,argv,const char __user * const __user *,envp)211338b983b3SAl Viro SYSCALL_DEFINE3(execve,
211438b983b3SAl Viro 		const char __user *, filename,
211538b983b3SAl Viro 		const char __user *const __user *, argv,
211638b983b3SAl Viro 		const char __user *const __user *, envp)
211738b983b3SAl Viro {
2118c4ad8f98SLinus Torvalds 	return do_execve(getname(filename), argv, envp);
211938b983b3SAl Viro }
212051f39a1fSDavid Drysdale 
SYSCALL_DEFINE5(execveat,int,fd,const char __user *,filename,const char __user * const __user *,argv,const char __user * const __user *,envp,int,flags)212151f39a1fSDavid Drysdale SYSCALL_DEFINE5(execveat,
212251f39a1fSDavid Drysdale 		int, fd, const char __user *, filename,
212351f39a1fSDavid Drysdale 		const char __user *const __user *, argv,
212451f39a1fSDavid Drysdale 		const char __user *const __user *, envp,
212551f39a1fSDavid Drysdale 		int, flags)
212651f39a1fSDavid Drysdale {
212751f39a1fSDavid Drysdale 	return do_execveat(fd,
21288228e2c3SDmitry Kadashev 			   getname_uflags(filename, flags),
212951f39a1fSDavid Drysdale 			   argv, envp, flags);
213051f39a1fSDavid Drysdale }
213151f39a1fSDavid Drysdale 
213238b983b3SAl Viro #ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE3(execve,const char __user *,filename,const compat_uptr_t __user *,argv,const compat_uptr_t __user *,envp)2133625b1d7eSHeiko Carstens COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
2134625b1d7eSHeiko Carstens 	const compat_uptr_t __user *, argv,
2135625b1d7eSHeiko Carstens 	const compat_uptr_t __user *, envp)
213638b983b3SAl Viro {
2137c4ad8f98SLinus Torvalds 	return compat_do_execve(getname(filename), argv, envp);
213838b983b3SAl Viro }
213951f39a1fSDavid Drysdale 
COMPAT_SYSCALL_DEFINE5(execveat,int,fd,const char __user *,filename,const compat_uptr_t __user *,argv,const compat_uptr_t __user *,envp,int,flags)214051f39a1fSDavid Drysdale COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
214151f39a1fSDavid Drysdale 		       const char __user *, filename,
214251f39a1fSDavid Drysdale 		       const compat_uptr_t __user *, argv,
214351f39a1fSDavid Drysdale 		       const compat_uptr_t __user *, envp,
214451f39a1fSDavid Drysdale 		       int,  flags)
214551f39a1fSDavid Drysdale {
214651f39a1fSDavid Drysdale 	return compat_do_execveat(fd,
21478228e2c3SDmitry Kadashev 				  getname_uflags(filename, flags),
214851f39a1fSDavid Drysdale 				  argv, envp, flags);
214951f39a1fSDavid Drysdale }
215038b983b3SAl Viro #endif
215166ad3986SLuis Chamberlain 
215266ad3986SLuis Chamberlain #ifdef CONFIG_SYSCTL
215366ad3986SLuis Chamberlain 
proc_dointvec_minmax_coredump(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)215478eb4ea2SJoel Granados static int proc_dointvec_minmax_coredump(const struct ctl_table *table, int write,
215566ad3986SLuis Chamberlain 		void *buffer, size_t *lenp, loff_t *ppos)
215666ad3986SLuis Chamberlain {
215766ad3986SLuis Chamberlain 	int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
215866ad3986SLuis Chamberlain 
215966ad3986SLuis Chamberlain 	if (!error)
216066ad3986SLuis Chamberlain 		validate_coredump_safety();
216166ad3986SLuis Chamberlain 	return error;
216266ad3986SLuis Chamberlain }
216366ad3986SLuis Chamberlain 
21641751f872SJoel Granados static const struct ctl_table fs_exec_sysctls[] = {
216566ad3986SLuis Chamberlain 	{
216666ad3986SLuis Chamberlain 		.procname	= "suid_dumpable",
216766ad3986SLuis Chamberlain 		.data		= &suid_dumpable,
216866ad3986SLuis Chamberlain 		.maxlen		= sizeof(int),
216966ad3986SLuis Chamberlain 		.mode		= 0644,
217066ad3986SLuis Chamberlain 		.proc_handler	= proc_dointvec_minmax_coredump,
217166ad3986SLuis Chamberlain 		.extra1		= SYSCTL_ZERO,
217266ad3986SLuis Chamberlain 		.extra2		= SYSCTL_TWO,
217366ad3986SLuis Chamberlain 	},
217466ad3986SLuis Chamberlain };
217566ad3986SLuis Chamberlain 
init_fs_exec_sysctls(void)217666ad3986SLuis Chamberlain static int __init init_fs_exec_sysctls(void)
217766ad3986SLuis Chamberlain {
217866ad3986SLuis Chamberlain 	register_sysctl_init("fs", fs_exec_sysctls);
217966ad3986SLuis Chamberlain 	return 0;
218066ad3986SLuis Chamberlain }
218166ad3986SLuis Chamberlain 
218266ad3986SLuis Chamberlain fs_initcall(init_fs_exec_sysctls);
218366ad3986SLuis Chamberlain #endif /* CONFIG_SYSCTL */
218460371f43SKees Cook 
218560371f43SKees Cook #ifdef CONFIG_EXEC_KUNIT_TEST
2186b6f5ee4dSKees Cook #include "tests/exec_kunit.c"
218760371f43SKees Cook #endif
2188