1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * linux/fs/exec.c
41da177e4SLinus Torvalds *
51da177e4SLinus Torvalds * Copyright (C) 1991, 1992 Linus Torvalds
61da177e4SLinus Torvalds */
71da177e4SLinus Torvalds
81da177e4SLinus Torvalds /*
91da177e4SLinus Torvalds * #!-checking implemented by tytso.
101da177e4SLinus Torvalds */
111da177e4SLinus Torvalds /*
121da177e4SLinus Torvalds * Demand-loading implemented 01.12.91 - no need to read anything but
131da177e4SLinus Torvalds * the header into memory. The inode of the executable is put into
141da177e4SLinus Torvalds * "current->executable", and page faults do the actual loading. Clean.
151da177e4SLinus Torvalds *
161da177e4SLinus Torvalds * Once more I can proudly say that linux stood up to being changed: it
171da177e4SLinus Torvalds * was less than 2 hours work to get demand-loading completely implemented.
181da177e4SLinus Torvalds *
191da177e4SLinus Torvalds * Demand loading changed July 1993 by Eric Youngdale. Use mmap instead,
201da177e4SLinus Torvalds * current->executable is only used by the procfs. This allows a dispatch
211da177e4SLinus Torvalds * table to check for several different types of binary formats. We keep
221da177e4SLinus Torvalds * trying until we recognize the file or we run out of supported binary
231da177e4SLinus Torvalds * formats.
241da177e4SLinus Torvalds */
251da177e4SLinus Torvalds
26b89999d0SScott Branden #include <linux/kernel_read_file.h>
271da177e4SLinus Torvalds #include <linux/slab.h>
281da177e4SLinus Torvalds #include <linux/file.h>
299f3acc31SAl Viro #include <linux/fdtable.h>
30ba92a43dSHugh Dickins #include <linux/mm.h>
311da177e4SLinus Torvalds #include <linux/stat.h>
321da177e4SLinus Torvalds #include <linux/fcntl.h>
33ba92a43dSHugh Dickins #include <linux/swap.h>
3474aadce9SNeil Horman #include <linux/string.h>
351da177e4SLinus Torvalds #include <linux/init.h>
366e84f315SIngo Molnar #include <linux/sched/mm.h>
37f7ccbae4SIngo Molnar #include <linux/sched/coredump.h>
383f07c014SIngo Molnar #include <linux/sched/signal.h>
396a3827d7SIngo Molnar #include <linux/sched/numa_balancing.h>
4029930025SIngo Molnar #include <linux/sched/task.h>
41ca5b172bSHugh Dickins #include <linux/pagemap.h>
42cdd6c482SIngo Molnar #include <linux/perf_event.h>
431da177e4SLinus Torvalds #include <linux/highmem.h>
441da177e4SLinus Torvalds #include <linux/spinlock.h>
451da177e4SLinus Torvalds #include <linux/key.h>
461da177e4SLinus Torvalds #include <linux/personality.h>
471da177e4SLinus Torvalds #include <linux/binfmts.h>
481da177e4SLinus Torvalds #include <linux/utsname.h>
4984d73786SSukadev Bhattiprolu #include <linux/pid_namespace.h>
501da177e4SLinus Torvalds #include <linux/module.h>
511da177e4SLinus Torvalds #include <linux/namei.h>
521da177e4SLinus Torvalds #include <linux/mount.h>
531da177e4SLinus Torvalds #include <linux/security.h>
541da177e4SLinus Torvalds #include <linux/syscalls.h>
558f0ab514SJay Lan #include <linux/tsacct_kern.h>
569f46080cSMatt Helsley #include <linux/cn_proc.h>
57473ae30bSAl Viro #include <linux/audit.h>
585f4123beSJohannes Berg #include <linux/kmod.h>
596110e3abSEric Paris #include <linux/fsnotify.h>
605ad4e53bSAl Viro #include <linux/fs_struct.h>
613d5992d2SYing Han #include <linux/oom.h>
620e028465SOleg Nesterov #include <linux/compat.h>
63b44a7dfcSMimi Zohar #include <linux/vmalloc.h>
640f212204SJens Axboe #include <linux/io_uring.h>
651446e1dfSGabriel Krisman Bertazi #include <linux/syscall_user_dispatch.h>
6666ad3986SLuis Chamberlain #include <linux/coredump.h>
672b5f9dadSAndrei Vagin #include <linux/time_namespace.h>
68fd593511SBeau Belgrave #include <linux/user_events.h>
69932562a6SKent Overstreet #include <linux/rseq.h>
703a9e567cSJinjiang Tu #include <linux/ksm.h>
711da177e4SLinus Torvalds
727c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
731da177e4SLinus Torvalds #include <asm/mmu_context.h>
74b6a2fea3SOllie Wild #include <asm/tlb.h>
7543d2b113SKAMEZAWA Hiroyuki
7643d2b113SKAMEZAWA Hiroyuki #include <trace/events/task.h>
77a6f76f23SDavid Howells #include "internal.h"
781da177e4SLinus Torvalds
794ff16c25SDavid Smith #include <trace/events/sched.h>
804ff16c25SDavid Smith
8156305aa9SEric W. Biederman static int bprm_creds_from_file(struct linux_binprm *bprm);
8256305aa9SEric W. Biederman
83d6e71144SAlan Cox int suid_dumpable = 0;
84d6e71144SAlan Cox
85e4dc1b14SAlexey Dobriyan static LIST_HEAD(formats);
861da177e4SLinus Torvalds static DEFINE_RWLOCK(binfmt_lock);
871da177e4SLinus Torvalds
__register_binfmt(struct linux_binfmt * fmt,int insert)888fc3dc5aSAl Viro void __register_binfmt(struct linux_binfmt * fmt, int insert)
891da177e4SLinus Torvalds {
901da177e4SLinus Torvalds write_lock(&binfmt_lock);
9174641f58SIvan Kokshaysky insert ? list_add(&fmt->lh, &formats) :
9274641f58SIvan Kokshaysky list_add_tail(&fmt->lh, &formats);
931da177e4SLinus Torvalds write_unlock(&binfmt_lock);
941da177e4SLinus Torvalds }
951da177e4SLinus Torvalds
9674641f58SIvan Kokshaysky EXPORT_SYMBOL(__register_binfmt);
971da177e4SLinus Torvalds
unregister_binfmt(struct linux_binfmt * fmt)98f6b450d4SAlexey Dobriyan void unregister_binfmt(struct linux_binfmt * fmt)
991da177e4SLinus Torvalds {
1001da177e4SLinus Torvalds write_lock(&binfmt_lock);
101e4dc1b14SAlexey Dobriyan list_del(&fmt->lh);
1021da177e4SLinus Torvalds write_unlock(&binfmt_lock);
1031da177e4SLinus Torvalds }
1041da177e4SLinus Torvalds
1051da177e4SLinus Torvalds EXPORT_SYMBOL(unregister_binfmt);
1061da177e4SLinus Torvalds
put_binfmt(struct linux_binfmt * fmt)1071da177e4SLinus Torvalds static inline void put_binfmt(struct linux_binfmt * fmt)
1081da177e4SLinus Torvalds {
1091da177e4SLinus Torvalds module_put(fmt->module);
1101da177e4SLinus Torvalds }
1111da177e4SLinus Torvalds
path_noexec(const struct path * path)11290f8572bSEric W. Biederman bool path_noexec(const struct path *path)
11390f8572bSEric W. Biederman {
11490f8572bSEric W. Biederman return (path->mnt->mnt_flags & MNT_NOEXEC) ||
11590f8572bSEric W. Biederman (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
11690f8572bSEric W. Biederman }
11790f8572bSEric W. Biederman
11869369a70SJosh Triplett #ifdef CONFIG_USELIB
1191da177e4SLinus Torvalds /*
1201da177e4SLinus Torvalds * Note that a shared library must be both readable and executable due to
1211da177e4SLinus Torvalds * security reasons.
1221da177e4SLinus Torvalds *
123b452722eSTom Rix * Also note that we take the address to load from the file itself.
1241da177e4SLinus Torvalds */
SYSCALL_DEFINE1(uselib,const char __user *,library)1251e7bfb21SHeiko Carstens SYSCALL_DEFINE1(uselib, const char __user *, library)
1261da177e4SLinus Torvalds {
12772c2d531SAl Viro struct linux_binfmt *fmt;
1281da177e4SLinus Torvalds struct file *file;
12991a27b2aSJeff Layton struct filename *tmp = getname(library);
130964bd183SAl Viro int error = PTR_ERR(tmp);
13147c805dcSAl Viro static const struct open_flags uselib_flags = {
1323eab8301SLinus Torvalds .open_flag = O_LARGEFILE | O_RDONLY,
13362fb4a15SAl Viro .acc_mode = MAY_READ | MAY_EXEC,
134f9652e10SAl Viro .intent = LOOKUP_OPEN,
135f9652e10SAl Viro .lookup_flags = LOOKUP_FOLLOW,
13647c805dcSAl Viro };
1371da177e4SLinus Torvalds
1386e8341a1SAl Viro if (IS_ERR(tmp))
1391da177e4SLinus Torvalds goto out;
1401da177e4SLinus Torvalds
141f9652e10SAl Viro file = do_filp_open(AT_FDCWD, tmp, &uselib_flags);
1426e8341a1SAl Viro putname(tmp);
1431da177e4SLinus Torvalds error = PTR_ERR(file);
1441da177e4SLinus Torvalds if (IS_ERR(file))
1451da177e4SLinus Torvalds goto out;
1461da177e4SLinus Torvalds
147633fb6acSKees Cook /*
1480d196e75SMateusz Guzik * Check do_open_execat() for an explanation.
149633fb6acSKees Cook */
150db19c91cSKees Cook error = -EACCES;
1510d196e75SMateusz Guzik if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
1520d196e75SMateusz Guzik path_noexec(&file->f_path))
1536e8341a1SAl Viro goto exit;
1546e8341a1SAl Viro
1551da177e4SLinus Torvalds error = -ENOEXEC;
1561da177e4SLinus Torvalds
1571da177e4SLinus Torvalds read_lock(&binfmt_lock);
158e4dc1b14SAlexey Dobriyan list_for_each_entry(fmt, &formats, lh) {
1591da177e4SLinus Torvalds if (!fmt->load_shlib)
1601da177e4SLinus Torvalds continue;
1611da177e4SLinus Torvalds if (!try_module_get(fmt->module))
1621da177e4SLinus Torvalds continue;
1631da177e4SLinus Torvalds read_unlock(&binfmt_lock);
1641da177e4SLinus Torvalds error = fmt->load_shlib(file);
1651da177e4SLinus Torvalds read_lock(&binfmt_lock);
1661da177e4SLinus Torvalds put_binfmt(fmt);
1671da177e4SLinus Torvalds if (error != -ENOEXEC)
1681da177e4SLinus Torvalds break;
1691da177e4SLinus Torvalds }
1701da177e4SLinus Torvalds read_unlock(&binfmt_lock);
1716e8341a1SAl Viro exit:
1721da177e4SLinus Torvalds fput(file);
1731da177e4SLinus Torvalds out:
1741da177e4SLinus Torvalds return error;
1751da177e4SLinus Torvalds }
17669369a70SJosh Triplett #endif /* #ifdef CONFIG_USELIB */
1771da177e4SLinus Torvalds
178b6a2fea3SOllie Wild #ifdef CONFIG_MMU
179ae6b585eSOleg Nesterov /*
180ae6b585eSOleg Nesterov * The nascent bprm->mm is not visible until exec_mmap() but it can
181ae6b585eSOleg Nesterov * use a lot of memory, account these pages in current->mm temporary
182ae6b585eSOleg Nesterov * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
183ae6b585eSOleg Nesterov * change the counter back via acct_arg_size(0).
184ae6b585eSOleg Nesterov */
acct_arg_size(struct linux_binprm * bprm,unsigned long pages)1850e028465SOleg Nesterov static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
1863c77f845SOleg Nesterov {
1873c77f845SOleg Nesterov struct mm_struct *mm = current->mm;
1883c77f845SOleg Nesterov long diff = (long)(pages - bprm->vma_pages);
1893c77f845SOleg Nesterov
1903c77f845SOleg Nesterov if (!mm || !diff)
1913c77f845SOleg Nesterov return;
1923c77f845SOleg Nesterov
1933c77f845SOleg Nesterov bprm->vma_pages = pages;
1943c77f845SOleg Nesterov add_mm_counter(mm, MM_ANONPAGES, diff);
1953c77f845SOleg Nesterov }
1963c77f845SOleg Nesterov
get_arg_page(struct linux_binprm * bprm,unsigned long pos,int write)1970e028465SOleg Nesterov static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
198b6a2fea3SOllie Wild int write)
199b6a2fea3SOllie Wild {
200b6a2fea3SOllie Wild struct page *page;
201f313c51dSLinus Torvalds struct vm_area_struct *vma = bprm->vma;
202f313c51dSLinus Torvalds struct mm_struct *mm = bprm->mm;
203b6a2fea3SOllie Wild int ret;
204b6a2fea3SOllie Wild
205f313c51dSLinus Torvalds /*
206f313c51dSLinus Torvalds * Avoid relying on expanding the stack down in GUP (which
207f313c51dSLinus Torvalds * does not work for STACK_GROWSUP anyway), and just do it
2087a571499SLorenzo Stoakes * ahead of time.
209f313c51dSLinus Torvalds */
2107a571499SLorenzo Stoakes if (!mmap_read_lock_maybe_expand(mm, vma, pos, write))
211b6a2fea3SOllie Wild return NULL;
2129beae1eaSLorenzo Stoakes
2131e987790SDave Hansen /*
2141e987790SDave Hansen * We are doing an exec(). 'current' is the process
215f313c51dSLinus Torvalds * doing the exec and 'mm' is the new process's mm.
2161e987790SDave Hansen */
217f313c51dSLinus Torvalds ret = get_user_pages_remote(mm, pos, 1,
218f313c51dSLinus Torvalds write ? FOLL_WRITE : 0,
219ca5e8632SLorenzo Stoakes &page, NULL);
220f313c51dSLinus Torvalds mmap_read_unlock(mm);
221b6a2fea3SOllie Wild if (ret <= 0)
222b6a2fea3SOllie Wild return NULL;
223b6a2fea3SOllie Wild
224655c16a8SOleg Nesterov if (write)
225f313c51dSLinus Torvalds acct_arg_size(bprm, vma_pages(vma));
226b6a2fea3SOllie Wild
227b6a2fea3SOllie Wild return page;
228b6a2fea3SOllie Wild }
229b6a2fea3SOllie Wild
put_arg_page(struct page * page)230b6a2fea3SOllie Wild static void put_arg_page(struct page *page)
231b6a2fea3SOllie Wild {
232b6a2fea3SOllie Wild put_page(page);
233b6a2fea3SOllie Wild }
234b6a2fea3SOllie Wild
free_arg_pages(struct linux_binprm * bprm)235b6a2fea3SOllie Wild static void free_arg_pages(struct linux_binprm *bprm)
236b6a2fea3SOllie Wild {
237b6a2fea3SOllie Wild }
238b6a2fea3SOllie Wild
flush_arg_page(struct linux_binprm * bprm,unsigned long pos,struct page * page)239b6a2fea3SOllie Wild static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
240b6a2fea3SOllie Wild struct page *page)
241b6a2fea3SOllie Wild {
242b6a2fea3SOllie Wild flush_cache_page(bprm->vma, pos, page_to_pfn(page));
243b6a2fea3SOllie Wild }
244b6a2fea3SOllie Wild
__bprm_mm_init(struct linux_binprm * bprm)245b6a2fea3SOllie Wild static int __bprm_mm_init(struct linux_binprm *bprm)
246b6a2fea3SOllie Wild {
247eaccbfa5SLuiz Fernando N. Capitulino int err;
248b6a2fea3SOllie Wild struct vm_area_struct *vma = NULL;
249b6a2fea3SOllie Wild struct mm_struct *mm = bprm->mm;
250b6a2fea3SOllie Wild
251490fc053SLinus Torvalds bprm->vma = vma = vm_area_alloc(mm);
252b6a2fea3SOllie Wild if (!vma)
253eaccbfa5SLuiz Fernando N. Capitulino return -ENOMEM;
254bfd40eafSKirill A. Shutemov vma_set_anonymous(vma);
255b6a2fea3SOllie Wild
256d8ed45c5SMichel Lespinasse if (mmap_write_lock_killable(mm)) {
257f268dfe9SMichal Hocko err = -EINTR;
258f268dfe9SMichal Hocko goto err_free;
259f268dfe9SMichal Hocko }
260b6a2fea3SOllie Wild
261b6a2fea3SOllie Wild /*
2623a9e567cSJinjiang Tu * Need to be called with mmap write lock
2633a9e567cSJinjiang Tu * held, to avoid race with ksmd.
2643a9e567cSJinjiang Tu */
2653a9e567cSJinjiang Tu err = ksm_execve(mm);
2663a9e567cSJinjiang Tu if (err)
2673a9e567cSJinjiang Tu goto err_ksm;
2683a9e567cSJinjiang Tu
2693a9e567cSJinjiang Tu /*
270b6a2fea3SOllie Wild * Place the stack at the largest stack address the architecture
271b6a2fea3SOllie Wild * supports. Later, we'll move this to an appropriate place. We don't
272b6a2fea3SOllie Wild * use STACK_TOP because that can depend on attributes which aren't
273b6a2fea3SOllie Wild * configured yet.
274b6a2fea3SOllie Wild */
275aacb3d17SMichal Hocko BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
276b6a2fea3SOllie Wild vma->vm_end = STACK_TOP_MAX;
277b6a2fea3SOllie Wild vma->vm_start = vma->vm_end - PAGE_SIZE;
2781c71222eSSuren Baghdasaryan vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP);
2793ed75eb8SColy Li vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
280462e635eSTavis Ormandy
281b6a2fea3SOllie Wild err = insert_vm_struct(mm, vma);
282eaccbfa5SLuiz Fernando N. Capitulino if (err)
283b6a2fea3SOllie Wild goto err;
284b6a2fea3SOllie Wild
285b6a2fea3SOllie Wild mm->stack_vm = mm->total_vm = 1;
286d8ed45c5SMichel Lespinasse mmap_write_unlock(mm);
287b6a2fea3SOllie Wild bprm->p = vma->vm_end - sizeof(void *);
288b6a2fea3SOllie Wild return 0;
289b6a2fea3SOllie Wild err:
2903a9e567cSJinjiang Tu ksm_exit(mm);
2913a9e567cSJinjiang Tu err_ksm:
292d8ed45c5SMichel Lespinasse mmap_write_unlock(mm);
293f268dfe9SMichal Hocko err_free:
294b6a2fea3SOllie Wild bprm->vma = NULL;
2953928d4f5SLinus Torvalds vm_area_free(vma);
296b6a2fea3SOllie Wild return err;
297b6a2fea3SOllie Wild }
298b6a2fea3SOllie Wild
valid_arg_len(struct linux_binprm * bprm,long len)299b6a2fea3SOllie Wild static bool valid_arg_len(struct linux_binprm *bprm, long len)
300b6a2fea3SOllie Wild {
301b6a2fea3SOllie Wild return len <= MAX_ARG_STRLEN;
302b6a2fea3SOllie Wild }
303b6a2fea3SOllie Wild
304b6a2fea3SOllie Wild #else
305b6a2fea3SOllie Wild
acct_arg_size(struct linux_binprm * bprm,unsigned long pages)3060e028465SOleg Nesterov static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
3073c77f845SOleg Nesterov {
3083c77f845SOleg Nesterov }
3093c77f845SOleg Nesterov
get_arg_page(struct linux_binprm * bprm,unsigned long pos,int write)3100e028465SOleg Nesterov static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
311b6a2fea3SOllie Wild int write)
312b6a2fea3SOllie Wild {
313b6a2fea3SOllie Wild struct page *page;
314b6a2fea3SOllie Wild
315b6a2fea3SOllie Wild page = bprm->page[pos / PAGE_SIZE];
316b6a2fea3SOllie Wild if (!page && write) {
317b6a2fea3SOllie Wild page = alloc_page(GFP_HIGHUSER|__GFP_ZERO);
318b6a2fea3SOllie Wild if (!page)
319b6a2fea3SOllie Wild return NULL;
320b6a2fea3SOllie Wild bprm->page[pos / PAGE_SIZE] = page;
321b6a2fea3SOllie Wild }
322b6a2fea3SOllie Wild
323b6a2fea3SOllie Wild return page;
324b6a2fea3SOllie Wild }
325b6a2fea3SOllie Wild
put_arg_page(struct page * page)326b6a2fea3SOllie Wild static void put_arg_page(struct page *page)
327b6a2fea3SOllie Wild {
328b6a2fea3SOllie Wild }
329b6a2fea3SOllie Wild
free_arg_page(struct linux_binprm * bprm,int i)330b6a2fea3SOllie Wild static void free_arg_page(struct linux_binprm *bprm, int i)
331b6a2fea3SOllie Wild {
332b6a2fea3SOllie Wild if (bprm->page[i]) {
333b6a2fea3SOllie Wild __free_page(bprm->page[i]);
334b6a2fea3SOllie Wild bprm->page[i] = NULL;
335b6a2fea3SOllie Wild }
336b6a2fea3SOllie Wild }
337b6a2fea3SOllie Wild
free_arg_pages(struct linux_binprm * bprm)338b6a2fea3SOllie Wild static void free_arg_pages(struct linux_binprm *bprm)
339b6a2fea3SOllie Wild {
340b6a2fea3SOllie Wild int i;
341b6a2fea3SOllie Wild
342b6a2fea3SOllie Wild for (i = 0; i < MAX_ARG_PAGES; i++)
343b6a2fea3SOllie Wild free_arg_page(bprm, i);
344b6a2fea3SOllie Wild }
345b6a2fea3SOllie Wild
flush_arg_page(struct linux_binprm * bprm,unsigned long pos,struct page * page)346b6a2fea3SOllie Wild static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
347b6a2fea3SOllie Wild struct page *page)
348b6a2fea3SOllie Wild {
349b6a2fea3SOllie Wild }
350b6a2fea3SOllie Wild
__bprm_mm_init(struct linux_binprm * bprm)351b6a2fea3SOllie Wild static int __bprm_mm_init(struct linux_binprm *bprm)
352b6a2fea3SOllie Wild {
353b6a2fea3SOllie Wild bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
354b6a2fea3SOllie Wild return 0;
355b6a2fea3SOllie Wild }
356b6a2fea3SOllie Wild
valid_arg_len(struct linux_binprm * bprm,long len)357b6a2fea3SOllie Wild static bool valid_arg_len(struct linux_binprm *bprm, long len)
358b6a2fea3SOllie Wild {
359b6a2fea3SOllie Wild return len <= bprm->p;
360b6a2fea3SOllie Wild }
361b6a2fea3SOllie Wild
362b6a2fea3SOllie Wild #endif /* CONFIG_MMU */
363b6a2fea3SOllie Wild
364b6a2fea3SOllie Wild /*
365b6a2fea3SOllie Wild * Create a new mm_struct and populate it with a temporary stack
366b6a2fea3SOllie Wild * vm_area_struct. We don't have enough context at this point to set the stack
367b6a2fea3SOllie Wild * flags, permissions, and offset, so we use temporary values. We'll update
368b6a2fea3SOllie Wild * them later in setup_arg_pages().
369b6a2fea3SOllie Wild */
bprm_mm_init(struct linux_binprm * bprm)3709cc64ceaSYuanhan Liu static int bprm_mm_init(struct linux_binprm *bprm)
371b6a2fea3SOllie Wild {
372b6a2fea3SOllie Wild int err;
373b6a2fea3SOllie Wild struct mm_struct *mm = NULL;
374b6a2fea3SOllie Wild
375b6a2fea3SOllie Wild bprm->mm = mm = mm_alloc();
376b6a2fea3SOllie Wild err = -ENOMEM;
377b6a2fea3SOllie Wild if (!mm)
378b6a2fea3SOllie Wild goto err;
379b6a2fea3SOllie Wild
380c31dbb14SKees Cook /* Save current stack limit for all calculations made during exec. */
381c31dbb14SKees Cook task_lock(current->group_leader);
382c31dbb14SKees Cook bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
383c31dbb14SKees Cook task_unlock(current->group_leader);
384c31dbb14SKees Cook
385b6a2fea3SOllie Wild err = __bprm_mm_init(bprm);
386b6a2fea3SOllie Wild if (err)
387b6a2fea3SOllie Wild goto err;
388b6a2fea3SOllie Wild
389b6a2fea3SOllie Wild return 0;
390b6a2fea3SOllie Wild
391b6a2fea3SOllie Wild err:
392b6a2fea3SOllie Wild if (mm) {
393b6a2fea3SOllie Wild bprm->mm = NULL;
394b6a2fea3SOllie Wild mmdrop(mm);
395b6a2fea3SOllie Wild }
396b6a2fea3SOllie Wild
397b6a2fea3SOllie Wild return err;
398b6a2fea3SOllie Wild }
399b6a2fea3SOllie Wild
400ba2d0162SOleg Nesterov struct user_arg_ptr {
4010e028465SOleg Nesterov #ifdef CONFIG_COMPAT
4020e028465SOleg Nesterov bool is_compat;
4030e028465SOleg Nesterov #endif
4040e028465SOleg Nesterov union {
405ba2d0162SOleg Nesterov const char __user *const __user *native;
4060e028465SOleg Nesterov #ifdef CONFIG_COMPAT
40738b983b3SAl Viro const compat_uptr_t __user *compat;
4080e028465SOleg Nesterov #endif
4090e028465SOleg Nesterov } ptr;
410ba2d0162SOleg Nesterov };
411ba2d0162SOleg Nesterov
get_user_arg_ptr(struct user_arg_ptr argv,int nr)412ba2d0162SOleg Nesterov static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
4131d1dbf81SOleg Nesterov {
4140e028465SOleg Nesterov const char __user *native;
4151d1dbf81SOleg Nesterov
4160e028465SOleg Nesterov #ifdef CONFIG_COMPAT
4170e028465SOleg Nesterov if (unlikely(argv.is_compat)) {
4180e028465SOleg Nesterov compat_uptr_t compat;
4190e028465SOleg Nesterov
4200e028465SOleg Nesterov if (get_user(compat, argv.ptr.compat + nr))
4211d1dbf81SOleg Nesterov return ERR_PTR(-EFAULT);
4221d1dbf81SOleg Nesterov
4230e028465SOleg Nesterov return compat_ptr(compat);
4240e028465SOleg Nesterov }
4250e028465SOleg Nesterov #endif
4260e028465SOleg Nesterov
4270e028465SOleg Nesterov if (get_user(native, argv.ptr.native + nr))
4280e028465SOleg Nesterov return ERR_PTR(-EFAULT);
4290e028465SOleg Nesterov
4300e028465SOleg Nesterov return native;
4311d1dbf81SOleg Nesterov }
4321d1dbf81SOleg Nesterov
4331da177e4SLinus Torvalds /*
4341da177e4SLinus Torvalds * count() counts the number of strings in array ARGV.
4351da177e4SLinus Torvalds */
count(struct user_arg_ptr argv,int max)436ba2d0162SOleg Nesterov static int count(struct user_arg_ptr argv, int max)
4371da177e4SLinus Torvalds {
4381da177e4SLinus Torvalds int i = 0;
4391da177e4SLinus Torvalds
4400e028465SOleg Nesterov if (argv.ptr.native != NULL) {
4411da177e4SLinus Torvalds for (;;) {
4421d1dbf81SOleg Nesterov const char __user *p = get_user_arg_ptr(argv, i);
4431da177e4SLinus Torvalds
4441da177e4SLinus Torvalds if (!p)
4451da177e4SLinus Torvalds break;
4461d1dbf81SOleg Nesterov
4471d1dbf81SOleg Nesterov if (IS_ERR(p))
4481d1dbf81SOleg Nesterov return -EFAULT;
4491d1dbf81SOleg Nesterov
4506d92d4f6SXi Wang if (i >= max)
4511da177e4SLinus Torvalds return -E2BIG;
4526d92d4f6SXi Wang ++i;
4539aea5a65SRoland McGrath
4549aea5a65SRoland McGrath if (fatal_signal_pending(current))
4559aea5a65SRoland McGrath return -ERESTARTNOHAND;
4561da177e4SLinus Torvalds cond_resched();
4571da177e4SLinus Torvalds }
4581da177e4SLinus Torvalds }
4591da177e4SLinus Torvalds return i;
4601da177e4SLinus Torvalds }
4611da177e4SLinus Torvalds
count_strings_kernel(const char * const * argv)462be619f7fSEric W. Biederman static int count_strings_kernel(const char *const *argv)
463be619f7fSEric W. Biederman {
464be619f7fSEric W. Biederman int i;
465be619f7fSEric W. Biederman
466be619f7fSEric W. Biederman if (!argv)
467be619f7fSEric W. Biederman return 0;
468be619f7fSEric W. Biederman
469be619f7fSEric W. Biederman for (i = 0; argv[i]; ++i) {
470be619f7fSEric W. Biederman if (i >= MAX_ARG_STRINGS)
471be619f7fSEric W. Biederman return -E2BIG;
472be619f7fSEric W. Biederman if (fatal_signal_pending(current))
473be619f7fSEric W. Biederman return -ERESTARTNOHAND;
474be619f7fSEric W. Biederman cond_resched();
475be619f7fSEric W. Biederman }
476be619f7fSEric W. Biederman return i;
477be619f7fSEric W. Biederman }
478be619f7fSEric W. Biederman
bprm_set_stack_limit(struct linux_binprm * bprm,unsigned long limit)479084ebf7cSKees Cook static inline int bprm_set_stack_limit(struct linux_binprm *bprm,
480084ebf7cSKees Cook unsigned long limit)
481084ebf7cSKees Cook {
482084ebf7cSKees Cook #ifdef CONFIG_MMU
48321f93108SKees Cook /* Avoid a pathological bprm->p. */
48421f93108SKees Cook if (bprm->p < limit)
48521f93108SKees Cook return -E2BIG;
486084ebf7cSKees Cook bprm->argmin = bprm->p - limit;
487084ebf7cSKees Cook #endif
488084ebf7cSKees Cook return 0;
489084ebf7cSKees Cook }
bprm_hit_stack_limit(struct linux_binprm * bprm)490084ebf7cSKees Cook static inline bool bprm_hit_stack_limit(struct linux_binprm *bprm)
491084ebf7cSKees Cook {
492084ebf7cSKees Cook #ifdef CONFIG_MMU
493084ebf7cSKees Cook return bprm->p < bprm->argmin;
494084ebf7cSKees Cook #else
495084ebf7cSKees Cook return false;
496084ebf7cSKees Cook #endif
497084ebf7cSKees Cook }
498084ebf7cSKees Cook
49960371f43SKees Cook /*
50060371f43SKees Cook * Calculate bprm->argmin from:
50160371f43SKees Cook * - _STK_LIM
50260371f43SKees Cook * - ARG_MAX
50360371f43SKees Cook * - bprm->rlim_stack.rlim_cur
50460371f43SKees Cook * - bprm->argc
50560371f43SKees Cook * - bprm->envc
50660371f43SKees Cook * - bprm->p
50760371f43SKees Cook */
bprm_stack_limits(struct linux_binprm * bprm)508d8b9cd54SEric W. Biederman static int bprm_stack_limits(struct linux_binprm *bprm)
509655c16a8SOleg Nesterov {
510655c16a8SOleg Nesterov unsigned long limit, ptr_size;
511655c16a8SOleg Nesterov
512655c16a8SOleg Nesterov /*
513655c16a8SOleg Nesterov * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
514655c16a8SOleg Nesterov * (whichever is smaller) for the argv+env strings.
515655c16a8SOleg Nesterov * This ensures that:
516655c16a8SOleg Nesterov * - the remaining binfmt code will not run out of stack space,
517655c16a8SOleg Nesterov * - the program will have a reasonable amount of stack left
518655c16a8SOleg Nesterov * to work from.
519655c16a8SOleg Nesterov */
520655c16a8SOleg Nesterov limit = _STK_LIM / 4 * 3;
521655c16a8SOleg Nesterov limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
522655c16a8SOleg Nesterov /*
523655c16a8SOleg Nesterov * We've historically supported up to 32 pages (ARG_MAX)
524655c16a8SOleg Nesterov * of argument strings even with small stacks
525655c16a8SOleg Nesterov */
526655c16a8SOleg Nesterov limit = max_t(unsigned long, limit, ARG_MAX);
52721f93108SKees Cook /* Reject totally pathological counts. */
52821f93108SKees Cook if (bprm->argc < 0 || bprm->envc < 0)
52921f93108SKees Cook return -E2BIG;
530655c16a8SOleg Nesterov /*
531655c16a8SOleg Nesterov * We must account for the size of all the argv and envp pointers to
532655c16a8SOleg Nesterov * the argv and envp strings, since they will also take up space in
533655c16a8SOleg Nesterov * the stack. They aren't stored until much later when we can't
534655c16a8SOleg Nesterov * signal to the parent that the child has run out of stack space.
535655c16a8SOleg Nesterov * Instead, calculate it here so it's possible to fail gracefully.
536dcd46d89SKees Cook *
537dcd46d89SKees Cook * In the case of argc = 0, make sure there is space for adding a
538dcd46d89SKees Cook * empty string (which will bump argc to 1), to ensure confused
539dcd46d89SKees Cook * userspace programs don't start processing from argv[1], thinking
540dcd46d89SKees Cook * argc can never be 0, to keep them from walking envp by accident.
541dcd46d89SKees Cook * See do_execveat_common().
542655c16a8SOleg Nesterov */
54321f93108SKees Cook if (check_add_overflow(max(bprm->argc, 1), bprm->envc, &ptr_size) ||
54421f93108SKees Cook check_mul_overflow(ptr_size, sizeof(void *), &ptr_size))
54521f93108SKees Cook return -E2BIG;
546655c16a8SOleg Nesterov if (limit <= ptr_size)
547655c16a8SOleg Nesterov return -E2BIG;
548655c16a8SOleg Nesterov limit -= ptr_size;
549655c16a8SOleg Nesterov
550084ebf7cSKees Cook return bprm_set_stack_limit(bprm, limit);
551655c16a8SOleg Nesterov }
552655c16a8SOleg Nesterov
5531da177e4SLinus Torvalds /*
554b6a2fea3SOllie Wild * 'copy_strings()' copies argument/environment strings from the old
555b6a2fea3SOllie Wild * processes's memory to the new process's stack. The call to get_user_pages()
556b6a2fea3SOllie Wild * ensures the destination page is created and not swapped out.
5571da177e4SLinus Torvalds */
copy_strings(int argc,struct user_arg_ptr argv,struct linux_binprm * bprm)558ba2d0162SOleg Nesterov static int copy_strings(int argc, struct user_arg_ptr argv,
55975c96f85SAdrian Bunk struct linux_binprm *bprm)
5601da177e4SLinus Torvalds {
5611da177e4SLinus Torvalds struct page *kmapped_page = NULL;
5621da177e4SLinus Torvalds char *kaddr = NULL;
563b6a2fea3SOllie Wild unsigned long kpos = 0;
5641da177e4SLinus Torvalds int ret;
5651da177e4SLinus Torvalds
5661da177e4SLinus Torvalds while (argc-- > 0) {
567d7627467SDavid Howells const char __user *str;
5681da177e4SLinus Torvalds int len;
5691da177e4SLinus Torvalds unsigned long pos;
5701da177e4SLinus Torvalds
5711da177e4SLinus Torvalds ret = -EFAULT;
5721d1dbf81SOleg Nesterov str = get_user_arg_ptr(argv, argc);
5731d1dbf81SOleg Nesterov if (IS_ERR(str))
5741da177e4SLinus Torvalds goto out;
5751da177e4SLinus Torvalds
5761d1dbf81SOleg Nesterov len = strnlen_user(str, MAX_ARG_STRLEN);
5771d1dbf81SOleg Nesterov if (!len)
5781da177e4SLinus Torvalds goto out;
5791d1dbf81SOleg Nesterov
5801d1dbf81SOleg Nesterov ret = -E2BIG;
5811d1dbf81SOleg Nesterov if (!valid_arg_len(bprm, len))
5821d1dbf81SOleg Nesterov goto out;
5831da177e4SLinus Torvalds
584b452722eSTom Rix /* We're going to work our way backwards. */
5851da177e4SLinus Torvalds pos = bprm->p;
586b6a2fea3SOllie Wild str += len;
587b6a2fea3SOllie Wild bprm->p -= len;
588084ebf7cSKees Cook if (bprm_hit_stack_limit(bprm))
589655c16a8SOleg Nesterov goto out;
5901da177e4SLinus Torvalds
5911da177e4SLinus Torvalds while (len > 0) {
5921da177e4SLinus Torvalds int offset, bytes_to_copy;
5931da177e4SLinus Torvalds
5949aea5a65SRoland McGrath if (fatal_signal_pending(current)) {
5959aea5a65SRoland McGrath ret = -ERESTARTNOHAND;
5969aea5a65SRoland McGrath goto out;
5979aea5a65SRoland McGrath }
5987993bc1fSRoland McGrath cond_resched();
5997993bc1fSRoland McGrath
6001da177e4SLinus Torvalds offset = pos % PAGE_SIZE;
601b6a2fea3SOllie Wild if (offset == 0)
602b6a2fea3SOllie Wild offset = PAGE_SIZE;
603b6a2fea3SOllie Wild
604b6a2fea3SOllie Wild bytes_to_copy = offset;
605b6a2fea3SOllie Wild if (bytes_to_copy > len)
606b6a2fea3SOllie Wild bytes_to_copy = len;
607b6a2fea3SOllie Wild
608b6a2fea3SOllie Wild offset -= bytes_to_copy;
609b6a2fea3SOllie Wild pos -= bytes_to_copy;
610b6a2fea3SOllie Wild str -= bytes_to_copy;
611b6a2fea3SOllie Wild len -= bytes_to_copy;
612b6a2fea3SOllie Wild
613b6a2fea3SOllie Wild if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
614b6a2fea3SOllie Wild struct page *page;
615b6a2fea3SOllie Wild
616b6a2fea3SOllie Wild page = get_arg_page(bprm, pos, 1);
6171da177e4SLinus Torvalds if (!page) {
618b6a2fea3SOllie Wild ret = -E2BIG;
6191da177e4SLinus Torvalds goto out;
6201da177e4SLinus Torvalds }
6211da177e4SLinus Torvalds
622b6a2fea3SOllie Wild if (kmapped_page) {
623f358afc5SChristoph Hellwig flush_dcache_page(kmapped_page);
6243a608cfeSFabio M. De Francesco kunmap_local(kaddr);
625b6a2fea3SOllie Wild put_arg_page(kmapped_page);
626b6a2fea3SOllie Wild }
6271da177e4SLinus Torvalds kmapped_page = page;
6283a608cfeSFabio M. De Francesco kaddr = kmap_local_page(kmapped_page);
629b6a2fea3SOllie Wild kpos = pos & PAGE_MASK;
630b6a2fea3SOllie Wild flush_arg_page(bprm, kpos, kmapped_page);
6311da177e4SLinus Torvalds }
632b6a2fea3SOllie Wild if (copy_from_user(kaddr+offset, str, bytes_to_copy)) {
6331da177e4SLinus Torvalds ret = -EFAULT;
6341da177e4SLinus Torvalds goto out;
6351da177e4SLinus Torvalds }
6361da177e4SLinus Torvalds }
6371da177e4SLinus Torvalds }
6381da177e4SLinus Torvalds ret = 0;
6391da177e4SLinus Torvalds out:
640b6a2fea3SOllie Wild if (kmapped_page) {
641f358afc5SChristoph Hellwig flush_dcache_page(kmapped_page);
6423a608cfeSFabio M. De Francesco kunmap_local(kaddr);
643b6a2fea3SOllie Wild put_arg_page(kmapped_page);
644b6a2fea3SOllie Wild }
6451da177e4SLinus Torvalds return ret;
6461da177e4SLinus Torvalds }
6471da177e4SLinus Torvalds
6481da177e4SLinus Torvalds /*
649986db2d1SChristoph Hellwig * Copy and argument/environment string from the kernel to the processes stack.
6501da177e4SLinus Torvalds */
copy_string_kernel(const char * arg,struct linux_binprm * bprm)651986db2d1SChristoph Hellwig int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
6521da177e4SLinus Torvalds {
653762a3af6SChristoph Hellwig int len = strnlen(arg, MAX_ARG_STRLEN) + 1 /* terminating NUL */;
654762a3af6SChristoph Hellwig unsigned long pos = bprm->p;
655ba2d0162SOleg Nesterov
656762a3af6SChristoph Hellwig if (len == 0)
657762a3af6SChristoph Hellwig return -EFAULT;
658762a3af6SChristoph Hellwig if (!valid_arg_len(bprm, len))
659762a3af6SChristoph Hellwig return -E2BIG;
660ba2d0162SOleg Nesterov
661762a3af6SChristoph Hellwig /* We're going to work our way backwards. */
662762a3af6SChristoph Hellwig arg += len;
663762a3af6SChristoph Hellwig bprm->p -= len;
664084ebf7cSKees Cook if (bprm_hit_stack_limit(bprm))
665762a3af6SChristoph Hellwig return -E2BIG;
666762a3af6SChristoph Hellwig
667762a3af6SChristoph Hellwig while (len > 0) {
668762a3af6SChristoph Hellwig unsigned int bytes_to_copy = min_t(unsigned int, len,
669762a3af6SChristoph Hellwig min_not_zero(offset_in_page(pos), PAGE_SIZE));
670762a3af6SChristoph Hellwig struct page *page;
671762a3af6SChristoph Hellwig
672762a3af6SChristoph Hellwig pos -= bytes_to_copy;
673762a3af6SChristoph Hellwig arg -= bytes_to_copy;
674762a3af6SChristoph Hellwig len -= bytes_to_copy;
675762a3af6SChristoph Hellwig
676762a3af6SChristoph Hellwig page = get_arg_page(bprm, pos, 1);
677762a3af6SChristoph Hellwig if (!page)
678762a3af6SChristoph Hellwig return -E2BIG;
679762a3af6SChristoph Hellwig flush_arg_page(bprm, pos & PAGE_MASK, page);
680c6e8e36cSFabio M. De Francesco memcpy_to_page(page, offset_in_page(pos), arg, bytes_to_copy);
681762a3af6SChristoph Hellwig put_arg_page(page);
6821da177e4SLinus Torvalds }
683762a3af6SChristoph Hellwig
684762a3af6SChristoph Hellwig return 0;
6851da177e4SLinus Torvalds }
686986db2d1SChristoph Hellwig EXPORT_SYMBOL(copy_string_kernel);
6871da177e4SLinus Torvalds
copy_strings_kernel(int argc,const char * const * argv,struct linux_binprm * bprm)688be619f7fSEric W. Biederman static int copy_strings_kernel(int argc, const char *const *argv,
689be619f7fSEric W. Biederman struct linux_binprm *bprm)
690be619f7fSEric W. Biederman {
691be619f7fSEric W. Biederman while (argc-- > 0) {
692be619f7fSEric W. Biederman int ret = copy_string_kernel(argv[argc], bprm);
693be619f7fSEric W. Biederman if (ret < 0)
694be619f7fSEric W. Biederman return ret;
695be619f7fSEric W. Biederman if (fatal_signal_pending(current))
696be619f7fSEric W. Biederman return -ERESTARTNOHAND;
697be619f7fSEric W. Biederman cond_resched();
698be619f7fSEric W. Biederman }
699be619f7fSEric W. Biederman return 0;
700be619f7fSEric W. Biederman }
701be619f7fSEric W. Biederman
7021da177e4SLinus Torvalds #ifdef CONFIG_MMU
703b6a2fea3SOllie Wild
7041da177e4SLinus Torvalds /*
705b6a2fea3SOllie Wild * Finalizes the stack vm_area_struct. The flags and permissions are updated,
706b6a2fea3SOllie Wild * the stack is optionally relocated, and some extra space is added.
707b6a2fea3SOllie Wild */
setup_arg_pages(struct linux_binprm * bprm,unsigned long stack_top,int executable_stack)708b6a2fea3SOllie Wild int setup_arg_pages(struct linux_binprm *bprm,
709b6a2fea3SOllie Wild unsigned long stack_top,
710b6a2fea3SOllie Wild int executable_stack)
7111da177e4SLinus Torvalds {
712b6a2fea3SOllie Wild unsigned long ret;
713b6a2fea3SOllie Wild unsigned long stack_shift;
714b6a2fea3SOllie Wild struct mm_struct *mm = current->mm;
715b6a2fea3SOllie Wild struct vm_area_struct *vma = bprm->vma;
716b6a2fea3SOllie Wild struct vm_area_struct *prev = NULL;
717b6a2fea3SOllie Wild unsigned long vm_flags;
718b6a2fea3SOllie Wild unsigned long stack_base;
719803bf5ecSMichael Neuling unsigned long stack_size;
720803bf5ecSMichael Neuling unsigned long stack_expand;
721803bf5ecSMichael Neuling unsigned long rlim_stack;
7224a18419fSNadav Amit struct mmu_gather tlb;
7232286a691SLiam R. Howlett struct vma_iterator vmi;
7241da177e4SLinus Torvalds
725b6a2fea3SOllie Wild #ifdef CONFIG_STACK_GROWSUP
726d71f290bSJames Hogan /* Limit stack size */
727c31dbb14SKees Cook stack_base = bprm->rlim_stack.rlim_max;
72822ee3ea5SHelge Deller
72922ee3ea5SHelge Deller stack_base = calc_max_stack_size(stack_base);
730b6a2fea3SOllie Wild
731d045c77cSHelge Deller /* Add space for stack randomization. */
732f31b2569SHelge Deller if (current->flags & PF_RANDOMIZE)
733d045c77cSHelge Deller stack_base += (STACK_RND_MASK << PAGE_SHIFT);
734d045c77cSHelge Deller
735b6a2fea3SOllie Wild /* Make sure we didn't let the argument array grow too large. */
736b6a2fea3SOllie Wild if (vma->vm_end - vma->vm_start > stack_base)
737b6a2fea3SOllie Wild return -ENOMEM;
738b6a2fea3SOllie Wild
739b6a2fea3SOllie Wild stack_base = PAGE_ALIGN(stack_top - stack_base);
740b6a2fea3SOllie Wild
741b6a2fea3SOllie Wild stack_shift = vma->vm_start - stack_base;
742b6a2fea3SOllie Wild mm->arg_start = bprm->p - stack_shift;
743b6a2fea3SOllie Wild bprm->p = vma->vm_end - stack_shift;
744b6a2fea3SOllie Wild #else
745b6a2fea3SOllie Wild stack_top = arch_align_stack(stack_top);
746b6a2fea3SOllie Wild stack_top = PAGE_ALIGN(stack_top);
7471b528181SRoland McGrath
7481b528181SRoland McGrath if (unlikely(stack_top < mmap_min_addr) ||
7491b528181SRoland McGrath unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
7501b528181SRoland McGrath return -ENOMEM;
7511b528181SRoland McGrath
752b6a2fea3SOllie Wild stack_shift = vma->vm_end - stack_top;
753b6a2fea3SOllie Wild
754b6a2fea3SOllie Wild bprm->p -= stack_shift;
755b6a2fea3SOllie Wild mm->arg_start = bprm->p;
756b6a2fea3SOllie Wild #endif
757b6a2fea3SOllie Wild
758b6a2fea3SOllie Wild bprm->exec -= stack_shift;
759b6a2fea3SOllie Wild
760d8ed45c5SMichel Lespinasse if (mmap_write_lock_killable(mm))
761f268dfe9SMichal Hocko return -EINTR;
762f268dfe9SMichal Hocko
76396a8e13eSHugh Dickins vm_flags = VM_STACK_FLAGS;
764b6a2fea3SOllie Wild
765b6a2fea3SOllie Wild /*
766b6a2fea3SOllie Wild * Adjust stack execute permissions; explicitly enable for
767b6a2fea3SOllie Wild * EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone
768b6a2fea3SOllie Wild * (arch default) otherwise.
769b6a2fea3SOllie Wild */
770b6a2fea3SOllie Wild if (unlikely(executable_stack == EXSTACK_ENABLE_X))
771b6a2fea3SOllie Wild vm_flags |= VM_EXEC;
772b6a2fea3SOllie Wild else if (executable_stack == EXSTACK_DISABLE_X)
773b6a2fea3SOllie Wild vm_flags &= ~VM_EXEC;
774b6a2fea3SOllie Wild vm_flags |= mm->def_flags;
775a8bef8ffSMel Gorman vm_flags |= VM_STACK_INCOMPLETE_SETUP;
776b6a2fea3SOllie Wild
7772286a691SLiam R. Howlett vma_iter_init(&vmi, mm, vma->vm_start);
7782286a691SLiam R. Howlett
7794a18419fSNadav Amit tlb_gather_mmu(&tlb, mm);
7802286a691SLiam R. Howlett ret = mprotect_fixup(&vmi, &tlb, vma, &prev, vma->vm_start, vma->vm_end,
781b6a2fea3SOllie Wild vm_flags);
7824a18419fSNadav Amit tlb_finish_mmu(&tlb);
7834a18419fSNadav Amit
784b6a2fea3SOllie Wild if (ret)
785b6a2fea3SOllie Wild goto out_unlock;
786b6a2fea3SOllie Wild BUG_ON(prev != vma);
787b6a2fea3SOllie Wild
78847a2ebb7SAlexey Dobriyan if (unlikely(vm_flags & VM_EXEC)) {
78947a2ebb7SAlexey Dobriyan pr_warn_once("process '%pD4' started with executable stack\n",
79047a2ebb7SAlexey Dobriyan bprm->file);
79147a2ebb7SAlexey Dobriyan }
79247a2ebb7SAlexey Dobriyan
793b6a2fea3SOllie Wild /* Move stack pages down in memory. */
794b6a2fea3SOllie Wild if (stack_shift) {
795d61f0d59SLorenzo Stoakes /*
796d61f0d59SLorenzo Stoakes * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once
797d61f0d59SLorenzo Stoakes * the binfmt code determines where the new stack should reside, we shift it to
798d61f0d59SLorenzo Stoakes * its final location.
799d61f0d59SLorenzo Stoakes */
800d61f0d59SLorenzo Stoakes ret = relocate_vma_down(vma, stack_shift);
801fc63cf23SAnton Blanchard if (ret)
802fc63cf23SAnton Blanchard goto out_unlock;
8031da177e4SLinus Torvalds }
8041da177e4SLinus Torvalds
805a8bef8ffSMel Gorman /* mprotect_fixup is overkill to remove the temporary stack flags */
8061c71222eSSuren Baghdasaryan vm_flags_clear(vma, VM_STACK_INCOMPLETE_SETUP);
807a8bef8ffSMel Gorman
8085ef097ddSMichael Neuling stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */
809803bf5ecSMichael Neuling stack_size = vma->vm_end - vma->vm_start;
810803bf5ecSMichael Neuling /*
811803bf5ecSMichael Neuling * Align this down to a page boundary as expand_stack
812803bf5ecSMichael Neuling * will align it up.
813803bf5ecSMichael Neuling */
814c31dbb14SKees Cook rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
815bfb4a2b9SRolf Eike Beer
816bfb4a2b9SRolf Eike Beer stack_expand = min(rlim_stack, stack_size + stack_expand);
817bfb4a2b9SRolf Eike Beer
818b6a2fea3SOllie Wild #ifdef CONFIG_STACK_GROWSUP
819bfb4a2b9SRolf Eike Beer stack_base = vma->vm_start + stack_expand;
820b6a2fea3SOllie Wild #else
821bfb4a2b9SRolf Eike Beer stack_base = vma->vm_end - stack_expand;
822b6a2fea3SOllie Wild #endif
8233af9e859SEric B Munson current->mm->start_stack = bprm->p;
8248d7071afSLinus Torvalds ret = expand_stack_locked(vma, stack_base);
825b6a2fea3SOllie Wild if (ret)
826b6a2fea3SOllie Wild ret = -EFAULT;
827b6a2fea3SOllie Wild
828b6a2fea3SOllie Wild out_unlock:
829d8ed45c5SMichel Lespinasse mmap_write_unlock(mm);
830fc63cf23SAnton Blanchard return ret;
831b6a2fea3SOllie Wild }
832b6a2fea3SOllie Wild EXPORT_SYMBOL(setup_arg_pages);
833b6a2fea3SOllie Wild
8347e7ec6a9SNicolas Pitre #else
8357e7ec6a9SNicolas Pitre
8367e7ec6a9SNicolas Pitre /*
8377e7ec6a9SNicolas Pitre * Transfer the program arguments and environment from the holding pages
8387e7ec6a9SNicolas Pitre * onto the stack. The provided stack pointer is adjusted accordingly.
8397e7ec6a9SNicolas Pitre */
transfer_args_to_stack(struct linux_binprm * bprm,unsigned long * sp_location)8407e7ec6a9SNicolas Pitre int transfer_args_to_stack(struct linux_binprm *bprm,
8417e7ec6a9SNicolas Pitre unsigned long *sp_location)
8427e7ec6a9SNicolas Pitre {
8437e7ec6a9SNicolas Pitre unsigned long index, stop, sp;
8447e7ec6a9SNicolas Pitre int ret = 0;
8457e7ec6a9SNicolas Pitre
8467e7ec6a9SNicolas Pitre stop = bprm->p >> PAGE_SHIFT;
8477e7ec6a9SNicolas Pitre sp = *sp_location;
8487e7ec6a9SNicolas Pitre
8497e7ec6a9SNicolas Pitre for (index = MAX_ARG_PAGES - 1; index >= stop; index--) {
8507e7ec6a9SNicolas Pitre unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0;
8513a608cfeSFabio M. De Francesco char *src = kmap_local_page(bprm->page[index]) + offset;
8527e7ec6a9SNicolas Pitre sp -= PAGE_SIZE - offset;
8537e7ec6a9SNicolas Pitre if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0)
8547e7ec6a9SNicolas Pitre ret = -EFAULT;
8553a608cfeSFabio M. De Francesco kunmap_local(src);
8567e7ec6a9SNicolas Pitre if (ret)
8577e7ec6a9SNicolas Pitre goto out;
8587e7ec6a9SNicolas Pitre }
8597e7ec6a9SNicolas Pitre
8602aea94acSMax Filippov bprm->exec += *sp_location - MAX_ARG_PAGES * PAGE_SIZE;
8617e7ec6a9SNicolas Pitre *sp_location = sp;
8627e7ec6a9SNicolas Pitre
8637e7ec6a9SNicolas Pitre out:
8647e7ec6a9SNicolas Pitre return ret;
8657e7ec6a9SNicolas Pitre }
8667e7ec6a9SNicolas Pitre EXPORT_SYMBOL(transfer_args_to_stack);
8677e7ec6a9SNicolas Pitre
8681da177e4SLinus Torvalds #endif /* CONFIG_MMU */
8691da177e4SLinus Torvalds
870bdd8f624SKees Cook /*
871bdd8f624SKees Cook * On success, caller must call do_close_execat() on the returned
872bdd8f624SKees Cook * struct file to close it.
873bdd8f624SKees Cook */
do_open_execat(int fd,struct filename * name,int flags)87451f39a1fSDavid Drysdale static struct file *do_open_execat(int fd, struct filename *name, int flags)
8751da177e4SLinus Torvalds {
8763b832035SChristian Brauner int err;
8773b832035SChristian Brauner struct file *file __free(fput) = NULL;
87851f39a1fSDavid Drysdale struct open_flags open_exec_flags = {
87947c805dcSAl Viro .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
88062fb4a15SAl Viro .acc_mode = MAY_EXEC,
881f9652e10SAl Viro .intent = LOOKUP_OPEN,
882f9652e10SAl Viro .lookup_flags = LOOKUP_FOLLOW,
88347c805dcSAl Viro };
8841da177e4SLinus Torvalds
885a5874fdeSMickaël Salaün if ((flags &
886a5874fdeSMickaël Salaün ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH | AT_EXECVE_CHECK)) != 0)
88751f39a1fSDavid Drysdale return ERR_PTR(-EINVAL);
88851f39a1fSDavid Drysdale if (flags & AT_SYMLINK_NOFOLLOW)
88951f39a1fSDavid Drysdale open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
89051f39a1fSDavid Drysdale if (flags & AT_EMPTY_PATH)
89151f39a1fSDavid Drysdale open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
89251f39a1fSDavid Drysdale
89351f39a1fSDavid Drysdale file = do_filp_open(fd, name, &open_exec_flags);
8946e8341a1SAl Viro if (IS_ERR(file))
895e56b6a5dSChristoph Hellwig return file;
896e56b6a5dSChristoph Hellwig
8970d196e75SMateusz Guzik /*
8980d196e75SMateusz Guzik * In the past the regular type check was here. It moved to may_open() in
8990d196e75SMateusz Guzik * 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is
9000d196e75SMateusz Guzik * an invariant that all non-regular files error out before we get here.
9010d196e75SMateusz Guzik */
9020d196e75SMateusz Guzik if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
9033b832035SChristian Brauner path_noexec(&file->f_path))
9040d196e75SMateusz Guzik return ERR_PTR(-EACCES);
9050d196e75SMateusz Guzik
9060357ef03SAmir Goldstein err = exe_file_deny_write_access(file);
9073b832035SChristian Brauner if (err)
9083b832035SChristian Brauner return ERR_PTR(err);
9093b832035SChristian Brauner
9103b832035SChristian Brauner return no_free_ptr(file);
911e56b6a5dSChristoph Hellwig }
912c4ad8f98SLinus Torvalds
913bdd8f624SKees Cook /**
914bdd8f624SKees Cook * open_exec - Open a path name for execution
915bdd8f624SKees Cook *
916bdd8f624SKees Cook * @name: path name to open with the intent of executing it.
917bdd8f624SKees Cook *
918bdd8f624SKees Cook * Returns ERR_PTR on failure or allocated struct file on success.
919bdd8f624SKees Cook *
9203b832035SChristian Brauner * As this is a wrapper for the internal do_open_execat(), callers
9210357ef03SAmir Goldstein * must call exe_file_allow_write_access() before fput() on release. Also see
922bdd8f624SKees Cook * do_close_execat().
923bdd8f624SKees Cook */
open_exec(const char * name)924c4ad8f98SLinus Torvalds struct file *open_exec(const char *name)
925c4ad8f98SLinus Torvalds {
92651689104SPaul Moore struct filename *filename = getname_kernel(name);
92751689104SPaul Moore struct file *f = ERR_CAST(filename);
92851689104SPaul Moore
92951689104SPaul Moore if (!IS_ERR(filename)) {
93051689104SPaul Moore f = do_open_execat(AT_FDCWD, filename, 0);
93151689104SPaul Moore putname(filename);
93251689104SPaul Moore }
93351689104SPaul Moore return f;
934c4ad8f98SLinus Torvalds }
9351da177e4SLinus Torvalds EXPORT_SYMBOL(open_exec);
9361da177e4SLinus Torvalds
937987f20a9SEric W. Biederman #if defined(CONFIG_BINFMT_FLAT) || defined(CONFIG_BINFMT_ELF_FDPIC)
read_code(struct file * file,unsigned long addr,loff_t pos,size_t len)9383dc20cb2SAl Viro ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
9393dc20cb2SAl Viro {
940ec695579SAl Viro ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
9413dc20cb2SAl Viro if (res > 0)
942bce2b68bSChristoph Hellwig flush_icache_user_range(addr, addr + len);
9433dc20cb2SAl Viro return res;
9443dc20cb2SAl Viro }
9453dc20cb2SAl Viro EXPORT_SYMBOL(read_code);
94648304f79SChristoph Hellwig #endif
9473dc20cb2SAl Viro
948eea96732SEric W. Biederman /*
949eea96732SEric W. Biederman * Maps the mm_struct mm into the current task struct.
950f7cfd871SEric W. Biederman * On success, this function returns with exec_update_lock
951f7cfd871SEric W. Biederman * held for writing.
952eea96732SEric W. Biederman */
exec_mmap(struct mm_struct * mm)9531da177e4SLinus Torvalds static int exec_mmap(struct mm_struct *mm)
9541da177e4SLinus Torvalds {
9551da177e4SLinus Torvalds struct task_struct *tsk;
9561da177e4SLinus Torvalds struct mm_struct *old_mm, *active_mm;
957eea96732SEric W. Biederman int ret;
9581da177e4SLinus Torvalds
9591da177e4SLinus Torvalds /* Notify parent that we're no longer interested in the old VM */
9601da177e4SLinus Torvalds tsk = current;
9611da177e4SLinus Torvalds old_mm = current->mm;
9624610ba7aSThomas Gleixner exec_mm_release(tsk, old_mm);
9631da177e4SLinus Torvalds
964f7cfd871SEric W. Biederman ret = down_write_killable(&tsk->signal->exec_update_lock);
965eea96732SEric W. Biederman if (ret)
966eea96732SEric W. Biederman return ret;
967eea96732SEric W. Biederman
9681da177e4SLinus Torvalds if (old_mm) {
9691da177e4SLinus Torvalds /*
9707e3c4fb7SEric W. Biederman * If there is a pending fatal signal perhaps a signal
9717e3c4fb7SEric W. Biederman * whose default action is to create a coredump get
9727e3c4fb7SEric W. Biederman * out and die instead of going through with the exec.
9731da177e4SLinus Torvalds */
9747e3c4fb7SEric W. Biederman ret = mmap_read_lock_killable(old_mm);
9757e3c4fb7SEric W. Biederman if (ret) {
976f7cfd871SEric W. Biederman up_write(&tsk->signal->exec_update_lock);
9777e3c4fb7SEric W. Biederman return ret;
9781da177e4SLinus Torvalds }
9791da177e4SLinus Torvalds }
980eea96732SEric W. Biederman
9811da177e4SLinus Torvalds task_lock(tsk);
982227a4aadSMathieu Desnoyers membarrier_exec_mmap(mm);
983d53c3dfbSNicholas Piggin
984d53c3dfbSNicholas Piggin local_irq_disable();
985d53c3dfbSNicholas Piggin active_mm = tsk->active_mm;
9861da177e4SLinus Torvalds tsk->active_mm = mm;
987d53c3dfbSNicholas Piggin tsk->mm = mm;
9887e019dccSMathieu Desnoyers mm_init_cid(mm, tsk);
989d53c3dfbSNicholas Piggin /*
990d53c3dfbSNicholas Piggin * This prevents preemption while active_mm is being loaded and
991d53c3dfbSNicholas Piggin * it and mm are being updated, which could cause problems for
992d53c3dfbSNicholas Piggin * lazy tlb mm refcounting when these are updated by context
993d53c3dfbSNicholas Piggin * switches. Not all architectures can handle irqs off over
994d53c3dfbSNicholas Piggin * activate_mm yet.
995d53c3dfbSNicholas Piggin */
996d53c3dfbSNicholas Piggin if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
997d53c3dfbSNicholas Piggin local_irq_enable();
9981da177e4SLinus Torvalds activate_mm(active_mm, mm);
999d53c3dfbSNicholas Piggin if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
1000d53c3dfbSNicholas Piggin local_irq_enable();
1001dda1c41aSSebastian Andrzej Siewior lru_gen_add_mm(mm);
10021da177e4SLinus Torvalds task_unlock(tsk);
1003bd74fdaeSYu Zhao lru_gen_use_mm(mm);
10041da177e4SLinus Torvalds if (old_mm) {
1005d8ed45c5SMichel Lespinasse mmap_read_unlock(old_mm);
10067dddb12cSEric Sesterhenn BUG_ON(active_mm != old_mm);
1007701085b2SOleg Nesterov setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
100831a78f23SBalbir Singh mm_update_next_owner(old_mm);
10091da177e4SLinus Torvalds mmput(old_mm);
10101da177e4SLinus Torvalds return 0;
10111da177e4SLinus Torvalds }
1012aa464ba9SNicholas Piggin mmdrop_lazy_tlb(active_mm);
10131da177e4SLinus Torvalds return 0;
10141da177e4SLinus Torvalds }
10151da177e4SLinus Torvalds
de_thread(struct task_struct * tsk)1016858119e1SArjan van de Ven static int de_thread(struct task_struct *tsk)
10171da177e4SLinus Torvalds {
10181da177e4SLinus Torvalds struct signal_struct *sig = tsk->signal;
1019b2c903b8SOleg Nesterov struct sighand_struct *oldsighand = tsk->sighand;
10201da177e4SLinus Torvalds spinlock_t *lock = &oldsighand->siglock;
10211da177e4SLinus Torvalds
1022aafe6c2aSEric W. Biederman if (thread_group_empty(tsk))
10231da177e4SLinus Torvalds goto no_thread_group;
10241da177e4SLinus Torvalds
10251da177e4SLinus Torvalds /*
10261da177e4SLinus Torvalds * Kill all other threads in the thread group.
10271da177e4SLinus Torvalds */
10281da177e4SLinus Torvalds spin_lock_irq(lock);
102949697335SEric W. Biederman if ((sig->flags & SIGNAL_GROUP_EXIT) || sig->group_exec_task) {
10301da177e4SLinus Torvalds /*
10311da177e4SLinus Torvalds * Another group action in progress, just
10321da177e4SLinus Torvalds * return so that the signal is processed.
10331da177e4SLinus Torvalds */
10341da177e4SLinus Torvalds spin_unlock_irq(lock);
10351da177e4SLinus Torvalds return -EAGAIN;
10361da177e4SLinus Torvalds }
10371da177e4SLinus Torvalds
103860700e38SEric W. Biederman sig->group_exec_task = tsk;
1039d344193aSOleg Nesterov sig->notify_count = zap_other_threads(tsk);
1040d344193aSOleg Nesterov if (!thread_group_leader(tsk))
1041d344193aSOleg Nesterov sig->notify_count--;
1042d344193aSOleg Nesterov
1043d344193aSOleg Nesterov while (sig->notify_count) {
1044d5bbd43dSOleg Nesterov __set_current_state(TASK_KILLABLE);
10451da177e4SLinus Torvalds spin_unlock_irq(lock);
1046a72173ecSRafael J. Wysocki schedule();
104708d405c8SDavidlohr Bueso if (__fatal_signal_pending(tsk))
1048d5bbd43dSOleg Nesterov goto killed;
10491da177e4SLinus Torvalds spin_lock_irq(lock);
10501da177e4SLinus Torvalds }
10511da177e4SLinus Torvalds spin_unlock_irq(lock);
10521da177e4SLinus Torvalds
10531da177e4SLinus Torvalds /*
10541da177e4SLinus Torvalds * At this point all other threads have exited, all we have to
10551da177e4SLinus Torvalds * do is to wait for the thread group leader to become inactive,
10561da177e4SLinus Torvalds * and to assume its PID:
10571da177e4SLinus Torvalds */
1058aafe6c2aSEric W. Biederman if (!thread_group_leader(tsk)) {
10598187926bSOleg Nesterov struct task_struct *leader = tsk->group_leader;
10606db840faSOleg Nesterov
10616db840faSOleg Nesterov for (;;) {
1062780de9ddSIngo Molnar cgroup_threadgroup_change_begin(tsk);
10636db840faSOleg Nesterov write_lock_irq(&tasklist_lock);
1064dfcce791SKirill Tkhai /*
1065dfcce791SKirill Tkhai * Do this under tasklist_lock to ensure that
106660700e38SEric W. Biederman * exit_notify() can't miss ->group_exec_task
1067dfcce791SKirill Tkhai */
1068dfcce791SKirill Tkhai sig->notify_count = -1;
10696db840faSOleg Nesterov if (likely(leader->exit_state))
10706db840faSOleg Nesterov break;
1071d5bbd43dSOleg Nesterov __set_current_state(TASK_KILLABLE);
10726db840faSOleg Nesterov write_unlock_irq(&tasklist_lock);
1073780de9ddSIngo Molnar cgroup_threadgroup_change_end(tsk);
1074a72173ecSRafael J. Wysocki schedule();
107508d405c8SDavidlohr Bueso if (__fatal_signal_pending(tsk))
1076d5bbd43dSOleg Nesterov goto killed;
10776db840faSOleg Nesterov }
10781da177e4SLinus Torvalds
1079f5e90281SRoland McGrath /*
1080f5e90281SRoland McGrath * The only record we have of the real-time age of a
1081f5e90281SRoland McGrath * process, regardless of execs it's done, is start_time.
1082f5e90281SRoland McGrath * All the past CPU time is accumulated in signal_struct
1083f5e90281SRoland McGrath * from sister threads now dead. But in this non-leader
1084f5e90281SRoland McGrath * exec, nothing survives from the original leader thread,
1085f5e90281SRoland McGrath * whose birth marks the true age of this process now.
1086f5e90281SRoland McGrath * When we take on its identity by switching to its PID, we
1087f5e90281SRoland McGrath * also take its birthdate (always earlier than our own).
1088f5e90281SRoland McGrath */
1089aafe6c2aSEric W. Biederman tsk->start_time = leader->start_time;
1090cf25e24dSPeter Zijlstra tsk->start_boottime = leader->start_boottime;
1091f5e90281SRoland McGrath
1092bac0abd6SPavel Emelyanov BUG_ON(!same_thread_group(leader, tsk));
10931da177e4SLinus Torvalds /*
10941da177e4SLinus Torvalds * An exec() starts a new thread group with the
10951da177e4SLinus Torvalds * TGID of the previous thread group. Rehash the
10961da177e4SLinus Torvalds * two threads with a switched PID, and release
10971da177e4SLinus Torvalds * the former thread group leader:
10981da177e4SLinus Torvalds */
1099d73d6529SEric W. Biederman
1100d73d6529SEric W. Biederman /* Become a process group leader with the old leader's pid.
1101c18258c6SEric W. Biederman * The old leader becomes a thread of the this thread group.
1102d73d6529SEric W. Biederman */
11036b03d130SEric W. Biederman exchange_tids(tsk, leader);
11046883f81aSEric W. Biederman transfer_pid(leader, tsk, PIDTYPE_TGID);
1105aafe6c2aSEric W. Biederman transfer_pid(leader, tsk, PIDTYPE_PGID);
1106aafe6c2aSEric W. Biederman transfer_pid(leader, tsk, PIDTYPE_SID);
11079cd80bbbSOleg Nesterov
1108aafe6c2aSEric W. Biederman list_replace_rcu(&leader->tasks, &tsk->tasks);
11099cd80bbbSOleg Nesterov list_replace_init(&leader->sibling, &tsk->sibling);
11101da177e4SLinus Torvalds
1111aafe6c2aSEric W. Biederman tsk->group_leader = tsk;
1112aafe6c2aSEric W. Biederman leader->group_leader = tsk;
1113de12a787SEric W. Biederman
1114aafe6c2aSEric W. Biederman tsk->exit_signal = SIGCHLD;
1115087806b1SOleg Nesterov leader->exit_signal = -1;
1116962b564cSOleg Nesterov
1117962b564cSOleg Nesterov BUG_ON(leader->exit_state != EXIT_ZOMBIE);
1118962b564cSOleg Nesterov leader->exit_state = EXIT_DEAD;
1119eac1b5e5SOleg Nesterov /*
1120eac1b5e5SOleg Nesterov * We are going to release_task()->ptrace_unlink() silently,
1121eac1b5e5SOleg Nesterov * the tracer can sleep in do_wait(). EXIT_DEAD guarantees
11225036793dSZhang Jiaming * the tracer won't block again waiting for this thread.
1123eac1b5e5SOleg Nesterov */
1124eac1b5e5SOleg Nesterov if (unlikely(leader->ptrace))
1125eac1b5e5SOleg Nesterov __wake_up_parent(leader, leader->parent);
11261da177e4SLinus Torvalds write_unlock_irq(&tasklist_lock);
1127780de9ddSIngo Molnar cgroup_threadgroup_change_end(tsk);
11288187926bSOleg Nesterov
11298187926bSOleg Nesterov release_task(leader);
11301da177e4SLinus Torvalds }
11311da177e4SLinus Torvalds
113260700e38SEric W. Biederman sig->group_exec_task = NULL;
11336db840faSOleg Nesterov sig->notify_count = 0;
11341da177e4SLinus Torvalds
11351da177e4SLinus Torvalds no_thread_group:
1136e6368253SOleg Nesterov /* we have changed execution domain */
1137e6368253SOleg Nesterov tsk->exit_signal = SIGCHLD;
1138e6368253SOleg Nesterov
113902169155SEric W. Biederman BUG_ON(!thread_group_leader(tsk));
114002169155SEric W. Biederman return 0;
114102169155SEric W. Biederman
114202169155SEric W. Biederman killed:
114302169155SEric W. Biederman /* protects against exit_notify() and __exit_signal() */
114402169155SEric W. Biederman read_lock(&tasklist_lock);
114560700e38SEric W. Biederman sig->group_exec_task = NULL;
114602169155SEric W. Biederman sig->notify_count = 0;
114702169155SEric W. Biederman read_unlock(&tasklist_lock);
114802169155SEric W. Biederman return -EAGAIN;
114902169155SEric W. Biederman }
115002169155SEric W. Biederman
115102169155SEric W. Biederman
11527a60ef48SEric W. Biederman /*
11537a60ef48SEric W. Biederman * This function makes sure the current process has its own signal table,
11547a60ef48SEric W. Biederman * so that flush_signal_handlers can later reset the handlers without
11557a60ef48SEric W. Biederman * disturbing other processes. (Other processes might share the signal
11567a60ef48SEric W. Biederman * table via the CLONE_SIGHAND option to clone().)
11577a60ef48SEric W. Biederman */
unshare_sighand(struct task_struct * me)115802169155SEric W. Biederman static int unshare_sighand(struct task_struct *me)
115902169155SEric W. Biederman {
116002169155SEric W. Biederman struct sighand_struct *oldsighand = me->sighand;
1161329f7dbaSOleg Nesterov
1162d036bda7SElena Reshetova if (refcount_read(&oldsighand->count) != 1) {
1163b2c903b8SOleg Nesterov struct sighand_struct *newsighand;
11641da177e4SLinus Torvalds /*
1165b2c903b8SOleg Nesterov * This ->sighand is shared with the CLONE_SIGHAND
1166b2c903b8SOleg Nesterov * but not CLONE_THREAD task, switch to the new one.
11671da177e4SLinus Torvalds */
1168b2c903b8SOleg Nesterov newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
1169b2c903b8SOleg Nesterov if (!newsighand)
1170b2c903b8SOleg Nesterov return -ENOMEM;
1171b2c903b8SOleg Nesterov
1172d036bda7SElena Reshetova refcount_set(&newsighand->count, 1);
11731da177e4SLinus Torvalds
11741da177e4SLinus Torvalds write_lock_irq(&tasklist_lock);
11751da177e4SLinus Torvalds spin_lock(&oldsighand->siglock);
11765bf2fedcSBernd Edlinger memcpy(newsighand->action, oldsighand->action,
11775bf2fedcSBernd Edlinger sizeof(newsighand->action));
117802169155SEric W. Biederman rcu_assign_pointer(me->sighand, newsighand);
11791da177e4SLinus Torvalds spin_unlock(&oldsighand->siglock);
11801da177e4SLinus Torvalds write_unlock_irq(&tasklist_lock);
11811da177e4SLinus Torvalds
1182fba2afaaSDavide Libenzi __cleanup_sighand(oldsighand);
11831da177e4SLinus Torvalds }
11841da177e4SLinus Torvalds return 0;
11851da177e4SLinus Torvalds }
11861da177e4SLinus Torvalds
11876a6d27deSAl Viro /*
11883a3f61ceSKees Cook * This is unlocked -- the string will always be NUL-terminated, but
11893a3f61ceSKees Cook * may show overlapping contents if racing concurrent reads.
11906a6d27deSAl Viro */
__set_task_comm(struct task_struct * tsk,const char * buf,bool exec)119182b89778SAdrian Hunter void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
11921da177e4SLinus Torvalds {
11933a3f61ceSKees Cook size_t len = min(strlen(buf), sizeof(tsk->comm) - 1);
11943a3f61ceSKees Cook
119543d2b113SKAMEZAWA Hiroyuki trace_task_rename(tsk, buf);
11963a3f61ceSKees Cook memcpy(tsk->comm, buf, len);
11973a3f61ceSKees Cook memset(&tsk->comm[len], 0, sizeof(tsk->comm) - len);
119882b89778SAdrian Hunter perf_event_comm(tsk, exec);
11991da177e4SLinus Torvalds }
12001da177e4SLinus Torvalds
1201a9208e42SKees Cook /*
1202a9208e42SKees Cook * Calling this is the point of no return. None of the failures will be
1203a9208e42SKees Cook * seen by userspace since either the process is already taking a fatal
1204a9208e42SKees Cook * signal (via de_thread() or coredump), or will have SEGV raised
120513c432b5SEric W. Biederman * (after exec_mmap()) by search_binary_handler (see below).
1206a9208e42SKees Cook */
begin_new_exec(struct linux_binprm * bprm)12072388777aSEric W. Biederman int begin_new_exec(struct linux_binprm * bprm)
12081da177e4SLinus Torvalds {
12092ca7be7dSEric W. Biederman struct task_struct *me = current;
1210221af7f8SLinus Torvalds int retval;
12111da177e4SLinus Torvalds
121256305aa9SEric W. Biederman /* Once we are committed compute the creds */
121356305aa9SEric W. Biederman retval = bprm_creds_from_file(bprm);
121456305aa9SEric W. Biederman if (retval)
121556305aa9SEric W. Biederman return retval;
121656305aa9SEric W. Biederman
12171da177e4SLinus Torvalds /*
1218c8238994SMarco Elver * This tracepoint marks the point before flushing the old exec where
1219c8238994SMarco Elver * the current task is still unchanged, but errors are fatal (point of
1220c8238994SMarco Elver * no return). The later "sched_process_exec" tracepoint is called after
1221c8238994SMarco Elver * the current task has successfully switched to the new exec.
1222c8238994SMarco Elver */
1223c8238994SMarco Elver trace_sched_prepare_exec(current, bprm);
1224c8238994SMarco Elver
1225c8238994SMarco Elver /*
12266834e0bbSEric W. Biederman * Ensure all future errors are fatal.
12276834e0bbSEric W. Biederman */
12286834e0bbSEric W. Biederman bprm->point_of_no_return = true;
12296834e0bbSEric W. Biederman
1230af7bb0d2SOleg Nesterov /* Make this the only thread in the thread group */
12312ca7be7dSEric W. Biederman retval = de_thread(me);
12321da177e4SLinus Torvalds if (retval)
12331da177e4SLinus Torvalds goto out;
1234af7bb0d2SOleg Nesterov /* see the comment in check_unsafe_exec() */
1235af7bb0d2SOleg Nesterov current->fs->in_exec = 0;
12366e399cd1SDavidlohr Bueso /*
12379ee1206dSEric W. Biederman * Cancel any io_uring activity across execve
12389ee1206dSEric W. Biederman */
12399ee1206dSEric W. Biederman io_uring_task_cancel();
12409ee1206dSEric W. Biederman
1241b6043501SEric W. Biederman /* Ensure the files table is not shared. */
12421f702603SEric W. Biederman retval = unshare_files();
1243b6043501SEric W. Biederman if (retval)
1244b6043501SEric W. Biederman goto out;
1245b6043501SEric W. Biederman
12466e399cd1SDavidlohr Bueso /*
12476e399cd1SDavidlohr Bueso * Must be called _before_ exec_mmap() as bprm->mm is
1248a7031f14SMateusz Guzik * not visible until then. Doing it here also ensures
1249a7031f14SMateusz Guzik * we don't race against replace_mm_exe_file().
12506e399cd1SDavidlohr Bueso */
1251fe69d560SDavid Hildenbrand retval = set_mm_exe_file(bprm->mm, bprm->file);
1252fe69d560SDavid Hildenbrand if (retval)
1253fe69d560SDavid Hildenbrand goto out;
12546e399cd1SDavidlohr Bueso
1255b8a61c9eSEric W. Biederman /* If the binary is not readable then enforce mm->dumpable=0 */
1256f87d1c95SEric W. Biederman would_dump(bprm, bprm->file);
1257b8a61c9eSEric W. Biederman if (bprm->have_execfd)
1258b8a61c9eSEric W. Biederman would_dump(bprm, bprm->executable);
1259f87d1c95SEric W. Biederman
12601da177e4SLinus Torvalds /*
12611da177e4SLinus Torvalds * Release all of the old mmap stuff
12621da177e4SLinus Torvalds */
12633c77f845SOleg Nesterov acct_arg_size(bprm, 0);
12641da177e4SLinus Torvalds retval = exec_mmap(bprm->mm);
12651da177e4SLinus Torvalds if (retval)
1266fd8328beSAl Viro goto out;
12671da177e4SLinus Torvalds
1268a9208e42SKees Cook bprm->mm = NULL;
12697ab02af4SLinus Torvalds
12702b5f9dadSAndrei Vagin retval = exec_task_namespaces();
12712b5f9dadSAndrei Vagin if (retval)
12722b5f9dadSAndrei Vagin goto out_unlock;
12732b5f9dadSAndrei Vagin
1274ccf0fa6bSEric W. Biederman #ifdef CONFIG_POSIX_TIMERS
1275e362359aSThadeu Lima de Souza Cascardo spin_lock_irq(&me->sighand->siglock);
1276e362359aSThadeu Lima de Souza Cascardo posix_cpu_timers_exit(me);
1277e362359aSThadeu Lima de Souza Cascardo spin_unlock_irq(&me->sighand->siglock);
1278d5b36a4dSOleg Nesterov exit_itimers(me);
1279ccf0fa6bSEric W. Biederman flush_itimer_signals();
1280ccf0fa6bSEric W. Biederman #endif
1281ccf0fa6bSEric W. Biederman
1282ccf0fa6bSEric W. Biederman /*
1283ccf0fa6bSEric W. Biederman * Make the signal table private.
1284ccf0fa6bSEric W. Biederman */
1285ccf0fa6bSEric W. Biederman retval = unshare_sighand(me);
1286ccf0fa6bSEric W. Biederman if (retval)
128789826cceSEric W. Biederman goto out_unlock;
1288ccf0fa6bSEric W. Biederman
12891b2552cbSEric W. Biederman me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC |
1290b88fae64SZhang Yi PF_NOFREEZE | PF_NO_SETAFFINITY);
12917ab02af4SLinus Torvalds flush_thread();
12922ca7be7dSEric W. Biederman me->personality &= ~bprm->per_clear;
12937ab02af4SLinus Torvalds
12941446e1dfSGabriel Krisman Bertazi clear_syscall_work_syscall_user_dispatch(me);
12951446e1dfSGabriel Krisman Bertazi
1296613cc2b6SAleksa Sarai /*
1297613cc2b6SAleksa Sarai * We have to apply CLOEXEC before we change whether the process is
1298613cc2b6SAleksa Sarai * dumpable (in setup_new_exec) to avoid a race with a process in userspace
1299613cc2b6SAleksa Sarai * trying to access the should-be-closed file descriptors of a process
1300613cc2b6SAleksa Sarai * undergoing exec(2).
1301613cc2b6SAleksa Sarai */
13022ca7be7dSEric W. Biederman do_close_on_exec(me->files);
1303df9e4d2cSEric W. Biederman
1304df9e4d2cSEric W. Biederman if (bprm->secureexec) {
1305df9e4d2cSEric W. Biederman /* Make sure parent cannot signal privileged process. */
1306df9e4d2cSEric W. Biederman me->pdeath_signal = 0;
1307df9e4d2cSEric W. Biederman
1308df9e4d2cSEric W. Biederman /*
1309df9e4d2cSEric W. Biederman * For secureexec, reset the stack limit to sane default to
1310df9e4d2cSEric W. Biederman * avoid bad behavior from the prior rlimits. This has to
1311df9e4d2cSEric W. Biederman * happen before arch_pick_mmap_layout(), which examines
1312df9e4d2cSEric W. Biederman * RLIMIT_STACK, but after the point of no return to avoid
1313df9e4d2cSEric W. Biederman * needing to clean up the change on failure.
1314df9e4d2cSEric W. Biederman */
1315df9e4d2cSEric W. Biederman if (bprm->rlim_stack.rlim_cur > _STK_LIM)
1316df9e4d2cSEric W. Biederman bprm->rlim_stack.rlim_cur = _STK_LIM;
1317df9e4d2cSEric W. Biederman }
1318df9e4d2cSEric W. Biederman
1319df9e4d2cSEric W. Biederman me->sas_ss_sp = me->sas_ss_size = 0;
1320df9e4d2cSEric W. Biederman
1321df9e4d2cSEric W. Biederman /*
1322df9e4d2cSEric W. Biederman * Figure out dumpability. Note that this checking only of current
1323df9e4d2cSEric W. Biederman * is wrong, but userspace depends on it. This should be testing
1324df9e4d2cSEric W. Biederman * bprm->secureexec instead.
1325df9e4d2cSEric W. Biederman */
1326df9e4d2cSEric W. Biederman if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP ||
1327df9e4d2cSEric W. Biederman !(uid_eq(current_euid(), current_uid()) &&
1328df9e4d2cSEric W. Biederman gid_eq(current_egid(), current_gid())))
1329df9e4d2cSEric W. Biederman set_dumpable(current->mm, suid_dumpable);
1330df9e4d2cSEric W. Biederman else
1331df9e4d2cSEric W. Biederman set_dumpable(current->mm, SUID_DUMP_USER);
1332df9e4d2cSEric W. Biederman
1333df9e4d2cSEric W. Biederman perf_event_exec();
1334543841d1SKees Cook
1335543841d1SKees Cook /*
1336543841d1SKees Cook * If the original filename was empty, alloc_bprm() made up a path
1337543841d1SKees Cook * that will probably not be useful to admins running ps or similar.
1338543841d1SKees Cook * Let's fix it up to be something reasonable.
1339543841d1SKees Cook */
1340543841d1SKees Cook if (bprm->comm_from_dentry) {
1341543841d1SKees Cook /*
1342543841d1SKees Cook * Hold RCU lock to keep the name from being freed behind our back.
1343543841d1SKees Cook * Use acquire semantics to make sure the terminating NUL from
1344543841d1SKees Cook * __d_alloc() is seen.
1345543841d1SKees Cook *
1346543841d1SKees Cook * Note, we're deliberately sloppy here. We don't need to care about
1347543841d1SKees Cook * detecting a concurrent rename and just want a terminated name.
1348543841d1SKees Cook */
1349543841d1SKees Cook rcu_read_lock();
1350543841d1SKees Cook __set_task_comm(me, smp_load_acquire(&bprm->file->f_path.dentry->d_name.name),
1351543841d1SKees Cook true);
1352543841d1SKees Cook rcu_read_unlock();
1353543841d1SKees Cook } else {
1354df9e4d2cSEric W. Biederman __set_task_comm(me, kbasename(bprm->filename), true);
1355543841d1SKees Cook }
1356df9e4d2cSEric W. Biederman
1357df9e4d2cSEric W. Biederman /* An exec changes our domain. We are no longer part of the thread
1358df9e4d2cSEric W. Biederman group */
1359df9e4d2cSEric W. Biederman WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1);
1360df9e4d2cSEric W. Biederman flush_signal_handlers(me, 0);
1361df9e4d2cSEric W. Biederman
1362905ae01cSAlexey Gladkov retval = set_cred_ucounts(bprm->cred);
1363905ae01cSAlexey Gladkov if (retval < 0)
1364905ae01cSAlexey Gladkov goto out_unlock;
1365905ae01cSAlexey Gladkov
1366df9e4d2cSEric W. Biederman /*
1367df9e4d2cSEric W. Biederman * install the new credentials for this executable
1368df9e4d2cSEric W. Biederman */
1369df9e4d2cSEric W. Biederman security_bprm_committing_creds(bprm);
1370df9e4d2cSEric W. Biederman
1371df9e4d2cSEric W. Biederman commit_creds(bprm->cred);
1372df9e4d2cSEric W. Biederman bprm->cred = NULL;
1373df9e4d2cSEric W. Biederman
1374df9e4d2cSEric W. Biederman /*
1375df9e4d2cSEric W. Biederman * Disable monitoring for regular users
1376df9e4d2cSEric W. Biederman * when executing setuid binaries. Must
1377df9e4d2cSEric W. Biederman * wait until new credentials are committed
1378df9e4d2cSEric W. Biederman * by commit_creds() above
1379df9e4d2cSEric W. Biederman */
1380df9e4d2cSEric W. Biederman if (get_dumpable(me->mm) != SUID_DUMP_USER)
1381df9e4d2cSEric W. Biederman perf_event_exit_task(me);
1382df9e4d2cSEric W. Biederman /*
1383df9e4d2cSEric W. Biederman * cred_guard_mutex must be held at least to this point to prevent
1384df9e4d2cSEric W. Biederman * ptrace_attach() from altering our determination of the task's
1385df9e4d2cSEric W. Biederman * credentials; any time after this it may be unlocked.
1386df9e4d2cSEric W. Biederman */
1387df9e4d2cSEric W. Biederman security_bprm_committed_creds(bprm);
1388b8a61c9eSEric W. Biederman
1389b8a61c9eSEric W. Biederman /* Pass the opened binary to the interpreter. */
1390b8a61c9eSEric W. Biederman if (bprm->have_execfd) {
1391b8a61c9eSEric W. Biederman retval = get_unused_fd_flags(0);
1392b8a61c9eSEric W. Biederman if (retval < 0)
1393b8a61c9eSEric W. Biederman goto out_unlock;
1394b8a61c9eSEric W. Biederman fd_install(retval, bprm->executable);
1395b8a61c9eSEric W. Biederman bprm->executable = NULL;
1396b8a61c9eSEric W. Biederman bprm->execfd = retval;
1397b8a61c9eSEric W. Biederman }
1398221af7f8SLinus Torvalds return 0;
1399221af7f8SLinus Torvalds
140089826cceSEric W. Biederman out_unlock:
1401f7cfd871SEric W. Biederman up_write(&me->signal->exec_update_lock);
140284c39ec5SBernd Edlinger if (!bprm->cred)
140384c39ec5SBernd Edlinger mutex_unlock(&me->signal->cred_guard_mutex);
140484c39ec5SBernd Edlinger
1405221af7f8SLinus Torvalds out:
1406221af7f8SLinus Torvalds return retval;
1407221af7f8SLinus Torvalds }
14082388777aSEric W. Biederman EXPORT_SYMBOL(begin_new_exec);
1409221af7f8SLinus Torvalds
would_dump(struct linux_binprm * bprm,struct file * file)14101b5d783cSAl Viro void would_dump(struct linux_binprm *bprm, struct file *file)
14111b5d783cSAl Viro {
1412f84df2a6SEric W. Biederman struct inode *inode = file_inode(file);
14134609e1f1SChristian Brauner struct mnt_idmap *idmap = file_mnt_idmap(file);
14144609e1f1SChristian Brauner if (inode_permission(idmap, inode, MAY_READ) < 0) {
1415f84df2a6SEric W. Biederman struct user_namespace *old, *user_ns;
14161b5d783cSAl Viro bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
1417f84df2a6SEric W. Biederman
1418f84df2a6SEric W. Biederman /* Ensure mm->user_ns contains the executable */
1419f84df2a6SEric W. Biederman user_ns = old = bprm->mm->user_ns;
1420f84df2a6SEric W. Biederman while ((user_ns != &init_user_ns) &&
14219452e93eSChristian Brauner !privileged_wrt_inode_uidgid(user_ns, idmap, inode))
1422f84df2a6SEric W. Biederman user_ns = user_ns->parent;
1423f84df2a6SEric W. Biederman
1424f84df2a6SEric W. Biederman if (old != user_ns) {
1425f84df2a6SEric W. Biederman bprm->mm->user_ns = get_user_ns(user_ns);
1426f84df2a6SEric W. Biederman put_user_ns(old);
1427f84df2a6SEric W. Biederman }
1428f84df2a6SEric W. Biederman }
14291b5d783cSAl Viro }
14301b5d783cSAl Viro EXPORT_SYMBOL(would_dump);
14311b5d783cSAl Viro
setup_new_exec(struct linux_binprm * bprm)1432221af7f8SLinus Torvalds void setup_new_exec(struct linux_binprm * bprm)
1433221af7f8SLinus Torvalds {
1434df9e4d2cSEric W. Biederman /* Setup things that can depend upon the personality */
14357d503febSEric W. Biederman struct task_struct *me = current;
143646d98eb4SKees Cook
14377d503febSEric W. Biederman arch_pick_mmap_layout(me->mm, &bprm->rlim_stack);
1438d6e71144SAlan Cox
1439e9ea1e7fSKyle Huey arch_setup_new_exec();
14401da177e4SLinus Torvalds
14410551fbd2SBenjamin Herrenschmidt /* Set the new mm task size. We have to do that late because it may
14420551fbd2SBenjamin Herrenschmidt * depend on TIF_32BIT which is only updated in flush_thread() on
14430551fbd2SBenjamin Herrenschmidt * some architectures like powerpc
14440551fbd2SBenjamin Herrenschmidt */
14457d503febSEric W. Biederman me->mm->task_size = TASK_SIZE;
1446f7cfd871SEric W. Biederman up_write(&me->signal->exec_update_lock);
14477d503febSEric W. Biederman mutex_unlock(&me->signal->cred_guard_mutex);
14481da177e4SLinus Torvalds }
1449221af7f8SLinus Torvalds EXPORT_SYMBOL(setup_new_exec);
14501da177e4SLinus Torvalds
1451b8383831SKees Cook /* Runs immediately before start_thread() takes over. */
finalize_exec(struct linux_binprm * bprm)1452b8383831SKees Cook void finalize_exec(struct linux_binprm *bprm)
1453b8383831SKees Cook {
1454c31dbb14SKees Cook /* Store any stack rlimit changes before starting thread. */
1455c31dbb14SKees Cook task_lock(current->group_leader);
1456c31dbb14SKees Cook current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack;
1457c31dbb14SKees Cook task_unlock(current->group_leader);
1458b8383831SKees Cook }
1459b8383831SKees Cook EXPORT_SYMBOL(finalize_exec);
1460b8383831SKees Cook
14611da177e4SLinus Torvalds /*
1462a2a8474cSOleg Nesterov * Prepare credentials and lock ->cred_guard_mutex.
146396ecee29SEric W. Biederman * setup_new_exec() commits the new creds and drops the lock.
14643d742d4bSRandy Dunlap * Or, if exec fails before, free_bprm() should release ->cred
1465a2a8474cSOleg Nesterov * and unlock.
1466a2a8474cSOleg Nesterov */
prepare_bprm_creds(struct linux_binprm * bprm)14674addd264SChanho Min static int prepare_bprm_creds(struct linux_binprm *bprm)
1468a2a8474cSOleg Nesterov {
14699b1bf12dSKOSAKI Motohiro if (mutex_lock_interruptible(¤t->signal->cred_guard_mutex))
1470a2a8474cSOleg Nesterov return -ERESTARTNOINTR;
1471a2a8474cSOleg Nesterov
1472a2a8474cSOleg Nesterov bprm->cred = prepare_exec_creds();
1473a2a8474cSOleg Nesterov if (likely(bprm->cred))
1474a2a8474cSOleg Nesterov return 0;
1475a2a8474cSOleg Nesterov
14769b1bf12dSKOSAKI Motohiro mutex_unlock(¤t->signal->cred_guard_mutex);
1477a2a8474cSOleg Nesterov return -ENOMEM;
1478a2a8474cSOleg Nesterov }
1479a2a8474cSOleg Nesterov
1480bdd8f624SKees Cook /* Matches do_open_execat() */
do_close_execat(struct file * file)1481bdd8f624SKees Cook static void do_close_execat(struct file *file)
1482bdd8f624SKees Cook {
14833b832035SChristian Brauner if (!file)
14843b832035SChristian Brauner return;
14850357ef03SAmir Goldstein exe_file_allow_write_access(file);
1486bdd8f624SKees Cook fput(file);
1487bdd8f624SKees Cook }
1488bdd8f624SKees Cook
free_bprm(struct linux_binprm * bprm)1489c4ad8f98SLinus Torvalds static void free_bprm(struct linux_binprm *bprm)
1490a2a8474cSOleg Nesterov {
1491f18ac551SEric W. Biederman if (bprm->mm) {
1492f18ac551SEric W. Biederman acct_arg_size(bprm, 0);
1493f18ac551SEric W. Biederman mmput(bprm->mm);
1494f18ac551SEric W. Biederman }
1495a2a8474cSOleg Nesterov free_arg_pages(bprm);
1496a2a8474cSOleg Nesterov if (bprm->cred) {
1497af7bb0d2SOleg Nesterov /* in case exec fails before de_thread() succeeds */
1498af7bb0d2SOleg Nesterov current->fs->in_exec = 0;
14999b1bf12dSKOSAKI Motohiro mutex_unlock(¤t->signal->cred_guard_mutex);
1500a2a8474cSOleg Nesterov abort_creds(bprm->cred);
1501a2a8474cSOleg Nesterov }
1502bdd8f624SKees Cook do_close_execat(bprm->file);
1503b8a61c9eSEric W. Biederman if (bprm->executable)
1504b8a61c9eSEric W. Biederman fput(bprm->executable);
1505b66c5984SKees Cook /* If a binfmt changed the interp, free it. */
1506b66c5984SKees Cook if (bprm->interp != bprm->filename)
1507b66c5984SKees Cook kfree(bprm->interp);
150860d9ad1dSEric W. Biederman kfree(bprm->fdpath);
1509a2a8474cSOleg Nesterov kfree(bprm);
1510a2a8474cSOleg Nesterov }
1511a2a8474cSOleg Nesterov
alloc_bprm(int fd,struct filename * filename,int flags)1512978ffcbfSLinus Torvalds static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int flags)
15130a8f36ebSEric W. Biederman {
1514978ffcbfSLinus Torvalds struct linux_binprm *bprm;
1515978ffcbfSLinus Torvalds struct file *file;
151660d9ad1dSEric W. Biederman int retval = -ENOMEM;
1517978ffcbfSLinus Torvalds
1518978ffcbfSLinus Torvalds file = do_open_execat(fd, filename, flags);
1519978ffcbfSLinus Torvalds if (IS_ERR(file))
1520978ffcbfSLinus Torvalds return ERR_CAST(file);
1521978ffcbfSLinus Torvalds
1522978ffcbfSLinus Torvalds bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1523978ffcbfSLinus Torvalds if (!bprm) {
1524bdd8f624SKees Cook do_close_execat(file);
1525978ffcbfSLinus Torvalds return ERR_PTR(-ENOMEM);
1526978ffcbfSLinus Torvalds }
1527978ffcbfSLinus Torvalds
1528978ffcbfSLinus Torvalds bprm->file = file;
152960d9ad1dSEric W. Biederman
153060d9ad1dSEric W. Biederman if (fd == AT_FDCWD || filename->name[0] == '/') {
153160d9ad1dSEric W. Biederman bprm->filename = filename->name;
153260d9ad1dSEric W. Biederman } else {
1533543841d1SKees Cook if (filename->name[0] == '\0') {
153460d9ad1dSEric W. Biederman bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
1535543841d1SKees Cook bprm->comm_from_dentry = 1;
1536543841d1SKees Cook } else {
153760d9ad1dSEric W. Biederman bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
153860d9ad1dSEric W. Biederman fd, filename->name);
1539543841d1SKees Cook }
154060d9ad1dSEric W. Biederman if (!bprm->fdpath)
154160d9ad1dSEric W. Biederman goto out_free;
154260d9ad1dSEric W. Biederman
1543978ffcbfSLinus Torvalds /*
1544978ffcbfSLinus Torvalds * Record that a name derived from an O_CLOEXEC fd will be
1545978ffcbfSLinus Torvalds * inaccessible after exec. This allows the code in exec to
1546978ffcbfSLinus Torvalds * choose to fail when the executable is not mmaped into the
1547978ffcbfSLinus Torvalds * interpreter and an open file descriptor is not passed to
1548978ffcbfSLinus Torvalds * the interpreter. This makes for a better user experience
1549978ffcbfSLinus Torvalds * than having the interpreter start and then immediately fail
1550978ffcbfSLinus Torvalds * when it finds the executable is inaccessible.
1551978ffcbfSLinus Torvalds */
1552978ffcbfSLinus Torvalds if (get_close_on_exec(fd))
1553978ffcbfSLinus Torvalds bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
1554978ffcbfSLinus Torvalds
155560d9ad1dSEric W. Biederman bprm->filename = bprm->fdpath;
155660d9ad1dSEric W. Biederman }
155760d9ad1dSEric W. Biederman bprm->interp = bprm->filename;
1558f18ac551SEric W. Biederman
1559a5874fdeSMickaël Salaün /*
1560a5874fdeSMickaël Salaün * At this point, security_file_open() has already been called (with
1561a5874fdeSMickaël Salaün * __FMODE_EXEC) and access control checks for AT_EXECVE_CHECK will
1562a5874fdeSMickaël Salaün * stop just after the security_bprm_creds_for_exec() call in
1563a5874fdeSMickaël Salaün * bprm_execve(). Indeed, the kernel should not try to parse the
1564a5874fdeSMickaël Salaün * content of the file with exec_binprm() nor change the calling
1565a5874fdeSMickaël Salaün * thread, which means that the following security functions will not
1566a5874fdeSMickaël Salaün * be called:
1567a5874fdeSMickaël Salaün * - security_bprm_check()
1568a5874fdeSMickaël Salaün * - security_bprm_creds_from_file()
1569a5874fdeSMickaël Salaün * - security_bprm_committing_creds()
1570a5874fdeSMickaël Salaün * - security_bprm_committed_creds()
1571a5874fdeSMickaël Salaün */
1572a5874fdeSMickaël Salaün bprm->is_check = !!(flags & AT_EXECVE_CHECK);
1573a5874fdeSMickaël Salaün
1574f18ac551SEric W. Biederman retval = bprm_mm_init(bprm);
1575978ffcbfSLinus Torvalds if (!retval)
15760a8f36ebSEric W. Biederman return bprm;
157760d9ad1dSEric W. Biederman
157860d9ad1dSEric W. Biederman out_free:
157960d9ad1dSEric W. Biederman free_bprm(bprm);
158060d9ad1dSEric W. Biederman return ERR_PTR(retval);
15810a8f36ebSEric W. Biederman }
15820a8f36ebSEric W. Biederman
bprm_change_interp(const char * interp,struct linux_binprm * bprm)1583c2315c18SOleg Nesterov int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
1584b66c5984SKees Cook {
1585b66c5984SKees Cook /* If a binfmt changed the interp, free it first. */
1586b66c5984SKees Cook if (bprm->interp != bprm->filename)
1587b66c5984SKees Cook kfree(bprm->interp);
1588b66c5984SKees Cook bprm->interp = kstrdup(interp, GFP_KERNEL);
1589b66c5984SKees Cook if (!bprm->interp)
1590b66c5984SKees Cook return -ENOMEM;
1591b66c5984SKees Cook return 0;
1592b66c5984SKees Cook }
1593b66c5984SKees Cook EXPORT_SYMBOL(bprm_change_interp);
1594b66c5984SKees Cook
1595a2a8474cSOleg Nesterov /*
1596a6f76f23SDavid Howells * determine how safe it is to execute the proposed program
15979b1bf12dSKOSAKI Motohiro * - the caller must hold ->cred_guard_mutex to protect against
1598c2e1f2e3SKees Cook * PTRACE_ATTACH or seccomp thread-sync
1599a6f76f23SDavid Howells */
check_unsafe_exec(struct linux_binprm * bprm)16009e00cdb0SOleg Nesterov static void check_unsafe_exec(struct linux_binprm *bprm)
1601a6f76f23SDavid Howells {
16020bf2f3aeSDavid Howells struct task_struct *p = current, *t;
1603f1191b50SAl Viro unsigned n_fs;
1604a6f76f23SDavid Howells
16059227dd2aSEric W. Biederman if (p->ptrace)
16064b9d33e6STejun Heo bprm->unsafe |= LSM_UNSAFE_PTRACE;
1607a6f76f23SDavid Howells
1608259e5e6cSAndy Lutomirski /*
1609259e5e6cSAndy Lutomirski * This isn't strictly necessary, but it makes it harder for LSMs to
1610259e5e6cSAndy Lutomirski * mess up.
1611259e5e6cSAndy Lutomirski */
16121d4457f9SKees Cook if (task_no_new_privs(current))
1613259e5e6cSAndy Lutomirski bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
1614259e5e6cSAndy Lutomirski
1615275498a9SKees Cook /*
1616275498a9SKees Cook * If another task is sharing our fs, we cannot safely
1617275498a9SKees Cook * suid exec because the differently privileged task
1618275498a9SKees Cook * will be able to manipulate the current directory, etc.
1619275498a9SKees Cook * It would be nice to force an unshare instead...
1620af7bb0d2SOleg Nesterov *
1621af7bb0d2SOleg Nesterov * Otherwise we set fs->in_exec = 1 to deny clone(CLONE_FS)
1622af7bb0d2SOleg Nesterov * from another sub-thread until de_thread() succeeds, this
1623af7bb0d2SOleg Nesterov * state is protected by cred_guard_mutex we hold.
1624275498a9SKees Cook */
16250bf2f3aeSDavid Howells n_fs = 1;
16262a4419b5SNick Piggin spin_lock(&p->fs->lock);
1627437f7fdbSOleg Nesterov rcu_read_lock();
162861a7a5e2SOleg Nesterov for_other_threads(p, t) {
16290bf2f3aeSDavid Howells if (t->fs == p->fs)
16300bf2f3aeSDavid Howells n_fs++;
16310bf2f3aeSDavid Howells }
1632437f7fdbSOleg Nesterov rcu_read_unlock();
16330bf2f3aeSDavid Howells
163490383cc0SKees Cook /* "users" and "in_exec" locked for copy_fs() */
16359e00cdb0SOleg Nesterov if (p->fs->users > n_fs)
1636a6f76f23SDavid Howells bprm->unsafe |= LSM_UNSAFE_SHARE;
16379e00cdb0SOleg Nesterov else
1638498052bbSAl Viro p->fs->in_exec = 1;
16392a4419b5SNick Piggin spin_unlock(&p->fs->lock);
1640a6f76f23SDavid Howells }
1641a6f76f23SDavid Howells
bprm_fill_uid(struct linux_binprm * bprm,struct file * file)164256305aa9SEric W. Biederman static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
16438b01fc86SJann Horn {
164456305aa9SEric W. Biederman /* Handle suid and sgid on files */
1645e67fe633SChristian Brauner struct mnt_idmap *idmap;
1646e6ae4381SAl Viro struct inode *inode = file_inode(file);
16478b01fc86SJann Horn unsigned int mode;
1648a2bd096fSChristian Brauner vfsuid_t vfsuid;
1649a2bd096fSChristian Brauner vfsgid_t vfsgid;
1650f50733b4SKees Cook int err;
16518b01fc86SJann Horn
165256305aa9SEric W. Biederman if (!mnt_may_suid(file->f_path.mnt))
16538b01fc86SJann Horn return;
16548b01fc86SJann Horn
16558b01fc86SJann Horn if (task_no_new_privs(current))
16568b01fc86SJann Horn return;
16578b01fc86SJann Horn
16588b01fc86SJann Horn mode = READ_ONCE(inode->i_mode);
16598b01fc86SJann Horn if (!(mode & (S_ISUID|S_ISGID)))
16608b01fc86SJann Horn return;
16618b01fc86SJann Horn
1662e67fe633SChristian Brauner idmap = file_mnt_idmap(file);
16631ab29965SChristian Brauner
16648b01fc86SJann Horn /* Be careful if suid/sgid is set */
16655955102cSAl Viro inode_lock(inode);
16668b01fc86SJann Horn
1667f50733b4SKees Cook /* Atomically reload and check mode/uid/gid now that lock held. */
16688b01fc86SJann Horn mode = inode->i_mode;
1669e67fe633SChristian Brauner vfsuid = i_uid_into_vfsuid(idmap, inode);
1670e67fe633SChristian Brauner vfsgid = i_gid_into_vfsgid(idmap, inode);
1671f50733b4SKees Cook err = inode_permission(idmap, inode, MAY_EXEC);
16725955102cSAl Viro inode_unlock(inode);
16738b01fc86SJann Horn
1674f50733b4SKees Cook /* Did the exec bit vanish out from under us? Give up. */
1675f50733b4SKees Cook if (err)
1676f50733b4SKees Cook return;
1677f50733b4SKees Cook
16788b01fc86SJann Horn /* We ignore suid/sgid if there are no mappings for them in the ns */
1679a2bd096fSChristian Brauner if (!vfsuid_has_mapping(bprm->cred->user_ns, vfsuid) ||
1680a2bd096fSChristian Brauner !vfsgid_has_mapping(bprm->cred->user_ns, vfsgid))
16818b01fc86SJann Horn return;
16828b01fc86SJann Horn
16838b01fc86SJann Horn if (mode & S_ISUID) {
16848b01fc86SJann Horn bprm->per_clear |= PER_CLEAR_ON_SETID;
1685a2bd096fSChristian Brauner bprm->cred->euid = vfsuid_into_kuid(vfsuid);
16868b01fc86SJann Horn }
16878b01fc86SJann Horn
16888b01fc86SJann Horn if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
16898b01fc86SJann Horn bprm->per_clear |= PER_CLEAR_ON_SETID;
1690a2bd096fSChristian Brauner bprm->cred->egid = vfsgid_into_kgid(vfsgid);
16918b01fc86SJann Horn }
16928b01fc86SJann Horn }
16938b01fc86SJann Horn
1694a6f76f23SDavid Howells /*
169556305aa9SEric W. Biederman * Compute brpm->cred based upon the final binary.
169656305aa9SEric W. Biederman */
bprm_creds_from_file(struct linux_binprm * bprm)169756305aa9SEric W. Biederman static int bprm_creds_from_file(struct linux_binprm *bprm)
169856305aa9SEric W. Biederman {
169956305aa9SEric W. Biederman /* Compute creds based on which file? */
170056305aa9SEric W. Biederman struct file *file = bprm->execfd_creds ? bprm->executable : bprm->file;
170156305aa9SEric W. Biederman
170256305aa9SEric W. Biederman bprm_fill_uid(bprm, file);
170356305aa9SEric W. Biederman return security_bprm_creds_from_file(bprm, file);
170456305aa9SEric W. Biederman }
170556305aa9SEric W. Biederman
170656305aa9SEric W. Biederman /*
17071da177e4SLinus Torvalds * Fill the binprm structure from the inode.
170856305aa9SEric W. Biederman * Read the first BINPRM_BUF_SIZE bytes
1709a6f76f23SDavid Howells *
1710a6f76f23SDavid Howells * This may be called multiple times for binary chains (scripts for example).
17111da177e4SLinus Torvalds */
prepare_binprm(struct linux_binprm * bprm)17128b72ca90SEric W. Biederman static int prepare_binprm(struct linux_binprm *bprm)
17131da177e4SLinus Torvalds {
1714bdd1d2d3SChristoph Hellwig loff_t pos = 0;
17151da177e4SLinus Torvalds
17161da177e4SLinus Torvalds memset(bprm->buf, 0, BINPRM_BUF_SIZE);
1717bdd1d2d3SChristoph Hellwig return kernel_read(bprm->file, bprm->buf, BINPRM_BUF_SIZE, &pos);
17181da177e4SLinus Torvalds }
17191da177e4SLinus Torvalds
17204fc75ff4SNick Piggin /*
17214fc75ff4SNick Piggin * Arguments are '\0' separated strings found at the location bprm->p
17224fc75ff4SNick Piggin * points to; chop off the first by relocating brpm->p to right after
17234fc75ff4SNick Piggin * the first '\0' encountered.
17244fc75ff4SNick Piggin */
remove_arg_zero(struct linux_binprm * bprm)1725b6a2fea3SOllie Wild int remove_arg_zero(struct linux_binprm *bprm)
17261da177e4SLinus Torvalds {
17271da177e4SLinus Torvalds unsigned long offset;
17281da177e4SLinus Torvalds char *kaddr;
17291da177e4SLinus Torvalds struct page *page;
17301da177e4SLinus Torvalds
1731b6a2fea3SOllie Wild if (!bprm->argc)
1732b6a2fea3SOllie Wild return 0;
17331da177e4SLinus Torvalds
1734b6a2fea3SOllie Wild do {
1735b6a2fea3SOllie Wild offset = bprm->p & ~PAGE_MASK;
1736b6a2fea3SOllie Wild page = get_arg_page(bprm, bprm->p, 0);
1737725d5026SKees Cook if (!page)
1738725d5026SKees Cook return -EFAULT;
17393a608cfeSFabio M. De Francesco kaddr = kmap_local_page(page);
17404fc75ff4SNick Piggin
1741b6a2fea3SOllie Wild for (; offset < PAGE_SIZE && kaddr[offset];
1742b6a2fea3SOllie Wild offset++, bprm->p++)
1743b6a2fea3SOllie Wild ;
17444fc75ff4SNick Piggin
17453a608cfeSFabio M. De Francesco kunmap_local(kaddr);
1746b6a2fea3SOllie Wild put_arg_page(page);
1747b6a2fea3SOllie Wild } while (offset == PAGE_SIZE);
17484fc75ff4SNick Piggin
1749b6a2fea3SOllie Wild bprm->p++;
17501da177e4SLinus Torvalds bprm->argc--;
1751b6a2fea3SOllie Wild
1752725d5026SKees Cook return 0;
17531da177e4SLinus Torvalds }
17541da177e4SLinus Torvalds EXPORT_SYMBOL(remove_arg_zero);
17551da177e4SLinus Torvalds
17561da177e4SLinus Torvalds /*
17571da177e4SLinus Torvalds * cycle the list of binary formats handler, until one recognizes the image
17581da177e4SLinus Torvalds */
search_binary_handler(struct linux_binprm * bprm)1759bc2bf338SEric W. Biederman static int search_binary_handler(struct linux_binprm *bprm)
17601da177e4SLinus Torvalds {
17611da177e4SLinus Torvalds struct linux_binfmt *fmt;
1762cb7b6b1cSOleg Nesterov int retval;
17631da177e4SLinus Torvalds
17648b72ca90SEric W. Biederman retval = prepare_binprm(bprm);
17658b72ca90SEric W. Biederman if (retval < 0)
17668b72ca90SEric W. Biederman return retval;
1767d7402698SKees Cook
17681da177e4SLinus Torvalds retval = security_bprm_check(bprm);
17691da177e4SLinus Torvalds if (retval)
17701da177e4SLinus Torvalds return retval;
17711da177e4SLinus Torvalds
17721da177e4SLinus Torvalds read_lock(&binfmt_lock);
1773e4dc1b14SAlexey Dobriyan list_for_each_entry(fmt, &formats, lh) {
17741da177e4SLinus Torvalds if (!try_module_get(fmt->module))
17751da177e4SLinus Torvalds continue;
17761da177e4SLinus Torvalds read_unlock(&binfmt_lock);
1777d53ddd01SAlexey Dobriyan
177892eaa565SOleg Nesterov retval = fmt->load_binary(bprm);
1779d53ddd01SAlexey Dobriyan
17801da177e4SLinus Torvalds read_lock(&binfmt_lock);
17811da177e4SLinus Torvalds put_binfmt(fmt);
1782bc2bf338SEric W. Biederman if (bprm->point_of_no_return || (retval != -ENOEXEC)) {
178319d860a1SAl Viro read_unlock(&binfmt_lock);
178419d860a1SAl Viro return retval;
178519d860a1SAl Viro }
17861da177e4SLinus Torvalds }
17871da177e4SLinus Torvalds read_unlock(&binfmt_lock);
1788cb7b6b1cSOleg Nesterov
1789fa1bdca9SNir Lichtman return -ENOEXEC;
17901da177e4SLinus Torvalds }
17911da177e4SLinus Torvalds
1792275498a9SKees Cook /* binfmt handlers will call back into begin_new_exec() on success. */
exec_binprm(struct linux_binprm * bprm)17935d1baf3bSOleg Nesterov static int exec_binprm(struct linux_binprm *bprm)
17945d1baf3bSOleg Nesterov {
17955d1baf3bSOleg Nesterov pid_t old_pid, old_vpid;
1796bc2bf338SEric W. Biederman int ret, depth;
17975d1baf3bSOleg Nesterov
17985d1baf3bSOleg Nesterov /* Need to fetch pid before load_binary changes it */
17995d1baf3bSOleg Nesterov old_pid = current->pid;
18005d1baf3bSOleg Nesterov rcu_read_lock();
18015d1baf3bSOleg Nesterov old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
18025d1baf3bSOleg Nesterov rcu_read_unlock();
18035d1baf3bSOleg Nesterov
1804bc2bf338SEric W. Biederman /* This allows 4 levels of binfmt rewrites before failing hard. */
1805bc2bf338SEric W. Biederman for (depth = 0;; depth++) {
1806bc2bf338SEric W. Biederman struct file *exec;
1807bc2bf338SEric W. Biederman if (depth > 5)
1808bc2bf338SEric W. Biederman return -ELOOP;
1809bc2bf338SEric W. Biederman
18105d1baf3bSOleg Nesterov ret = search_binary_handler(bprm);
1811bc2bf338SEric W. Biederman if (ret < 0)
1812bc2bf338SEric W. Biederman return ret;
1813bc2bf338SEric W. Biederman if (!bprm->interpreter)
1814bc2bf338SEric W. Biederman break;
1815bc2bf338SEric W. Biederman
1816bc2bf338SEric W. Biederman exec = bprm->file;
1817bc2bf338SEric W. Biederman bprm->file = bprm->interpreter;
1818bc2bf338SEric W. Biederman bprm->interpreter = NULL;
1819bc2bf338SEric W. Biederman
18200357ef03SAmir Goldstein exe_file_allow_write_access(exec);
1821bc2bf338SEric W. Biederman if (unlikely(bprm->have_execfd)) {
1822bc2bf338SEric W. Biederman if (bprm->executable) {
1823bc2bf338SEric W. Biederman fput(exec);
1824bc2bf338SEric W. Biederman return -ENOEXEC;
1825bc2bf338SEric W. Biederman }
1826bc2bf338SEric W. Biederman bprm->executable = exec;
1827bc2bf338SEric W. Biederman } else
1828bc2bf338SEric W. Biederman fput(exec);
1829bc2bf338SEric W. Biederman }
1830bc2bf338SEric W. Biederman
18319410d228SRichard Guy Briggs audit_bprm(bprm);
18325d1baf3bSOleg Nesterov trace_sched_process_exec(current, old_pid, bprm);
18335d1baf3bSOleg Nesterov ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
18349beb266fSOleg Nesterov proc_exec_connector(current);
1835bc2bf338SEric W. Biederman return 0;
18365d1baf3bSOleg Nesterov }
18375d1baf3bSOleg Nesterov
bprm_execve(struct linux_binprm * bprm)1838978ffcbfSLinus Torvalds static int bprm_execve(struct linux_binprm *bprm)
18390c9cdff0SEric W. Biederman {
18400c9cdff0SEric W. Biederman int retval;
18410c9cdff0SEric W. Biederman
18420c9cdff0SEric W. Biederman retval = prepare_bprm_creds(bprm);
18430c9cdff0SEric W. Biederman if (retval)
1844b6043501SEric W. Biederman return retval;
18450c9cdff0SEric W. Biederman
1846275498a9SKees Cook /*
1847275498a9SKees Cook * Check for unsafe execution states before exec_binprm(), which
1848275498a9SKees Cook * will call back into begin_new_exec(), into bprm_creds_from_file(),
1849275498a9SKees Cook * where setuid-ness is evaluated.
1850275498a9SKees Cook */
18510c9cdff0SEric W. Biederman check_unsafe_exec(bprm);
18520c9cdff0SEric W. Biederman current->in_execve = 1;
1853af7f588dSMathieu Desnoyers sched_mm_cid_before_execve(current);
18540c9cdff0SEric W. Biederman
18550c9cdff0SEric W. Biederman sched_exec();
18560c9cdff0SEric W. Biederman
18570c9cdff0SEric W. Biederman /* Set the unchanging part of bprm->cred */
18580c9cdff0SEric W. Biederman retval = security_bprm_creds_for_exec(bprm);
1859a5874fdeSMickaël Salaün if (retval || bprm->is_check)
18600c9cdff0SEric W. Biederman goto out;
18610c9cdff0SEric W. Biederman
18620c9cdff0SEric W. Biederman retval = exec_binprm(bprm);
18630c9cdff0SEric W. Biederman if (retval < 0)
18640c9cdff0SEric W. Biederman goto out;
18650c9cdff0SEric W. Biederman
1866af7f588dSMathieu Desnoyers sched_mm_cid_after_execve(current);
1867*169eae77SMathieu Desnoyers rseq_execve(current);
18680c9cdff0SEric W. Biederman /* execve succeeded */
18690c9cdff0SEric W. Biederman current->in_execve = 0;
1870fd593511SBeau Belgrave user_events_execve(current);
18710c9cdff0SEric W. Biederman acct_update_integrals(current);
18720c9cdff0SEric W. Biederman task_numa_free(current, false);
18730c9cdff0SEric W. Biederman return retval;
18740c9cdff0SEric W. Biederman
18750c9cdff0SEric W. Biederman out:
18760c9cdff0SEric W. Biederman /*
18773d742d4bSRandy Dunlap * If past the point of no return ensure the code never
18780c9cdff0SEric W. Biederman * returns to the userspace process. Use an existing fatal
18790c9cdff0SEric W. Biederman * signal if present otherwise terminate the process with
18800c9cdff0SEric W. Biederman * SIGSEGV.
18810c9cdff0SEric W. Biederman */
18820c9cdff0SEric W. Biederman if (bprm->point_of_no_return && !fatal_signal_pending(current))
1883e21294a7SEric W. Biederman force_fatal_sig(SIGSEGV);
18840c9cdff0SEric W. Biederman
1885af7f588dSMathieu Desnoyers sched_mm_cid_after_execve(current);
1886*169eae77SMathieu Desnoyers rseq_set_notify_resume(current);
18870c9cdff0SEric W. Biederman current->in_execve = 0;
18880c9cdff0SEric W. Biederman
18890c9cdff0SEric W. Biederman return retval;
18900c9cdff0SEric W. Biederman }
18910c9cdff0SEric W. Biederman
do_execveat_common(int fd,struct filename * filename,struct user_arg_ptr argv,struct user_arg_ptr envp,int flags)189225cf336dSEric W. Biederman static int do_execveat_common(int fd, struct filename *filename,
1893ba2d0162SOleg Nesterov struct user_arg_ptr argv,
189451f39a1fSDavid Drysdale struct user_arg_ptr envp,
189525cf336dSEric W. Biederman int flags)
18961da177e4SLinus Torvalds {
18971da177e4SLinus Torvalds struct linux_binprm *bprm;
18981da177e4SLinus Torvalds int retval;
189972fa5997SVasiliy Kulikov
1900c4ad8f98SLinus Torvalds if (IS_ERR(filename))
1901c4ad8f98SLinus Torvalds return PTR_ERR(filename);
1902c4ad8f98SLinus Torvalds
190372fa5997SVasiliy Kulikov /*
190472fa5997SVasiliy Kulikov * We move the actual failure in case of RLIMIT_NPROC excess from
190572fa5997SVasiliy Kulikov * set*uid() to execve() because too many poorly written programs
190672fa5997SVasiliy Kulikov * don't check setuid() return code. Here we additionally recheck
190772fa5997SVasiliy Kulikov * whether NPROC limit is still exceeded.
190872fa5997SVasiliy Kulikov */
190972fa5997SVasiliy Kulikov if ((current->flags & PF_NPROC_EXCEEDED) &&
1910de399236SAlexey Gladkov is_rlimit_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
191172fa5997SVasiliy Kulikov retval = -EAGAIN;
191272fa5997SVasiliy Kulikov goto out_ret;
191372fa5997SVasiliy Kulikov }
191472fa5997SVasiliy Kulikov
191572fa5997SVasiliy Kulikov /* We're below the limit (still or again), so we don't want to make
191672fa5997SVasiliy Kulikov * further execve() calls fail. */
191772fa5997SVasiliy Kulikov current->flags &= ~PF_NPROC_EXCEEDED;
19181da177e4SLinus Torvalds
1919978ffcbfSLinus Torvalds bprm = alloc_bprm(fd, filename, flags);
19200a8f36ebSEric W. Biederman if (IS_ERR(bprm)) {
19210a8f36ebSEric W. Biederman retval = PTR_ERR(bprm);
19220a8f36ebSEric W. Biederman goto out_ret;
19230a8f36ebSEric W. Biederman }
19240a8f36ebSEric W. Biederman
1925d8b9cd54SEric W. Biederman retval = count(argv, MAX_ARG_STRINGS);
1926d8b9cd54SEric W. Biederman if (retval < 0)
1927d8b9cd54SEric W. Biederman goto out_free;
1928d8b9cd54SEric W. Biederman bprm->argc = retval;
1929d8b9cd54SEric W. Biederman
1930d8b9cd54SEric W. Biederman retval = count(envp, MAX_ARG_STRINGS);
1931d8b9cd54SEric W. Biederman if (retval < 0)
1932d8b9cd54SEric W. Biederman goto out_free;
1933d8b9cd54SEric W. Biederman bprm->envc = retval;
1934d8b9cd54SEric W. Biederman
1935d8b9cd54SEric W. Biederman retval = bprm_stack_limits(bprm);
1936655c16a8SOleg Nesterov if (retval < 0)
19370c9cdff0SEric W. Biederman goto out_free;
19381da177e4SLinus Torvalds
1939986db2d1SChristoph Hellwig retval = copy_string_kernel(bprm->filename, bprm);
19401da177e4SLinus Torvalds if (retval < 0)
19410c9cdff0SEric W. Biederman goto out_free;
19421da177e4SLinus Torvalds bprm->exec = bprm->p;
19430c9cdff0SEric W. Biederman
19441da177e4SLinus Torvalds retval = copy_strings(bprm->envc, envp, bprm);
19451da177e4SLinus Torvalds if (retval < 0)
19460c9cdff0SEric W. Biederman goto out_free;
19471da177e4SLinus Torvalds
19481da177e4SLinus Torvalds retval = copy_strings(bprm->argc, argv, bprm);
19491da177e4SLinus Torvalds if (retval < 0)
19500c9cdff0SEric W. Biederman goto out_free;
19511da177e4SLinus Torvalds
1952dcd46d89SKees Cook /*
1953dcd46d89SKees Cook * When argv is empty, add an empty string ("") as argv[0] to
1954dcd46d89SKees Cook * ensure confused userspace programs that start processing
1955dcd46d89SKees Cook * from argv[1] won't end up walking envp. See also
1956dcd46d89SKees Cook * bprm_stack_limits().
1957dcd46d89SKees Cook */
1958dcd46d89SKees Cook if (bprm->argc == 0) {
1959dcd46d89SKees Cook retval = copy_string_kernel("", bprm);
1960dcd46d89SKees Cook if (retval < 0)
1961dcd46d89SKees Cook goto out_free;
1962dcd46d89SKees Cook bprm->argc = 1;
19634188fc31S[email protected]
19644188fc31S[email protected] pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n",
19654188fc31S[email protected] current->comm, bprm->filename);
1966dcd46d89SKees Cook }
1967dcd46d89SKees Cook
1968978ffcbfSLinus Torvalds retval = bprm_execve(bprm);
1969a6f76f23SDavid Howells out_free:
197008a6fac1SAl Viro free_bprm(bprm);
19711da177e4SLinus Torvalds
19721da177e4SLinus Torvalds out_ret:
1973c4ad8f98SLinus Torvalds putname(filename);
19741da177e4SLinus Torvalds return retval;
19751da177e4SLinus Torvalds }
19761da177e4SLinus Torvalds
kernel_execve(const char * kernel_filename,const char * const * argv,const char * const * envp)1977be619f7fSEric W. Biederman int kernel_execve(const char *kernel_filename,
1978be619f7fSEric W. Biederman const char *const *argv, const char *const *envp)
1979be619f7fSEric W. Biederman {
1980be619f7fSEric W. Biederman struct filename *filename;
1981be619f7fSEric W. Biederman struct linux_binprm *bprm;
1982be619f7fSEric W. Biederman int fd = AT_FDCWD;
1983be619f7fSEric W. Biederman int retval;
1984be619f7fSEric W. Biederman
19851b2552cbSEric W. Biederman /* It is non-sense for kernel threads to call execve */
19861b2552cbSEric W. Biederman if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
1987343f4c49SEric W. Biederman return -EINVAL;
1988343f4c49SEric W. Biederman
1989be619f7fSEric W. Biederman filename = getname_kernel(kernel_filename);
1990be619f7fSEric W. Biederman if (IS_ERR(filename))
1991be619f7fSEric W. Biederman return PTR_ERR(filename);
1992be619f7fSEric W. Biederman
1993978ffcbfSLinus Torvalds bprm = alloc_bprm(fd, filename, 0);
1994be619f7fSEric W. Biederman if (IS_ERR(bprm)) {
1995be619f7fSEric W. Biederman retval = PTR_ERR(bprm);
1996be619f7fSEric W. Biederman goto out_ret;
1997be619f7fSEric W. Biederman }
1998be619f7fSEric W. Biederman
1999be619f7fSEric W. Biederman retval = count_strings_kernel(argv);
2000dcd46d89SKees Cook if (WARN_ON_ONCE(retval == 0))
2001dcd46d89SKees Cook retval = -EINVAL;
2002be619f7fSEric W. Biederman if (retval < 0)
2003be619f7fSEric W. Biederman goto out_free;
2004be619f7fSEric W. Biederman bprm->argc = retval;
2005be619f7fSEric W. Biederman
2006be619f7fSEric W. Biederman retval = count_strings_kernel(envp);
2007be619f7fSEric W. Biederman if (retval < 0)
2008be619f7fSEric W. Biederman goto out_free;
2009be619f7fSEric W. Biederman bprm->envc = retval;
2010be619f7fSEric W. Biederman
2011be619f7fSEric W. Biederman retval = bprm_stack_limits(bprm);
2012be619f7fSEric W. Biederman if (retval < 0)
2013be619f7fSEric W. Biederman goto out_free;
2014be619f7fSEric W. Biederman
2015be619f7fSEric W. Biederman retval = copy_string_kernel(bprm->filename, bprm);
2016be619f7fSEric W. Biederman if (retval < 0)
2017be619f7fSEric W. Biederman goto out_free;
2018be619f7fSEric W. Biederman bprm->exec = bprm->p;
2019be619f7fSEric W. Biederman
2020be619f7fSEric W. Biederman retval = copy_strings_kernel(bprm->envc, envp, bprm);
2021be619f7fSEric W. Biederman if (retval < 0)
2022be619f7fSEric W. Biederman goto out_free;
2023be619f7fSEric W. Biederman
2024be619f7fSEric W. Biederman retval = copy_strings_kernel(bprm->argc, argv, bprm);
2025be619f7fSEric W. Biederman if (retval < 0)
2026be619f7fSEric W. Biederman goto out_free;
2027be619f7fSEric W. Biederman
2028978ffcbfSLinus Torvalds retval = bprm_execve(bprm);
2029be619f7fSEric W. Biederman out_free:
2030be619f7fSEric W. Biederman free_bprm(bprm);
2031be619f7fSEric W. Biederman out_ret:
2032be619f7fSEric W. Biederman putname(filename);
2033be619f7fSEric W. Biederman return retval;
2034be619f7fSEric W. Biederman }
2035be619f7fSEric W. Biederman
do_execve(struct filename * filename,const char __user * const __user * __argv,const char __user * const __user * __envp)2036be619f7fSEric W. Biederman static int do_execve(struct filename *filename,
2037ba2d0162SOleg Nesterov const char __user *const __user *__argv,
2038da3d4c5fSAl Viro const char __user *const __user *__envp)
2039ba2d0162SOleg Nesterov {
20400e028465SOleg Nesterov struct user_arg_ptr argv = { .ptr.native = __argv };
20410e028465SOleg Nesterov struct user_arg_ptr envp = { .ptr.native = __envp };
204251f39a1fSDavid Drysdale return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
204351f39a1fSDavid Drysdale }
204451f39a1fSDavid Drysdale
do_execveat(int fd,struct filename * filename,const char __user * const __user * __argv,const char __user * const __user * __envp,int flags)2045be619f7fSEric W. Biederman static int do_execveat(int fd, struct filename *filename,
204651f39a1fSDavid Drysdale const char __user *const __user *__argv,
204751f39a1fSDavid Drysdale const char __user *const __user *__envp,
204851f39a1fSDavid Drysdale int flags)
204951f39a1fSDavid Drysdale {
205051f39a1fSDavid Drysdale struct user_arg_ptr argv = { .ptr.native = __argv };
205151f39a1fSDavid Drysdale struct user_arg_ptr envp = { .ptr.native = __envp };
205251f39a1fSDavid Drysdale
205351f39a1fSDavid Drysdale return do_execveat_common(fd, filename, argv, envp, flags);
2054ba2d0162SOleg Nesterov }
2055ba2d0162SOleg Nesterov
20560e028465SOleg Nesterov #ifdef CONFIG_COMPAT
compat_do_execve(struct filename * filename,const compat_uptr_t __user * __argv,const compat_uptr_t __user * __envp)2057c4ad8f98SLinus Torvalds static int compat_do_execve(struct filename *filename,
205838b983b3SAl Viro const compat_uptr_t __user *__argv,
2059d03d26e5SAl Viro const compat_uptr_t __user *__envp)
20600e028465SOleg Nesterov {
20610e028465SOleg Nesterov struct user_arg_ptr argv = {
20620e028465SOleg Nesterov .is_compat = true,
20630e028465SOleg Nesterov .ptr.compat = __argv,
20640e028465SOleg Nesterov };
20650e028465SOleg Nesterov struct user_arg_ptr envp = {
20660e028465SOleg Nesterov .is_compat = true,
20670e028465SOleg Nesterov .ptr.compat = __envp,
20680e028465SOleg Nesterov };
206951f39a1fSDavid Drysdale return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
207051f39a1fSDavid Drysdale }
207151f39a1fSDavid Drysdale
compat_do_execveat(int fd,struct filename * filename,const compat_uptr_t __user * __argv,const compat_uptr_t __user * __envp,int flags)207251f39a1fSDavid Drysdale static int compat_do_execveat(int fd, struct filename *filename,
207351f39a1fSDavid Drysdale const compat_uptr_t __user *__argv,
207451f39a1fSDavid Drysdale const compat_uptr_t __user *__envp,
207551f39a1fSDavid Drysdale int flags)
207651f39a1fSDavid Drysdale {
207751f39a1fSDavid Drysdale struct user_arg_ptr argv = {
207851f39a1fSDavid Drysdale .is_compat = true,
207951f39a1fSDavid Drysdale .ptr.compat = __argv,
208051f39a1fSDavid Drysdale };
208151f39a1fSDavid Drysdale struct user_arg_ptr envp = {
208251f39a1fSDavid Drysdale .is_compat = true,
208351f39a1fSDavid Drysdale .ptr.compat = __envp,
208451f39a1fSDavid Drysdale };
208551f39a1fSDavid Drysdale return do_execveat_common(fd, filename, argv, envp, flags);
20860e028465SOleg Nesterov }
20870e028465SOleg Nesterov #endif
20880e028465SOleg Nesterov
set_binfmt(struct linux_binfmt * new)2089964ee7dfSOleg Nesterov void set_binfmt(struct linux_binfmt *new)
20901da177e4SLinus Torvalds {
2091801460d0SHiroshi Shimamoto struct mm_struct *mm = current->mm;
20921da177e4SLinus Torvalds
2093801460d0SHiroshi Shimamoto if (mm->binfmt)
2094801460d0SHiroshi Shimamoto module_put(mm->binfmt->module);
2095801460d0SHiroshi Shimamoto
2096801460d0SHiroshi Shimamoto mm->binfmt = new;
2097964ee7dfSOleg Nesterov if (new)
2098964ee7dfSOleg Nesterov __module_get(new->module);
20991da177e4SLinus Torvalds }
21001da177e4SLinus Torvalds EXPORT_SYMBOL(set_binfmt);
21011da177e4SLinus Torvalds
21026c5d5238SKawai, Hidehiro /*
21037288e118SOleg Nesterov * set_dumpable stores three-value SUID_DUMP_* into mm->flags.
21046c5d5238SKawai, Hidehiro */
set_dumpable(struct mm_struct * mm,int value)21056c5d5238SKawai, Hidehiro void set_dumpable(struct mm_struct *mm, int value)
21066c5d5238SKawai, Hidehiro {
21077288e118SOleg Nesterov if (WARN_ON((unsigned)value > SUID_DUMP_ROOT))
21087288e118SOleg Nesterov return;
21097288e118SOleg Nesterov
211026e15225SVineet Gupta set_mask_bits(&mm->flags, MMF_DUMPABLE_MASK, value);
21116c5d5238SKawai, Hidehiro }
21126c5d5238SKawai, Hidehiro
SYSCALL_DEFINE3(execve,const char __user *,filename,const char __user * const __user *,argv,const char __user * const __user *,envp)211338b983b3SAl Viro SYSCALL_DEFINE3(execve,
211438b983b3SAl Viro const char __user *, filename,
211538b983b3SAl Viro const char __user *const __user *, argv,
211638b983b3SAl Viro const char __user *const __user *, envp)
211738b983b3SAl Viro {
2118c4ad8f98SLinus Torvalds return do_execve(getname(filename), argv, envp);
211938b983b3SAl Viro }
212051f39a1fSDavid Drysdale
SYSCALL_DEFINE5(execveat,int,fd,const char __user *,filename,const char __user * const __user *,argv,const char __user * const __user *,envp,int,flags)212151f39a1fSDavid Drysdale SYSCALL_DEFINE5(execveat,
212251f39a1fSDavid Drysdale int, fd, const char __user *, filename,
212351f39a1fSDavid Drysdale const char __user *const __user *, argv,
212451f39a1fSDavid Drysdale const char __user *const __user *, envp,
212551f39a1fSDavid Drysdale int, flags)
212651f39a1fSDavid Drysdale {
212751f39a1fSDavid Drysdale return do_execveat(fd,
21288228e2c3SDmitry Kadashev getname_uflags(filename, flags),
212951f39a1fSDavid Drysdale argv, envp, flags);
213051f39a1fSDavid Drysdale }
213151f39a1fSDavid Drysdale
213238b983b3SAl Viro #ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE3(execve,const char __user *,filename,const compat_uptr_t __user *,argv,const compat_uptr_t __user *,envp)2133625b1d7eSHeiko Carstens COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
2134625b1d7eSHeiko Carstens const compat_uptr_t __user *, argv,
2135625b1d7eSHeiko Carstens const compat_uptr_t __user *, envp)
213638b983b3SAl Viro {
2137c4ad8f98SLinus Torvalds return compat_do_execve(getname(filename), argv, envp);
213838b983b3SAl Viro }
213951f39a1fSDavid Drysdale
COMPAT_SYSCALL_DEFINE5(execveat,int,fd,const char __user *,filename,const compat_uptr_t __user *,argv,const compat_uptr_t __user *,envp,int,flags)214051f39a1fSDavid Drysdale COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
214151f39a1fSDavid Drysdale const char __user *, filename,
214251f39a1fSDavid Drysdale const compat_uptr_t __user *, argv,
214351f39a1fSDavid Drysdale const compat_uptr_t __user *, envp,
214451f39a1fSDavid Drysdale int, flags)
214551f39a1fSDavid Drysdale {
214651f39a1fSDavid Drysdale return compat_do_execveat(fd,
21478228e2c3SDmitry Kadashev getname_uflags(filename, flags),
214851f39a1fSDavid Drysdale argv, envp, flags);
214951f39a1fSDavid Drysdale }
215038b983b3SAl Viro #endif
215166ad3986SLuis Chamberlain
215266ad3986SLuis Chamberlain #ifdef CONFIG_SYSCTL
215366ad3986SLuis Chamberlain
proc_dointvec_minmax_coredump(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)215478eb4ea2SJoel Granados static int proc_dointvec_minmax_coredump(const struct ctl_table *table, int write,
215566ad3986SLuis Chamberlain void *buffer, size_t *lenp, loff_t *ppos)
215666ad3986SLuis Chamberlain {
215766ad3986SLuis Chamberlain int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
215866ad3986SLuis Chamberlain
215966ad3986SLuis Chamberlain if (!error)
216066ad3986SLuis Chamberlain validate_coredump_safety();
216166ad3986SLuis Chamberlain return error;
216266ad3986SLuis Chamberlain }
216366ad3986SLuis Chamberlain
21641751f872SJoel Granados static const struct ctl_table fs_exec_sysctls[] = {
216566ad3986SLuis Chamberlain {
216666ad3986SLuis Chamberlain .procname = "suid_dumpable",
216766ad3986SLuis Chamberlain .data = &suid_dumpable,
216866ad3986SLuis Chamberlain .maxlen = sizeof(int),
216966ad3986SLuis Chamberlain .mode = 0644,
217066ad3986SLuis Chamberlain .proc_handler = proc_dointvec_minmax_coredump,
217166ad3986SLuis Chamberlain .extra1 = SYSCTL_ZERO,
217266ad3986SLuis Chamberlain .extra2 = SYSCTL_TWO,
217366ad3986SLuis Chamberlain },
217466ad3986SLuis Chamberlain };
217566ad3986SLuis Chamberlain
init_fs_exec_sysctls(void)217666ad3986SLuis Chamberlain static int __init init_fs_exec_sysctls(void)
217766ad3986SLuis Chamberlain {
217866ad3986SLuis Chamberlain register_sysctl_init("fs", fs_exec_sysctls);
217966ad3986SLuis Chamberlain return 0;
218066ad3986SLuis Chamberlain }
218166ad3986SLuis Chamberlain
218266ad3986SLuis Chamberlain fs_initcall(init_fs_exec_sysctls);
218366ad3986SLuis Chamberlain #endif /* CONFIG_SYSCTL */
218460371f43SKees Cook
218560371f43SKees Cook #ifdef CONFIG_EXEC_KUNIT_TEST
2186b6f5ee4dSKees Cook #include "tests/exec_kunit.c"
218760371f43SKees Cook #endif
2188